Beispiel #1
0
def test_real(epi):
    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model'])

    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model'])

    controller = Controller()
    controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model'])

    env = DoomTakeCover(True)
    obs = env.reset()
    model.reset()
    frames = []
    for step in range(cfg.max_steps):
        frames.append(cv2.resize(obs, (256, 256)))
        obs = torch.from_numpy(obs.transpose(2, 0,
                                             1)).unsqueeze(0).float() / 255.0
        mu, logvar, _, z = vae(obs)

        inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
        y = controller(inp)
        y = y.item()
        action = encode_action(y)

        model.step(z.unsqueeze(0), action.unsqueeze(0))
        obs_next, reward, done, _ = env.step(action.item())
        obs = obs_next
        if done:
            break
    print('Episode {}: Real Reward {}'.format(epi, step))
    write_video(frames, 'real_{}.avi'.format(epi), (256, 256))
    os.system('mv real_{}.avi /home/bzhou/Dropbox/share'.format(epi))
Beispiel #2
0
def slave(comm):

    vae = VAE()
    vae.load_state_dict(
        torch.load(cfg.vae_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    model = RNNModel()
    model.load_state_dict(
        torch.load(cfg.rnn_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    controller = Controller()
    controller.load_state_dict(
        torch.load(cfg.ctrl_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    env = DoomTakeCover(False)

    rewards = []
    for epi in range(cfg.trials_per_pop * 4):
        obs = env.reset()
        model.reset()
        for step in range(cfg.max_steps):
            obs = torch.from_numpy(obs.transpose(
                2, 0, 1)).unsqueeze(0).float() / 255.0
            mu, logvar, _, z = vae(obs)

            inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
            y = controller(inp)
            y = y.item()
            action = encode_action(y)

            model.step(z.unsqueeze(0), action.unsqueeze(0))
            obs_next, reward, done, _ = env.step(action.item())
            obs = obs_next
            if done:
                break
        rewards.append(step)
        print('Workder {} got reward {} at epi {}'.format(
            comm.rank, step, epi))
    rewards = np.array(rewards)
    comm.send(rewards, dest=0, tag=1)
    print('Worker {} sent rewards to master'.format(comm.rank))
Beispiel #3
0
def test_rnn(epi):
    mus, logvars = load_init_z()

    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model'])

    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model'])

    controller = Controller()
    controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model'])

    model.reset()
    z = sample_init_z(mus, logvars)
    frames = []

    for step in range(cfg.max_steps):
        z = torch.from_numpy(z).float().unsqueeze(0)
        curr_frame = vae.decode(z).detach().numpy()

        frames.append(curr_frame.transpose(0, 2, 3, 1)[0] * 255.0)
        # cv2.imshow('game', frames[-1])
        # k = cv2.waitKey(33)

        inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
        y = controller(inp)
        y = y.item()
        action = encode_action(y)

        logmix, mu, logstd, done_p = model.step(z.unsqueeze(0),
                                                action.unsqueeze(0))

        # logmix = logmix - reduce_logsumexp(logmix)
        logmix_max = logmix.max(dim=1, keepdim=True)[0]
        logmix_reduce_logsumexp = (logmix - logmix_max).exp().sum(
            dim=1, keepdim=True).log() + logmix_max
        logmix = logmix - logmix_reduce_logsumexp

        # Adjust temperature
        logmix = logmix / cfg.temperature
        logmix -= logmix.max(dim=1, keepdim=True)[0]
        logmix = F.softmax(logmix, dim=1)

        m = Categorical(logmix)
        idx = m.sample()

        new_mu = torch.FloatTensor([mu[i, j] for i, j in enumerate(idx)])
        new_logstd = torch.FloatTensor(
            [logstd[i, j] for i, j in enumerate(idx)])
        z_next = new_mu + new_logstd.exp() * torch.randn_like(
            new_mu) * np.sqrt(cfg.temperature)

        z = z_next.detach().numpy()
        if done_p.squeeze().item() > 0:
            break

    frames = [cv2.resize(frame, (256, 256)) for frame in frames]

    print('Episode {}: RNN Reward {}'.format(epi, step))
    write_video(frames, 'rnn_{}.avi'.format(epi), (256, 256))
    os.system('mv rnn_{}.avi /home/bzhou/Dropbox/share'.format(epi))