def test_real(epi): vae = VAE() vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model']) model = RNNModel() model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model']) controller = Controller() controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model']) env = DoomTakeCover(True) obs = env.reset() model.reset() frames = [] for step in range(cfg.max_steps): frames.append(cv2.resize(obs, (256, 256))) obs = torch.from_numpy(obs.transpose(2, 0, 1)).unsqueeze(0).float() / 255.0 mu, logvar, _, z = vae(obs) inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1) y = controller(inp) y = y.item() action = encode_action(y) model.step(z.unsqueeze(0), action.unsqueeze(0)) obs_next, reward, done, _ = env.step(action.item()) obs = obs_next if done: break print('Episode {}: Real Reward {}'.format(epi, step)) write_video(frames, 'real_{}.avi'.format(epi), (256, 256)) os.system('mv real_{}.avi /home/bzhou/Dropbox/share'.format(epi))
def slave(comm): vae = VAE() vae.load_state_dict( torch.load(cfg.vae_save_ckpt, map_location=lambda storage, loc: storage)['model']) model = RNNModel() model.load_state_dict( torch.load(cfg.rnn_save_ckpt, map_location=lambda storage, loc: storage)['model']) controller = Controller() controller.load_state_dict( torch.load(cfg.ctrl_save_ckpt, map_location=lambda storage, loc: storage)['model']) env = DoomTakeCover(False) rewards = [] for epi in range(cfg.trials_per_pop * 4): obs = env.reset() model.reset() for step in range(cfg.max_steps): obs = torch.from_numpy(obs.transpose( 2, 0, 1)).unsqueeze(0).float() / 255.0 mu, logvar, _, z = vae(obs) inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1) y = controller(inp) y = y.item() action = encode_action(y) model.step(z.unsqueeze(0), action.unsqueeze(0)) obs_next, reward, done, _ = env.step(action.item()) obs = obs_next if done: break rewards.append(step) print('Workder {} got reward {} at epi {}'.format( comm.rank, step, epi)) rewards = np.array(rewards) comm.send(rewards, dest=0, tag=1) print('Worker {} sent rewards to master'.format(comm.rank))
def test_rnn(epi): mus, logvars = load_init_z() vae = VAE() vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model']) model = RNNModel() model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model']) controller = Controller() controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model']) model.reset() z = sample_init_z(mus, logvars) frames = [] for step in range(cfg.max_steps): z = torch.from_numpy(z).float().unsqueeze(0) curr_frame = vae.decode(z).detach().numpy() frames.append(curr_frame.transpose(0, 2, 3, 1)[0] * 255.0) # cv2.imshow('game', frames[-1]) # k = cv2.waitKey(33) inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1) y = controller(inp) y = y.item() action = encode_action(y) logmix, mu, logstd, done_p = model.step(z.unsqueeze(0), action.unsqueeze(0)) # logmix = logmix - reduce_logsumexp(logmix) logmix_max = logmix.max(dim=1, keepdim=True)[0] logmix_reduce_logsumexp = (logmix - logmix_max).exp().sum( dim=1, keepdim=True).log() + logmix_max logmix = logmix - logmix_reduce_logsumexp # Adjust temperature logmix = logmix / cfg.temperature logmix -= logmix.max(dim=1, keepdim=True)[0] logmix = F.softmax(logmix, dim=1) m = Categorical(logmix) idx = m.sample() new_mu = torch.FloatTensor([mu[i, j] for i, j in enumerate(idx)]) new_logstd = torch.FloatTensor( [logstd[i, j] for i, j in enumerate(idx)]) z_next = new_mu + new_logstd.exp() * torch.randn_like( new_mu) * np.sqrt(cfg.temperature) z = z_next.detach().numpy() if done_p.squeeze().item() > 0: break frames = [cv2.resize(frame, (256, 256)) for frame in frames] print('Episode {}: RNN Reward {}'.format(epi, step)) write_video(frames, 'rnn_{}.avi'.format(epi), (256, 256)) os.system('mv rnn_{}.avi /home/bzhou/Dropbox/share'.format(epi))