def plot(actor): paths = [] actions_plot = [] env = MultiGoalEnv() n_games = 50 max_episode_length = 30 for i in range(n_games): observation = env.reset(init_state=[-3, 0]) episode_length = 0 done = False score = 0 path = {'infos': {'pos': []}} while not done: env.render() #print('state: ', np.squeeze(observation)) action, _ = actor.forward(T.Tensor([observation]).to(actor.device)) action = action.cpu().detach().numpy()[0] #print('ac: ', action[0].cpu().detach().numpy()) observation_, reward, done, info = env.step(action) path['infos']['pos'].append(observation) if episode_length == max_episode_length: done = True episode_length += 1 #print('re:', reward) score += reward observation = observation_ paths.append(path) env.render_rollouts(paths, fout="test_%d.png" % i)
def plot(agent): paths = [] actions_plot = [] env = MultiGoalEnv() n_games = 50 max_episode_length = 20 for i in range(n_games): observation = env.reset(init_state=[0, 0]) episode_length = 0 done = False score = 0 path = {'infos': {'pos': []}} while not done: env.render() #print('state: ', np.squeeze(observation)) action = agent.choose_action(observation) #print('ac: ', action[0].cpu().detach().numpy()) observation_, reward, done, info = env.step(action) path['infos']['pos'].append(observation) if episode_length == max_episode_length: done = True episode_length += 1 #print('re:', reward) score += reward observation = observation_ paths.append(path) score = score / 200 score_history.append(score) avg_score = np.mean(score_history[-20:]) env.render_rollouts(paths, fout="test_%d.png" % i)
def plot_paths(self, epoch): paths = [] actions_plot = [] env = MultiGoalEnv() for episode in range(50): observation = env.reset() done = False step = 0 path = {'infos': {'pos': []}} particles = None while not done and step < 30: self.SVGD_Network.eval() actions = self.get_sample(observation, 1) self.SVGD_Network.train() observation, reward, done, _ = env.step(actions) path['infos']['pos'].append(observation) step += 1 paths.append(path) print("saving figure..., epoch=", epoch) env.render_rollouts(paths, fout="test_%d.png" % epoch)
#print(env.action_space.high) best_score = env.reward_range[0] score_history = [] load_checkpoint = False max_episode_length = 30 if load_checkpoint: agent.load_models() env.render(mode='human') for i in range(n_games): observation = env.reset(init_state=[0, 0]) episode_length = 0 done = False score = 0 while not done: env.render() #print('state: ', np.squeeze(observation)) action = agent.choose_action(observation) #print('ac: ', np.squeeze(action)) observation_, reward, done, info = env.step(action) if episode_length == max_episode_length: done = True episode_length += 1
replay_size=int(1e6), pi_lr=1e-3, q_lr=1e-3, batch_size=100, n_particles=16, gamma=0.99, polyak=0.995) epochs = 100 update_after = 0 max_ep_len = 30 steps_per_epoch = 400 # Prepare for interaction with environment total_steps = steps_per_epoch * epochs o, ep_ret, ep_len = env.reset(), 0, 0 # Main loop: collect experience in env and update/log each epoch n_particles = 16 epsilon = 0.8 for t in range(total_steps): if epsilon > 0.2: epsilon -= 0.00001 #a = agent.get_sample(o) if np.random.uniform(0, 1) > epsilon: a = agent.get_sample(o, n_sample=n_particles) # ind = np.random.choice(np.array([i for i in range(0, n_particles)])) # a = a[ind] Q_values = agent.Q_Network(T.tensor(o).float().unsqueeze(0).to(