scores_window.append(score) # save most recent score list_scores.append(score) # save most recent score print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window)), end="") if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window))) return list_scores if __name__ == '__main__': print(opt) env = UnityEnvironment(file_name="Reacher.app") brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] state_size, action_size = get_settings(env_info, brain) agent1 = Agent(state_size, action_size, opt.seed) agent1.load_actor(opt.actor_model_path) agent1.load_critic(opt.critic_model_path) agent2 = Agent(state_size, action_size, opt.seed) agent2.load_actor(opt.actor_model_path) agent2.load_critic(opt.critic_model_path) scores = infer_agent(env, agent1, agent2, brain_name) env.close()
#agent.step(state, action, reward, next_state, done) state = next_state score += reward if done: break scores_window.append(score) # save most recent score scores.append(score) # save most recent score print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window)), end="") if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window))) return scores if __name__ == '__main__': print(opt) env = UnityEnvironment(file_name="Reacher.app") brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] state_size, action_size = get_settings(env_info, brain) agent = Agent(state_size, action_size, opt.seed) agent = agent.load_actor(opt.actor_model_path) agent = agent.load_critic(opt.critic_model_path) scores = infer_agent(env, agent, brain_name) env.close()