コード例 #1
0
        scores_window.append(score)  # save most recent score
        list_scores.append(score)  # save most recent score
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(
            i_episode, np.mean(scores_window)),
              end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))
    return list_scores


if __name__ == '__main__':

    print(opt)
    env = UnityEnvironment(file_name="Reacher.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    state_size, action_size = get_settings(env_info, brain)

    agent1 = Agent(state_size, action_size, opt.seed)
    agent1.load_actor(opt.actor_model_path)
    agent1.load_critic(opt.critic_model_path)

    agent2 = Agent(state_size, action_size, opt.seed)
    agent2.load_actor(opt.actor_model_path)
    agent2.load_critic(opt.critic_model_path)

    scores = infer_agent(env, agent1, agent2, brain_name)
    env.close()
コード例 #2
0
            #agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(
            i_episode, np.mean(scores_window)),
              end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))
    return scores


if __name__ == '__main__':

    print(opt)
    env = UnityEnvironment(file_name="Reacher.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    state_size, action_size = get_settings(env_info, brain)

    agent = Agent(state_size, action_size, opt.seed)
    agent = agent.load_actor(opt.actor_model_path)
    agent = agent.load_critic(opt.critic_model_path)
    scores = infer_agent(env, agent, brain_name)
    env.close()