# Info
info = Info(env)  # create the info object
info.print_info()  # print out information

# TO NOTE:
# Each observation is a stack of 3 states.
# Each game state has 8 variables
# making each observation have the size 24

# set action and state
action_size, state_size, num_agents = info.getInfo()

# baseline = Baseline(env, action_size, state_size)
# baseline.run()
seed = 6

random.seed(seed)
torch.manual_seed(seed)

# Create the maddpg object
maddpg = MADDPG(env, state_size, action_size, num_agents, seed)

# train agent
# scores, average_scores_list = maddpg.train(n_episodes=5000)

# info.plotResults(scores, average_scores_list) # plot the scores

# test best agent
maddpg.test(env, state_size)
Ejemplo n.º 2
0
if __name__ == "__main__":

    # Configuration
    n_episodes = 1
    checkpoint = "./checkpoints/checkpoint{}.pth"

    # Unitiy environment
    env = UnityEnvironment("./Tennis_Linux/Tennis.x86_64")

    # Agent
    agent = TennisMultiAgent(state_size=24, action_size=2, n_agents=2)
    agent.load(checkpoint)

    # DDPG
    maddpg = MADDPG(env=env, agent=agent)
    scores = maddpg.test(n_episodes=n_episodes)

    # Close the environment
    env.close()

    if n_episodes > 1:
        # Show results
        print(scores)
        print("Average score of {} episodes: {:.2f}".format(
            n_episodes, np.mean(scores)))

        # Plot scores
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.plot(np.linspace(1, n_episodes + 1, n_episodes), scores)
        ax.set_xlabel("Episodes")
        ax.set_ylabel("Score per episode")
Ejemplo n.º 3
0
]
# handle invalid dir char
for i in range(len(model_names)):
    model_names[i] = model_names[i].replace('[', '').replace(']', '').replace(
        ' ', '').replace(',', '_')
# handle standard arg, i.e., {}
model_names = ['standard' if name == '' else name for name in model_names]

# model loop
for i in trange(len(args), desc='model', leave=True):
    model_dir = '{}/{}'.format(root, model_names[i])
    os.mkdir(model_dir)
    # log cmd
    with open('{}/cmd_config.txt'.format(model_dir), 'w') as f:
        for k, v in control_args.items():
            f.write(str(k) + ': ' + str(v) + '\n')
    arg = args[i]
    # repeat loop
    for n in trange(control_args['repeat'], desc='repeat', leave=True):
        dir = '{}/{}'.format(model_dir, n)
        os.mkdir(dir)
        maddpg = MADDPG(env, **arg)
        if control_args.has_key('load'):
            model_path = control_args['load']
            maddpg.load_actor(model_path)
            maddpg.load_critic(model_path)
        if control_args['train']:
            maddpg.train(dir, control_args['save_interval'])
            maddpg.save(dir)
        maddpg.test(dir, n=control_args['n_test'])