# Info info = Info(env) # create the info object info.print_info() # print out information # TO NOTE: # Each observation is a stack of 3 states. # Each game state has 8 variables # making each observation have the size 24 # set action and state action_size, state_size, num_agents = info.getInfo() # baseline = Baseline(env, action_size, state_size) # baseline.run() seed = 6 random.seed(seed) torch.manual_seed(seed) # Create the maddpg object maddpg = MADDPG(env, state_size, action_size, num_agents, seed) # train agent # scores, average_scores_list = maddpg.train(n_episodes=5000) # info.plotResults(scores, average_scores_list) # plot the scores # test best agent maddpg.test(env, state_size)
if __name__ == "__main__": # Configuration n_episodes = 1 checkpoint = "./checkpoints/checkpoint{}.pth" # Unitiy environment env = UnityEnvironment("./Tennis_Linux/Tennis.x86_64") # Agent agent = TennisMultiAgent(state_size=24, action_size=2, n_agents=2) agent.load(checkpoint) # DDPG maddpg = MADDPG(env=env, agent=agent) scores = maddpg.test(n_episodes=n_episodes) # Close the environment env.close() if n_episodes > 1: # Show results print(scores) print("Average score of {} episodes: {:.2f}".format( n_episodes, np.mean(scores))) # Plot scores fig, ax = plt.subplots(figsize=(10, 6)) ax.plot(np.linspace(1, n_episodes + 1, n_episodes), scores) ax.set_xlabel("Episodes") ax.set_ylabel("Score per episode")
] # handle invalid dir char for i in range(len(model_names)): model_names[i] = model_names[i].replace('[', '').replace(']', '').replace( ' ', '').replace(',', '_') # handle standard arg, i.e., {} model_names = ['standard' if name == '' else name for name in model_names] # model loop for i in trange(len(args), desc='model', leave=True): model_dir = '{}/{}'.format(root, model_names[i]) os.mkdir(model_dir) # log cmd with open('{}/cmd_config.txt'.format(model_dir), 'w') as f: for k, v in control_args.items(): f.write(str(k) + ': ' + str(v) + '\n') arg = args[i] # repeat loop for n in trange(control_args['repeat'], desc='repeat', leave=True): dir = '{}/{}'.format(model_dir, n) os.mkdir(dir) maddpg = MADDPG(env, **arg) if control_args.has_key('load'): model_path = control_args['load'] maddpg.load_actor(model_path) maddpg.load_critic(model_path) if control_args['train']: maddpg.train(dir, control_args['save_interval']) maddpg.save(dir) maddpg.test(dir, n=control_args['n_test'])