def test_agents_can_play_games_of_different_dimensions(): config.num_episodes_to_run = 10 config.hyperparameters["DQN_Agents"]["batch_size"] = 3 AGENTS = [ A2C, A3C, PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN ] trainer = Trainer(config, AGENTS) config.environment = gym.make("CartPole-v0") results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys() AGENTS = [SAC, TD3, PPO, DDPG] config.environment = gym.make("MountainCarContinuous-v0") trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys() AGENTS = [DDQN, SNN_HRL] config.environment = Four_Rooms_Environment( 15, 15, stochastic_actions_probability=0.25, random_start_user_place=True, random_goal_place=False) trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys()
def run_train(args): save_path = os.path.join(args.logdir, args.agent+'@cps'+str(args.stage)) args.save_path = init_trial_path(save_path) with open(os.path.join(args.save_path, 'args.json'), 'w') as f: json.dump(args.__dict__, f) print(args) config=init_config(args) agent = get_agent(args.agent) trainer = Trainer(config, [agent]) results = trainer.run_games_for_agents() agent_name = agent.agent_name res_age=np.array(results[agent_name][0][0]) score = (max(res_age)+np.mean(res_age[-10:]))/2 converge_step_1=np.where(res_age>0)[0] converge_step_2= np.where(res_age > 100)[0] if len(converge_step_1)>0: converge_step_1 =converge_step_1[0] else: converge_step_1 = -1 if len(converge_step_2)>0: converge_step_2 =converge_step_2[0] else: converge_step_2 = -1 print("Train Done") print(f"Agent={agent_name}, Score={score}, Path={args.save_path}") print(f"Final Reward={np.mean(res_age[-10:])}, Converge Step 0 ={converge_step_1}, Converge Step 100 ={converge_step_2},") agent = trainer.trained_agents[0][0] return agent
def test_agent_solve_bit_flipping_game(): AGENTS = [DQN] trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: agent_results = results[agent.agent_name] agent_results = np.max(agent_results[0][1][50:]) assert agent_results >= 0.0, "Failed for {} -- score {}".format( agent.agent_name, agent_results)
def test_agent_solve_bit_flipping_game(): AGENTS = [ PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN, DQN_HER ] trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: agent_results = results[agent.agent_name] agent_results = np.max(agent_results[0][1][50:]) assert agent_results >= 0.0, "Failed for {} -- score {}".format( agent.agent_name, agent_results)
"batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 400, "batch_size": 64, "discount_rate": 0.99, "mu": 0.0, #for O-H noise "theta": 0.15, #for O-H noise "sigma": 0.25, #for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } } if __name__ == "__main__": # turn it on if you want to test specific network saved test = False dictPath = None AGENTS = [SAC_Discrete] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents(test, dictPath)
"batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [400, 300], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.01, "gradient_clipping_norm": 5 }, "batch_size": 64, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.2, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "clip_rewards": False } } if __name__ == "__main__": AGENTS = [DDPG, HIRO] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents()