def test_agent_solve_bit_flipping_game(): AGENTS = [PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN, DQN_HER] trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: agent_results = results[agent.agent_name] agent_results = np.max(agent_results[0][1][50:]) assert agent_results >= 0.0, "Failed for {} -- score {}".format(agent.agent_name, agent_results)
def test_agents_can_play_games_of_different_dimensions(): config.num_episodes_to_run = 10 config.hyperparameters["DQN_Agents"]["batch_size"] = 3 AGENTS = [A2C, A3C, PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN] trainer = Trainer(config, AGENTS) config.environment = gym.make("CartPole-v0") results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys() AGENTS = [SAC, TD3, PPO, DDPG] config.environment = gym.make("MountainCarContinuous-v0") trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys() AGENTS = [DDQN, SNN_HRL] config.environment = Four_Rooms_Environment(15, 15, stochastic_actions_probability=0.25, random_start_user_place=True, random_goal_place=False) trainer = Trainer(config, AGENTS) results = trainer.run_games_for_agents() for agent in AGENTS: assert agent.agent_name in results.keys()
def test_get_mean_and_standard_deviation_difference_results(): """Tests that get_mean_and_standard_deviation_difference_results method produces correct output""" results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]] mean_results = [ np.mean([1.0, 5.0, 2.5]), np.mean([2.0, -33.0, 2.5]), np.mean([3.0, 55.0, 2.5]) ] std_results = [ np.std([1.0, 5.0, 2.5]), np.std([2.0, -33.0, 2.5]), np.std([3.0, 55.0, 2.5]) ] mean_minus_1_std = [ mean - std_val for mean, std_val in zip(mean_results, std_results) ] mean_plus_1_std = [ mean + std_val for mean, std_val in zip(mean_results, std_results) ] config = Config() config.standard_deviation_results = 1.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) assert mean_results == mean_results_guess assert mean_minus_1_std == mean_minus_x_std_guess assert mean_plus_1_std == mean_plus_x_std_guess config.standard_deviation_results = 3.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) mean_plus_3_std = [ mean + 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] mean_minus_3_std = [ mean - 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] assert mean_results == mean_results_guess assert mean_minus_3_std == mean_minus_x_std_guess assert mean_plus_3_std == mean_plus_x_std_guess
"tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 10000, "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } } if __name__ == "__main__": AGENTS = [ HRL ] #SAC_Discrete, SAC_Discrete, DDQN] #HRL] #, SNN_HRL, DQN, h_DQN] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents()
"tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4, "incremental_td_error": 1e-8, "update_every_n_steps": 3, "linear_hidden_units": [20, 20, 20], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "clip_rewards": False } } trainer = Trainer(config, [DQN_HER]) config.hyperparameters = config.hyperparameters["DQN_Agents"] agent = DQN_HER(config) agent.reset_game() def test_initiation(): """Tests whether DQN_HER initiates correctly""" config.hyperparameters["batch_size"] = 64 agent = DQN_HER(config) agent.reset_game() assert agent.ordinary_buffer_batch_size == int(0.2 * 64) assert agent.HER_buffer_batch_size == 64 - int(0.2 * 64) assert agent.q_network_local.input_dim == 8