コード例 #1
0
def test_agent_solve_bit_flipping_game():
    AGENTS = [PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN, DQN_HER]
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        agent_results = results[agent.agent_name]
        agent_results = np.max(agent_results[0][1][50:])
        assert agent_results >= 0.0, "Failed for {} -- score {}".format(agent.agent_name, agent_results)
コード例 #2
0
def test_agents_can_play_games_of_different_dimensions():
    config.num_episodes_to_run = 10
    config.hyperparameters["DQN_Agents"]["batch_size"] = 3
    AGENTS = [A2C, A3C, PPO, DDQN, DQN_With_Fixed_Q_Targets, DDQN_With_Prioritised_Experience_Replay, DQN]
    trainer = Trainer(config, AGENTS)
    config.environment = gym.make("CartPole-v0")
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()

    AGENTS = [SAC, TD3, PPO, DDPG]
    config.environment = gym.make("MountainCarContinuous-v0")
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()

    AGENTS = [DDQN, SNN_HRL]
    config.environment = Four_Rooms_Environment(15, 15, stochastic_actions_probability=0.25,
                                                random_start_user_place=True, random_goal_place=False)
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()
def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
コード例 #4
0
            "tau":
            0.005,
            "gradient_clipping_norm":
            5,
            "initialiser":
            "Xavier"
        },
        "min_steps_before_learning": 10000,
        "batch_size": 256,
        "discount_rate": 0.99,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.25,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "automatically_tune_entropy_hyperparameter": True,
        "entropy_term_weight": None,
        "add_extra_noise": False,
        "do_evaluation_iterations": True
    }
}

if __name__ == "__main__":
    AGENTS = [
        HRL
    ]  #SAC_Discrete,  SAC_Discrete, DDQN] #HRL] #, SNN_HRL, DQN, h_DQN]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents()
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 3,
        "linear_hidden_units": [20, 20, 20],
        "final_layer_activation": "None",
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8,
        "clip_rewards": False
}
}


trainer = Trainer(config, [DQN_HER])
config.hyperparameters = config.hyperparameters["DQN_Agents"]
agent = DQN_HER(config)
agent.reset_game()

def test_initiation():
    """Tests whether DQN_HER initiates correctly"""
    config.hyperparameters["batch_size"] = 64
    agent = DQN_HER(config)
    agent.reset_game()


    assert agent.ordinary_buffer_batch_size == int(0.2 * 64)
    assert agent.HER_buffer_batch_size == 64 - int(0.2 * 64)

    assert agent.q_network_local.input_dim == 8