Example #1
0
def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
from Utilities.Data_Structures.Config import Config

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = gym.make("Pendulum-v0")
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "LOWER_LEVEL": {
        "max_lower_level_timesteps": 3,
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.005,