Esempio n. 1
0
def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
# from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
# from agents.policy_gradient_agents.PPO import PPO
# from environments.Four_Rooms_Environment import Four_Rooms_Environment
# from agents.hierarchical_agents.SNN_HRL import SNN_HRL
# from agents.actor_critic_agents.TD3 import TD3
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
import pickle

from Trainer import Trainer
from utilities.data_structures.Config import Config

trainer = Trainer(config=Config(), agents=None)

#
# trainer.visualise_set_of_preexisting_results(save_image_path="Four_Rooms_and_Long_Corridor.png", results_data_paths=["Long_Corridor_Results_Data.pkl", "Four_Rooms.pkl"],
#                                       plot_titles=["Long Corridor", "Four Rooms"], y_limits=[(0.0, 0.25), (-90.0, 100.25)])

trainer.visualise_preexisting_results(
    save_image_path="hrl_experiments/Taxi_graph_comparison.png",
    data_path="hrl_experiments/Taxi_data.pkl",
    title="Taxi v2",
    y_limits=(-800.0, 0.0))

# trainer.visualise_preexisting_results(save_image_path="Long_Corridor_Graph.png", data_path="Long_Corridor_Results_Data.pkl",
#                                       title="Long Corridor", y_limits=(0.0, 0.25))

# trainer.visualise_preexisting_results(save_image_path="Hopper_Results_Graph_Both_Agents.png", data_path="Hopper_Results_Data.pkl",
#                                       title="Hopper") #, y_limits=(0.0, 0.25))

# trainer.visualise_set_of_preexisting_results(results_data_paths=["Cart_Pole_Results_Data.pkl",
#                                                                  "Mountain_Car_Results_Data.pkl"],
#                                              plot_titles=["Cart Pole (Discrete Actions)", "Mountain Car (Continuous Actions)"],
#                                              save_image_path="CartPole_and_MountainCar_Graph.png")

# trainer.visualise_set_of_preexisting_results(results_data_paths=["Data_and_Graphs/Bit_Flipping_Results_Data.pkl",
#                                                                  "Data_and_Graphs/Fetch_Reach_Results_Data.pkl"],
#                                              plot_titles=["Bit Flipping", "Fetch Reach"],