def test_get_mean_and_standard_deviation_difference_results(): """Tests that get_mean_and_standard_deviation_difference_results method produces correct output""" results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]] mean_results = [ np.mean([1.0, 5.0, 2.5]), np.mean([2.0, -33.0, 2.5]), np.mean([3.0, 55.0, 2.5]) ] std_results = [ np.std([1.0, 5.0, 2.5]), np.std([2.0, -33.0, 2.5]), np.std([3.0, 55.0, 2.5]) ] mean_minus_1_std = [ mean - std_val for mean, std_val in zip(mean_results, std_results) ] mean_plus_1_std = [ mean + std_val for mean, std_val in zip(mean_results, std_results) ] config = Config() config.standard_deviation_results = 1.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) assert mean_results == mean_results_guess assert mean_minus_1_std == mean_minus_x_std_guess assert mean_plus_1_std == mean_plus_x_std_guess config.standard_deviation_results = 3.0 trainer = Trainer(config, []) mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results( results) mean_plus_3_std = [ mean + 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] mean_minus_3_std = [ mean - 3.0 * std_val for mean, std_val in zip(mean_results, std_results) ] assert mean_results == mean_results_guess assert mean_minus_3_std == mean_minus_x_std_guess assert mean_plus_3_std == mean_plus_x_std_guess
from environments.Bit_Flipping_Environment import Bit_Flipping_Environment from drl.agents.policy_gradient_agents.PPO import PPO from environments.Four_Rooms_Environment import Four_Rooms_Environment from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL from drl.agents.actor_critic_agents.TD3 import TD3 from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config from drl.agents.DQN_agents.DQN import DQN import numpy as np import torch random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(4) config.num_episodes_to_run = 2000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 64, "buffer_size": 40000,
import gym from drl.agents.hierarchical_agents.HRL.HRL import HRL from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config config = Config() config.environment = gym.make("Taxi-v2") config.seed = 1 config.env_parameters = {} config.num_episodes_to_run = 2000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False linear_hidden_units = [32, 32] learning_rate = 0.01 buffer_size = 100000 batch_size = 256 batch_norm = False embedding_dimensionality = 10 gradient_clipping_norm = 5 update_every_n_steps = 1
import gym import pytest from drl.utilities.Utility_Functions import flatten_action_id_to_actions from drl.utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("Taxi-v2") config.env_parameters = {} config.num_episodes_to_run = 1000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False linear_hidden_units = [10, 5] learning_rate = 0.01 buffer_size = 40000 batch_size = 256 batch_norm = False embedding_dimensionality = 15 gradient_clipping_norm = 5 update_every_n_steps = 1
import gym from environments.Atari_Environment import make_atari_game from drl.agents.DQN_agents.DDQN import DDQN from drl.agents.hierarchical_agents.HRL.HRL import HRL from drl.agents.hierarchical_agents.HRL.Model_HRL import Model_HRL from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = make_atari_game("SpaceInvaders-v0") config.env_parameters = {} config.num_episodes_to_run = 500 config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Space_Invaders_Data.pkl" config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Space_Invaders.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 10 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False # Loss is not drawing a random sample! otherwise wouldnt jump around that much!! linear_hidden_units = [32, 32] learning_rate = 0.005 # 0.001 taxi buffer_size = 1000000
from drl.agents.DQN_agents.DDQN import DDQN from drl.agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from drl.agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets from environments.Bit_Flipping_Environment import Bit_Flipping_Environment from drl.agents.policy_gradient_agents.PPO import PPO from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config from drl.agents.DQN_agents.DQN import DQN import numpy as np import torch random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(4) config.num_episodes_to_run = 1 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005,
import gym from drl.agents.actor_critic_agents.A2C import A2C from drl.agents.DQN_agents.Dueling_DDQN import Dueling_DDQN from drl.agents.actor_critic_agents.SAC_Discrete import SAC_Discrete from drl.agents.actor_critic_agents.A3C import A3C from drl.agents.policy_gradient_agents.PPO import PPO from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config from drl.agents.DQN_agents.DDQN import DDQN from drl.agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay from drl.agents.DQN_agents.DQN import DQN from drl.agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets config = Config() config.seed = 1 config.environment = gym.make("CartPole-v0") config.num_episodes_to_run = 450 config.file_to_save_data_results = "results/data_and_graphs/Cart_Pole_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/Cart_Pole_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False
import pickle from drl.utilities.data_structures.Config import Config from Trainer import Trainer trainer = Trainer(config=Config(), agents=None) # # trainer.visualise_set_of_preexisting_results(save_image_path="Four_Rooms_and_Long_Corridor.png", results_data_paths=["Long_Corridor_Results_Data.pkl", "Four_Rooms.pkl"], # plot_titles=["Long Corridor", "Four Rooms"], y_limits=[(0.0, 0.25), (-90.0, 100.25)]) trainer.visualise_preexisting_results( save_image_path="hrl_experiments/Taxi_graph_comparison.png", data_path="hrl_experiments/Taxi_data.pkl", title="Taxi v2", y_limits=(-800.0, 0.0)) # trainer.visualise_preexisting_results(save_image_path="Long_Corridor_Graph.png", data_path="Long_Corridor_Results_Data.pkl", # title="Long Corridor", y_limits=(0.0, 0.25)) # trainer.visualise_preexisting_results(save_image_path="Hopper_Results_Graph_Both_Agents.png", data_path="Hopper_Results_Data.pkl", # title="Hopper") #, y_limits=(0.0, 0.25)) # trainer.visualise_set_of_preexisting_results(results_data_paths=["Cart_Pole_Results_Data.pkl", # "Mountain_Car_Results_Data.pkl"], # plot_titles=["Cart Pole (Discrete Actions)", "Mountain Car (Continuous Actions)"], # save_image_path="CartPole_and_MountainCar_Graph.png") # trainer.visualise_set_of_preexisting_results(results_data_paths=["Data_and_Graphs/Bit_Flipping_Results_Data.pkl", # "Data_and_Graphs/Fetch_Reach_Results_Data.pkl"], # plot_titles=["Bit Flipping", "Fetch Reach"],
import gym from drl.agents.policy_gradient_agents.PPO import PPO from drl.agents.actor_critic_agents.DDPG import DDPG from drl.agents.actor_critic_agents.SAC import SAC from drl.agents.actor_critic_agents.TD3 import TD3 from drl.agents.Trainer import Trainer from drl.agents.hierarchical_agents.DIAYN import DIAYN from drl.utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("Walker2d-v2") config.num_episodes_to_run = 400 config.file_to_save_data_results = "data_and_graphs/Walker_Results_Data.pkl" config.file_to_save_results_graph = "data_and_graphs/Walker_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False actor_critic_agent_hyperparameters = { "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False,
import os import sys from os.path import dirname, abspath sys.path.append(dirname(dirname(abspath(__file__)))) import gym from drl.agents.Trainer import Trainer from drl.agents.actor_critic_agents.DDPG import DDPG from drl.agents.hierarchical_agents.HIRO import HIRO from drl.utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("Reacher-v2") # Reacher-v2 "InvertedPendulum-v2") #Pendulum-v0 config.num_episodes_to_run = 1500 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "HIRO": {
import gym from drl.agents.actor_critic_agents.DDPG import DDPG from drl.agents.actor_critic_agents.DDPG_HER import DDPG_HER from drl.utilities.data_structures.Config import Config from drl.agents.Trainer import Trainer config = Config() config.seed = 1 config.environment = gym.make("FetchReach-v1") config.num_episodes_to_run = 1000 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.001, "linear_hidden_units": [50, 50], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.01,
from gym.wrappers import FlattenDictWrapper from drl.agents.DQN_agents.DQN_HER import DQN_HER from environments.Bit_Flipping_Environment import Bit_Flipping_Environment from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config from drl.agents.DQN_agents.DQN import DQN config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(14) config.num_episodes_to_run = 4500 config.file_to_save_data_results = None #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl" config.file_to_save_results_graph = None #"Data_and_Graphs/Bit_Flipping_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.001, "batch_size": 128, "buffer_size": 100000, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999, "incremental_td_error": 1e-8,
from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config from drl.agents.DQN_agents.DQN import DQN from drl.agents.hierarchical_agents.h_DQN import h_DQN from environments.Long_Corridor_Environment import Long_Corridor_Environment config = Config() config.seed = 1 config.env_parameters = {"stochasticity_of_action_right": 0.5} config.environment = Long_Corridor_Environment(stochasticity_of_action_right=config.env_parameters["stochasticity_of_action_right"]) config.num_episodes_to_run = 10000 config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl" config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "h_DQN": { "CONTROLLER": { "batch_size": 256, "learning_rate": 0.01, "buffer_size": 40000,
from drl.agents.DQN_agents.DDQN import DDQN from environments.Four_Rooms_Environment import Four_Rooms_Environment from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config config = Config() config.seed = 1 height = 15 width = 15 random_goal_place = False num_possible_states = (height * width)**(1 + 1 * random_goal_place) embedding_dimensions = [[num_possible_states, 20]] print("Num possible states ", num_possible_states) config.environment = Four_Rooms_Environment( height, width, stochastic_actions_probability=0.0, random_start_user_place=True, random_goal_place=random_goal_place) config.num_episodes_to_run = 1000 config.file_to_save_data_results = "Data_and_Graphs/Four_Rooms.pkl" config.file_to_save_results_graph = "Data_and_Graphs/Four_Rooms.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False
import gym from drl.agents.policy_gradient_agents.PPO import PPO from drl.agents.actor_critic_agents.DDPG import DDPG from drl.agents.actor_critic_agents.SAC import SAC from drl.agents.actor_critic_agents.TD3 import TD3 from drl.agents.Trainer import Trainer from drl.utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("MountainCarContinuous-v0") config.num_episodes_to_run = 450 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "Policy_Gradient_Agents": { "learning_rate": 0.05, "linear_hidden_units": [30, 15], "final_layer_activation": "TANH", "learning_iterations_per_round": 10,