def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
예제 #2
0
from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from drl.agents.policy_gradient_agents.PPO import PPO
from environments.Four_Rooms_Environment import Four_Rooms_Environment
from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
예제 #3
0
import gym

from drl.agents.hierarchical_agents.HRL.HRL import HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.environment = gym.make("Taxi-v2")
config.seed = 1
config.env_parameters = {}
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

linear_hidden_units = [32, 32]
learning_rate = 0.01
buffer_size = 100000
batch_size = 256
batch_norm = False
embedding_dimensionality = 10
gradient_clipping_norm = 5
update_every_n_steps = 1
import gym
import pytest

from drl.utilities.Utility_Functions import flatten_action_id_to_actions
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("Taxi-v2")
config.env_parameters = {}
config.num_episodes_to_run = 1000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

linear_hidden_units = [10, 5]
learning_rate = 0.01
buffer_size = 40000
batch_size = 256
batch_norm = False
embedding_dimensionality = 15
gradient_clipping_norm = 5
update_every_n_steps = 1
예제 #5
0
import gym
from environments.Atari_Environment import make_atari_game
from drl.agents.DQN_agents.DDQN import DDQN
from drl.agents.hierarchical_agents.HRL.HRL import HRL
from drl.agents.hierarchical_agents.HRL.Model_HRL import Model_HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = make_atari_game("SpaceInvaders-v0")
config.env_parameters = {}
config.num_episodes_to_run = 500
config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Space_Invaders_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Space_Invaders.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 10
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False


# Loss is not drawing a random sample! otherwise wouldnt jump around that much!!

linear_hidden_units = [32, 32]
learning_rate = 0.005  # 0.001 taxi
buffer_size = 1000000
from drl.agents.DQN_agents.DDQN import DDQN
from drl.agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from drl.agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 1
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {

    "DQN_Agents": {

        "learning_rate": 0.005,
예제 #7
0
import gym

from drl.agents.actor_critic_agents.A2C import A2C
from drl.agents.DQN_agents.Dueling_DDQN import Dueling_DDQN
from drl.agents.actor_critic_agents.SAC_Discrete import SAC_Discrete
from drl.agents.actor_critic_agents.A3C import A3C
from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DDQN import DDQN
from drl.agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from drl.agents.DQN_agents.DQN import DQN
from drl.agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets

config = Config()
config.seed = 1
config.environment = gym.make("CartPole-v0")
config.num_episodes_to_run = 450
config.file_to_save_data_results = "results/data_and_graphs/Cart_Pole_Results_Data.pkl"
config.file_to_save_results_graph = "results/data_and_graphs/Cart_Pole_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
import pickle

from drl.utilities.data_structures.Config import Config
from Trainer import Trainer

trainer = Trainer(config=Config(), agents=None)

#
# trainer.visualise_set_of_preexisting_results(save_image_path="Four_Rooms_and_Long_Corridor.png", results_data_paths=["Long_Corridor_Results_Data.pkl", "Four_Rooms.pkl"],
#                                       plot_titles=["Long Corridor", "Four Rooms"], y_limits=[(0.0, 0.25), (-90.0, 100.25)])

trainer.visualise_preexisting_results(
    save_image_path="hrl_experiments/Taxi_graph_comparison.png",
    data_path="hrl_experiments/Taxi_data.pkl",
    title="Taxi v2",
    y_limits=(-800.0, 0.0))

# trainer.visualise_preexisting_results(save_image_path="Long_Corridor_Graph.png", data_path="Long_Corridor_Results_Data.pkl",
#                                       title="Long Corridor", y_limits=(0.0, 0.25))

# trainer.visualise_preexisting_results(save_image_path="Hopper_Results_Graph_Both_Agents.png", data_path="Hopper_Results_Data.pkl",
#                                       title="Hopper") #, y_limits=(0.0, 0.25))

# trainer.visualise_set_of_preexisting_results(results_data_paths=["Cart_Pole_Results_Data.pkl",
#                                                                  "Mountain_Car_Results_Data.pkl"],
#                                              plot_titles=["Cart Pole (Discrete Actions)", "Mountain Car (Continuous Actions)"],
#                                              save_image_path="CartPole_and_MountainCar_Graph.png")

# trainer.visualise_set_of_preexisting_results(results_data_paths=["Data_and_Graphs/Bit_Flipping_Results_Data.pkl",
#                                                                  "Data_and_Graphs/Fetch_Reach_Results_Data.pkl"],
#                                              plot_titles=["Bit Flipping", "Fetch Reach"],
예제 #9
0
import gym
from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.SAC import SAC
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.agents.hierarchical_agents.DIAYN import DIAYN
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("Walker2d-v2")
config.num_episodes_to_run = 400
config.file_to_save_data_results = "data_and_graphs/Walker_Results_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/Walker_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

actor_critic_agent_hyperparameters = {
    "Actor": {
        "learning_rate": 0.0003,
        "linear_hidden_units": [64, 64],
        "final_layer_activation": None,
        "batch_norm": False,
예제 #10
0
import os
import sys
from os.path import dirname, abspath
sys.path.append(dirname(dirname(abspath(__file__))))
import gym
from drl.agents.Trainer import Trainer
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.hierarchical_agents.HIRO import HIRO
from drl.utilities.data_structures.Config import Config
config = Config()
config.seed = 1
config.environment = gym.make("Reacher-v2") #  Reacher-v2 "InvertedPendulum-v2") #Pendulum-v0
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False




config.hyperparameters = {
    "HIRO": {
import gym

from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.DDPG_HER import DDPG_HER
from drl.utilities.data_structures.Config import Config
from drl.agents.Trainer import Trainer

config = Config()
config.seed = 1
config.environment = gym.make("FetchReach-v1")
config.num_episodes_to_run = 1000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "Actor_Critic_Agents": {
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [50, 50],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.01,
예제 #12
0
from gym.wrappers import FlattenDictWrapper
from drl.agents.DQN_agents.DQN_HER import DQN_HER
from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(14)
config.num_episodes_to_run = 4500
config.file_to_save_data_results = None  #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl"
config.file_to_save_results_graph = None  #"Data_and_Graphs/Bit_Flipping_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon_decay_rate_denominator": 150,
        "discount_rate": 0.999,
        "incremental_td_error": 1e-8,
예제 #13
0
from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN
from drl.agents.hierarchical_agents.h_DQN import h_DQN
from environments.Long_Corridor_Environment import Long_Corridor_Environment

config = Config()
config.seed = 1
config.env_parameters = {"stochasticity_of_action_right": 0.5}
config.environment = Long_Corridor_Environment(stochasticity_of_action_right=config.env_parameters["stochasticity_of_action_right"])
config.num_episodes_to_run = 10000
config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {

    "h_DQN": {
        "CONTROLLER": {
            "batch_size": 256,
            "learning_rate": 0.01,
            "buffer_size": 40000,
from drl.agents.DQN_agents.DDQN import DDQN
from environments.Four_Rooms_Environment import Four_Rooms_Environment
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1

height = 15
width = 15
random_goal_place = False
num_possible_states = (height * width)**(1 + 1 * random_goal_place)
embedding_dimensions = [[num_possible_states, 20]]
print("Num possible states ", num_possible_states)

config.environment = Four_Rooms_Environment(
    height,
    width,
    stochastic_actions_probability=0.0,
    random_start_user_place=True,
    random_goal_place=random_goal_place)

config.num_episodes_to_run = 1000
config.file_to_save_data_results = "Data_and_Graphs/Four_Rooms.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Four_Rooms.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
예제 #15
0
import gym

from drl.agents.policy_gradient_agents.PPO import PPO
from drl.agents.actor_critic_agents.DDPG import DDPG
from drl.agents.actor_critic_agents.SAC import SAC
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("MountainCarContinuous-v0")
config.num_episodes_to_run = 450
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "Policy_Gradient_Agents": {
        "learning_rate": 0.05,
        "linear_hidden_units": [30, 15],
        "final_layer_activation": "TANH",
        "learning_iterations_per_round": 10,