Exemple #1
0
def test_get_mean_and_standard_deviation_difference_results():
    """Tests that get_mean_and_standard_deviation_difference_results method produces correct output"""
    results = [[1.0, 2.0, 3.0], [5.0, -33.0, 55.0], [2.5, 2.5, 2.5]]
    mean_results = [
        np.mean([1.0, 5.0, 2.5]),
        np.mean([2.0, -33.0, 2.5]),
        np.mean([3.0, 55.0, 2.5])
    ]
    std_results = [
        np.std([1.0, 5.0, 2.5]),
        np.std([2.0, -33.0, 2.5]),
        np.std([3.0, 55.0, 2.5])
    ]
    mean_minus_1_std = [
        mean - std_val for mean, std_val in zip(mean_results, std_results)
    ]
    mean_plus_1_std = [
        mean + std_val for mean, std_val in zip(mean_results, std_results)
    ]
    config = Config()
    config.standard_deviation_results = 1.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    assert mean_results == mean_results_guess
    assert mean_minus_1_std == mean_minus_x_std_guess
    assert mean_plus_1_std == mean_plus_x_std_guess

    config.standard_deviation_results = 3.0
    trainer = Trainer(config, [])
    mean_minus_x_std_guess, mean_results_guess, mean_plus_x_std_guess = trainer.get_mean_and_standard_deviation_difference_results(
        results)
    mean_plus_3_std = [
        mean + 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    mean_minus_3_std = [
        mean - 3.0 * std_val
        for mean, std_val in zip(mean_results, std_results)
    ]
    assert mean_results == mean_results_guess
    assert mean_minus_3_std == mean_minus_x_std_guess
    assert mean_plus_3_std == mean_plus_x_std_guess
Exemple #2
0
import gym, os
from agents.hierarchical_agents.DIAYN import DIAYN
from agents.hierarchical_agents.DBH import DBH
from agents.actor_critic_agents.SAC_Discrete import SAC_Discrete
from agents.actor_critic_agents.SAC import SAC
from agents.DQN_agents.DDQN import DDQN
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
import argparse

config = Config()
parser = argparse.ArgumentParser()
parser.add_argument('--env',
                    action='store',
                    dest='environment',
                    default='SpaceInvaders-v0',
                    help='which environment to compare on')
parser.add_argument('--alg',
                    nargs='+',
                    action='store',
                    dest='algorithms',
                    default='SAC_Discrete',
                    help='which algorithms to compare')
parser.add_argument('--eval',
                    type=bool,
                    default=False,
                    action='store',
                    dest='evaluate',
                    help='set False for training and True for evaluating.')
parser.add_argument('--num_ep',
                    type=int,
Exemple #3
0
import os
import sys
from os.path import dirname, abspath
sys.path.append(dirname(dirname(abspath(__file__))))
import gym
from agents.actor_critic_agents.SAC_Discrete import SAC_Discrete
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from environments.DMP_Env_1D_dynamic import deep_mobile_printing_1d1r

config = Config()
config.seed = 1
config.environment = deep_mobile_printing_1d1r()
config.num_episodes_to_run = 10000
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.GPU = "cuda:0"
config.overwrite_existing_results_file = True
config.randomise_random_seed = False
config.save_model = False
OUT_FILE_NAME = "SAC_1d" + "sin" + "_seed_" + str(config.seed)
config.save_model_path = "/mnt/NAS/home/WenyuHan/SNAC/SAC/1D/dynamic/" + OUT_FILE_NAME + "/"
config.file_to_save_data_results = "/mnt/NAS/home/WenyuHan/SNAC/SAC/1D/dynamic/" + OUT_FILE_NAME + "/" + "Results_Data.pkl"
config.file_to_save_results_graph = "/mnt/NAS/home/WenyuHan/SNAC/SAC/1D/dynamic/" + OUT_FILE_NAME + "/" + "Results_Graph.png"
if os.path.exists(config.save_model_path) == False:
    os.makedirs(config.save_model_path)
Exemple #4
0
from A3C import A3C
from agents.DQN_agents.DQN_HER import DQN_HER
from DDQN import DDQN
from environments.Four_Rooms_Environment import Four_Rooms_Environment
from hierarchical_agents.DIAYN import DIAYN
from hierarchical_agents.HRL import HRL
from hierarchical_agents.SNN_HRL import SNN_HRL
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN

config = Config()
config.seed = 1
config.environment = Four_Rooms_Environment(
    15,
    15,
    stochastic_actions_probability=0.25,
    random_start_user_place=True,
    random_goal_place=False)

config.num_episodes_to_run = 200
config.file_to_save_data_results = "Data_and_Graphs/Four_Rooms.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Four_Rooms.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
Exemple #5
0
from gym.wrappers import FlattenDictWrapper
from agents.DQN_agents.DQN_HER import DQN_HER
from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(14)
config.num_episodes_to_run = 4500
config.file_to_save_data_results = None  #"Data_and_Graphs/Bit_Flipping_Results_Data.pkl"
config.file_to_save_results_graph = None  #"Data_and_Graphs/Bit_Flipping_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon_decay_rate_denominator": 150,
        "discount_rate": 0.999,
        "incremental_td_error": 1e-8,
import pickle

from Trainer import Trainer
from utilities.data_structures.Config import Config

trainer = Trainer(config=Config(), agents=None)

#
# trainer.visualise_set_of_preexisting_results(save_image_path="Four_Rooms_and_Long_Corridor.png", results_data_paths=["Long_Corridor_Results_Data.pkl", "Four_Rooms.pkl"],
#                                       plot_titles=["Long Corridor", "Four Rooms"], y_limits=[(0.0, 0.25), (-90.0, 100.25)])

trainer.visualise_preexisting_results(
    save_image_path="hrl_experiments/Taxi_graph_comparison.png",
    data_path="hrl_experiments/Taxi_data.pkl",
    title="Taxi v2",
    y_limits=(-800.0, 0.0))

# trainer.visualise_preexisting_results(save_image_path="Long_Corridor_Graph.png", data_path="Long_Corridor_Results_Data.pkl",
#                                       title="Long Corridor", y_limits=(0.0, 0.25))

# trainer.visualise_preexisting_results(save_image_path="Hopper_Results_Graph_Both_Agents.png", data_path="Hopper_Results_Data.pkl",
#                                       title="Hopper") #, y_limits=(0.0, 0.25))

# trainer.visualise_set_of_preexisting_results(results_data_paths=["Cart_Pole_Results_Data.pkl",
#                                                                  "Mountain_Car_Results_Data.pkl"],
#                                              plot_titles=["Cart Pole (Discrete Actions)", "Mountain Car (Continuous Actions)"],
#                                              save_image_path="CartPole_and_MountainCar_Graph.png")

# trainer.visualise_set_of_preexisting_results(results_data_paths=["Data_and_Graphs/Bit_Flipping_Results_Data.pkl",
#                                                                  "Data_and_Graphs/Fetch_Reach_Results_Data.pkl"],
#                                              plot_titles=["Bit Flipping", "Fetch Reach"],
Exemple #7
0
import gym
import pytest

from utilities.Utility_Functions import flatten_action_id_to_actions
from utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("Taxi-v2")
config.env_parameters = {}
config.num_episodes_to_run = 1000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
config.load_model = False

linear_hidden_units = [10, 5]
learning_rate = 0.01
buffer_size = 40000
batch_size = 256
batch_norm = False
embedding_dimensionality = 15
gradient_clipping_norm = 5
Exemple #8
0
import gym

from A2C import A2C
from Dueling_DDQN import Dueling_DDQN
from SAC_Discrete import SAC_Discrete
from agents.actor_critic_agents.A3C import A3C
from agents.policy_gradient_agents.PPO import PPO
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DDQN import DDQN
from agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from agents.DQN_agents.DQN import DQN
from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets

config = Config()
config.seed = 1
config.environment = gym.make("CartPole-v0")
config.num_episodes_to_run = 450
config.file_to_save_data_results = "data_and_graphs/Cart_Pole_Results_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/Cart_Pole_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))

import gym

from agents.actor_critic_agents.DDPG import DDPG
from agents.actor_critic_agents.DDPG_HER_Che import DDPG_HER_Che
from utilities.data_structures.Config import Config
from agents.Trainer import Trainer

config = Config()
config.seed = 1
config.environment = gym.make("FetchReach-v1")
#config.environment = gym.make("FetchPush-v1")
#config.environment = gym.make("FetchPickAndPlace-v1")
#config.environment = gym.make("FetchSlide-v1")
config.num_episodes_to_run = 2000
#config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
#config.runs_per_agent = 3
config.runs_per_agent = 25
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
Exemple #10
0
import sys
sys.path.insert(0, '../')

from environments.isc_environments.SimpleISC import SimpleISC
from utilities.data_structures.Config import Config
from agents.Trainer import Trainer

from agents.actor_critic_agents import A2C, A3C, DDPG, DDPG_HER

from gym.core import Wrapper
from torch.cuda import is_available

config = Config()

config.environment = Wrapper(SimpleISC(mode="DISCRETE"))
config.num_episodes_to_run = 5

config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl"
config.file_to_save_results_graph = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = is_available()
config.overwrite_existing_results_file = True
config.randomise_random_seed = False
config.save_model = False
config.seed = 0
config.debug_mode = True
config.wandb_log = True
Exemple #11
0
from agents.DQN_agents.DDQN import DDQN
from agents.actor_critic_agents.DDPG import DDPG
from agents.actor_critic_agents.SAC import SAC
from agents.actor_critic_agents.SAC_Discrete import SAC_Discrete
from agents.actor_critic_agents.A3C import A3C 
from agents.DQN_agents.DDQN import DDQN
from agents.DQN_agents.Dueling_DDQN import Dueling_DDQN
from environments.VEC_Environment import VEC_Environment
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
import matplotlib.pyplot as plt
import numpy as np

config = Config()
config.seed = 1
    
config.num_episodes_to_run = 8000
# config.file_to_save_data_results = "results/data_and_graphs/VEC.pkl"
# config.file_to_save_results_graph = "results/data_and_graphs/VEC.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
config.device = "cuda:0"

config.hyperparameters = {
import gym

from HRL import HRL
from hierarchical_agents.SNN_HRL import SNN_HRL
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
from agents.hierarchical_agents.h_DQN import h_DQN

config = Config()
config.seed = 1
config.environment = gym.make("Taxi-v2")
config.env_parameters = {}
config.num_episodes_to_run = 10000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "HRL": {
        "linear_hidden_units": [10, 5],
        "learning_rate":
        0.01,
from environments.isc_environments.SimpleISC import SimpleISC
from utilities.data_structures.Config import Config
from agents.Trainer import Trainer

from agents.DQN_agents import DQN, DDQN, Dueling_DDQN, DDQN_With_Prioritised_Experience_Replay, DRQN

import wandb
from gym.core import Wrapper
from torch.cuda import is_available

config = Config()

config.environment = Wrapper(SimpleISC(mode="DISCRETE"))
config.num_episodes_to_run = 5_000

config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl"
config.runs_per_agent = 1
config.use_GPU = is_available()
config.overwrite_existing_results_file = True
config.randomise_random_seed = False
config.save_model = False
config.model = None
config.seed = 0

config.debug_mode = True
config.wandb_log = True
config.wandb_job_type = "testing"
config.wandb_entity = "rafael_piacsek"
config.wandb_tags = ["initial testing"]
config.wandb_model_log_freq = 1_000
Exemple #14
0
import gym
from agents.hierarchical_agents.DIAYN import DIAYN
from agents.actor_critic_agents.SAC import SAC
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config

config = Config()
config.environment = gym.make("MountainCarContinuous-v0")
config.seed = 1
config.env_parameters = {}
config.num_episodes_to_run = 10000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

linear_hidden_units = [32, 32]
learning_rate = 0.01
buffer_size = 100000
batch_size = 256
batch_norm = False
embedding_dimensionality = 10
gradient_clipping_norm = 5
update_every_n_steps = 1
"""Tests for the hierarchical RL agent HIRO"""
import copy

import gym
import random
import numpy as np
import torch

from hierarchical_agents.HIRO import HIRO
from utilities.data_structures.Config import Config

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = gym.make("Pendulum-v0")
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
Exemple #16
0
from agents.hierarchical_agents.SNN_HRL import SNN_HRL
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
from agents.hierarchical_agents.h_DQN import h_DQN
from environments.Long_Corridor_Environment import Long_Corridor_Environment

config = Config()
config.seed = 1
config.env_parameters = {"stochasticity_of_action_right": 0.5}
config.environment = Long_Corridor_Environment(
    stochasticity_of_action_right=config.
    env_parameters["stochasticity_of_action_right"])
config.num_episodes_to_run = 10000
config.file_to_save_data_results = "Data_and_Graphs/Long_Corridor_Results_Data.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/Long_Corridor_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
config.load_model = False

config.hyperparameters = {
    "h_DQN": {
        "CONTROLLER": {
            "batch_size":
Exemple #17
0
# import the agents and the trainer
from agents.DQN_agents.DQN_multi_agent import DQN
from agents.Trainer_multi_agent import Trainer

routing_modes = [
    "Q_routing_2_hop", "Q_routing_1_hop", "Q_routing_0_hop", "Q_routing",
    "TTSPWRR", "TTSP"
]
network_names = ["5x6", "UES_Manhatan", "toronto"]

gpu_num = int(sys.argv[1])
algorithm_num = int(sys.argv[2])
network_num = int(sys.argv[3])

config = Config()

config.use_GPU = True
assert (torch.cuda.is_available())
config.device = torch.device(gpu_num)

config.routing_mode = routing_modes[algorithm_num]
network_name = network_names[network_num]

config.training_mode = True

config.does_need_network_state = config.routing_mode in [
    "Q_routing_2_hop", "Q_routing_1_hop", "Q_routing_0_hop"
]
config.does_need_network_state_embeding = config.routing_mode in [
    "Q_routing_2_hop", "Q_routing_1_hop"
from environments.j2n6s300.DDPG_HER_env_Gazebo import j2n6s300_Environment
from agents.actor_critic_agents.DDPG_HER import DDPG_HER
from utilities.data_structures.Config import Config
from agents.Trainer import Trainer
from datetime import datetime
import os

now = datetime.now()  # current date and time
date_str = now.strftime("%Y-%m-%d_%H-%M-%S")
os.mkdir('Data_and_Graphs/results_' + date_str)
path = 'Data_and_Graphs/results_' + date_str + '/'

config = Config()
config.seed = 1
config.environment = j2n6s300_Environment(proxyID='Env1')
config.num_episodes_to_run = 1
config.file_to_save_config = path + "config.json"
config.file_to_save_data_results = path + "jaco_DDPG-HER.pkl"
config.file_to_save_results_graph = path + "jaco_DDPG-HER.png"
config.show_solution_score = False
config.visualise_results_while_training = True
config.visualise_individual_results = True
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.load_model = False
config.load_model_path = "Models/model.pt"
config.save_model = True
from environments.isc_environments.SimpleISC import SimpleISC
from utilities.data_structures.Config import Config
from agents.Trainer import Trainer

from agents.DQN_agents.DQN import DQN
from agents.DQN_agents.DDQN import DDQN
from agents.DQN_agents.Dueling_DDQN import Dueling_DDQN
from agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from agents.DQN_agents.DRQN import DRQN

from models.FCNN import FCNN

from gym.core import Wrapper

config = Config()

config.environment = Wrapper(SimpleISC(mode="DISCRETE"))
config.num_episodes_to_run = 50

config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl"
config.file_to_save_results_graph = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Graph.png"
config.show_solution_score = True
config.visualise_individual_results = True
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = True
config.randomise_random_seed = False
config.save_model = False
Exemple #20
0
import gym
from agents.policy_gradient_agents.PPO import PPO
from agents.actor_critic_agents.DDPG import DDPG
from agents.actor_critic_agents.SAC import SAC
from agents.actor_critic_agents.TD3 import TD3
from agents.Trainer import Trainer
from agents.hierarchical_agents.DIAYN import DIAYN
from utilities.data_structures.Config import Config


config = Config()
config.seed = 1
config.environment = gym.make("Hopper-v2")
config.num_episodes_to_run = 1000
config.file_to_save_data_results = "data_and_graphs/Hopper_Results_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/Hopper_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
config.load_model = False


actor_critic_agent_hyperparameters = {
        "Actor": {
            "learning_rate": 0.0003,
Exemple #21
0
import gym

from agents.hierarchical_agents.HRL.HRL import HRL
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config

config = Config()
config.environment = gym.make("Taxi-v2")
config.seed = 1
config.env_parameters = {}
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

linear_hidden_units = [32, 32]
learning_rate = 0.01
buffer_size = 100000
batch_size = 256
batch_norm = False
embedding_dimensionality = 10
gradient_clipping_norm = 5
update_every_n_steps = 1
from agents.actor_critic_agents.A3C import A3C
from agents.policy_gradient_agents.PPO import PPO
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DDQN import DDQN
from agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from agents.DQN_agents.DQN import DQN
from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
from agents.policy_gradient_agents.REINFORCE import REINFORCE

## envs import ##
from environments.carla_enviroments import env_v1_ObstacleAvoidance

env_title = "ObstacleAvoidance-v0"

config = Config()
config.env_title = env_title
config.seed = 1
config.environment = gym.make(env_title)
config.num_episodes_to_run = 2000
config.show_solution_score = False
config.visualise_individual_results = True
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = True
config.log_loss = False
config.log_base = time.strftime("%Y%m%d%H%M%S", time.localtime())
from agents.DQN_agents.DQN_HER import DQN_HER
from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
from agents.DQN_agents.Dueling_DDQN import Dueling_DDQN
from agents.DQN_agents.DDQN import DDQN
from agents.hierarchical_agents.DIAYN import DIAYN
from agents.hierarchical_agents.h_DQN import h_DQN
from agents.hierarchical_agents.HIRO import HIRO
from agents.hierarchical_agents.SNN_HRL import SNN_HRL
from agents.policy_gradient_agents.PPO import PPO
from agents.policy_gradient_agents.REINFORCE import REINFORCE

from environments.FaceDiscreete import FaceEnvironementDiscreete
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config

config = Config()
config.seed = 1

config.environment = FaceEnvironementDiscreete(
    "../weights/blg_small_12_5e-06_5e-05_2_8_small_big_noisy_first_True_512")

config.num_episodes_to_run = 500
config.file_to_save_data_results = "Data_and_Graphs/FaceDiscreete.pkl"
config.file_to_save_results_graph = "Data_and_Graphs/FaceDiscreete.png"
config.show_solution_score = True
config.visualise_individual_results = True
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = True
import gym

from models.Trainer import Trainer
from models.actor_critic_agents.DDPG import DDPG
from models.actor_critic_agents.TD3 import TD3
from models.policy_gradient_agents.PPO import PPO
from utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("MountainCarContinuous-v0")
config.num_episodes_to_run = 450
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

config.hyperparameters = {
    "Policy_Gradient_Agents": {
        "learning_rate": 0.05,
        "linear_hidden_units": [30, 15],
        "final_layer_activation": "TANH",
        "learning_iterations_per_round": 10,
        "discount_rate": 0.9,
        self.logger.info("Learning rate {}".format(new_lr))


if __name__ == '__main__':
    from utilities.data_structures.Config import Config
    import gym
    ## envs import ##
    from environments.carla_enviroments import env_v1_ObstacleAvoidance

    # net = q_network_toa(n_action=4)
    # net.to('cuda')
    # input = torch.rand(size=(10, 3, 224, 224)).to('cuda')
    # q1, q2 = net(input)

    config = Config()
    config.seed = 1
    config.environment = gym.make("ObstacleAvoidance-v0")
    config.num_episodes_to_run = 2000
    config.file_to_save_data_results = "C:/my_project/Deep-Reinforcement-Learning-Algorithms-with-PyTorch/results/data_and_graphs/carla_obstacle_avoidance/data.pkl"
    config.file_to_save_results_graph = "C:/my_project/Deep-Reinforcement-Learning-Algorithms-with-PyTorch/results/data_and_graphs/carla_obstacle_avoidance/data.png"
    config.show_solution_score = False
    config.visualise_individual_results = True
    config.visualise_overall_agent_results = True
    config.standard_deviation_results = 1.0
    config.runs_per_agent = 1
    config.use_GPU = True
    config.overwrite_existing_results_file = False
    config.randomise_random_seed = True
    config.save_model = True
from agents.DQN_agents.DDQN import DDQN
from agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
from agents.policy_gradient_agents.PPO import PPO
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 1
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.load_model = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 3,
Exemple #27
0
import gym
from agents.Trainer import Trainer
from agents.actor_critic_agents.DDPG import DDPG
from agents.hierarchical_agents.HIRO import HIRO
from utilities.data_structures.Config import Config
config = Config()
config.seed = 1
config.environment = gym.make(
    "Reacher-v2")  #  Reacher-v2 "InvertedPendulum-v2") #Pendulum-v0
config.num_episodes_to_run = 1500
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False
config.load_model = False

config.hyperparameters = {
    "HIRO": {
        "LOWER_LEVEL": {
            "max_lower_level_timesteps": 5,
            "Actor": {
                "learning_rate": 0.001,
                "linear_hidden_units": [20, 20],
                "final_layer_activation": "TANH",
Exemple #28
0
import gym
from environments.Atari_Environment import make_atari_game
from agents.DQN_agents.DDQN import DDQN
from agents.hierarchical_agents.HRL.HRL import HRL
from agents.hierarchical_agents.HRL.Model_HRL import Model_HRL
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = make_atari_game("SpaceInvaders-v0")
config.env_parameters = {}
config.num_episodes_to_run = 500
config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Space_Invaders_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Space_Invaders.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 10
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

# Loss is not drawing a random sample! otherwise wouldnt jump around that much!!

linear_hidden_units = [32, 32]
learning_rate = 0.005  # 0.001 taxi
buffer_size = 1000000
batch_size = 256
# from environments.Bit_Flipping_Environment import Bit_Flipping_Environment
# from agents.policy_gradient_agents.PPO import PPO
# from environments.Four_Rooms_Environment import Four_Rooms_Environment
# from agents.hierarchical_agents.SNN_HRL import SNN_HRL
# from agents.actor_critic_agents.TD3 import TD3
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
Exemple #30
0
@author: kashishg
"""

from agents.DQN_agents.DQN_HER import DQN_HER
from environments.j2n6s300.HER_env_tf import j2n6s300_Environment
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from datetime import datetime


now = datetime.now() # current date and time
num_episodes_to_run = 500
eps_decay_rate_denom = round(num_episodes_to_run/6)

config = Config()
config.seed = 1
config.environment =  j2n6s300_Environment()
config.num_episodes_to_run = num_episodes_to_run
config.file_to_save_data_results = "Data_and_Graphs/{}jaco.pkl".format(now.strftime("%Y-%m-%d_%H-%M-%S_"))
config.file_to_save_results_graph = "Data_and_Graphs/{}jaco.png".format(now.strftime("%Y-%m-%d_%H-%M-%S_"))
config.show_solution_score = False
config.visualise_results_while_training = True
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.load_model = False