Example #1
0
from drl.agents.hierarchical_agents.SNN_HRL import SNN_HRL
from drl.agents.actor_critic_agents.TD3 import TD3
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config
from drl.agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 200,
        "discount_rate": 0.99,
Example #2
0
import gym
from environments.Atari_Environment import make_atari_game
from drl.agents.DQN_agents.DDQN import DDQN
from drl.agents.hierarchical_agents.HRL.HRL import HRL
from drl.agents.hierarchical_agents.HRL.Model_HRL import Model_HRL
from drl.agents.Trainer import Trainer
from drl.utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = make_atari_game("SpaceInvaders-v0")
config.env_parameters = {}
config.num_episodes_to_run = 500
config.file_to_save_data_results = "data_and_graphs/hrl_experiments/Space_Invaders_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/hrl_experiments/Space_Invaders.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 10
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False


# Loss is not drawing a random sample! otherwise wouldnt jump around that much!!

linear_hidden_units = [32, 32]
learning_rate = 0.005  # 0.001 taxi
buffer_size = 1000000