# from agents.policy_gradient_agents.PPO import PPO
# from environments.Four_Rooms_Environment import Four_Rooms_Environment
# from agents.hierarchical_agents.SNN_HRL import SNN_HRL
# from agents.actor_critic_agents.TD3 import TD3
from agents.Trainer import Trainer
from utilities.data_structures.Config import Config
from agents.DQN_agents.DQN import DQN
import numpy as np
import torch

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

config = Config()
config.seed = 1
config.environment = Bit_Flipping_Environment(4)
config.num_episodes_to_run = 2000
config.file_to_save_data_results = None
config.file_to_save_results_graph = None
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
        "epsilon": 0.1,
コード例 #2
0
ファイル: main.py プロジェクト: JayakumarPawan/HRL-research
    'SAC': SAC,
    'DDQN': DDQN,
    'SAC_Discrete': SAC_Discrete,
    'DIAYN': DIAYN,
    'DBH': DBH
}
if args.rts:
    config.rts()
    AGENTS = [DDQN, SAC_Discrete, DIAYN, DBH]

else:
    AGENTS = [str_to_obj[i] for i in args.algorithms]
    config.environment_name = args.environment
    config.environment = gym.make(config.environment_name)
    config.eval = args.evaluate
    config.seed = args.seed
    config.num_episodes_to_run = args.num_episodes
    config.runs_per_agent = args.n_trials
    config.use_GPU = args.use_GPU
    config.save_results = args.save_results
    config.run_prefix = args.run_prefix
    config.train_existing_model = args.tem
    config.save_directory = 'results/{}'.format(config.run_prefix)
    if not os.path.exists(config.save_directory):
        os.makedirs(config.save_directory)
    config.visualise_overall_agent_results = True
    config.standard_deviation_results = 1.0

linear_hidden_units = [128, 128, 32]
learning_rate = 0.01
buffer_size = 100000