torch.manual_seed(1) config = Config() config.seed = 1 config.environment = gym.make("Pendulum-v0") config.num_episodes_to_run = 1500 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "LOWER_LEVEL": { "max_lower_level_timesteps": 3, "Actor": { "learning_rate": 0.001, "linear_hidden_units": [20, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01,
import numpy as np import torch random.seed(1) np.random.seed(1) torch.manual_seed(1) config = Config() config.seed = 1 config.environment = Bit_Flipping_Environment(4) config.num_episodes_to_run = 1 config.file_to_save_data_results = None config.file_to_save_results_graph = None config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 3, "buffer_size": 40000, "epsilon": 0.1, "epsilon_decay_rate_denominator": 200, "discount_rate": 0.99, "tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4,