config.hyperparameters = { "DQN_Agents": { "linear_hidden_units": [30, 10], "learning_rate": 0.01, "buffer_size": 40000, "batch_size": 256, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": embedding_dimensions, "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 10, "discount_rate": 0.99, "learning_iterations": 1, "tau": 0.01, "exploration_cycle_episodes_length": None, "learning_iterations": 1, "clip_rewards": False }, "SNN_HRL": { "SKILL_AGENT": { "num_skills": 20, "regularisation_weight": 1.5, "visitations_decay": 0.9999, "episodes_for_pretraining": 300, "batch_size": 256, "learning_rate": 0.001, "buffer_size": 40000, "linear_hidden_units": [20, 10], "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0, 1], "embedding_dimensions": [embedding_dimensions[0], [20, 6]], "batch_norm": False, "gradient_clipping_norm": 2, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 500, "discount_rate": 0.999, "learning_iterations": 1, "tau": 0.01, "clip_rewards": False }, "MANAGER": { "timesteps_before_changing_skill": 6, "linear_hidden_units": [10, 5], "learning_rate": 0.01, "buffer_size": 40000, "batch_size": 256, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": embedding_dimensions, "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 50, "discount_rate": 0.99, "learning_iterations": 1, "tau": 0.01, "clip_rewards": False } }, "Actor_Critic_Agents": { "learning_rate": 0.005, "linear_hidden_units": [20, 10], "columns_of_data_to_be_embedded": [0], "embedding_dimensions": embedding_dimensions, "final_layer_activation": ["SOFTMAX", None], "gradient_clipping_norm": 5.0, "discount_rate": 0.99, "epsilon_decay_rate_denominator": 50.0, "normalise_rewards": True, "clip_rewards": False }, "DIAYN": { "num_skills": 5, "DISCRIMINATOR": { "learning_rate": 0.01, "linear_hidden_units": [20, 10], "columns_of_data_to_be_embedded": [0], "embedding_dimensions": embedding_dimensions, }, "AGENT": { "learning_rate": 0.01, "linear_hidden_units": [20, 10], } }, "HRL": { "linear_hidden_units": [10, 5], "learning_rate": 0.01, "buffer_size": 40000, "batch_size": 256, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": embedding_dimensions, "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 400, "discount_rate": 0.99, "learning_iterations": 1, "tau": 0.01 } }
config.hyperparameters = { "Actor_Critic_Agents": { "learning_rate": 0.005, "linear_hidden_units": [512, 512], "final_layer_activation": ["SOFTMAX", None], "gradient_clipping_norm": 5.0, "discount_rate": 0.99, "epsilon_decay_rate_denominator": 1.0, "normalise_rewards": False, "exploration_worker_difference": 2.0, "clip_rewards": False, "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [512, 512, 512], "final_layer_activation": "Softmax", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [512, 512, 512], "final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 400, "batch_size": 100, "discount_rate": 0.99, "mu": 0.0, #for O-H noise "theta": 0.15, #for O-H noise "sigma": 0.25, #for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 3, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } }
config.hyperparameters = { "Policy_Gradient_Agents": { "learning_rate": 0.05, "linear_hidden_units": [30, 15], "final_layer_activation": "TANH", "learning_iterations_per_round": 10, "discount_rate": 0.9, "batch_norm": False, "clip_epsilon": 0.2, "episodes_per_learning_round": 10, "normalise_rewards": True, "gradient_clipping_norm": 5, "mu": 0.0, "theta": 0.15, "sigma": 0.2, "epsilon_decay_rate_denominator": 1 }, "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.003, "linear_hidden_units": [20, 20], "final_layer_activation": None, "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "Critic": { "learning_rate": 0.02, "linear_hidden_units": [20, 20], "final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 1000, #for SAC only "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": True, "do_evaluation_iterations": True } }
config.hyperparameters = { "linear_hidden_units": linear_hidden_units, "learning_rate": learning_rate, "buffer_size": buffer_size, "batch_size": batch_size, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": batch_norm, "gradient_clipping_norm": gradient_clipping_norm, "update_every_n_steps": update_every_n_steps, "epsilon_decay_rate_denominator": epsilon_decay_rate_denominator, "discount_rate": discount_rate, "learning_iterations": learning_iterations, "tau": tau, "sequitur_k": sequitur_k, "action_length_reward_bonus": 0.1, "episodes_to_run_with_no_exploration": 10, "pre_training_learning_iterations_multiplier": 0.1, "copy_over_hidden_layers": True, "use_global_list_of_best_performing_actions": True }
config.hyperparameters = { "LOWER_LEVEL": { "max_lower_level_timesteps": 3, "Actor": { "learning_rate": 0.001, "linear_hidden_units": [20, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [20, 20], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.005, "gradient_clipping_norm": 5, }, "batch_size": 256, "discount_rate": 0.9, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "number_goal_candidates": 8 }, "HIGHER_LEVEL": { "Actor": { "learning_rate": 0.001, "linear_hidden_units": [20, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "number_goal_candidates": 8 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [20, 20], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.005, "gradient_clipping_norm": 5 }, "batch_size": 256, "discount_rate": 0.9, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "number_goal_candidates": 8 }, }
config.randomise_random_seed = False config.save_model = False config.model = None config.seed = 0 config.debug_mode = True config.wandb_log = True config.wandb_job_type = "testing" config.wandb_entity = "rafael_piacsek" config.wandb_tags = ["initial testing"] config.wandb_model_log_freq = 1_000 config.hyperparameters = dict( # y_range=(-1, 14), HER_sample_proportion=0.8, alpha_prioritised_replay=0.6, batch_norm=False, batch_size=64, beta_prioritised_replay=0.1, buffer_size=1_000_000, clip_rewards=False, discount_rate=0.999, epsilon=1.0, epsilon_decay_rate_denominator=(config.num_episodes_to_run * 0.01) // (1 - 0.01), final_layer_activation="softmax", gradient_clipping_norm=5, incremental_td_error=1e-8, learning_iterations=1, learning_rate=0.01, random_episodes_to_run=0, tau=1e-2,
config.hyperparameters = { "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.001, "linear_hidden_units": [50, 50], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [50, 50, 50], "final_layer_activation": None, "batch_norm": False, "buffer_size": 30000, "tau": 0.01, "gradient_clipping_norm": 5 }, "batch_size": 256, "discount_rate": 0.9, "mu": 0.0, "theta": 0.15, "sigma": 0.25, "update_every_n_steps": 10, "learning_updates_per_learning_session": 10, "HER_sample_proportion": 0.8, "clip_rewards": False }}
config.hyperparameters = { "h_DQN": { "CONTROLLER": { "batch_size": 256, "learning_rate": 0.01, "buffer_size": 40000, "linear_hidden_units": [20, 10], "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0, 1], "embedding_dimensions": [[ config.environment.observation_space.n, max(4, int(config.environment.observation_space.n / 10.0)) ], [ config.environment.observation_space.n, max(4, int(config.environment.observation_space.n / 10.0)) ]], "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 1500, "discount_rate": 0.999, "learning_iterations": 1 }, "META_CONTROLLER": { "batch_size": 256, "learning_rate": 0.001, "buffer_size": 40000, "linear_hidden_units": [20, 10], "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[ config.environment.observation_space.n, max(4, int(config.environment.observation_space.n / 10.0)) ]], "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 2500, "discount_rate": 0.999, "learning_iterations": 1 } }, "SNN_HRL": { "SKILL_AGENT": { "num_skills": 2, "regularisation_weight": 1.5, "visitations_decay": 0.99, "episodes_for_pretraining": 2000, # "batch_size": 256, # "learning_rate": 0.01, # "buffer_size": 40000, # "linear_hidden_units": [20, 10], # "final_layer_activation": "None", # "columns_of_data_to_be_embedded": [0, 1], # "embedding_dimensions": [[config.environment.observation_space.n, # max(4, int(config.environment.observation_space.n / 10.0))], # [6, 4]], # "batch_norm": False, # "gradient_clipping_norm": 5, # "update_every_n_steps": 1, # "epsilon_decay_rate_denominator": 50, # "discount_rate": 0.999, # "learning_iterations": 1 "learning_rate": 0.05, "linear_hidden_units": [20, 20], "final_layer_activation": "SOFTMAX", "learning_iterations_per_round": 5, "discount_rate": 0.99, "batch_norm": False, "clip_epsilon": 0.1, "episodes_per_learning_round": 4, "normalise_rewards": True, "gradient_clipping_norm": 7.0, "mu": 0.0, # only required for continuous action games "theta": 0.0, # only required for continuous action games "sigma": 0.0, # only required for continuous action games "epsilon_decay_rate_denominator": 1.0 }, "MANAGER": { "timesteps_before_changing_skill": 4, "linear_hidden_units": [10, 5], "learning_rate": 0.01, "buffer_size": 40000, "batch_size": 256, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[ config.environment.observation_space.n, max(4, int(config.environment.observation_space.n / 10.0)) ]], "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 1000, "discount_rate": 0.999, "learning_iterations": 1 } } }
config.hyperparameters = { "DIAYN": { "DISCRIMINATOR": { "final_layer_activation": None, "learning_rate": discriminator_learning_rate, "linear_hidden_units": linear_hidden_units, "gradient_clipping_norm": 5, }, "AGENT": { "clip_rewards": False, "do_evaluation_iterations": False, "learning_rate": 0.005, "linear_hidden_units": [20, 10], "final_layer_activation": ["SOFTMAX", None], "gradient_clipping_norm": 5.0, "epsilon_decay_rate_denominator": 1.0, "normalise_rewards": True, "exploration_worker_difference": 2.0, "min_steps_before_learning": 10000, "batch_size": 256, "discount_rate": 0.99, # questionable... "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "use_GPU": config.use_GPU, "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier", }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier", }, }, "MANAGER": { "timesteps_to_give_up_control_for": timesteps_to_give_up_control_for, "learning_rate": 0.01, "batch_size": 256, "buffer_size": 40000, "epsilon": 1.0, "epsilon_decay_rate_denominator": 1, "discount_rate": 0.99, "tau": 0.01, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.1, "incremental_td_error": 1e-8, "update_every_n_steps": 1, "linear_hidden_units": [30, 15], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 0.7, "learning_iterations": 1, "clip_rewards": False }, "num_skills": num_skills, "num_unsupservised_episodes": num_unsupservised_episodes, "final_layer_activation": None }, "Actor_Critic_Agents": { 'batch_size': 256, "clip_rewards": False, 'automatically_tune_entropy_hyperparameter': True, 'entropy_term_weight': .3, 'add_extra_noise': False, 'learning_updates_per_learning_session': 1, 'min_steps_before_learning': 10000, 'update_every_n_steps': 1, 'discount_rate': .99, 'do_evaluation_iterations': False, "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier", }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier", }, } }
config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 64, "buffer_size": 40000, "epsilon": 0.1, "epsilon_decay_rate_denominator": 200, "discount_rate": 0.99, "tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4, "incremental_td_error": 1e-8, "update_every_n_steps": 3, "linear_hidden_units": [20, 20, 20], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "clip_rewards": False, "learning_iterations": 1 }, "Stochastic_Policy_Search_Agents": { "policy_network_type": "Linear", "noise_scale_start": 1e-2, "noise_scale_min": 1e-3, "noise_scale_max": 2.0, "noise_scale_growth_factor": 2.0, "stochastic_action_decision": False, "num_policies": 10, "episodes_per_policy": 1, "num_policies_to_keep": 5, "clip_rewards": False }, "Policy_Gradient_Agents": { "learning_rate": 0.01, "linear_hidden_units": [20], "final_layer_activation": "SOFTMAX", "learning_iterations_per_round": 7, "discount_rate": 0.99, "batch_norm": False, "clip_epsilon": 0.1, "episodes_per_learning_round": 7, "normalise_rewards": False, "gradient_clipping_norm": 5, "mu": 0.0, #only required for continuous action games "theta": 0.0, #only required for continuous action games "sigma": 0.0, #only required for continuous action games "epsilon_decay_rate_denominator": 1, "clip_rewards": False }, "Actor_Critic_Agents": { "learning_rate": 0.0005, "linear_hidden_units": [150, 30, 30, 30], "final_layer_activation": ["SOFTMAX", None], "gradient_clipping_norm": 25.0, "discount_rate": 0.99, "epsilon_decay_rate_denominator": 10.0, "normalise_rewards": False, "automatically_tune_entropy_hyperparameter": True, "add_extra_noise": False, "min_steps_before_learning": 4, "do_evaluation_iterations": True, "clip_rewards": False, "Actor": { "learning_rate": 0.001, "linear_hidden_units": [20, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [20, 20], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.005, "gradient_clipping_norm": 5 }, "batch_size": 3, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "HER_sample_proportion": 0.8, "exploration_worker_difference": 1.0 }, "SNN_HRL": { "SKILL_AGENT": { "num_skills": 20, "regularisation_weight": 1.5, "visitations_decay": 0.9999, "episodes_for_pretraining": 7, "batch_size": 256, "learning_rate": 0.001, "buffer_size": 40000, "linear_hidden_units": [20, 10], "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0, 1], "embedding_dimensions": [[300, 10], [20, 6]], "batch_norm": False, "gradient_clipping_norm": 2, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 500, "discount_rate": 0.999, "learning_iterations": 1, "tau": 0.01, "clip_rewards": False }, "MANAGER": { "timesteps_before_changing_skill": 6, "linear_hidden_units": [10, 5], "learning_rate": 0.01, "buffer_size": 40000, "batch_size": 3, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[300, 10]], "batch_norm": False, "gradient_clipping_norm": 5, "update_every_n_steps": 1, "epsilon_decay_rate_denominator": 50, "discount_rate": 0.99, "learning_iterations": 1, "tau": 0.01, "clip_rewards": False } } }
config.hyperparameters = { "Policy_Gradient_Agents": { "learning_rate": 0.05, "linear_hidden_units": [30, 15], "final_layer_activation": "TANH", "learning_iterations_per_round": 10, "discount_rate": 0.9, "batch_norm": False, "clip_epsilon": 0.2, "episodes_per_learning_round": 10, "normalise_rewards": True, "gradient_clipping_norm": 5, "mu": 0.0, "theta": 0.15, "sigma": 0.2, "epsilon_decay_rate_denominator": 1, "clip_rewards": False }, "Actor_Critic_Agents": actor_critic_agent_hyperparameters, "DIAYN": { "DISCRIMINATOR": { "learning_rate": 0.001, "linear_hidden_units": [32, 32], "final_layer_activation": None, "gradient_clipping_norm": 5 }, "AGENT": actor_critic_agent_hyperparameters, "MANAGER": manager_hyperparameters, "num_skills": 10, "num_unsupservised_episodes": 500 } }
config.hyperparameters = { "HIRO": { "LOWER_LEVEL": { "max_lower_level_timesteps": 5, "Actor": { "learning_rate": 0.001, "linear_hidden_units": [20, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [20, 20], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.005, "gradient_clipping_norm": 5 }, "batch_size": 256, "discount_rate": 0.9, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "clip_rewards": False }, "HIGHER_LEVEL": { "Actor": { "learning_rate": 0.001, "linear_hidden_units": [20, 20], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [20, 20], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.005, "gradient_clipping_norm": 5 }, "batch_size": 256, "discount_rate": 0.9, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 20, "learning_updates_per_learning_session": 10, "clip_rewards": False }, }, "Actor_Critic_Agents": { # hyperparameters taken from https://arxiv.org/pdf/1802.09477.pdf "Actor": { "learning_rate": 0.001, "linear_hidden_units": [400, 300], "final_layer_activation": "TANH", "batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [400, 300], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.01, "gradient_clipping_norm": 5 }, "batch_size": 64, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.2, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "clip_rewards": False } }
config.visualise_individual_results = False config.visualise_overall_agent_results = False config.randomise_random_seed = False config.runs_per_agent = 1 config.use_GPU = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 3, "buffer_size": 40000, "epsilon": 0.1, "epsilon_decay_rate_denominator": 200, "discount_rate": 0.99, "tau": 0.1, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.4, "incremental_td_error": 1e-8, "update_every_n_steps": 3, "linear_hidden_units": [20, 20, 20], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "clip_rewards": False } } trainer = Trainer(config, [DQN_HER]) config.hyperparameters = config.hyperparameters["DQN_Agents"] agent = DQN_HER(config) agent.reset_game()
config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.001, "batch_size": 128, "buffer_size": 100000, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999, "incremental_td_error": 1e-8, "update_every_n_steps": 1, "linear_hidden_units": [64, 64], "final_layer_activation": None, "y_range": (-1, 14), "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "learning_iterations": 1, "clip_rewards": False } } if __name__ == '__main__': AGENTS = [DQN_HER, DQN] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents()
config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.00002, "batch_size": 256, "buffer_size": 100000, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.99, "incremental_td_error": 1e-8, "update_every_n_steps": 1, "linear_hidden_units": [1000, 800], "final_layer_activation": None, "batch_norm": False, "gradient_clipping_norm": 5, "HER_sample_proportion": 0.8, "learning_iterations": 1, "clip_rewards": False, "tau": 0.01 }, "Actor_Critic_Agents": { # hyperparameters taken from https://arxiv.org/pdf/1802.09477.pdf "Actor": { "learning_rate": 0.0008, "linear_hidden_units": [1200, 1200], "final_layer_activation": "Softmax", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.0008, "linear_hidden_units": [1200, 1200], "final_layer_activation": None, "batch_norm": False, "buffer_size": 100000, "tau": 0.005, "gradient_clipping_norm": 5 }, "min_steps_before_learning": 5000, "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, #for O-H noise "theta": 0.15, #for O-H noise "sigma": 0.25, #for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True, "clip_rewards": False } }
config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.01, "batch_size": 256, "buffer_size": 40000, "epsilon": 1.0, "epsilon_decay_rate_denominator": 1, "discount_rate": 0.99, "tau": 0.01, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.1, "incremental_td_error": 1e-8, "update_every_n_steps": 1, "linear_hidden_units": [30, 15], "final_layer_activation": "None", "batch_norm": False, "gradient_clipping_norm": 0.7, "learning_iterations": 1, "clip_rewards": False }, "Stochastic_Policy_Search_Agents": { "policy_network_type": "Linear", "noise_scale_start": 1e-2, "noise_scale_min": 1e-3, "noise_scale_max": 2.0, "noise_scale_growth_factor": 2.0, "stochastic_action_decision": False, "num_policies": 10, "episodes_per_policy": 1, "num_policies_to_keep": 5, "clip_rewards": False }, "Policy_Gradient_Agents": { "learning_rate": 0.05, "linear_hidden_units": [20, 20], "final_layer_activation": "SOFTMAX", "learning_iterations_per_round": 5, "discount_rate": 0.99, "batch_norm": False, "clip_epsilon": 0.1, "episodes_per_learning_round": 4, "normalise_rewards": True, "gradient_clipping_norm": 7.0, "mu": 0.0, #only required for continuous action games "theta": 0.0, #only required for continuous action games "sigma": 0.0, #only required for continuous action games "epsilon_decay_rate_denominator": 1.0, "clip_rewards": False }, "Actor_Critic_Agents": { "learning_rate": 0.005, "linear_hidden_units": [20, 10], "final_layer_activation": ["SOFTMAX", None], "gradient_clipping_norm": 5.0, "discount_rate": 0.99, "epsilon_decay_rate_denominator": 1.0, "normalise_rewards": True, "exploration_worker_difference": 2.0, "clip_rewards": False, "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": "Softmax", "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 400, "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, #for O-H noise "theta": 0.15, #for O-H noise "sigma": 0.25, #for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } }
config.hyperparameters = { "HRL": { "linear_hidden_units": linear_hidden_units, "learning_rate": learning_rate, "buffer_size": buffer_size, "batch_size": batch_size, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": batch_norm, "gradient_clipping_norm": gradient_clipping_norm, "update_every_n_steps": update_every_n_steps, "epsilon_decay_rate_denominator": epsilon_decay_rate_denominator, "discount_rate": discount_rate, "learning_iterations": learning_iterations, "tau": tau, "sequitur_k": sequitur_k, "action_length_reward_bonus": action_length_reward_bonus, "pre_training_learning_iterations_multiplier": pre_training_learning_iterations_multiplier, "episodes_to_run_with_no_exploration": episodes_to_run_with_no_exploration, "action_balanced_replay_buffer": action_balanced_replay_buffer, "copy_over_hidden_layers": copy_over_hidden_layers }, "DQN_Agents": { "linear_hidden_units": linear_hidden_units, "learning_rate": learning_rate, "buffer_size": buffer_size, "batch_size": batch_size, "final_layer_activation": "None", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": batch_norm, "gradient_clipping_norm": gradient_clipping_norm, "update_every_n_steps": update_every_n_steps, "epsilon_decay_rate_denominator": epsilon_decay_rate_denominator, "discount_rate": discount_rate, "learning_iterations": learning_iterations, "tau": tau, }, "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": "Softmax", "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "columns_of_data_to_be_embedded": [0], "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 10000, "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True } }
config.hyperparameters = { "HRL": { "linear_hidden_units": linear_hidden_units, "learning_rate": learning_rate, "buffer_size": buffer_size, "batch_size": batch_size, "final_layer_activation": "None", # "columns_of_data_to_be_embedded": [0], # "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": batch_norm, "gradient_clipping_norm": gradient_clipping_norm, "update_every_n_steps": update_every_n_steps, "epsilon_decay_rate_denominator": epsilon_decay_rate_denominator, "discount_rate": discount_rate, "learning_iterations": learning_iterations, "tau": tau, "sequitur_k": sequitur_k, "use_relative_counts": use_relative_counts, "action_length_reward_bonus": action_length_reward_bonus, "pre_training_learning_iterations_multiplier": pre_training_learning_iterations_multiplier, "episodes_to_run_with_no_exploration": episodes_to_run_with_no_exploration, "action_balanced_replay_buffer": action_balanced_replay_buffer, "copy_over_hidden_layers": copy_over_hidden_layers, "random_episodes_to_run": random_episodes_to_run, "only_train_new_actions": only_train_new_actions, "only_train_final_layer": only_train_final_layer, "num_top_results_to_use": num_top_results_to_use, "action_frequency_required_in_top_results": action_frequency_required_in_top_results, "reduce_macro_action_appearance_cutoff_throughout_training": reduce_macro_action_appearance_cutoff_throughout_training, "add_1_macro_action_at_a_time": add_1_macro_action_at_a_time, "calculate_q_values_as_increments": calculate_q_values_as_increments, "episodes_per_round": episodes_per_round, "abandon_ship": abandon_ship, "clip_rewards": clip_rewards }, "DQN_Agents": { "linear_hidden_units": linear_hidden_units, "learning_rate": learning_rate, "buffer_size": buffer_size, "batch_size": batch_size, "final_layer_activation": "None", # "columns_of_data_to_be_embedded": [0], # "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": batch_norm, "gradient_clipping_norm": gradient_clipping_norm, "update_every_n_steps": update_every_n_steps, "epsilon_decay_rate_denominator": epsilon_decay_rate_denominator, "discount_rate": discount_rate, "learning_iterations": learning_iterations, "tau": tau, "clip_rewards": clip_rewards }, "Actor_Critic_Agents": { "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": "Softmax", # "columns_of_data_to_be_embedded": [0], # "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "Critic": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, # "columns_of_data_to_be_embedded": [0], # "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": False, "buffer_size": 1000000, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "min_steps_before_learning": 10000, "batch_size": 256, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.25, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "automatically_tune_entropy_hyperparameter": True, "entropy_term_weight": None, "add_extra_noise": False, "do_evaluation_iterations": True, "clip_rewards": clip_rewards } }
config.resume = False config.resume_path = '' config.backbone_pretrain = True config.hyperparameters = { "DQN_Agents": { "learning_rate": 1e-2, "batch_size": 8, "buffer_size": 4000, "epsilon": 1.0, "epsilon_decay_rate_denominator": 1.0, "discount_rate": 0.99, "tau": 0.01, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.1, "incremental_td_error": 1e-8, "update_every_n_steps": 1, "gradient_clipping_norm": None, "learning_iterations": 1, "clip_rewards": False, ## useless } } if __name__ == "__main__": # AGENTS = [SAC_Discrete, DDQN, Dueling_DDQN, DQN, DQN_With_Fixed_Q_Targets, # DDQN_With_Prioritised_Experience_Replay, A2C, PPO, A3C ] AGENTS = [DQN_With_Fixed_Q_Targets_2_EYE] trainer = Trainer(config, AGENTS)