Beispiel #1
0
def test_add_default_hyperparameters_if_not_overriden():
    """Tests that add_default_hyperparameters_if_not_overriden function works"""
    config = Config()
    default_hyperparameter_set = {'output_activation': 'None', 'hidden_activations': 'relu', 'dropout': 0.0, 'initialiser': 'default',
     'batch_norm': False, 'columns_of_data_to_be_embedded': [], 'embedding_dimensions': [], 'y_range': (),
     }
    alternative_hyperparmater_set = {'output_activation': "YESSS!!", 'hidden_activations': 'relu', 'dropout': 0.0, 'initialiser': 'default',
     'batch_norm': False, 'columns_of_data_to_be_embedded': [], 'embedding_dimensions': [], 'y_range': (),
     "helo": 20}

    config.hyperparameters = {"DQN_Agents": {}}
    config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters)
    assert config.hyperparameters == {"DQN_Agents": default_hyperparameter_set}

    config.hyperparameters = {"DQN_Agents": {}, "Test": {}}
    config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters)
    assert config.hyperparameters == {"DQN_Agents": default_hyperparameter_set, "Test": default_hyperparameter_set}

    config.hyperparameters = {"DQN_Agents": {"helo": 20,  "output_activation": "YESSS!!"}}
    config.hyperparameters = Trainer(config, []).add_default_hyperparameters_if_not_overriden(config.hyperparameters)
    assert config.hyperparameters == {"DQN_Agents": alternative_hyperparmater_set}
config.hyperparameters = {
    "LOWER_LEVEL": {
        "max_lower_level_timesteps": 3,
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },
        "batch_size": 256,
        "discount_rate": 0.9,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.25,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 20,
        "learning_updates_per_learning_session": 10,
    },
    "HIGHER_LEVEL": {
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },
        "batch_size": 256,
        "discount_rate": 0.9,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.25,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 20,
        "learning_updates_per_learning_session": 10,
    },
}
config.hyperparameters = {
    "h_DQN": {
        "CONTROLLER": {
            "batch_size":
            256,
            "learning_rate":
            0.01,
            "buffer_size":
            40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0, 1],
            "embedding_dimensions":
            [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ],
             [
                 config.environment.observation_space.n,
                 max(4, int(config.environment.observation_space.n / 10.0))
             ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            1500,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        },
        "META_CONTROLLER": {
            "batch_size":
            256,
            "learning_rate":
            0.001,
            "buffer_size":
            40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            2500,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        }
    },
    "SNN_HRL": {
        "SKILL_AGENT": {
            "num_skills": 2,
            "regularisation_weight": 1.5,
            "visitations_decay": 0.99,
            "episodes_for_pretraining": 2000,
            # "batch_size": 256,
            # "learning_rate": 0.01,
            # "buffer_size": 40000,
            # "linear_hidden_units": [20, 10],
            # "final_layer_activation": "None",
            # "columns_of_data_to_be_embedded": [0, 1],
            # "embedding_dimensions": [[config.environment.observation_space.n,
            #                           max(4, int(config.environment.observation_space.n / 10.0))],
            #                          [6, 4]],
            # "batch_norm": False,
            # "gradient_clipping_norm": 5,
            # "update_every_n_steps": 1,
            # "epsilon_decay_rate_denominator": 50,
            # "discount_rate": 0.999,
            # "learning_iterations": 1
            "learning_rate": 0.05,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "SOFTMAX",
            "learning_iterations_per_round": 5,
            "discount_rate": 0.99,
            "batch_norm": False,
            "clip_epsilon": 0.1,
            "episodes_per_learning_round": 4,
            "normalise_rewards": True,
            "gradient_clipping_norm": 7.0,
            "mu": 0.0,  # only required for continuous action games
            "theta": 0.0,  # only required for continuous action games
            "sigma": 0.0,  # only required for continuous action games
            "epsilon_decay_rate_denominator": 1.0
        },
        "MANAGER": {
            "timesteps_before_changing_skill":
            4,
            "linear_hidden_units": [10, 5],
            "learning_rate":
            0.01,
            "buffer_size":
            40000,
            "batch_size":
            256,
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            1000,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        }
    }
}
Beispiel #4
0
config.visualise_individual_results = True
config.visualise_overall_results = True
config.runs_per_agent = 3
config.use_GPU = False

config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 500,
        "discount_rate": 0.98,
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 1,
        "nn_layers": 2,
        "nn_start_units": 256,
        "nn_unit_decay": 1.0,
        "final_layer_activation": None,
        "batch_norm": False,
        "gradient_clipping_norm": 5
    }
}

if __name__== '__main__':
    AGENTS = [DQN_HER_Agent, DQN_Agent]
    run_games_for_agents(config, AGENTS)
Beispiel #5
0
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 128,
        "buffer_size": 40000,
        "epsilon": 1.0,
        "epsilon_decay_rate_denominator": 3,
        "discount_rate": 0.99,
        "tau": 0.01,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.1,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 3,
        "linear_hidden_units": [30, 15],
        "final_layer_activation": "None",
        "batch_norm": False,
        "gradient_clipping_norm": 5
    },
    "Stochastic_Policy_Search_Agents": {
        "policy_network_type": "Linear",
        "noise_scale_start": 1e-2,
        "noise_scale_min": 1e-3,
        "noise_scale_max": 2.0,
        "noise_scale_growth_factor": 2.0,
        "stochastic_action_decision": False,
        "num_policies": 10,
        "episodes_per_policy": 1,
        "num_policies_to_keep": 5
    },
    "Policy_Gradient_Agents": {
        "learning_rate": 0.05,
        "linear_hidden_units": [20, 20],
        "final_layer_activation": "SOFTMAX",
        "learning_iterations_per_round": 5,
        "discount_rate": 0.99,
        "batch_norm": False,
        "clip_epsilon": 0.1,
        "episodes_per_learning_round": 4,
        "normalise_rewards": True,
        "gradient_clipping_norm": 7.0,
        "mu": 0.0,  #only required for continuous action games
        "theta": 0.0,  #only required for continuous action games
        "sigma": 0.0,  #only required for continuous action games
        "epsilon_decay_rate_denominator": 1.0
    },
    "Actor_Critic_Agents": {
        "learning_rate": 0.005,
        "linear_hidden_units": [20, 10],
        "final_layer_activation": ["SOFTMAX", None],
        "gradient_clipping_norm": 5.0,
        "discount_rate": 0.99,
        "epsilon_decay_rate_denominator": 50.0,
        "normalise_rewards": True
    }
}
config.visualise_individual_results = False
config.visualise_overall_agent_results = False
config.randomise_random_seed = False
config.runs_per_agent = 1
config.use_GPU = False
config.hyperparameters = {

    "DQN_Agents": {

        "learning_rate": 0.005,
        "batch_size": 3,
        "buffer_size": 40000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 200,
        "discount_rate": 0.99,
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 3,
        "linear_hidden_units": [20, 20, 20],
        "final_layer_activation": "None",
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8
}
}


trainer = Trainer(config, [DQN_HER])
config.hyperparameters = trainer.add_default_hyperparameters_if_not_overriden(config.hyperparameters)
config.hyperparameters = config.hyperparameters["DQN_Agents"]
Beispiel #7
0
config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.005,
        "batch_size": 64,
        "buffer_size": 40000,
        "epsilon": 0.1,
        "epsilon_decay_rate_denominator": 200,
        "discount_rate": 0.99,
        "tau": 0.1,
        "alpha_prioritised_replay": 0.6,
        "beta_prioritised_replay": 0.4,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 3,
        "linear_hidden_units": [20, 20, 20],
        "final_layer_activation": "None",
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8
    },
    "Stochastic_Policy_Search_Agents": {
        "policy_network_type": "Linear",
        "noise_scale_start": 1e-2,
        "noise_scale_min": 1e-3,
        "noise_scale_max": 2.0,
        "noise_scale_growth_factor": 2.0,
        "stochastic_action_decision": False,
        "num_policies": 10,
        "episodes_per_policy": 1,
        "num_policies_to_keep": 5
    },
    "Policy_Gradient_Agents": {
        "learning_rate": 0.01,
        "linear_hidden_units": [20],
        "final_layer_activation": "SOFTMAX",
        "learning_iterations_per_round": 7,
        "discount_rate": 0.99,
        "batch_norm": False,
        "clip_epsilon": 0.1,
        "episodes_per_learning_round": 7,
        "normalise_rewards": False,
        "gradient_clipping_norm": 5,
        "mu": 0.0, #only required for continuous action games
        "theta": 0.0, #only required for continuous action games
        "sigma": 0.0, #only required for continuous action games
        "epsilon_decay_rate_denominator": 1
    },

    "Actor_Critic_Agents": {

        "learning_rate": 0.005,
        "linear_hidden_units": [20, 10],
        "final_layer_activation": ["SOFTMAX", None],
        "gradient_clipping_norm": 5.0,
        "discount_rate": 0.99,
        "epsilon_decay_rate_denominator": 50.0,
        "normalise_rewards": True,

        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },

        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },

        "batch_size": 3,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.25,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 20,
        "learning_updates_per_learning_session": 10,
        "HER_sample_proportion": 0.8,
        "exploration_worker_difference": 1.0
    },

"SNN_HRL": {
        "SKILL_AGENT": {
            "num_skills": 20,
            "regularisation_weight": 1.5,
            "visitations_decay": 0.9999,
            "episodes_for_pretraining": 7,
            "batch_size": 256,
            "learning_rate": 0.001,
            "buffer_size": 40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation": "None",
            "columns_of_data_to_be_embedded": [0, 1],
            "embedding_dimensions": [[300, 10], [20, 6]],
            "batch_norm": False,
            "gradient_clipping_norm": 2,
            "update_every_n_steps": 1,
            "epsilon_decay_rate_denominator": 500,
            "discount_rate": 0.999,
            "learning_iterations": 1,
            "tau": 0.01
        },

        "MANAGER": {
            "timesteps_before_changing_skill": 6,
            "linear_hidden_units": [10, 5],
            "learning_rate": 0.01,
            "buffer_size": 40000,
            "batch_size": 3,
            "final_layer_activation": "None",
            "columns_of_data_to_be_embedded": [0],
            "embedding_dimensions": [[300, 10]],
            "batch_norm": False,
            "gradient_clipping_norm": 5,
            "update_every_n_steps": 1,
            "epsilon_decay_rate_denominator": 50,
            "discount_rate": 0.99,
            "learning_iterations": 1,
            "tau": 0.01

        }

    }
}
Beispiel #8
0
config.hyperparameters = {
    "Policy_Gradient_Agents": {
            "learning_rate": 0.05,
            "linear_hidden_units": [30, 15],
            "final_layer_activation": "TANH",
            "learning_iterations_per_round": 10,
            "discount_rate": 0.9,
            "batch_norm": False,
            "clip_epsilon": 0.2,
            "episodes_per_learning_round": 10,
            "normalise_rewards": True,
            "gradient_clipping_norm": 5,
            "mu": 0.0,
            "theta": 0.15,
            "sigma": 0.2,
            "epsilon_decay_rate_denominator": 1
        },

    "Actor_Critic_Agents": {
        "Actor": {
            "learning_rate": 0.001,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "TANH",
            "batch_norm": False,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },

        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [20, 20],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.005,
            "gradient_clipping_norm": 5
        },

        "batch_size": 256,
        "discount_rate": 0.9,
        "mu": 0.0, #for O-H noise
        "theta": 0.15, #for O-H noise
        "sigma": 0.25, #for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 20,
        "learning_updates_per_learning_session": 10,

    }
}
Beispiel #9
0
config.hyperparameters = {
    "DQN_Agents": {
        "linear_hidden_units": [10, 5],
        "learning_rate":
        0.01,
        "buffer_size":
        40000,
        "batch_size":
        256,
        "final_layer_activation":
        "None",
        "columns_of_data_to_be_embedded": [0],
        "embedding_dimensions": [[
            config.environment.observation_space.n,
            max(4, int(config.environment.observation_space.n / 10.0))
        ]],
        "batch_norm":
        False,
        "gradient_clipping_norm":
        5,
        "update_every_n_steps":
        1,
        "epsilon_decay_rate_denominator":
        400,
        "discount_rate":
        0.99,
        "learning_iterations":
        1,
        "tau":
        0.01
    },
    "SNN_HRL": {
        "SKILL_AGENT": {
            "num_skills":
            20,
            "regularisation_weight":
            1.5,
            "visitations_decay":
            0.9999,
            "episodes_for_pretraining":
            300,
            "batch_size":
            256,
            "learning_rate":
            0.001,
            "buffer_size":
            40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0, 1],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ], [20, 6]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            2,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            500,
            "discount_rate":
            0.999,
            "learning_iterations":
            1,
            "tau":
            0.01
        },
        "MANAGER": {
            "timesteps_before_changing_skill":
            6,
            "linear_hidden_units": [10, 5],
            "learning_rate":
            0.01,
            "buffer_size":
            40000,
            "batch_size":
            256,
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            50,
            "discount_rate":
            0.99,
            "learning_iterations":
            1,
            "tau":
            0.01
        }
    },
    "Actor_Critic_Agents": {
        "learning_rate":
        0.005,
        "linear_hidden_units": [20, 10],
        "columns_of_data_to_be_embedded": [0],
        "embedding_dimensions": [[
            config.environment.observation_space.n,
            max(4, int(config.environment.observation_space.n / 10.0))
        ]],
        "final_layer_activation": ["SOFTMAX", None],
        "gradient_clipping_norm":
        5.0,
        "discount_rate":
        0.99,
        "epsilon_decay_rate_denominator":
        50.0,
        "normalise_rewards":
        True
    }
}
Beispiel #10
0
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "max_lower_level_timesteps": 4

    }

higher_level_hyperparameters = DDPG_hyperparameters
higher_level_hyperparameters["number_goal_candidates"] = 10

config.hyperparameters = {
    "HIRO": {

        "LOWER_LEVEL": DDPG_hyperparameters ,
        "HIGHER_LEVEL": higher_level_hyperparameters },



    "Actor_Critic_Agents": DDPG_hyperparameters

        }


print(config.hyperparameters["HIRO"])


if __name__ == "__main__":

    #
    AGENTS = [HIRO, DDPG]
    trainer = Trainer(config, AGENTS)
config.hyperparameters = {
    "h_DQN": {
        "CONTROLLER": {
            "batch_size":
            256,
            "learning_rate":
            0.01,
            "buffer_size":
            40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0, 1],
            "embedding_dimensions":
            [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ],
             [
                 config.environment.observation_space.n,
                 max(4, int(config.environment.observation_space.n / 10.0))
             ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            1500,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        },
        "META_CONTROLLER": {
            "batch_size":
            256,
            "learning_rate":
            0.001,
            "buffer_size":
            40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            2500,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        }
    },
    "SNN_HRL": {
        "SKILL_AGENT": {
            "num_skills":
            2,
            "regularisation_weight":
            1.5,
            "visitations_decay":
            0.9999,
            "episodes_for_pretraining":
            2000,
            "batch_size":
            256,
            "learning_rate":
            0.01,
            "buffer_size":
            40000,
            "linear_hidden_units": [20, 10],
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0, 1],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ], [6, 4]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            50,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        },
        "MANAGER": {
            "timesteps_before_changing_skill":
            4,
            "linear_hidden_units": [10, 5],
            "learning_rate":
            0.01,
            "buffer_size":
            40000,
            "batch_size":
            256,
            "final_layer_activation":
            "None",
            "columns_of_data_to_be_embedded": [0],
            "embedding_dimensions": [[
                config.environment.observation_space.n,
                max(4, int(config.environment.observation_space.n / 10.0))
            ]],
            "batch_norm":
            False,
            "gradient_clipping_norm":
            5,
            "update_every_n_steps":
            1,
            "epsilon_decay_rate_denominator":
            1000,
            "discount_rate":
            0.999,
            "learning_iterations":
            1
        }
    }
}
Beispiel #12
0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False


config.hyperparameters = {
    "DQN_Agents": {
        "learning_rate": 0.001,
        "batch_size": 128,
        "buffer_size": 100000,
        "epsilon_decay_rate_denominator": 150,
        "discount_rate": 0.999,
        "incremental_td_error": 1e-8,
        "update_every_n_steps": 1,
        "linear_hidden_units": [64, 64],
        "final_layer_activation": None,
        "y_range": (-1, 14),
        "batch_norm": False,
        "gradient_clipping_norm": 5,
        "HER_sample_proportion": 0.8
    }
}

if __name__== '__main__':
    AGENTS = [DQN_HER, DQN]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents()