コード例 #1
0
 def test_log_float_param(self):
     param = hp.LogFloatParam("variable", 1e-5, 1e-1)
     n = 10000
     num_success = 0
     threshold = 1e-3
     for _ in range(n):
         if param.generate() > threshold:
             num_success += 1
     p = 0.5
     self.assertTrue(is_binomial_trial_likely(n, p, num_success))
コード例 #2
0
def main():
    num_hyperparameters = 40
    layer_norm = True
    sweeper = hp.RandomHyperparameterSweeper([
        hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-1),
        hp.LogFloatParam("policy_learning_rate", 1e-5, 1e-1),
        hp.LogFloatParam("reward_scale", 10.0, 0.001),
        hp.LogFloatParam("discount", 0.5, 0.99),
    ])
    for seed in range(num_hyperparameters):
        params_dict = sweeper.generate_random_hyperparameters()
        variant = dict(
            algo_params=dict(batch_size=128,
                             n_epochs=50,
                             epoch_length=1000,
                             eval_samples=1000,
                             replay_pool_size=1000000,
                             min_pool_size=256,
                             max_path_length=1000,
                             qf_weight_decay=0.00,
                             n_updates_per_time_step=5,
                             soft_target_tau=0.01,
                             **params_dict),
            env_params=dict(
                env_id='cart',
                normalize_env=True,
                gym_name="",
            ),
            policy_params=dict(layer_norm=layer_norm, ),
            qf_params=dict(layer_norm=layer_norm, ),
        )
        run_experiment(
            my_ddpg_launcher,
            exp_prefix="3-16-cartpole-ddpg-sweep-test",
            seed=seed,
            variant=variant,
            mode="ec2",
        )
コード例 #3
0
    def test_sweep_hyperparameters(self):
        sweeper = hp.RandomHyperparameterSweeper([
            hp.LinearFloatParam("v1", -10, 10),
            hp.LogFloatParam("v2", 1e-5, 1e-1),
        ])
        n = 100
        num_successes = np.zeros((2, 2))
        threshold_v1 = 0
        threshold_v2 = 1e-3

        def update_success(v1, v2):
            success_v1 = int(v1 > threshold_v1)
            success_v2 = int(v2 > threshold_v2)
            num_successes[success_v1, success_v2] += 1

        sweeper.sweep_hyperparameters(update_success, n)
        p = 0.25
        for i in range(2):
            for j in range(2):
                self.assertTrue(
                    is_binomial_trial_likely(n, p, num_successes[i, j]))
コード例 #4
0
         load_trials=True,
         num_rounds=500,
         num_evals_per_round=1,
     )
 elif run_mode == 'random':
     hyperparameters = [
         # hyp.EnumParam('qf_params.dropout', [True, False]),
         hyp.EnumParam('algo_params.qf_criterion_class', [
             HuberLoss,
             nn.MSELoss,
         ]),
         hyp.EnumParam('qf_params.hidden_sizes', [
             [100, 100],
             [800, 600, 400],
         ]),
         hyp.LogFloatParam('algo_params.qf_weight_decay', 1e-5, 1e-2),
     ]
     sweeper = hyp.RandomHyperparameterSweeper(
         hyperparameters,
         default_kwargs=variant,
     )
     for _ in range(num_configurations):
         for exp_id in range(n_seeds):
             seed = random.randint(0, 10000)
             variant = sweeper.generate_random_hyperparameters()
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
                 mode=mode,
                 variant=variant,
コード例 #5
0
             )
 if run_mode == 'random':
     for (
         rnn_cell,
         output_activation,
     ) in [
         (LSTMCell, F.tanh),
         (LSTMCell, ptu.clip1),
         (GRUCell, F.tanh),
         (GRUCell, ptu.clip1),
     ]:
         variant['policy_params']['cell_class'] = rnn_cell
         variant['policy_params']['output_activation'] = output_activation
         hyperparameters = [
             hyp.LogIntParam('memory_dim', 4, 400),
             hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-5, 1e-2),
             hyp.LogFloatParam(
                 'algo_params.write_policy_learning_rate', 1e-5, 1e-3
             ),
             hyp.LogFloatParam(
                 'algo_params.action_policy_learning_rate', 1e-5, 1e-3
             ),
             # hyp.EnumParam(
             #     'algo_params.action_policy_optimize_bellman', [True, False],
             # ),
             # hyp.EnumParam(
             #     'algo_params.use_action_policy_params_for_entire_policy',
             #     [True, False],
             # ),
             # hyp.EnumParam(
             #     'algo_params.write_policy_optimizes', ['both', 'qf', 'bellman']
コード例 #6
0
         for i in range(n_seeds):
             seed = random.randint(0, 10000)
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
                 mode=mode,
                 variant=variant,
                 exp_id=exp_id,
                 sync_s3_log=True,
                 sync_s3_pkl=True,
                 periodic_sync_interval=600,
             )
 if run_mode == 'random':
     hyperparameters = [
         hyp.LogFloatParam('algo_params.policy_learning_rate', 1e-7, 1e-1),
         hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-7, 1e-1),
         hyp.LogIntParam('qf_hidden_sizes', 10, 1000),
     ]
     sweeper = hyp.RandomHyperparameterSweeper(
         hyperparameters,
         default_kwargs=variant,
     )
     for _ in range(num_configurations):
         for exp_id in range(n_seeds):
             seed = random.randint(0, 10000)
             variant = sweeper.generate_random_hyperparameters()
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
コード例 #7
0
    def get_launch_settings(algo_name):
        """
        Return a dictionary of the form
        {
            'algo_params': algo_params to pass to run_algorithm
            'variant': variant to pass to run_algorithm
        }
        :param algo_name: Name of the algorithm to run.
        :return:
        """
        sweeper = hp.RandomHyperparameterSweeper()
        algo_params = {}
        if algo_name == 'ddpg' or algo_name == 'mddpg':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2),
                hp.LogFloatParam("policy_learning_rate", 1e-6, 1e-3),
                hp.LogFloatParam("reward_scale", 10.0, 0.001),
                hp.LogFloatParam("soft_target_tau", 1e-5, 1e-2),
            ])
            algo_params = get_ddpg_params()
            algo_params['render'] = render
            variant = {
                'qf_params':
                dict(
                    embedded_hidden_sizes=(100, ),
                    observation_hidden_sizes=(100, ),
                    hidden_nonlinearity=tf.nn.relu,
                ),
                'policy_params':
                dict(
                    observation_hidden_sizes=(100, 100),
                    hidden_nonlinearity=tf.nn.relu,
                )
            }
            if algo_name == 'ddpg':
                algorithm_launcher = my_ddpg_launcher
                variant['Algorithm'] = 'DDPG'
                variant['policy_params']['output_nonlinearity'] = tf.nn.tanh
            else:
                algorithm_launcher = mem_ddpg_launcher
                variant['Algorithm'] = 'Memory-DDPG'
        elif algo_name == 'naf':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2),
                hp.LogFloatParam("reward_scale", 10.0, 0.001),
                hp.LogFloatParam("soft_target_tau", 1e-6, 1e-1),
                hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1),
            ])
            algo_params = get_my_naf_params()
            algo_params['render'] = render
            algorithm_launcher = naf_launcher
            variant = {
                'Algorithm': 'NAF',
                'exploration_strategy_params': {
                    'sigma': 0.15
                },
            }
        elif algo_name == 'random':
            algorithm_launcher = random_action_launcher
            variant = {'Algorithm': 'Random'}
        elif algo_name == 'bptt':
            algorithm_launcher = bptt_launcher
            variant = {'Algorithm': 'BPTT'}
        else:
            raise Exception("Algo name not recognized: " + algo_name)

        # bn_sweeper = hp.RandomHyperparameterSweeper([
        #     hp.EnumParam("decay", [0.9, 0.99, 0.999, 0.9999]),
        #     hp.LogFloatParam("epsilon", 1e-3, 1e-7),
        #     hp.EnumParam("enable_offset", [True, False]),
        #     hp.EnumParam("enable_scale", [True, False]),
        # ])
        bn_sweeper = None
        return {
            'sweeper': sweeper,
            'batch_norm_sweeper': bn_sweeper,
            'variant': variant,
            'algo_params': algo_params,
            'algorithm_launcher': algorithm_launcher,
            'batch_norm_params': BATCH_NORM_PARAMS
        }
コード例 #8
0
    def get_launch_settings(algo_name):
        """
        Return a dictionary of the form
        {
            'algo_params': algo_params to pass to run_algorithm
            'variant': variant to pass to run_algorithm
        }
        :param algo_name: Name of the algorithm to run.
        :return:
        """
        sweeper = hp.RandomHyperparameterSweeper()
        algo_params = {}
        if algo_name == 'ddpg':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2),
                hp.LogFloatParam("policy_learning_rate", 1e-6, 1e-3),
                hp.LogFloatParam("scale_reward", 10.0, 0.001),
                hp.LogFloatParam("soft_target_tau", 1e-5, 1e-2),
            ])
            algo_params = get_ddpg_params()
            algo_params['render'] = render
            algorithm_launcher = my_ddpg_launcher
            variant = {
                'Algorithm': 'DDPG',
                'qf_params': dict(
                    embedded_hidden_sizes=(400,300),
                    observation_hidden_sizes=(200,200),
                    hidden_nonlinearity=tf.nn.relu,
                ),
                'policy_params': dict(
                    observation_hidden_sizes=(200, 200),
                    hidden_nonlinearity=tf.nn.tanh,
                    output_nonlinearity=tf.nn.tanh,
                )
            }
        elif algo_name == 'shane-ddpg':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("soft_target_tau", 0.005, 0.1),
                hp.LogFloatParam("scale_reward", 10.0, 0.01),
                hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1),
            ])
            algo_params = get_ddpg_params()
            if algo_params['min_pool_size'] <= algo_params['batch_size']:
                algo_params['min_pool_size'] = algo_params['batch_size'] + 1
            algorithm_launcher = shane_ddpg_launcher
            variant = {'Algorithm': 'Shane-DDPG', 'policy_params': dict(
                hidden_sizes=(100, 100),
                hidden_nonlinearity=tf.nn.relu,
                output_nonlinearity=tf.nn.tanh,
            ), 'qf_params': dict(
                hidden_sizes=(100, 100)
            )}
        elif algo_name == 'qddpg':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("soft_target_tau", 0.005, 0.1),
                hp.LogFloatParam("scale_reward", 10.0, 0.01),
                hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1),
                hp.LogFloatParam("qf_learning_rate", 1e-6, 1e-2),
                hp.LogFloatParam("policy_learning_rate", 1e-6, 1e-2),
            ])
            algo_params = get_ddpg_params()
            algorithm_launcher = quadratic_ddpg_launcher
            variant = {
                'Algorithm': 'QuadraticDDPG',
                'qf_params': dict(),
                'policy_params': dict(
                    observation_hidden_sizes=(100, 100),
                    hidden_nonlinearity=tf.nn.relu,
                    output_nonlinearity=tf.nn.tanh,
                )
            }
        elif algo_name == 'oat':
            algo_params = get_ddpg_params()
            algorithm_launcher = oat_qddpg_launcher
            variant = {
                'Algorithm': 'QuadraticOptimalActionTargetDDPG',
                'qf_params': dict(),
                'policy_params': dict(
                    observation_hidden_sizes=(100, 100),
                    hidden_nonlinearity=tf.nn.relu,
                    output_nonlinearity=tf.nn.tanh,
                )
            }
        elif algo_name == 'naf':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2),
                hp.LogFloatParam("scale_reward", 10.0, 0.001),
                hp.LogFloatParam("soft_target_tau", 1e-6, 1e-1),
                hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1),
            ])
            algo_params = get_my_naf_params()
            algo_params['render'] = render
            algorithm_launcher = naf_launcher
            variant = {
                'Algorithm': 'NAF',
                'exploration_strategy_params': {
                    'sigma': 0.15
                },
            }
        elif algo_name == 'random':
            algorithm_launcher = random_action_launcher
            variant = {'Algorithm': 'Random'}
        elif algo_name == 'idle':
            # TODO: implement idle launcher
            algorithm_launcher = None
            variant = {'Algorithm': 'Idle'}
        elif algo_name == 'rl-vpg':
            algorithm_launcher = rllab_vpg_launcher
            algo_params = dict(
                batch_size=BATCH_SIZE,
                max_path_length=MAX_PATH_LENGTH,
                n_itr=N_EPOCHS,
                discount=DISCOUNT,
                optimizer_args=dict(
                    tf_optimizer_args=dict(
                        learning_rate=BATCH_LEARNING_RATE,
                    )
                ),
            )
            variant = {'Algorithm': 'rllab-VPG'}
        elif algo_name == 'rl-trpo':
            algorithm_launcher = rllab_trpo_launcher
            algo_params = dict(
                batch_size=BATCH_SIZE,
                max_path_length=MAX_PATH_LENGTH,
                n_itr=N_EPOCHS,
                discount=DISCOUNT,
                step_size=BATCH_LEARNING_RATE,
            )
            variant = {'Algorithm': 'rllab-TRPO'}
        elif algo_name == 'tf-trpo':
            algorithm_launcher = tf_trpo_launcher
            algo_params = dict(
                batch_size=BATCH_SIZE,
                max_path_length=MAX_PATH_LENGTH,
                n_itr=N_EPOCHS,
                discount=DISCOUNT,
                step_size=BATCH_LEARNING_RATE,
            )
            variant = {'Algorithm': 'tf-TRPO'}
        elif algo_name == 'rl-ddpg':
            algorithm_launcher = rllab_ddpg_launcher
            algo_params = get_ddpg_params()
            if algo_params['min_pool_size'] <= algo_params['batch_size']:
                algo_params['min_pool_size'] = algo_params['batch_size'] + 1
            variant = {'Algorithm': 'rllab-DDPG'}
        else:
            raise Exception("Algo name not recognized: " + algo_name)

        # bn_sweeper = hp.RandomHyperparameterSweeper([
        #     hp.EnumParam("decay", [0.9, 0.99, 0.999, 0.9999]),
        #     hp.LogFloatParam("epsilon", 1e-3, 1e-7),
        #     hp.EnumParam("enable_offset", [True, False]),
        #     hp.EnumParam("enable_scale", [True, False]),
        # ])
        bn_sweeper = None
        return {
            'sweeper': sweeper,
            'batch_norm_sweeper': bn_sweeper,
            'variant': variant,
            'algo_params': algo_params,
            'algorithm_launcher': algorithm_launcher,
            'batch_norm_params': BATCH_NORM_PARAMS
        }