def run_experiment_with_multiple_seeds(variant):
     scores = []
     for i in range(n_seeds):
         variant['seed'] = str(int(variant['seed']) + i)
         exp_prefix = variant['exp_prefix']
         scores.append(
             run_experiment_here(
                 get_ocm_score,
                 exp_prefix=exp_prefix,
                 variant=variant,
                 exp_id=i,
             ))
     return np.mean(scores)
Exemple #2
0
    args_dict['run_experiment_kwargs']['variant']['rl_variant']['algo_kwargs'][
        'base_kwargs']['num_epochs'] = 300

    method_call = args_dict['method_call']
    run_experiment_kwargs = args_dict['run_experiment_kwargs']
    output_dir = args_dict['output_dir']
    run_mode = args_dict.get('mode', None)
    if run_mode and run_mode in ['slurm_singularity', 'sss']:
        import os
        run_experiment_kwargs['variant']['slurm-job-id'] = os.environ.get(
            'SLURM_JOB_ID', None)
    if run_mode and run_mode == 'ec2':
        try:
            import urllib.request
            instance_id = urllib.request.urlopen(
                'http://169.254.169.254/latest/meta-data/instance-id').read(
                ).decode()
            run_experiment_kwargs['variant']['EC2_instance_id'] = instance_id
        except Exception as e:
            print("Could not get instance ID. Error was...")
            print(e)
        # Do this in case base_log_dir was already set
        run_experiment_kwargs['base_log_dir'] = output_dir
        run_experiment_here(method_call,
                            include_exp_prefix_sub_dir=False,
                            **run_experiment_kwargs)
    else:
        run_experiment_here(method_call,
                            log_dir=output_dir,
                            **run_experiment_kwargs)
def example(*_):
    env = HalfCheetahEnv()
    es = OUStrategy(env_spec=env.spec)
    qf = FeedForwardCritic(
        name_or_scope="critic",
        env_spec=env.spec,
    )
    policy = FeedForwardPolicy(
        name_or_scope="actor",
        env_spec=env.spec,
    )
    algorithm = DDPG(
        env,
        es,
        policy,
        qf,
        n_epochs=25,
        batch_size=1024,
        replay_pool_size=10000,
    )
    algorithm.train()


if __name__ == "__main__":
    run_experiment_here(
        example,
        exp_prefix="ddpg-half-cheetah",
        seed=2,
    )
                max_path_length=max_path_length,
                # qf_learning_rate=1e-1,
                # policy_learning_rate=1e-1,
            )
            variant = dict(
                H=H,
                num_values=num_values,
                exp_prefix=exp_prefix,
                ddpg_params=ddpg_params,
            )
            for seed in range(n_seeds):
                variant['seed'] = seed
                variant['exp_id'] = exp_id

                if USE_EC2:
                    run_experiment(
                        run_linear_ocm_exp,
                        exp_prefix=exp_prefix,
                        seed=seed,
                        mode="ec2",
                        variant=variant,
                    )
                else:
                    run_experiment_here(
                        run_linear_ocm_exp,
                        exp_prefix=exp_prefix,
                        variant=variant,
                        exp_id=exp_id,
                        seed=seed,
                    )