Exemple #1
0
stub(globals())

env = TfEnv(normalize(CartpoleEnv()))

policy = GaussianLSTMPolicy(
    name="policy",
    env_spec=env.spec,
    lstm_layer_cls=L.TfBasicLSTMLayer,
    # gru_layer_cls=L.GRULayer,
)

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=100,
    n_itr=10,
    discount=0.99,
    step_size=0.01,
    optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
)
run_experiment_lite(
    algo.train(),
    n_parallel=4,
    seed=1,
)
            n_itr=n_itr,
            optimizer_args={
                'init_learning_rate': step_sizes[step_i],
                'tf_optimizer_args': {
                    'learning_rate': 0.5 * step_sizes[step_i]
                },
                'tf_optimizer_cls': tf.train.GradientDescentOptimizer
            })

        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=4,
            exp_prefix='trpopoint2d_test',
            exp_name='test',
            #plot=True,
        )
        import pdb
        pdb.set_trace()
        # get return from the experiment
        with open('data/local/trpopoint2d-test/test/progress.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            i = 0
            row = None
            returns = []
            for row in reader:
Exemple #3
0
            reset_arg=goal,
            optimizer_args={
                'init_learning_rate': step_sizes[step_i],
                'tf_optimizer_args': {
                    'learning_rate': 0.5 * step_sizes[step_i]
                },
                'tf_optimizer_cls': tf.train.GradientDescentOptimizer
            })

        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="all",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=goal_i,
            exp_prefix='cheetahdirec_test',
            exp_name='test' + str(run_id),
            plot=True,
        )
        # get return from the experiment
        with open(
                'data/local/cheetahdirec-test/test' + str(run_id) +
                '/progress.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            i = 0
            row = None
            returns = []
            for row in reader:
            reset_arg=goal,
            optimizer_args={
                'init_learning_rate': step_sizes[step_i],
                'tf_optimizer_args': {
                    'learning_rate': 0.5 * step_sizes[step_i]
                },
                'tf_optimizer_cls': tf.train.GradientDescentOptimizer
            })

        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="all",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=
            2,  #1  # don't set the seed for oracle, since it's already deterministic.
            exp_prefix='cheetah_test',
            exp_name='test' + str(run_id),
            plot=True,
        )
        # get return from the experiment
        import csv
        with open(
                'data/local/cheetah-test/test' + str(run_id) + '/progress.csv',
                'r') as f:
            reader = csv.reader(f, delimiter=',')
            i = 0
            row = None
            returns = []
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()
    vg.add('env',
           ['HalfCheetahEnvRandParams'
            ])  #Reacher5DofMAMLEnvRandParams HalfCheetahMAMLEnvRandParams
    vg.add('log_scale_limit', [0.1, 0.3, 0.5])
    vg.add('fast_lr', [0.1])
    vg.add('meta_batch_size', [40])
    vg.add('num_grad_updates', [1])
    vg.add('meta_step_size', [0.01])
    vg.add('fast_batch_size', [20])
    vg.add('seed', [1, 11])  #TODO add [21, 31, 41]
    vg.add('discount', [0.99])
    vg.add('n_iter', [500])
    vg.add('path_length', [100])
    vg.add('hidden_nonlinearity', ['tanh'])
    vg.add('hidden_sizes', [(100, 100)])
    vg.add('trainable_step_size', [True, False])
    vg.add('bias_transform', [False])
    vg.add('policy', ['MAMLImprovedGaussianMLPPolicy'])

    variants = vg.variants()

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        subnets = cheapest_subnets(ec2_instance, num_subnets=3)
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    if args.mode == 'ec2':
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        n_parallel = int(info["vCPU"] /
                         2)  # make the default 4 if not using ec2
    else:
        n_parallel = 12

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "trpo_maml_train_env_%s_%.3f_%.3f_%i_id_%i" % (
            v['env'], v['log_scale_limit'], v['meta_step_size'], v['seed'],
            exp_id)
        v = instantiate_class_stings(v)

        if args.mode == 'ec2':
            # configure instance

            subnet = random.choice(subnets)
            config.AWS_REGION_NAME = subnet[:-1]
            config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                config.AWS_REGION_NAME]
            config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                config.AWS_REGION_NAME]
            config.AWS_SECURITY_GROUP_IDS = \
                config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                    config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            periodic_sync=True,
            sync_s3_pkl=True,
            sync_s3_log=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            pre_commands=[
                "yes | pip install --upgrade pip",
                "yes | pip install tensorflow=='1.6.0'",
                "yes | pip install --upgrade cloudpickle"
            ],
            seed=v["seed"],
            python_command="python3",
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
        max_path_length=max_path_length,
        meta_batch_size=v['meta_batch_size'],
        num_grad_updates=num_grad_updates,
        n_itr=800,
        use_maml=use_maml,
        step_size=v['meta_step_size'],
        plot=False,
    )
    direc = 'direc' if direc else ''

    run_experiment_lite(
        algo.train(),
        exp_prefix='trpo_maml_cheetah' + direc + str(max_path_length),
        exp_name='maml'+str(int(use_maml))+'_fbs'+str(v['fast_batch_size'])+'_mbs'+str(v['meta_batch_size'])+'_flr_' + str(v['fast_lr'])  + '_mlr' + str(v['meta_step_size']),
        # Number of parallel workers for sampling
        n_parallel=8,
        # Only keep the snapshot parameters for the last iteration
        snapshot_mode="gap",
        snapshot_gap=25,
        sync_s3_pkl=True,
        python_command=sys.executable,
        # Specifies the seed for the experiment. If this is not provided, a random seed
        # will be used
        seed=v["seed"],
        mode="local",
        #mode="ec2",
        variant=v,
        # plot=True,
        # terminate_machine=False,
    )
Exemple #7
0
def run_evaluation(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'exp_prefix_dir',
        type=str,
        help='path to dump dir which contains folders with '
        'the train results i.e. params.pkl and variant.json file')
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=
        'Number of parallel workers to perform rollouts. 0 => don\'t start any workers'
    )
    parser.add_argument('--num_sampled_envs',
                        type=int,
                        default=5,
                        help='number or environments with samples parameters')

    args = parser.parse_args(argv[1:])

    # ----------------------- EVALUATION ---------------------------------------

    exp_prefix = os.path.basename(args.exp_prefix_dir)
    eval_exp_prefix = exp_prefix + '-eval'
    evaluation_runs = eval.prepare_evaluation_runs(
        args.exp_prefix_dir,
        EXP_PREFIX,
        num_sampled_envs=args.num_sampled_envs)

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        subnets = cheapest_subnets(ec2_instance, num_subnets=3)
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO',
                                                      len(evaluation_runs)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    for eval_exp_name, v in evaluation_runs:

        if args.mode == 'ec2':
            subnet = random.choice(subnets)
            config.AWS_REGION_NAME = subnet[:-1]
            config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                config.AWS_REGION_NAME]
            config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                config.AWS_REGION_NAME]
            config.AWS_SECURITY_GROUP_IDS = \
                config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                    config.AWS_REGION_NAME]

        run_experiment_lite(
            run_eval_task,
            exp_prefix=eval_exp_prefix,
            exp_name=eval_exp_name,
            # Number of parallel workers for sampling
            n_parallel=args.n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            python_command="python3",  #TODO
            mode=args.mode,
            use_cloudpickle=True,
            periodic_sync=True,
            variant=v,
            # plot=True,
            # terminate_machine=False,
        )
Exemple #8
0
            elif 'linear' in bas:
                baseline = LinearFeatureBaseline(env_spec=env.spec)
            else:
                baseline = GaussianMLPBaseline(env_spec=env.spec)
            algo = MAMLTRPO(
                env=env,
                policy=policy,
                baseline=baseline,
                batch_size=fast_batch_size,  # number of trajs for grad update
                max_path_length=max_path_length,
                meta_batch_size=meta_batch_size,
                num_grad_updates=num_grad_updates,
                n_itr=100,
                use_maml=use_maml,
                step_size=meta_step_size,
                plot=False,
            )
            run_experiment_lite(
                algo.train(),
                n_parallel=1,
                snapshot_mode="last",
                python_command=interpreter_path,
                seed=1,
                exp_prefix='trpo_maml_point100',
                exp_name='trpomaml' + str(int(use_maml)) + '_fbs' +
                str(fast_batch_size) + '_mbs' + str(meta_batch_size) +
                '_flr_' + str(fast_learning_rate) + 'metalr_' +
                str(meta_step_size) + '_step1' + str(num_grad_updates),
                plot=False,
            )
Exemple #9
0
    if oracle:
        exp_name = 'oracleenv'
    else:
        exp_name = 'randenv'
    if direc:
        exp_prefix = 'trpo_maml_cheetahdirec' + str(max_path_length)
    else:
        exp_prefix = 'bugfix_trpo_maml_cheetah' + str(max_path_length)

    run_experiment_lite(
        algo.train(),
        exp_prefix=exp_prefix,
        exp_name=exp_name,
        # Number of parallel workers for sampling
        n_parallel=1,
        # Only keep the snapshot parameters for the last iteration
        #snapshot_mode="last",
        snapshot_mode="gap",
        snapshot_gap=25,
        sync_s3_pkl=True,
        python_command=sys.executable,
        # Specifies the seed for the experiment. If this is not provided, a random seed
        # will be used
        seed=v["seed"],
        mode="local",
        #mode="ec2",
        variant=v,
        # plot=True,
        # terminate_machine=False,
    )