예제 #1
0
def main():
    global policy
    global baseline

    # Load pre-trained network if available
    args = parse_arguments()
    if args.network is not None:
        data = joblib.load(args.network)
        policy = data['policy']
        baseline = data['baseline']
        use_pretrained = True
    else:
        use_pretrained = False

    # Run multiple experiment variants at once
    vg = VariantGenerator()

    # Non-configurable parameters (do not change)
    vg.add('trajectory', ['Circle'])
    vg.add('objective', ['Fast'])
    if args.algo == 'trpo':
        vg.add('algo', ['TRPO'])
    else:
        vg.add('algo', ['CPO'])

    # Configurable parameters
    #   Options for model_type: 'BrushTireModel', 'LinearTireModel'
    #   Options for robot_type: 'MRZR', 'RCCar'
    # Note: There is no notion of a target velocity in CPO, but it does
    #       control the distribution of the initial state. See the function
    #       get_initial_state() in envs/circle/circle_env.py for more
    #       information.
    robot_type = 'RCCar'
    seeds = [100, 200, 300, 400, 500]
    vg.add('seed', seeds)
    vg.add('target_velocity', [1.0])
    vg.add('radius', [1.0])
    vg.add('dt', [0.1])
    vg.add('eps', [0.05])
    vg.add('model_type', ['BrushTireModel'])
    vg.add('robot_type', [robot_type])
    vg.add('mu_s', [1.37])
    vg.add('mu_k', [1.96])
    vg.add('pretrained', [use_pretrained])
    print('Number of Configurations: ', len(vg.variants()))

    # Run each experiment variant
    for vv in vg.variants():
        run_experiment_lite(stub_method_call=run_task,
                            variant=vv,
                            n_parallel=4,
                            snapshot_mode='last',
                            seed=vv['seed'])
def main():
    global policy
    global baseline

    # Load pre-trained network if available
    args = parse_arguments()
    if args.network is not None:
        data = joblib.load(args.network)
        policy = data['policy']
        baseline = data['baseline']
        use_pretrained = True
    else:
        use_pretrained = False

    # Run multiple experiment variants at once
    vg = VariantGenerator()

    # Non-configurable parameters (do not change)
    vg.add('trajectory', ['Straight'])
    vg.add('objective', ['TargetVelocity'])
    if args.algo == 'trpo':
        vg.add('algo', ['TRPO'])
    else:
        vg.add('algo', ['CPO'])

    # Configurable parameters
    #   Options for model_type: 'BrushTireModel', 'LinearTireModel'
    #   Options for robot_type: 'MRZR', 'RCCar'
    seeds = [102, 201, 54, 304]
    robot_type = 'RCCar'
    use_ros = False
    vg.add('seed', seeds)
    vg.add('target_velocity', [1.0])
    vg.add('dt', [0.02])
    vg.add('model_type', ['BrushTireModel'])
    vg.add('robot_type', [robot_type])
    vg.add('mu_s', [1.37])
    vg.add('mu_k', [1.96])
    vg.add('use_ros', [use_ros])
    vg.add('pretrained', [use_pretrained])
    print('Number of Configurations: ', len(vg.variants()))

    # Run each experiment variant
    # for vv in vg.variants():
    #     run_task(vv)

    for vv in vg.variants():
        run_experiment_lite(stub_method_call=run_task,
                            variant=vv,
                            n_parallel=4,
                            snapshot_mode='last',
                            seed=vv['seed'])
예제 #3
0
def main():

    # Set up multiple experiments at once
    vg = VariantGenerator()
    vg.add('target_velocity', [0.7])
    vg.add('seed', [100])
    print('Number of Configurations: ', len(vg.variants()))

    # Run each experiment variant
    for vv in vg.variants():
        run_experiment_lite(stub_method_call=run_task,
                            variant=vv,
                            n_parallel=1,
                            snapshot_mode='last',
                            seed=vv['seed'])
예제 #4
0
def experiment(variant):
    # we have to generate the combinations for the env_specs
    env_specs = variant['env_specs']
    env_specs_vg = VariantGenerator()
    env_spec_constants = {}
    for k, v in env_specs.items():
        if isinstance(v, list):
            env_specs_vg.add(k, v)
        else:
            env_spec_constants[k] = v

    env_specs_list = []
    for es in env_specs_vg.variants():
        del es['_hidden_keys']
        es.update(env_spec_constants)
        env_specs_list.append(es)
    print(env_specs_list)

    print(env_specs_list[0])
    env_sampler = EnvSampler(env_specs_list)

    # set up similar to non-meta version
    sample_env, _ = env_sampler()
    if variant['algo_params']['concat_env_params_to_obs']:
        meta_params_dim = sample_env.env_meta_params.shape[0]
    else:
        meta_params_dim = 0
    obs_dim = int(np.prod(sample_env.observation_space.shape))
    action_dim = int(np.prod(sample_env.action_space.shape))

    net_size = variant['net_size']
    qf = FlattenMlp(
        hidden_sizes=[net_size, net_size],
        input_size=obs_dim + action_dim + meta_params_dim,
        output_size=1,
    )
    vf = FlattenMlp(
        hidden_sizes=[net_size, net_size],
        input_size=obs_dim + meta_params_dim,
        output_size=1,
    )
    policy = TanhGaussianPolicy(
        hidden_sizes=[net_size, net_size],
        obs_dim=obs_dim + meta_params_dim,
        action_dim=action_dim,
    )
    algorithm = MetaSoftActorCritic(env_sampler=env_sampler,
                                    policy=policy,
                                    qf=qf,
                                    vf=vf,
                                    **variant['algo_params'])
    if ptu.gpu_enabled():
        algorithm.cuda()
    algorithm.train()

    return 1
예제 #5
0
        report.add_text(
            'Outer iteration: {}, disc loss: {}, gen loss: {}'.format(
                outer_iter, dloss, gloss))
        generated_samples, _ = gan.sample_generator(50)
        report.add_image(plot_samples(generated_samples))
        report.add_image(plot_dicriminator(gan))

        report.save()


if __name__ == '__main__':
    vg = VariantGenerator()
    # vg.add('generator_init', ['xavier', 0.02, 0.1, 0.005])
    # vg.add('generator_iters', [40, 20, 5, 2])
    # vg.add('discriminator_iters', [20, 5, 1])
    # vg.add('generator_learning_rate', [0.0003, 0.001, 0.003, 0.01, 0.1])
    # vg.add('discriminator_learning_rate', [0.0003, 0.001, 0.003, 0.01, 0.1])
    vg.add('outer_iters', [500])

    for variant in vg.variants(randomized=False):
        run_experiment_lite(
            stub_method_call=run_task,
            mode='local',
            n_parallel=1,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            seed=int(time.time()),
            exp_prefix='debug_simple_circle_gan',
            variant=variant,
            # exp_name=exp_name,
        )
예제 #6
0
def experiment(variant):
    # we have to generate the combinations for the env_specs
    if variant['on_the_fly']:
        # we have to generate the combinations for the env_specs
        env_specs = variant['env_specs']
        env_sampler = OnTheFlyEnvSampler(env_specs)
    else:
        env_specs = variant['env_specs']
        env_specs_vg = VariantGenerator()
        env_spec_constants = {}
        env_spec_ranges = {}
        for k, v in env_specs.items():
            if isinstance(v, list):
                env_specs_vg.add(k, v)
                env_spec_ranges[k] = v
            else:
                env_spec_constants[k] = v
        
        env_specs_list = []
        for es in env_specs_vg.variants():
            del es['_hidden_keys']
            es.update(env_spec_constants)
            env_specs_list.append(es)
        
        env_sampler = EnvSampler(env_specs_list)

    # set up the neural process
    np_path = exp_specs['neural_process_load_path']
    if np_path == '':
        raise NotImplementedError()
    else:
        neural_process = joblib.load(np_path)['neural_process']

    # set up similar to non-meta version
    sample_env, _ = env_sampler()
    obs_dim = int(np.prod(sample_env.observation_space.shape))
    action_dim = int(np.prod(sample_env.action_space.shape))

    if variant['algo_params']['latent_repr_mode'] == 'concat_params':
        extra_obs_dim = 2 * neural_process.z_dim
    else: # concat samples
        extra_obs_dim = variant['algo_params']['num_latent_samples'] * neural_process.z_dim

    net_size = variant['net_size']
    vf = FlattenMlp(
        hidden_sizes=[net_size, net_size],
        input_size=obs_dim + extra_obs_dim,
        output_size=1,
    )
    policy = TanhGaussianPolicy(
        hidden_sizes=[net_size, net_size],
        obs_dim=obs_dim + extra_obs_dim,
        action_dim=action_dim,
    )
    qf = FlattenMlp(
        hidden_sizes=[net_size, net_size],
        input_size=obs_dim + action_dim + extra_obs_dim,
        output_size=1,
    )
    algorithm = NPMetaSoftActorCritic(
        env_sampler=env_sampler,
        neural_process=neural_process,
        policy=policy,
        qf=qf,
        vf=vf,
        **variant['algo_params']
    )

    if ptu.gpu_enabled():
        algorithm.cuda()
    algorithm.train()

    return 1
예제 #7
0
def main(train_bool, manual_edit_params, load_saved_params, saved_config_path):
    #################################
    ######## Set parameters #########
    #################################

    #read in default config
    config = yaml.load(open("../config.yaml"))

    if load_saved_params:
        saved_config = yaml.load(open(saved_config_path, "r"))
        #IPython.embed()
        if train_bool:
            # replcae training config only
            config["training"] = recursive_dict_merge(config["training"],
                                                      saved_config["training"])
        else:
            # replace testing config only
            config["testing"] = recursive_dict_merge(config["testing"],
                                                     saved_config["testing"])
            IPython.embed()

    vg = VariantGenerator()
    vg.add('config', [config])

    if manual_edit_params:
        # For testing, you must fill out:

        vg.add('previous_dynamics_model', [
            "/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_11_optimization/_ubs_23_ulr_0.0num_updates1_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/model_aggIter0_epoch45"
        ])
        #vg.add('previous_dynamics_model', ["/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_7_optimization/_ubs_23_ulr_2.0num_updates2_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/model_aggIter0_epoch45"])
        vg.add('restore_previous_dynamics_model', [True])

        # For testing, please customize these:
        vg.add('num_steps_per_rollout', [110])
        vg.add('desired_shape_for_rollout', ["straight"])
        vg.add('save_rollout_run_num', [1])
        vg.add('dynamic_evaluation', [True])  #####~!!!!!!!!!!!!!!!!!!

        vg.add('meta_batch_size', [64])
        vg.add('meta_lr', [0.001])
        vg.add('update_batch_size', [23])

        vg.add('max_runs_per_surface', [5])  #396
        vg.add('num_updates', [1])
        vg.add('update_lr', [0.1])
        vg.add("task_list", [["all"]])  #"all"
        vg.add('max_epochs', [50])
        vg.add('num_sgd_steps', [1])

        # Aggregation
        vg.add('ratio_new', [0.9])
        vg.add('curr_agg_iter', [0])  #0, 1, 2, etc

        # Misc
        vg.add('horizon', [5])  #
        vg.add('use_reg', [True])
        vg.add('seed', [0])
        vg.add('nonlinearity', ['relu'])
        if config['training']['use_reg']:
            vg.add('regularization_weight', [0.001])
        vg.add('use_clip', [True])
        vg.add("weight_initializer", ["xavier"])
        vg.add("dim_hidden", [[500, 500]])
        vg.add('optimizer', ["adam"])
        vg.add('dim_bias', [5])
        vg.add('use_momentum', [False])
        vg.add('learn_inner_loss', [False])

    for v in vg.variants():
        time.sleep(1.)
        if manual_edit_params:
            _v = v.copy()
            del _v['config'], _v['_hidden_keys']
            v['config'] = replace_in_dict(v['config'], _v)

        if train_bool:
            # Want the testing parameters to match the training parameters, so you can easily load this saved config for testing
            v['config']['testing'] = recursive_dict_merge(
                v['config']['testing'], v['config']['training'])

        # Example foldername if training
        #v['exp_name'] = "MAML_roach/9_11_optimization/" + "_ubs_" + str(v['config']['training']['update_batch_size']) + "_ulr_" + str(v['config']['training']['update_lr']) + "num_updates" + str(v['config']['training']['num_updates']) + "_layers_" + str(len(v['config']['model']['dim_hidden'])) + "_x" + str((v['config']['model']['dim_hidden'])[0]) + "_task_list_" + "_".join(v['config']['training']['task_list']) + "_mlr_" + str(v['config']['training']['meta_lr']) + "_mbs_" + str(v['config']['testing']['meta_batch_size']) + "_num-sgd-steps_" + str(v['config']['training']['num_sgd_steps']) + '_reg_weight_' + str(v['config']['training']['regularization_weight']) + "_dim_bias_" + str(v['config']['model']['dim_bias'])

        # Example foldername if testing (i.e. you can place the rollouts in the same folder as the model used)
        #v['exp_name'] = "/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_11_optimization/_ubs_23_ulr_0.0num_updates1_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/video"

        v['exp_name'] = "/home/anagabandi/roach_workspace/src/gbac_roach/videos/de/shell_shift"

        v['train_bool'] = train_bool
        run_experiment_lite(
            run,
            sync_s3_pkl=True,
            periodic_sync=True,
            variant=v,
            snapshot_mode="all",
            mode="local",
            use_cloudpickle=True,
            exp_name=v['exp_name'],
            use_gpu=False,
            pre_commands=[  #"yes | pip install --upgrade pip",
                "yes | pip install tensorflow=='1.4.1'",
                "yes | pip install --upgrade cloudpickle"
            ],
            seed=v['config']['seed'])
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()
    vg.add('n_itr', [5000])
    vg.add('fixed_gains', [True, False])
    vg.add('stability_cost_coef', [0.0, 0.01])
    vg.add('ctrl_cost_coef', [0, 0.0005, 0.001, 0.005])
    vg.add('alive_bonus', [0, 1])
    vg.add('step_size', [0.02])
    vg.add('seed', [1, 11])
    vg.add('discount', [0.99])
    vg.add('path_length', [200])
    vg.add('batch_size', [50000])
    vg.add('hidden_nonlinearity', ['tanh'])
    vg.add('hidden_sizes', [(64, 64)])

    variants = vg.variants()

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        n_parallel = info['vCPU']
    else:
        n_parallel = 12

    if args.mode == 'ec2':

        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "trpo_train_cassie_mujoco_%.3f_%i_%i_id_%i" % (
            v['step_size'], v['batch_size'], v['seed'], exp_id)
        v = instantiate_class_stings(v)

        subnet = random.choice(subnets)
        config.AWS_REGION_NAME = subnet[:-1]
        config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
            config.AWS_REGION_NAME]
        config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
            config.AWS_REGION_NAME]
        config.AWS_SECURITY_GROUP_IDS = \
            config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            periodic_sync=True,
            sync_s3_pkl=True,
            sync_s3_log=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            #sync_all_data_node_to_s3=True,
            python_command="python3",
            pre_commands=[
                "yes | pip install --upgrade pip",
                "yes | pip install tensorflow=='1.6.0'",
                "yes | pip install --upgrade cloudpickle"
            ],
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
예제 #9
0
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')
    parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs')
    parser.add_argument('--ctx',
                        type=int,
                        default=4,
                        help='Number of tasks per GPU')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()

    vg.add('seed', [22, 33])

    # env spec
    vg.add('env', ['HalfCheetahEnvRandParams'])  # HalfCheetahEnvRandParams
    vg.add('log_scale_limit', [0.0])
    vg.add('path_length', [200, 500])

    # Model-based MAML algo spec
    vg.add('n_itr', [100])
    vg.add('step_size', [0.01])
    vg.add('discount', [0.99])

    vg.add('batch_size_env_samples', [4000])
    vg.add('initial_random_samples', [4000])
    vg.add('num_models', [5, 10])
    vg.add('n_candidates', [1000])
    vg.add('horizon', [10])

    # neural network configuration
    vg.add('hidden_nonlinearity_model', ['relu'])
    vg.add('hidden_sizes_model', [(512, 512)])
    vg.add('dynamic_model_epochs', [(200, 200)])
    vg.add('weight_normalization_model', [True])
    vg.add('reinit_model_cycle', [0])

    vg.add('valid_split_ratio', [0.2])
    vg.add('rolling_average_persitency', [0.99])

    # other stuff
    vg.add('exp_prefix', [EXP_PREFIX])

    variants = vg.variants()

    default_dict = dict(exp_prefix=EXP_PREFIX,
                        snapshot_mode="gap",
                        snapshot_gap=5,
                        periodic_sync=True,
                        sync_s3_pkl=True,
                        sync_s3_log=True,
                        python_command="python3",
                        pre_commands=[
                            "yes | pip install tensorflow=='1.6.0'",
                            "pip list",
                            "yes | pip install --upgrade cloudpickle"
                        ],
                        use_cloudpickle=True,
                        variants=variants)

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        n_parallel = int(info["vCPU"] /
                         2)  # make the default 4 if not using ec2
    else:
        n_parallel = 6

    if args.mode == 'ec2':

        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])
        subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS)

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "model_based_mpc_train_env_%s_%i_%i_%i_id_%i" % (
            v['env'], v['path_length'], v['batch_size_env_samples'], v['seed'],
            exp_id)
        v = instantiate_class_stings(v)

        if args.mode == 'ec2':
            subnet = random.choice(subnets)
            config.AWS_REGION_NAME = subnet[:-1]
            config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                config.AWS_REGION_NAME]
            config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                config.AWS_REGION_NAME]
            config.AWS_SECURITY_GROUP_IDS = \
                config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                    config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            snapshot_mode="gap",
            snapshot_gap=5,
            periodic_sync=True,
            sync_s3_pkl=True,
            sync_s3_log=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            python_command='python3',
            pre_commands=[
                "yes | pip install tensorflow=='1.6.0'", "pip list",
                "yes | pip install --upgrade cloudpickle"
            ],
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()
    vg.add('env', [
        'HalfCheetahEnvRandParams', 'AntEnvRandParams',
        'WalkerEnvRandomParams', 'SwimmerEnvRandParams', 'HopperEnvRandParams',
        'PR2EnvRandParams'
    ])
    vg.add('total_timesteps', [int(10**8)])
    vg.add('seed', [31, 41, 32])
    vg.add('discount', [0.99])
    vg.add('path_length', [200])
    vg.add('batch_size', [5000])
    vg.add('num_timesteps', [10**7])
    vg.add('hidden_nonlinearity', ['tanh'])
    vg.add('hidden_sizes', [(32, 32)])

    variants = vg.variants()
    from pprint import pprint
    pprint(variants)

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        n_parallel = int(info["vCPU"] /
                         2)  # make the default 4 if not using ec2
    else:
        n_parallel = 6

    if args.mode == 'ecs':
        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, parallel {} on the subnets: '.
            format(config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE,
                   n_parallel), *subnets)

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "acktr_%s_%i_%i_id_%i" % (v['env'], v['batch_size'],
                                             v['seed'], exp_id)

        v['exp_name'] = exp_name
        v['exp_prefix'] = EXP_PREFIX

        v = instantiate_class_stings(v)

        subnet = random.choice(subnets)
        config.AWS_REGION_NAME = subnet[:-1]
        config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
            config.AWS_REGION_NAME]
        config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
            config.AWS_REGION_NAME]
        config.AWS_SECURITY_GROUP_IDS = \
            config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            sync_s3_pkl=True,
            periodic_sync=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            #sync_all_data_node_to_s3=True,
            python_command="python3",  #sys.executable,
            pre_commands=[
                "yes | pip install tensorflow=='1.6.0'",
                "yes | pip install --upgrade cloudpickle",
                "yes | pip install gym==0.10.5"
            ],
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
예제 #11
0
    # policy initialization
    vg.add('output_gain', [0.1])
    vg.add('policy_init_std', [1])
    vg.add('learn_std', [False]) #2
    vg.add('adaptive_std', [False])
    vg.add('discount', [0.998])
    vg.add('seed_with', ['only_goods'])
    vg.add('seed', [args.seed])

    if args.scratch_dir:
        vg.add('scratch_dir', [args.scratch_dir])

    exp_prefix = 'ant-startgen-smartreplay4'
    print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format(exp_prefix, vg.size))

    variants = vg.variants()
    assert len(variants) == 1
    vv = variants[0]

    run_experiment_lite(
        # use_cloudpickle=False,
        stub_method_call=run_task,
        variant=vv,
        mode='local',
        n_parallel=8,
        snapshot_mode="last",
        seed=vv['seed'],
        exp_prefix=exp_prefix,
        # exp_name=exp_name,
        log_dir=args.log_dir,
    )
예제 #12
0
def experiment(variant):
    # we have to generate the combinations for the env_specs
    env_specs = variant['env_specs']
    env_specs_vg = VariantGenerator()
    env_spec_constants = {}
    env_spec_ranges = {}
    for k, v in env_specs.items():
        if isinstance(v, list):
            env_specs_vg.add(k, v)
            env_spec_ranges[k] = v
        else:
            env_spec_constants[k] = v

    env_specs_list = []
    for es in env_specs_vg.variants():
        del es['_hidden_keys']
        es.update(env_spec_constants)
        env_specs_list.append(es)

    env_sampler = EnvSampler(env_specs_list)

    # make the normalizer function for the env_params
    mean = []
    half_diff = []
    for k in sorted(env_spec_ranges.keys()):
        r = env_spec_ranges[k]
        if len(r) == 1:
            mean.append(0)
            half_diff.append(r[0])
        else:
            mean.append((r[0] + r[1]) / 2.0)
            half_diff.append((r[1] - r[0]) / 2.0)
    mean = np.array(mean)
    half_diff = np.array(half_diff)

    def env_params_normalizer(params):
        return (params - mean) / half_diff

    variant['algo_params']['env_params_normalizer'] = env_params_normalizer

    # set up similar to non-meta version
    sample_env, _ = env_sampler()
    if variant['algo_params']['concat_env_params_to_obs']:
        meta_params_dim = sample_env.env_meta_params.shape[0]
    else:
        meta_params_dim = 0
    obs_dim = int(np.prod(sample_env.observation_space.shape))
    action_dim = int(np.prod(sample_env.action_space.shape))

    net_size = variant['net_size']
    vf = FlattenMlp(
        hidden_sizes=[net_size, net_size],
        input_size=obs_dim + meta_params_dim,
        output_size=1,
    )
    if exp_specs['use_new_sac']:
        qf1 = FlattenMlp(
            hidden_sizes=[net_size, net_size],
            input_size=obs_dim + action_dim + meta_params_dim,
            output_size=1,
        )
        qf2 = FlattenMlp(
            hidden_sizes=[net_size, net_size],
            input_size=obs_dim + action_dim + meta_params_dim,
            output_size=1,
        )
        policy = ReparamTanhMultivariateGaussianPolicy(
            hidden_sizes=[net_size, net_size],
            obs_dim=obs_dim + meta_params_dim,
            action_dim=action_dim,
        )
        algorithm = NewMetaSoftActorCritic(env_sampler=env_sampler,
                                           policy=policy,
                                           qf1=qf1,
                                           qf2=qf2,
                                           vf=vf,
                                           **variant['algo_params'])
    else:
        policy = TanhGaussianPolicy(
            hidden_sizes=[net_size, net_size],
            obs_dim=obs_dim + meta_params_dim,
            action_dim=action_dim,
        )
        qf = FlattenMlp(
            hidden_sizes=[net_size, net_size],
            input_size=obs_dim + action_dim + meta_params_dim,
            output_size=1,
        )
        algorithm = MetaSoftActorCritic(env_sampler=env_sampler,
                                        policy=policy,
                                        qf=qf,
                                        vf=vf,
                                        **variant['algo_params'])

    if ptu.gpu_enabled():
        algorithm.cuda()
    algorithm.train()

    return 1
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()
    vg.add('env', ['SawyerPushAndReachXYZEnv'])
    vg.add('fix_goal', [False])
    vg.add('goal_slack', [0.0, 0.05, 0.1])
    vg.add('init_slack', [0.0, 0.05])
    vg.add('reward_type', ['puck_distance_hand_distance_after_success'])

    vg.add('seed', [1, 10])
    vg.add('n_itr', [1001])
    vg.add('fast_lr', [0.1])
    vg.add('outer_lr', [1e-3])
    vg.add('meta_batch_size', [40])
    vg.add('num_grad_updates', [1])
    vg.add('fast_batch_size', [20])

    vg.add('discount', [0.99])
    vg.add('path_length', [200])
    vg.add('hidden_nonlinearity', ['tanh'])
    vg.add('hidden_sizes', [(64, 64)])
    vg.add('trainable_step_size', [False])
    vg.add('bias_transform', [False])
    vg.add('entropy_bonus', [0])

    # PPO-MAML params
    vg.add('clip_eps', [0.5])
    vg.add('clip_outer', [True])
    vg.add('target_outer_step', [0])
    vg.add('init_outer_kl_penalty', [0])
    vg.add('adaptive_outer_kl_penalty', [False])
    vg.add('target_inner_step', [1e-2])
    vg.add('init_inner_kl_penalty', [1e-3])
    vg.add('adaptive_inner_kl_penalty', [True])
    vg.add('max_epochs', [5])
    vg.add('num_batches', [1])
    vg.add('parallel_sampler', [True])

    variants = vg.variants()

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        n_parallel = info['vCPU']
    else:
        n_parallel = 8

    if args.mode == 'ec2':

        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "%s_%s_%.1f_%.3f_%i_%i_id_%i" % (
            EXP_PREFIX, v['env'], v['clip_eps'], v['target_inner_step'],
            v['max_epochs'], v['seed'], exp_id)

        v = instantiate_class_stings(v)

        subnet = random.choice(subnets)
        config.AWS_REGION_NAME = subnet[:-1]
        config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
            config.AWS_REGION_NAME]
        config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
            config.AWS_REGION_NAME]
        config.AWS_SECURITY_GROUP_IDS = \
            config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="gap",
            snapshot_gap=200,
            periodic_sync=True,
            sync_s3_pkl=True,
            sync_s3_log=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            #sync_all_data_node_to_s3=True,
            python_command="python3",
            pre_commands=[
                "yes | pip install --upgrade pip",
                "yes | pip install --upgrade cloudpickle"
            ],
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
예제 #14
0
def main(config_path, extra_config):

	#################################
	## INIT config and vars
	#################################

	#read in config vars
	config = yaml.load(open(config_path))
	config = replace_in_dict(config, extra_config)

	vg = VariantGenerator()
	vg.add('config', [config])
	##vg.add('batch_size', [2000]) ######### to do: use this to decide how much data to read in from disk
	vg.add('meta_batch_size', [64]) #1300 #################
	vg.add('meta_lr', [0.001])

	vg.add('update_batch_size', [16]) #############
	vg.add('update_lr', [1.0]) #[1.0, 0.1, 0.01, 0.001]
	vg.add('num_updates', [3]) #

	vg.add('max_epochs', [50])
	vg.add('horizon', [5])
	
	vg.add('curr_agg_iter', [0])
	#vg.add('use_reg', [True, False]) # This only changes the save filename! The config.yaml var needs to agree with this one if True
	vg.add('use_reg', [True]) # This only changes the save filename! The config.yaml var needs to agree with this one if True
	vg.add('seed', [0]) 
	vg.add('nonlinearity', ['relu'])
	if config['training']['use_reg']:
		vg.add('regularization_weight', [0.000000001]) #no reg for carp on carp: 0.000000001

	vg.add('use_clip', [True])
	vg.add("weight_initializer", ["truncated_normal"])
	vg.add("dim_hidden", [[800], [800, 800]])
	vg.add("task_list", [["all"]])

	#vg.add('max_runs_per_surface', [5])

	#vg.add('backward_discouragement', [10, 11])
	#IPython.embed()
	##print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('MAML', vg.size))
	for v in vg.variants():

		time.sleep(1.)
		#IPython.embed()

		_v = v.copy(); del _v['config'], _v['_hidden_keys']
		v['config'] = replace_in_dict(v['config'], _v)

		#IPython.embed()
		#v['exp_name'] = exp_name = v['config']['logging']['log_dir'] + '__'.join([v['config']['experiment_type']] + [key + '_' + str(val) for key,val in _v.items() if key not in ['name', 'experiment_type', 'dim_hidden']]) 


		#v['exp_name'] = exp_name = v['config']['logging']['log_dir'] + v['config']['experiment_type'] + '__max_epochs_5__meta_batch_size_40__batch_size_2000__update_batch_size_20__horizon_5'
		# v['exp_name'] = v['config']['logging']['log_dir'] + v['config']['experiment_type'] + "_all_terrain_mbs_" + str(v['config']['training']['meta_batch_size']) + "_ubs_" + str(v['config']['training']['update_batch_size']) + "NON_GBAC"
		# if v['config']['training']['use_reg']:
		#     v['exp_name'] = v['exp_name'] + "_reg_" + str(v['config']['training']['regularization_weight'])
		#v['exp_name'] = "MAML_roach/terrain_types__regularization_weight_0.001__use_reg_True__meta_batch_size_250__meta_lr_0.001__horizon_5__max_epochs_80__update_lr_0.1__curr_agg_iter_0__update_batch_size_16"

		#v['exp_name'] = "MAML_roach/thorough_debug/" + "ulr_" + str(v['config']['training']['update_lr']) + "_use_reg_" + str(v['config']['training']['use_reg']) + "_use_clip_" +str(v['config']['training']['use_clip']) + "_use_clf_" + str(v['config']['training']['use_clf']) + "_nonx_001"
		#v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/all_terrains_with_carpet_on_carpet_params_except_lr_" + str(v['config']['training']['update_lr'])
		#v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/num_updates_2/num_updates_"+ str(v['config']['training']['num_updates'])+"_lr_ " + str(v['config']['training']['update_lr']) +"_ubs_" + str(v['config']['training']['update_batch_size']) +"_reg_weight_" + str(v['config']['training']['regularization_weight'])
		#v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/averaging_debug"
		#v['exp_name'] = "MAML_roach_copy/Tuesday_optimization/" + '__'.join([v['config']['experiment_type']] + [key + '_' + str(val) for key,val in _v.items() if key not in ['name', 'experiment_type', 'dim_hidden']])
		#IPython.embed()
		v['exp_name'] = "MAML_roach_copy/Tuesday_night_optimization/" + "_ubs_" + str(v['config']['training']['update_batch_size']) + "_ulr_" + str(v['config']['training']['update_lr']) + "num_updates" + str(v['config']['training']['num_updates']) + "_layers_" + str(len(v['config']['model']['dim_hidden'])) + "_x" + str((v['config']['model']['dim_hidden'])[0])
		
		run_experiment_lite(
			run,
			sync_s3_pkl=True,
			periodic_sync=True,
			variant=v,
			snapshot_mode="all",
			mode="local",
			use_cloudpickle=True,
			exp_name=v['exp_name'],
			use_gpu=False,
			pre_commands=[#"yes | pip install --upgrade pip",
						  "yes | pip install tensorflow=='1.4.1'",
						  "yes | pip install --upgrade cloudpickle"],
			seed=v['config']['seed']
		)