예제 #1
0
def experiment(variant):

    seed = variant['seed']
    n_parallel = variant['n_parallel']
    log_dir = variant['log_dir']
    setup(seed, n_parallel, log_dir)

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 100
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    env = TfEnv(normalize(PointEnvRandGoal()))

    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=variant['hidden_sizes'],
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    algo.train()
예제 #2
0
def experiment(variant):

    fast_learning_rate = variant['fast_learning_rate']

    fast_batch_size = variant['fast_batch_size']
    meta_batch_size = variant['meta_batch_size']
    max_path_length = variant['max_path_length']
    num_grad_updates = variant['num_grad_updates']
    meta_step_size = variant['meta_step_size']

    env = TfEnv(BallEnv())

    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=(100, 100),
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    algo.train()
예제 #3
0
def main():
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True # pylint: disable=E1101
  with tf.Session(config=config):
  # env = TfEnv(normalize(GridWorldEnvRand('four-state')))
    env = DummyVecEnv([make_env])
    policy = MAMLCategoricalMLPPolicy(
            name="policy",
            env_spec=env.spec,
            grad_step_size=fast_learning_rate,
            prob_network = nature_cnn
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100,100),
        )
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = MAMLTRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=fast_batch_size, # number of trajs for grad update
            max_path_length=max_path_length,
            meta_batch_size=meta_batch_size,
            num_grad_updates=num_grad_updates,
            n_itr=800,
            use_maml=use_maml,
            step_size=meta_step_size,
            plot=False,
        )
    run_experiment_lite(
            algo.train(),
            n_parallel=4,
            snapshot_mode="last",
            seed=1,
            exp_prefix='trpo_maml_4state',
            exp_name='trpo_maml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates),
            plot=False,
        )
예제 #4
0
def experiment(variant):

    seed = variant['seed']
    n_parallel = variant['n_parallel']
    log_dir = variant['log_dir']

    setup(seed, n_parallel, log_dir)
    expertDataLoc = variant['expertDataLoc']
    expertDataItr = variant['expertDataItr']

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 150
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    regionSize = variant['regionSize']

    if regionSize == '20X20':
        tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_v1.pkl'

    else:
        assert regionSize == '60X30'
        tasksFile = '/root/code/multiworld/multiworld/envs/goals/PickPlace_60X30.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))
    envType = variant['envType']

    if envType == 'Push':
        baseEnv = SawyerPushEnv(tasks=tasks)
    else:
        assert (envType) == 'PickPlace'

        baseEnv = SawyerPickPlaceEnv(tasks=tasks)

    env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation']))
    env = TfEnv(NormalizedBoxEnv(env))
    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=variant['hidden_sizes'],
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
        numExpertPolicies=20,
        expertDataInfo={
            'expert_loc': expertDataLoc,
            'expert_itr': expertDataItr
        })

    algo.train()
예제 #5
0
     hidden_nonlinearity=tf.nn.relu,
     hidden_sizes=(100,100),
 )
 if bas == 'zero':
     baseline = ZeroBaseline(env_spec=env.spec)
 elif 'linear' in bas:
     baseline = LinearFeatureBaseline(env_spec=env.spec)
 else:
     baseline = GaussianMLPBaseline(env_spec=env.spec)
 algo = MAMLTRPO(
     env=env,
     policy=policy,
     baseline=baseline,
     batch_size=fast_batch_size, # number of trajs for grad update
     max_path_length=max_path_length,
     meta_batch_size=meta_batch_size,
     num_grad_updates=num_grad_updates,
     n_itr=100,
     use_maml=use_maml,
     step_size=meta_step_size,
     plot=False,
 )
 run_experiment_lite(
     algo.train(),
     n_parallel=1,
     snapshot_mode="last",
     python_command='python3',
     seed=1,
     exp_prefix='vpg_maml_point100',
     exp_name='trpomaml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates),
     plot=False,
                        baseline = ZeroBaseline(env_spec=env.spec)
                    elif 'linear' in bas:
                        baseline = LinearFeatureBaseline(env_spec=env.spec)
                    else:
                        baseline = GaussianMLPBaseline(env_spec=env.spec)
                    algo = MAMLTRPO(
                        env=env,
                        policy=policy,
                        baseline=baseline,
                        batch_size=fast_batch_size,
                        max_path_length=max_path_length,
                        meta_batch_size=meta_batch_size,
                        num_grad_updates=num_grad_updates,
                        n_itr=n_itr,
                        use_maml=use_maml,
                        use_pooled_goals=True,
                        step_size=meta_step_size,
                        plot=False,
                        pre_std_modifier=pre_std_modifier,
                        post_std_modifier_train=post_std_modifier_train,
                        post_std_modifier_test=post_std_modifier_test,
                        # goals_pool_to_load=R7DOF_GOALS_LOCATION,
                        # goals_pickle_to=R7DOF_GOALS_LOCATION,
                        # goals_pool_size=200,

                    )
                    run_experiment_lite(
                        algo.train(),
                        n_parallel=1, #10, If you use more than 1, your std modifiers may not work
                        snapshot_mode="last",
                        python_command='python3',
예제 #7
0
policy = MAMLCategoricalMLPPolicy(
    name="policy",
    env_spec=env.spec,
    grad_step_size=0.1,
    hidden_nonlinearity=tf.nn.relu,
    hidden_sizes=(100, 100),
)
baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = MAMLTRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=20,  # number of trajs for grad update
    max_path_length=int(args.n),
    meta_batch_size=40,
    num_grad_updates=1,
    n_itr=args.iters,
    use_maml=True,
    step_size=0.01)

run_experiment_lite(
    algo.train(),
    exp_prefix=args.expt_name,
    exp_name='run_{}'.format(args.seed),
    n_parallel=1,
    snapshot_mode="gap",
    snapshot_gap=20,
    python_command='python3',
    seed=args.seed,
예제 #8
0
     baseline = ZeroBaseline(env_spec=env.spec)
 elif 'linear' in bas:
     baseline = LinearFeatureBaseline(
         env_spec=env.spec)
 else:
     baseline = GaussianMLPBaseline(
         env_spec=env.spec)
 algo = MAMLTRPO(
     env=env,
     policy=policy,
     baseline=baseline,
     batch_size=
     fast_batch_size,  # number of trajs for alpha grad update
     max_path_length=max_path_length,
     meta_batch_size=
     meta_batch_size,  # number of tasks sampled for beta grad update
     num_grad_updates=
     num_grad_updates,  # number of alpha grad updates
     n_itr=100,
     use_maml=use_maml,
     step_size=meta_step_size,
     plot=False,
     pre_std_modifier=pre_std_modifier,
     post_std_modifier_train=post_std_modifier_train,
     post_std_modifier_test=post_std_modifier_test)
 run_experiment_lite(
     algo.train(),
     n_parallel=1,
     snapshot_mode="last",
     python_command='python3',
     seed=seed,
     exp_prefix='maml_trpo_push100',
예제 #9
0
def experiment(variant):

    seed = variant['seed']
    n_parallel = variant['n_parallel']
    log_dir = variant['log_dir']
    setup(seed, n_parallel, log_dir)

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 150
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    tasksFile = '/root/code/multiworld/multiworld/envs/goals/Door_60X20X20.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))

    baseEnv = SawyerDoorOpenEnv(tasks=tasks)

    env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation']))

    env = TfEnv(NormalizedBoxEnv(env))

    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=variant['hidden_sizes'],
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    # import os

    # saveDir = variant['saveDir']

    # if os.path.isdir(saveDir)==False:
    #     os.mkdir(saveDir)

    # logger.set_snapshot_dir(saveDir)
    # #logger.set_snapshot_gap(20)
    # logger.add_tabular_output(saveDir+'progress.csv')

    algo.train()
예제 #10
0
 if bas == 'zero':
     baseline = ZeroBaseline(env_spec=env.spec)
 elif 'linear' in bas:
     baseline = LinearFeatureBaseline(env_spec=env.spec)
 else:
     baseline = GaussianMLPBaseline(env_spec=env.spec)
 algo = MAMLTRPO(
     env=env,
     policy=policy,
     baseline=baseline,
     batch_size=fast_batch_size,  # number of trajs for alpha grad update
     max_path_length=max_path_length,
     meta_batch_size=meta_batch_size,  # number of tasks sampled for beta grad update
     num_grad_updates=num_grad_updates,  # number of alpha grad updates
     n_itr=100, #100
     use_maml=use_maml,
     step_size=meta_step_size,
     plot=False,
     pre_std_modifier=pre_std_modifier,
     post_std_modifier_train=post_std_modifier_train,
     post_std_modifier_test=post_std_modifier_test,
     meta_train_on_expert_traj=False,
     goals_pool_to_load=POINT_GOALS_LOCATION[".local"],
     # goals_pickle_to=POINT_GOALS_LOCATION[".local"],
     # goals_pool_size=1000,
 )
 run_experiment_lite(
     algo.train(),
     n_parallel=1,
     snapshot_mode="last",
     python_command='python3',
     seed=1,
        env = TfEnv(env)
        policy = MAMLGaussianMLPPolicy(  # random policy
            name='policy',
            env_spec=env.spec,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
        )

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = MAMLTRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=20,  # number of trajs for grad update
            max_path_length=20,
            meta_batch_size=1,
            num_grad_updates=1,
            n_itr=n_itr,
            use_maml=True,
            step_size=step_sizes[step_i],
            plot=False,
        )

        run_experiment_lite(
            algo.meta_online_train(),
            # Number of parallel workers for sampling
            n_parallel=2,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="all",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
예제 #12
0
policy = MAMLGaussianMLPPolicy(
    name="policy",
    env_spec=env.spec,
    grad_step_size=0.1,
    hidden_nonlinearity=tf.nn.relu,
    hidden_sizes=(100, 100),
)
baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = MAMLTRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=20,  # number of trajs for grad update
    max_path_length=200,
    meta_batch_size=40,
    num_grad_updates=1,
    n_itr=800,
    use_maml=bool(args.use_maml),
    step_size=0.01,
    plot=False,
)

run_experiment_lite(
    algo.train(),
    exp_prefix=args.expt_name,
    exp_name='run_{}'.format(args.seed),
    n_parallel=8,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode="gap",
    snapshot_gap=25,
예제 #13
0
    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=v['fast_lr'],  # learning rate of policy
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=(100, 100),
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=v['fast_batch_size'],  # Number of samples per iteration.
        max_path_length=max_path_length,  # Maximum length of a single rollout.
        meta_batch_size=v[
            'meta_batch_size'],  # Number of tasks sampled per meta-update
        num_grad_updates=num_grad_updates,  # Number of fast gradient updates
        n_itr=n_itr,  # Number of iterations.
        use_maml=use_maml,
        step_size=v['meta_step_size'],  # learning rate of meta-update
        plot=False,
    )
    direc = 'direc22' if direc else 'slope'

    run_experiment_lite(
        algo.train(),
        exp_prefix='trpo_maml_cheetah' + direc + str(max_path_length),
        exp_name='maml' + str(int(use_maml)) + '_fbs' +
        str(v['fast_batch_size']) + '_mbs' + str(v['meta_batch_size']) +
        '_flr_' + str(v['fast_lr']) + '_mlr' + str(v['meta_step_size']),
        # Number of parallel workers for sampling
예제 #14
0
     baseline = ZeroBaseline(env_spec=env.spec)
 elif 'linear' in bas:
     baseline = LinearFeatureBaseline(env_spec=env.spec)
 else:
     baseline = GaussianMLPBaseline(env_spec=env.spec)
 algo = MAMLTRPO(
     env=env,
     policy=None,
     load_policy=
     "/home/rosen/maml_rl/data/local/PU-TR/PU_TRrelu.f0.0_081018_15_42/itr_799.pkl",
     baseline=baseline,
     batch_size=fast_batch_size,
     max_path_length=max_path_length,
     meta_batch_size=meta_batch_size,
     num_grad_updates=num_grad_updates,
     n_itr=n_itr,
     use_maml=use_maml,
     use_pooled_goals=True,
     step_size=meta_step_size,
     plot=False,
     pre_std_modifier=pre_std_modifier,
     post_std_modifier_train=post_std_modifier_train,
     post_std_modifier_test=post_std_modifier_test,
     goals_pool_to_load=PUSHER_GOALS_LOCATION,
     # goals_pickle_to=PUSHER_GOALS_LOCATION,
     goals_pool_size=200,
 )
 run_experiment_lite(
     algo.train(),
     n_parallel=
     1,  #10, If you use more than 1, your std modifiers may not work
     snapshot_mode="all",
예제 #15
0
                hidden_nonlinearity=tf.nn.relu,
                hidden_sizes=(100, 100),
            )

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = MAMLTRPO(
                env=env,
                policy=policy,
                baseline=baseline,
                batch_size=fast_batch_size,  # number of trajs for grad update
                max_path_length=max_path_length,
                meta_batch_size=meta_batch_size,
                num_grad_updates=num_grad_updates,
                n_itr=500,
                use_maml=use_maml,
                step_size=meta_step_size,
                numExpertPolicies=20,
                num_imSteps=num_imSteps,
                expertDataInfo={
                    'expert_loc': expertDataLoc,
                    'expert_itr': expertDataItr
                },
                plot=False,
            )
            """run_experiment_lite(
                algo.train(),
                n_parallel=4,
                snapshot_mode="all",
               # python_command='python3',
                seed=1,
    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=v['fast_lr'],
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=(100, 100),
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=v['fast_batch_size'],  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=v['meta_batch_size'],
        num_grad_updates=num_grad_updates,
        n_itr=800,
        use_maml=use_maml,
        step_size=v['meta_step_size'],
        plot=False,
    )
    direc = 'direc' if direc else ''

    run_experiment_lite(
        algo.train(),
        exp_prefix='trpo_maml_cheetah' + direc + str(max_path_length),
        exp_name='maml' + str(int(use_maml)) + '_fbs' +
        str(v['fast_batch_size']) + '_mbs' + str(v['meta_batch_size']) +
        '_flr_' + str(v['fast_lr']) + '_mlr' + str(v['meta_step_size']),
        # Number of parallel workers for sampling
예제 #17
0
def experiment(variant):

    seed = variant['seed']

    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 150
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    regionSize = variant['regionSize']

    if regionSize == '20X20':

        tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_6_8.pkl'

    else:
        assert regionSize == '60X30'

        tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_60X30.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))

    envType = variant['envType']

    if envType == 'Push':

        baseEnv = SawyerPushEnv(tasks=tasks)
    else:
        assert (envType) == 'PickPlace'

        baseEnv = SawyerPickPlaceEnv(tasks=tasks)
    env = FinnMamlEnv(
        FlatGoalEnv(baseEnv,
                    obs_keys=['state_observation', 'state_desired_goal']))

    env = TfEnv(NormalizedBoxEnv(env))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=None,
        load_policy=variant['init_param_file'],
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    import os

    saveDir = variant['saveDir']

    if os.path.isdir(saveDir) == False:
        os.mkdir(saveDir)

    logger.set_snapshot_dir(saveDir)
    logger.add_tabular_output(saveDir + 'progress.csv')

    algo.train()