def run_task(variant):
    from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
    from sandbox.rocky.tf.algos.vpg import VPG
    from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
    from rllab.envs.box2d.cartpole_env import CartpoleEnv
    from sandbox.rocky.tf.envs.base import TfEnv

    env_name = variant['Environment']
    if env_name == 'Cartpole':
        env = TfEnv(CartpoleEnv())
    policy = GaussianMLPPolicy(name="policy",
                               env_spec=env.spec,
                               hidden_sizes=(100, 100))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algorithm = VPG(
        env=env,
        policy=policy,
        baseline=baseline,
        n_itr=100,
        start_itr=0,
        batch_size=1000,
        max_path_length=1000,
        discount=0.99,
    )
    algorithm.train()
Beispiel #2
0
def experiment(variant, saveDir):

    initial_params_file = variant['initial_params_file']

    goalIndex = variant['goalIndex']

    init_step_size = variant['init_step_size']

    baseEnv = SawyerPickPlace_finnMAMLEnv()
    env = TfEnv(NormalizedBoxEnv(baseEnv))
    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = VPG(
        env=env,
        policy=None,
        load_policy=initial_params_file,
        baseline=baseline,
        batch_size=10000,  # 2x
        max_path_length=150,
        n_itr=10,
        reset_arg=goalIndex,
        optimizer_args={
            'init_learning_rate': init_step_size,
            'tf_optimizer_args': {
                'learning_rate': 0.5 * init_step_size
            },
            'tf_optimizer_cls': tf.train.GradientDescentOptimizer
        })

    import os

    saveDir = variant['saveDir']

    if os.path.isdir(saveDir) == False:
        os.mkdir(saveDir)

    logger.set_snapshot_dir(saveDir)
    logger.add_tabular_output(saveDir + 'progress.csv')

    algo.train()
        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = VPG(
            env=env,
            policy=policy,
            load_policy=initial_params_file,
            baseline=baseline,
            batch_size=4000,  # 2x
            max_path_length=100,
            n_itr=n_itr,
            optimizer_args={'init_learning_rate': step_sizes[step_i], 'tf_optimizer_args': {'learning_rate': 0.5*step_sizes[step_i]}, 'tf_optimizer_cls': tf.train.GradientDescentOptimizer}
        )


        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=4,
            exp_prefix='trpopoint2d_test',
            exp_name='test',
            #plot=True,
        )
        import pdb; pdb.set_trace()
        # get return from the experiment
        with open('data/local/trpopoint2d-test/test/progress.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
Beispiel #4
0
from sandbox.rocky.tf.algos.vpg import VPG
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.envs.box2d.cartpole_env import CartpoleEnv
from rllab.envs.normalized_env import normalize
from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
from sandbox.rocky.tf.envs.base import TfEnv
from rllab.misc.instrument import stub, run_experiment_lite

env = TfEnv(normalize(CartpoleEnv()))

policy = GaussianMLPPolicy(
    name="policy",
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = VPG(env=env,
           policy=policy,
           baseline=baseline,
           batch_size=10000,
           max_path_length=100,
           n_itr=4,
           discount=0.99,
           optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, )))
algo.train()
Beispiel #5
0
def experiment(variant):

    seed = variant['seed']
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    initial_params_file = variant['initial_params_file']
    goalIndex = variant['goalIndex']
    init_step_size = variant['init_step_size']

    regionSize = variant['regionSize']

    mode = variant['mode']

    if 'docker' in mode:
        taskFilePrefix = '/root/code'
    else:
        taskFilePrefix = '/home/russellm'

    if variant['valRegionSize'] != None:
        valRegionSize = variant['valRegionSize']

        tasksFile = taskFilePrefix + '/multiworld/multiworld/envs/goals/pickPlace_' + valRegionSize + '_val.pkl'

    else:
        tasksFile = taskFilePrefix + '/multiworld/multiworld/envs/goals/pickPlace_' + regionSize + '.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))

    envType = variant['envType']
    if envType == 'Push':
        baseEnv = SawyerPushEnv(tasks=tasks)
    else:
        assert (envType) == 'PickPlace'
        baseEnv = SawyerPickPlaceEnv(tasks=tasks)

    env = FinnMamlEnv(
        FlatGoalEnv(baseEnv,
                    obs_keys=['state_observation', 'state_desired_goal']))
    env = TfEnv(NormalizedBoxEnv(env))
    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = VPG(
        env=env,
        policy=None,
        load_policy=initial_params_file,
        baseline=baseline,
        batch_size=7500,  # 2x
        max_path_length=150,
        n_itr=10,
        reset_arg=goalIndex,
        optimizer_args={
            'init_learning_rate': init_step_size,
            'tf_optimizer_args': {
                'learning_rate': 0.1 * init_step_size
            },
            'tf_optimizer_cls': tf.train.GradientDescentOptimizer
        })
    import os
    saveDir = variant['saveDir']
    currPath = ''
    for _dir in saveDir.split('/'):
        currPath += _dir + '/'
        if os.path.isdir(currPath) == False:
            os.mkdir(currPath)

    logger.set_snapshot_dir(saveDir)
    logger.add_tabular_output(saveDir + 'progress.csv')
    algo.train()
Beispiel #6
0
from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
from sandbox.rocky.tf.envs.base import TfEnv
from rllab.misc.instrument import stub, run_experiment_lite

env = TfEnv(normalize(CartpoleEnv()))

policy = GaussianMLPPolicy(
    name="policy",
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = VPG(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=10000,
    max_path_length=100,
    n_itr=40,
    discount=0.99,
    optimizer_args=dict(
        tf_optimizer_args=dict(
            learning_rate=0.01,
        )
    )
)
algo.train()