def run_task(variant): from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from sandbox.rocky.tf.algos.vpg import VPG from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy from rllab.envs.box2d.cartpole_env import CartpoleEnv from sandbox.rocky.tf.envs.base import TfEnv env_name = variant['Environment'] if env_name == 'Cartpole': env = TfEnv(CartpoleEnv()) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(100, 100)) baseline = LinearFeatureBaseline(env_spec=env.spec) algorithm = VPG( env=env, policy=policy, baseline=baseline, n_itr=100, start_itr=0, batch_size=1000, max_path_length=1000, discount=0.99, ) algorithm.train()
def experiment(variant, saveDir): initial_params_file = variant['initial_params_file'] goalIndex = variant['goalIndex'] init_step_size = variant['init_step_size'] baseEnv = SawyerPickPlace_finnMAMLEnv() env = TfEnv(NormalizedBoxEnv(baseEnv)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=None, load_policy=initial_params_file, baseline=baseline, batch_size=10000, # 2x max_path_length=150, n_itr=10, reset_arg=goalIndex, optimizer_args={ 'init_learning_rate': init_step_size, 'tf_optimizer_args': { 'learning_rate': 0.5 * init_step_size }, 'tf_optimizer_cls': tf.train.GradientDescentOptimizer }) import os saveDir = variant['saveDir'] if os.path.isdir(saveDir) == False: os.mkdir(saveDir) logger.set_snapshot_dir(saveDir) logger.add_tabular_output(saveDir + 'progress.csv') algo.train()
baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, load_policy=initial_params_file, baseline=baseline, batch_size=4000, # 2x max_path_length=100, n_itr=n_itr, optimizer_args={'init_learning_rate': step_sizes[step_i], 'tf_optimizer_args': {'learning_rate': 0.5*step_sizes[step_i]}, 'tf_optimizer_cls': tf.train.GradientDescentOptimizer} ) run_experiment_lite( algo.train(), # Number of parallel workers for sampling n_parallel=4, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=4, exp_prefix='trpopoint2d_test', exp_name='test', #plot=True, ) import pdb; pdb.set_trace() # get return from the experiment with open('data/local/trpopoint2d-test/test/progress.csv', 'r') as f: reader = csv.reader(f, delimiter=',')
from sandbox.rocky.tf.algos.vpg import VPG from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from rllab.envs.box2d.cartpole_env import CartpoleEnv from rllab.envs.normalized_env import normalize from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy from sandbox.rocky.tf.envs.base import TfEnv from rllab.misc.instrument import stub, run_experiment_lite env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=4, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) algo.train()
def experiment(variant): seed = variant['seed'] tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) initial_params_file = variant['initial_params_file'] goalIndex = variant['goalIndex'] init_step_size = variant['init_step_size'] regionSize = variant['regionSize'] mode = variant['mode'] if 'docker' in mode: taskFilePrefix = '/root/code' else: taskFilePrefix = '/home/russellm' if variant['valRegionSize'] != None: valRegionSize = variant['valRegionSize'] tasksFile = taskFilePrefix + '/multiworld/multiworld/envs/goals/pickPlace_' + valRegionSize + '_val.pkl' else: tasksFile = taskFilePrefix + '/multiworld/multiworld/envs/goals/pickPlace_' + regionSize + '.pkl' tasks = pickle.load(open(tasksFile, 'rb')) envType = variant['envType'] if envType == 'Push': baseEnv = SawyerPushEnv(tasks=tasks) else: assert (envType) == 'PickPlace' baseEnv = SawyerPickPlaceEnv(tasks=tasks) env = FinnMamlEnv( FlatGoalEnv(baseEnv, obs_keys=['state_observation', 'state_desired_goal'])) env = TfEnv(NormalizedBoxEnv(env)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=None, load_policy=initial_params_file, baseline=baseline, batch_size=7500, # 2x max_path_length=150, n_itr=10, reset_arg=goalIndex, optimizer_args={ 'init_learning_rate': init_step_size, 'tf_optimizer_args': { 'learning_rate': 0.1 * init_step_size }, 'tf_optimizer_cls': tf.train.GradientDescentOptimizer }) import os saveDir = variant['saveDir'] currPath = '' for _dir in saveDir.split('/'): currPath += _dir + '/' if os.path.isdir(currPath) == False: os.mkdir(currPath) logger.set_snapshot_dir(saveDir) logger.add_tabular_output(saveDir + 'progress.csv') algo.train()
from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy from sandbox.rocky.tf.envs.base import TfEnv from rllab.misc.instrument import stub, run_experiment_lite env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(32, 32) ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=40, discount=0.99, optimizer_args=dict( tf_optimizer_args=dict( learning_rate=0.01, ) ) ) algo.train()