def load_expert_policies(self, sess): for task in range(self.numExpertPolicies): print("######LOADING EXPERT " + str(task) + "##############") policy = GaussianMLPPolicy(name='expert' + str(task), env_spec=self.env.spec, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100)) weights = pickle.load( open( self.expertDataLoc + "Task_" + str(task) + "/itr_" + str(self.expertDataItr) + ".pkl", 'rb')) for key in policy.mean_params: sess.run( tf.assign(policy.mean_params[key], weights['mean_params'][key])) sess.run(tf.assign(policy.std_params, weights['std_params'])) self.expertPolicies[task] = policy
exp_prefix = 'posticml_trpo_maml_ant' + str(max_path_length) if oracle: env = TfEnv(normalize(AntEnvOracle())) else: env = TfEnv(normalize(AntEnvRand())) elif task_var == 2: print("HERE") task_var = 'pos' exp_prefix = 'posticml_trpo_maml_antpos_' + str(max_path_length) if oracle: env = TfEnv(normalize(AntEnvRandGoalOracle())) else: env = TfEnv(normalize(AntEnvRandGoal())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=max_path_length * 100, # number of trajs for grad update max_path_length=max_path_length, n_itr=2000, use_maml=use_maml, step_size=0.01, plot=False, )
for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files): avg_returns = [] for goal in goals: goal = list(goal) if initial_params_file is not None and 'oracle' in initial_params_file: env = normalize(PointEnvRandGoalOracle(goal=goal)) n_itr = 1 else: env = normalize(PointEnvRandGoal(goal=goal)) n_itr = 5 env = TfEnv(env) policy = GaussianMLPPolicy( # random policy name='policy', env_spec=env.spec, hidden_sizes=(100, 100), ) if initial_params_file is not None: policy = None baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, load_policy=initial_params_file, baseline=baseline, batch_size=4000, # 2x max_path_length=100,
all_avg_returns = [] for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files): avg_returns = [] for goal_i, goal in zip(range(len(goals)), goals): if initial_params_file is not None and 'oracle' in initial_params_file: env = normalize(AntEnvDirecOracle()) n_itr = 1 else: env = normalize(AntEnvRandDirec()) n_itr = 4 env = TfEnv(env) policy = GaussianMLPPolicy( # random policy name='policy', env_spec=env.spec, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) if initial_params_file is not None: policy = None baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, load_policy=initial_params_file, baseline=baseline, batch_size=8000, max_path_length=200, n_itr=n_itr,
from rllab.envs.normalized_env import normalize from rllab.misc.instrument import stub, run_experiment_lite stub(globals()) import tensorflow as tf #env = normalize(PointEnvRandGoal()) env = normalize(PointEnvRandGoalOracle()) #env = normalize(HalfCheetahEnv()) #env = normalize(Walker2DEnv()) env = TfEnv(env) policy = GaussianMLPPolicy( name='policy', env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. #hidden_sizes=(32, 32) #hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=500, # was 4k max_path_length=5, n_itr=100, discount=0.99, step_size=0.01,
from rllab.misc.instrument import stub, run_experiment_lite stub(globals()) #env = normalize(SwimmerEnv()) env = normalize(SwimmerRandGoalOracleEnv()) #env = normalize(SwimmerRandGoalEnv()) max_path_length = 100 #env = normalize(HalfCheetahEnv()) #env = normalize(Walker2DEnv()) if use_tf: env = TfEnv(env) policy = GaussianMLPPolicy( name='policy', env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. #hidden_sizes=(32, 32) hidden_sizes=(100, 100)) else: policy = GaussianMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(100, 100)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=max_path_length * 10, # was 4k
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from examples.point_env import PointEnv from examples.point_env_randgoal import PointEnvRandGoal from rllab.envs.normalized_env import normalize from rllab.misc.instrument import stub, run_experiment_lite #from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy #from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy from sandbox.rocky.tf.policies.minimal_gauss_mlp_policy import GaussianMLPPolicy from sandbox.rocky.tf.envs.base import TfEnv stub(globals()) env = TfEnv(normalize(PointEnv())) #env = TfEnv(normalize(PointEnvRandGoal())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, #plot=True, ) run_experiment_lite( algo.train(), n_parallel=1, snapshot_mode="last", seed=1, #plot=True, )
for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files): avg_returns = [] for goal in goals: print('goal = ()', goal/3.141692*180) if initial_params_file is not None and 'oracle' in initial_params_file: env = normalize(AntEnvOracle()) n_itr = 1 else: env = normalize(CellRobotRandDirectEnv()) n_itr = 5 env = TfEnv(env) policy = GaussianMLPPolicy( # random policy name='policy', env_spec=env.spec, hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.sigmoid, hidden_sizes=(64, 64), ) if initial_params_file is not None: policy = None baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, load_policy=initial_params_file, baseline=baseline, batch_size=400, # 2x
stub(globals()) oracle = False random = True if oracle: env = TfEnv(normalize(SwimmerRandGoalOracleEnv())) batch_size = 200 elif random: env = TfEnv(normalize(SwimmerRandGoalEnv())) batch_size = 200 else: env = TfEnv(normalize(SwimmerEnv())) batch_size = 20 policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(100,100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) #baseline = ZeroBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=500*batch_size, max_path_length=500, n_itr=500, #plot=True, optimizer_args={'tf_optimizer_args':{'learning_rate': 1e-3}}, ) run_experiment_lite(