rd.seed(seed) ### seed %= 4294967294 global seed_ seed_ = seed rd.seed(seed) np.random.seed(seed) try: import tensorflow as tf tf.set_random_seed(seed) except Exception as e: print(e) print('using seed %s' % (str(seed))) env = TfEnv(normalize(PointEnvRandGoal())) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), std_modifier=pre_std_modifier, ) if bas == 'zero': baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) #expert_policy = PointEnvExpertPolicy(env_spec=env.spec)
names = ['maml', 'maml0', 'random', 'oracle'] exp_names = [gen_name + name for name in names] all_avg_returns = [] for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files): avg_returns = [] for goal in goals: goal = list(goal) if initial_params_file is not None and 'oracle' in initial_params_file: env = normalize(PointEnvRandGoalOracle(goal=goal)) n_itr = 1 else: env = normalize(PointEnvRandGoal(goal=goal)) n_itr = 5 env = TfEnv(env) policy = GaussianMLPPolicy( # random policy name='policy', env_spec=env.spec, hidden_sizes=(100, 100), ) if initial_params_file is not None: policy = None baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy,
### seed %= 4294967294 global seed_ seed_ = seed rd.seed(seed) np.random.seed(seed) try: import tensorflow as tf tf.set_random_seed(seed) except Exception as e: print(e) print('using seed %s' % (str(seed))) env = TfEnv(normalize(PointEnvRandGoal())) base_partitions = [PointEnvRandGoal(goal = goal) for goal in goals] partitions = [TfEnv(normalize(part_env)) for part_env in base_partitions] metaPolicy = MAMLGaussianMLPPolicy( name="central_policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), std_modifier=pre_std_modifier, num_tasks = meta_batch_size, updateMode = updateMode ) metaBaseline = LinearFeatureBaseline(env_spec=env.spec)