예제 #1
0
                                    rd.seed(seed)

                                    ###
                                    seed %= 4294967294
                                    global seed_
                                    seed_ = seed
                                    rd.seed(seed)
                                    np.random.seed(seed)
                                    try:
                                        import tensorflow as tf

                                        tf.set_random_seed(seed)
                                    except Exception as e:
                                        print(e)
                                    print('using seed %s' % (str(seed)))
                                    env = TfEnv(normalize(PointEnvRandGoal()))
                                    policy = MAMLGaussianMLPPolicy(
                                        name="policy",
                                        env_spec=env.spec,
                                        grad_step_size=fast_learning_rate,
                                        hidden_nonlinearity=tf.nn.relu,
                                        hidden_sizes=(100, 100),
                                        std_modifier=pre_std_modifier,
                                    )
                                    if bas == 'zero':
                                        baseline = ZeroBaseline(env_spec=env.spec)
                                    elif 'linear' in bas:
                                        baseline = LinearFeatureBaseline(env_spec=env.spec)
                                    else:
                                        baseline = GaussianMLPBaseline(env_spec=env.spec)
                                    #expert_policy = PointEnvExpertPolicy(env_spec=env.spec)
names = ['maml', 'maml0', 'random', 'oracle']

exp_names = [gen_name + name for name in names]

all_avg_returns = []
for step_i, initial_params_file in zip(range(len(step_sizes)),
                                       initial_params_files):
    avg_returns = []
    for goal in goals:
        goal = list(goal)

        if initial_params_file is not None and 'oracle' in initial_params_file:
            env = normalize(PointEnvRandGoalOracle(goal=goal))
            n_itr = 1
        else:
            env = normalize(PointEnvRandGoal(goal=goal))
            n_itr = 5
        env = TfEnv(env)
        policy = GaussianMLPPolicy(  # random policy
            name='policy',
            env_spec=env.spec,
            hidden_sizes=(100, 100),
        )

        if initial_params_file is not None:
            policy = None

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = VPG(
            env=env,
            policy=policy,
                                    ###
                                    seed %= 4294967294
                                    global seed_
                                    seed_ = seed
                                    rd.seed(seed)
                                    np.random.seed(seed)
                                    try:
                                        import tensorflow as tf

                                        tf.set_random_seed(seed)
                                    except Exception as e:
                                        print(e)
                                    print('using seed %s' % (str(seed)))


                                    env = TfEnv(normalize(PointEnvRandGoal()))
                                    base_partitions = [PointEnvRandGoal(goal = goal) for goal in goals]                                       
                                    partitions = [TfEnv(normalize(part_env)) for part_env in base_partitions]

                                    metaPolicy = MAMLGaussianMLPPolicy(
                                        name="central_policy",
                                        env_spec=env.spec,
                                        grad_step_size=fast_learning_rate,
                                        hidden_nonlinearity=tf.nn.relu,
                                        hidden_sizes=(100, 100),
                                        std_modifier=pre_std_modifier,
                                        num_tasks = meta_batch_size,
                                        updateMode = updateMode
                                    )
                                    
                                    metaBaseline = LinearFeatureBaseline(env_spec=env.spec)