Beispiel #1
0
    oracle = v['oracle']

    if task_var == 0:
        task_var = 'direc'
        exp_prefix = 'bugfix_trpo_maml_antdirec' + str(max_path_length)
        if oracle:
            env = TfEnv(normalize(AntEnvDirecOracle()))
        else:
            env = TfEnv(normalize(AntEnvRandDirec()))
    elif task_var == 1:
        task_var = 'vel'
        exp_prefix = 'posticml_trpo_maml_ant' + str(max_path_length)
        if oracle:
            env = TfEnv(normalize(AntEnvOracle()))
        else:
            env = TfEnv(normalize(AntEnvRand()))
    elif task_var == 2:
        print("HERE")
        task_var = 'pos'
        exp_prefix = 'posticml_trpo_maml_antpos_' + str(max_path_length)
        if oracle:
            env = TfEnv(normalize(AntEnvRandGoalOracle()))
        else:
            env = TfEnv(normalize(AntEnvRandGoal()))
    policy = GaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=(100, 100),
    )
Beispiel #2
0
step_sizes = [0.1, 0.2, 1.0, 0.0]
initial_params_files = [file1, file2, None, file3]

all_avg_returns = []
for step_i, initial_params_file in zip(range(len(step_sizes)),
                                       initial_params_files):
    avg_returns = []

    for goal in goals:

        if initial_params_file is not None and 'oracle' in initial_params_file:
            env = normalize(AntEnvOracle())
            n_itr = 1
        else:
            env = normalize(AntEnvRand())
            n_itr = 4
        env = TfEnv(env)
        policy = GaussianMLPPolicy(  # random policy
            name='policy',
            env_spec=env.spec,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
        )

        if initial_params_file is not None:
            policy = None

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = VPG(
            env=env,