コード例 #1
0
def main():
    env = gym.make(ENV_NAME)
    env = Monitor(env, f"./videos/{ENV_PREFIX}", force=True)
    policy, baseline = load_meta_learner_params(META_POLICY_PATH,
                                                BASELINE_PATH, env)
    sampler = BatchSampler(env_name=ENV_NAME, batch_size=20, num_workers=2)
    learner = MetaLearner(sampler, policy, baseline, optimizer=None)

    for task in TEST_TASKS:
        returns = []

        for i in range(1, EVAL_STEPS + 1):
            for grad_steps in GRAD_STEPS:
                if i % 10 == 0:
                    print(f"Evaluation-step: {i}")

                env.reset_task(task)
                learner.policy, learner.baseline = load_meta_learner_params(
                    META_POLICY_PATH, BASELINE_PATH, env)

                # Sample a batch of transitions
                sampler.reset_task(task)
                episodes = sampler.sample(learner.policy)
                for _ in range(grad_steps):
                    new_params = learner.adapt(episodes)
                    learner.policy.set_params_with_name(new_params)
                returns.append(evaluate(env, task, learner.policy))

        print("========EVAL RESULTS=======")
        print(f"Task: {task}")
        print(f"Returns: {returns}")
        print(f"Average Return: {np.mean(returns)}")
        print("===========================")
コード例 #2
0
                          fast_lr=args.fast_lr,
                          tau=args.tau,
                          device=args.device)

env = gym.make(args.env_name)

# new task!
episodes = []

#randomly sample task
test_task = sampler.sample_tasks(num_tasks=1)

#set specific task.
#test_task = []
#test_task.append({'velocity': 1.9})
sampler.reset_task(test_task[0])

print("new task: ", test_task[0], ", where 1 is forward")

#task = env.unwrapped.sample_tasks(1)
env.unwrapped.reset_task(test_task[0])
observations = env.reset()
print("new task: ", env.step([1])[3]['task'], ", where 1 is forward")
_theta = env.step([1])[3]['task']
degrees = 180 * _theta['theta'] / np.pi
print("new task in degrees: ", degrees)

train_episodes = metalearner.sampler.sample(the_model,
                                            gamma=args.gamma,
                                            device=args.device)
print("len of train episoid: ", len(train_episodes))