Ejemplo n.º 1
0
def main():
    env = gym.make(ENV_NAME)
    env = Monitor(env, f"./videos/{ENV_PREFIX}", force=True)
    policy, baseline = load_meta_learner_params(META_POLICY_PATH,
                                                BASELINE_PATH, env)
    sampler = BatchSampler(env_name=ENV_NAME, batch_size=20, num_workers=2)
    learner = MetaLearner(sampler, policy, baseline, optimizer=None)

    for task in TEST_TASKS:
        returns = []

        for i in range(1, EVAL_STEPS + 1):
            for grad_steps in GRAD_STEPS:
                if i % 10 == 0:
                    print(f"Evaluation-step: {i}")

                env.reset_task(task)
                learner.policy, learner.baseline = load_meta_learner_params(
                    META_POLICY_PATH, BASELINE_PATH, env)

                # Sample a batch of transitions
                sampler.reset_task(task)
                episodes = sampler.sample(learner.policy)
                for _ in range(grad_steps):
                    new_params = learner.adapt(episodes)
                    learner.policy.set_params_with_name(new_params)
                returns.append(evaluate(env, task, learner.policy))

        print("========EVAL RESULTS=======")
        print(f"Task: {task}")
        print(f"Returns: {returns}")
        print(f"Average Return: {np.mean(returns)}")
        print("===========================")
Ejemplo n.º 2
0
def main():

    env = 'MiniGrid-Empty-v0'
    env = gym.make(env, size=ENV_SIZE)
    env = ch.envs.Torch(env)
    env = ch.envs.Runner(env)
    env = Monitor(env,
                  "./vid",
                  video_callable=lambda episode_id: True,
                  force=True)

    for task_config in env.sample_tasks(4):
        env.reset_task(task_config)
        env.reset()
        transition = env.run(get_random_action, episodes=5, render=RENDER)