def main(): env = gym.make(ENV_NAME) env = Monitor(env, f"./videos/{ENV_PREFIX}", force=True) policy, baseline = load_meta_learner_params(META_POLICY_PATH, BASELINE_PATH, env) sampler = BatchSampler(env_name=ENV_NAME, batch_size=20, num_workers=2) learner = MetaLearner(sampler, policy, baseline, optimizer=None) for task in TEST_TASKS: returns = [] for i in range(1, EVAL_STEPS + 1): for grad_steps in GRAD_STEPS: if i % 10 == 0: print(f"Evaluation-step: {i}") env.reset_task(task) learner.policy, learner.baseline = load_meta_learner_params( META_POLICY_PATH, BASELINE_PATH, env) # Sample a batch of transitions sampler.reset_task(task) episodes = sampler.sample(learner.policy) for _ in range(grad_steps): new_params = learner.adapt(episodes) learner.policy.set_params_with_name(new_params) returns.append(evaluate(env, task, learner.policy)) print("========EVAL RESULTS=======") print(f"Task: {task}") print(f"Returns: {returns}") print(f"Average Return: {np.mean(returns)}") print("===========================")
def main(): env = 'MiniGrid-Empty-v0' env = gym.make(env, size=ENV_SIZE) env = ch.envs.Torch(env) env = ch.envs.Runner(env) env = Monitor(env, "./vid", video_callable=lambda episode_id: True, force=True) for task_config in env.sample_tasks(4): env.reset_task(task_config) env.reset() transition = env.run(get_random_action, episodes=5, render=RENDER)