Esempio n. 1
0
def test_agent(env_name, policy_update_filename, frame_stack):
    policy_update = torch.load(policy_update_filename)
    policy_update.eval()
    logger.debug(f"Loaded : {policy_update}")
    env = make_env(env_name, 1, frame_stack=frame_stack)
    obs = env.reset()
    done = np.array([False])
    policy = policy_update.policy
    total_reward = 0
    steps = 0
    while not done.all():
        env.render()
        dist = policy(torch.from_numpy(obs).float())
        act = dist.sample()
        obs, rew, done, _ = env.step(act.numpy())
        total_reward += rew
        steps += 1

    logger.debug(f"Total reward : {total_reward}")
    logger.debug(f"Episode length : {steps}")
    env.close()
Esempio n. 2
0
    parser.add_argument("--env-name",
                        "--env",
                        type=str,
                        default="CarRacing-v0")
    parser.add_argument("--num-envs",
                        type=int,
                        default=multiprocessing.cpu_count())
    parser.add_argument("--n-episodes", type=int, default=4)

    parser.add_argument("--alpha", type=float, default=0.5)
    parser.add_argument("--epochs", type=int, default=1000)
    args = parser.parse_args()

    logger.info("Using RCRC formulation.")

    env = make_env(args.env_name, args.num_envs)
    if isinstance(env.action_space, Discrete):
        n_acts = env.action_space.n
    elif isinstance(env.action_space, Box):
        assert (
            len(env.action_space.shape) == 1
        ), f"This example only works for envs with Box(n,) not {env.action_space} action spaces."
        n_acts = env.action_space.shape[0]

    fixed_model = FixedRandomModel(args.alpha)

    update = RCRCUpdate(fixed_model, args.num_envs, n_acts)
    update.train()

    solve(
        args.env_name,
Esempio n. 3
0
 def test_cartpole_v0(self):
     env_name = "CartPole-v0"
     env = make_env(env_name, 1)
     policy_update = self.get_update_model(env)
     result = solve(env_name, env, policy_update, logdir)
     self.assertEqual(result, True)
Esempio n. 4
0
 def test_lunar_lander_v2(self):
     env_name = "LunarLander-v2"
     env = make_env(env_name, 1)
     policy_update = self.get_update_model(env)
     result = solve(env_name, env, policy_update, logdir, epochs=500)
     self.assertEqual(result, True)