Beispiel #1
0
def play(
    model: BaseAlgorithm,
    environment: GymEnv,
    episode_max_steps: int = 100,
    visualize: bool = True,
) -> Tuple[float, int]:
    """
    Plays and records one game in given environment.
    :param model: an agent taken actions
    :param environment: environment in which agent moves
    :param episode_max_steps: maximal number of agent moves in single episode
    :param gameplay_path: location in which gameplay will be recorded (as gif)
    :return: episode reward and number of moves taken to finish episode
    """
    images = []
    is_done = False
    step = 1
    state = environment.reset()

    while not is_done and step <= episode_max_steps:
        if visualize:
            environment.render()
            print()
        action, _ = model.predict(state)
        state, reward, is_done, info = environment.step(action)
        step += 1

    return reward, step
Beispiel #2
0
def test(test_agent: BaseAlgorithm,
         max_episodes: int = math.inf,
         max_duration: int = math.inf,
         verbose: bool = True) -> None:
    """Test a model on a specific environment using a given agent. It will
    render the result in the default viewer.

    .. note::
        This function can be terminated early using CTRL+C.

    :param train_agent: Testing agent.
    :param max_episodes: Max number of episodes to run. Can be infinite.
                         Optional: infinite by default.
    :param max_duration: Max total duration of the episodes. Can be infinite.
                         Optional: infinite by default.
    :param verbose: Whether or not to print information about what is going on.
                    Optional: True by default.
    """
    # Check user arguments
    if (math.isinf(max_episodes) and math.isinf(max_duration)):
        raise ValueError(
            "Either 'max_episodes' or 'max_duration' must be finite.")

    # Get environment timestep
    step_dt = test_agent.eval_env.envs[0].step_dt

    try:
        t_init, t_cur = time.time(), time.time()
        num_episodes = 0
        while (num_episodes < max_episodes) and \
                (t_cur - t_init < max_duration):
            obs = test_agent.eval_env.reset()
            cum_step, cum_reward = 0, 0.0
            done = False
            while not done:
                # Update state
                action = test_agent.predict(obs)
                obs, reward, done, _ = test_agent.eval_env.step(action)
                cum_step += 1
                cum_reward += reward[0]

                # Render the current state in default viewer
                test_agent.eval_env.render()
                sleep(step_dt - (time.time() - t_cur))
                t_cur = time.time()

                # Break the simulation if max duration reached
                if t_cur - t_init > max_duration:
                    break
            num_episodes += 1

            # Print the simulation final state
            if done and verbose:
                print(f"Episode length: {cum_step} - Cumulative reward: "
                      f"{cum_reward}")
    except KeyboardInterrupt:
        if verbose:
            print("Interrupting testing...")
Beispiel #3
0
def evaluate(environment: RelativeExchange, model: BaseAlgorithm):
    total_pnl = 0
    total_reward = 0
    done = False
    state = environment.reset()
    while not done:
        action, _ = model.predict(state, deterministic=True)
        state, reward, done, info = environment.step(action)
        total_reward += reward
        total_pnl += info['pnl']

    environment.clean_up()
    print(f'Pnl: {total_pnl} / Reward: {total_reward}')
Beispiel #4
0
def evaluate(model: BaseAlgorithm, env: Env, n_episodes: int = 100):
    episodical_rewards = []

    for _ in range(n_episodes):
        obs = env.reset()
        episodical_reward = 0
        done = False

        while not done:
            action = model.predict(obs)
            obs, rewards, dones, _ = env.step(action[0])
            episodical_reward += rewards[0]
            done = dones[0]
        episodical_rewards.append(episodical_reward)

    return mean(episodical_rewards), stdev(episodical_rewards)
def evaluate(
    model: BaseAlgorithm,
    env: Union[VecEnv, gym.Env],
    number_of_episodes: int
) -> Tuple[np.ndarray, np.ndarray]:
    """Evaluate for a given number of episodes."""
    rewards = []
    episode_lengths = []
    for _ in tqdm(list(range(number_of_episodes))):
        state = env.reset()
        reward_cum = 0
        steps = 0
        while True:
            actions = model.predict(state)[0]
            state, reward, done, _ = env.step(actions)
            reward_cum += reward
            steps += 1
            if np.any(done):
                break
        rewards.append(reward_cum)
        episode_lengths.append(steps)

    return np.array(rewards), np.array(episode_lengths)