def play( model: BaseAlgorithm, environment: GymEnv, episode_max_steps: int = 100, visualize: bool = True, ) -> Tuple[float, int]: """ Plays and records one game in given environment. :param model: an agent taken actions :param environment: environment in which agent moves :param episode_max_steps: maximal number of agent moves in single episode :param gameplay_path: location in which gameplay will be recorded (as gif) :return: episode reward and number of moves taken to finish episode """ images = [] is_done = False step = 1 state = environment.reset() while not is_done and step <= episode_max_steps: if visualize: environment.render() print() action, _ = model.predict(state) state, reward, is_done, info = environment.step(action) step += 1 return reward, step
def test(test_agent: BaseAlgorithm, max_episodes: int = math.inf, max_duration: int = math.inf, verbose: bool = True) -> None: """Test a model on a specific environment using a given agent. It will render the result in the default viewer. .. note:: This function can be terminated early using CTRL+C. :param train_agent: Testing agent. :param max_episodes: Max number of episodes to run. Can be infinite. Optional: infinite by default. :param max_duration: Max total duration of the episodes. Can be infinite. Optional: infinite by default. :param verbose: Whether or not to print information about what is going on. Optional: True by default. """ # Check user arguments if (math.isinf(max_episodes) and math.isinf(max_duration)): raise ValueError( "Either 'max_episodes' or 'max_duration' must be finite.") # Get environment timestep step_dt = test_agent.eval_env.envs[0].step_dt try: t_init, t_cur = time.time(), time.time() num_episodes = 0 while (num_episodes < max_episodes) and \ (t_cur - t_init < max_duration): obs = test_agent.eval_env.reset() cum_step, cum_reward = 0, 0.0 done = False while not done: # Update state action = test_agent.predict(obs) obs, reward, done, _ = test_agent.eval_env.step(action) cum_step += 1 cum_reward += reward[0] # Render the current state in default viewer test_agent.eval_env.render() sleep(step_dt - (time.time() - t_cur)) t_cur = time.time() # Break the simulation if max duration reached if t_cur - t_init > max_duration: break num_episodes += 1 # Print the simulation final state if done and verbose: print(f"Episode length: {cum_step} - Cumulative reward: " f"{cum_reward}") except KeyboardInterrupt: if verbose: print("Interrupting testing...")
def evaluate(environment: RelativeExchange, model: BaseAlgorithm): total_pnl = 0 total_reward = 0 done = False state = environment.reset() while not done: action, _ = model.predict(state, deterministic=True) state, reward, done, info = environment.step(action) total_reward += reward total_pnl += info['pnl'] environment.clean_up() print(f'Pnl: {total_pnl} / Reward: {total_reward}')
def evaluate(model: BaseAlgorithm, env: Env, n_episodes: int = 100): episodical_rewards = [] for _ in range(n_episodes): obs = env.reset() episodical_reward = 0 done = False while not done: action = model.predict(obs) obs, rewards, dones, _ = env.step(action[0]) episodical_reward += rewards[0] done = dones[0] episodical_rewards.append(episodical_reward) return mean(episodical_rewards), stdev(episodical_rewards)
def evaluate( model: BaseAlgorithm, env: Union[VecEnv, gym.Env], number_of_episodes: int ) -> Tuple[np.ndarray, np.ndarray]: """Evaluate for a given number of episodes.""" rewards = [] episode_lengths = [] for _ in tqdm(list(range(number_of_episodes))): state = env.reset() reward_cum = 0 steps = 0 while True: actions = model.predict(state)[0] state, reward, done, _ = env.step(actions) reward_cum += reward steps += 1 if np.any(done): break rewards.append(reward_cum) episode_lengths.append(steps) return np.array(rewards), np.array(episode_lengths)