def rollout(env, policy): state = env.reset().state reward = 0 terminal = False episode = Episode() while not terminal: action = policy.get_action(state) episode.add_step(EpisodeStep(state, action, reward)) timestep = env.step(action) state = timestep.state reward = timestep.reward terminal = timestep.terminal # Append the goal state and final reward (no action to report here). episode.add_step(EpisodeStep(state, None, reward)) return episode