def rollout(env, policy):
	state = env.reset().state
	reward = 0
	terminal = False
	episode = Episode()

	while not terminal:
		action = policy.get_action(state)
		episode.add_step(EpisodeStep(state, action, reward))

		timestep = env.step(action)
		state = timestep.state
		reward = timestep.reward
		terminal = timestep.terminal
	
	# Append the goal state and final reward (no action to report here).
	episode.add_step(EpisodeStep(state, None, reward))
	
	return episode