from agents import * from dealers import pacman_dealer """ The UCB rollout agent attempts to minimize cumulative regret over its pull budget. Unlike uniform rollout, it doesn't spend much time on non-promising arms. """ if __name__ == '__main__': u_ro = uniform_rollout_agent.UniformRolloutAgent(depth=1, num_pulls=100) ucb_ro = ucb_rollout_agent.UCBRolloutAgent(depth=1, num_pulls=100, c=1.0) pacman = pacman_dealer.Dealer(layout_repr='testClassic') pacman.run(agents=[u_ro, ucb_ro], num_trials=15)
from agents import * from dealers import pacman_dealer, openai_dealer """ Although to us humans, Pacman and Ms. Pacman are nearly identical games, the latter's simulator has a much finer grid. This multiplies the depth required to have the same level of lookahead. """ if __name__ == '__main__': u_ro = uniform_rollout_agent.UniformRolloutAgent(depth=0, num_pulls=100) pacman = pacman_dealer.Dealer(layout_repr='originalClassic') pacman.run(agents=[u_ro], num_trials=1, multiprocess_mode='', show_moves=True) # be patient - Ms. Pacman has much more overhead! openai_pacman = openai_dealer.Dealer(env_name='MsPacman-v0') openai_pacman.run(agents=[u_ro], num_trials=1, multiprocess_mode='', show_moves=True)