Example #1
0
from agents import *
from dealers import pacman_dealer
"""
The UCB rollout agent attempts to minimize cumulative regret over its pull budget. Unlike uniform rollout, it doesn't 
spend much time on non-promising arms.
"""

if __name__ == '__main__':
    u_ro = uniform_rollout_agent.UniformRolloutAgent(depth=1, num_pulls=100)
    ucb_ro = ucb_rollout_agent.UCBRolloutAgent(depth=1, num_pulls=100, c=1.0)

    pacman = pacman_dealer.Dealer(layout_repr='testClassic')
    pacman.run(agents=[u_ro, ucb_ro], num_trials=15)
from agents import *
from dealers import pacman_dealer, openai_dealer
"""
Although to us humans, Pacman and Ms. Pacman are nearly identical games, the latter's simulator has a much finer grid. 
This multiplies the depth required to have the same level of lookahead.
"""

if __name__ == '__main__':
    u_ro = uniform_rollout_agent.UniformRolloutAgent(depth=0, num_pulls=100)

    pacman = pacman_dealer.Dealer(layout_repr='originalClassic')
    pacman.run(agents=[u_ro],
               num_trials=1,
               multiprocess_mode='',
               show_moves=True)

    # be patient - Ms. Pacman has much more overhead!
    openai_pacman = openai_dealer.Dealer(env_name='MsPacman-v0')
    openai_pacman.run(agents=[u_ro],
                      num_trials=1,
                      multiprocess_mode='',
                      show_moves=True)