def main(argv): """ Entry point for a capture game. The args are a blind pass of `sys.argv` with the executable stripped. """ initLogging() # Get game components based on input options = readCommand(argv) # Special case: recorded games don't use the runGames method. if (options['replay'] is not None): logging.info('Replaying recorded game %s.' % options['replay']) recorded = None with open(options['replay'], 'rb') as file: recorded = pickle.load(file) recorded['display'] = options['display'] replayGame(**recorded) return return runGames(**options)
def main(argv): """ Entry point for the crawler simulation. The args are a blind pass of `sys.argv`. """ initLogging() max_steps = _load_args(argv) sys.exit(run(max_steps=max_steps))
def main(): """ Entry point for the eightpuzzle simulation. """ initLogging() puzzle = createRandomEightPuzzle(25) print('A random puzzle:\n' + str(puzzle)) problem = EightPuzzleSearchProblem(puzzle) path = search.bfs(problem) print('BFS found a path of %d moves: %s' % (len(path), str(path))) curr = puzzle i = 1 for a in path: curr = curr.result(a) print('After %d move%s: %s' % (i, ("", "s")[i > 1], a) + '\n' + str(curr)) input('Press return for the next state...') # wait for key stroke i += 1
def main(argv): """ Entry point for the gridworld simulation The args are a blind pass of `sys.argv` with the executable stripped. """ initLogging() opts = parseOptions(argv) ########################### # GET THE GRIDWORLD ########################### mdp = _getGridWorld(opts.grid) mdp.setLivingReward(opts.livingReward) mdp.setNoise(opts.noise) env = GridworldEnvironment(mdp) ########################### # GET THE DISPLAY ADAPTER ########################### display = TextGridworldDisplay(mdp) if not opts.textGraphics and not opts.nullGraphics: from pacai.ui.gridworld.gui import GraphicsGridworldDisplay display = GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed) display.start() ########################### # GET THE AGENT ########################### a = None if (opts.agent == 'value'): a = ValueIterationAgent(0, mdp, opts.discount, opts.iters) elif (opts.agent == 'q'): qLearnOpts = { 'gamma': opts.discount, 'alpha': opts.learningRate, 'epsilon': opts.epsilon, 'actionFn': lambda state: mdp.getPossibleActions(state), } a = QLearningAgent(0, **qLearnOpts) elif (opts.agent == 'random'): # No reason to use the random agent without episodes. if (opts.episodes == 0): opts.episodes = 10 class RandomMDPAgent: def getAction(self, state): return random.choice(mdp.getPossibleActions(state)) def getValue(self, state): return 0.0 def getQValue(self, state, action): return 0.0 def getPolicy(self, state): "NOTE: 'random' is a special policy value; don't use it in your code." return 'random' def update(self, state, action, nextState, reward): pass a = RandomMDPAgent() else: if (not opts.manual): raise 'Unknown agent type: ' + opts.agent ########################### # RUN EPISODES ########################### # Display q/v values before simulation of episodes. if (not opts.manual and opts.agent == 'value'): if (opts.valueSteps): for i in range(opts.iters): tempAgent = ValueIterationAgent(0, mdp, opts.discount, i) display.displayValues(tempAgent, message='VALUES AFTER ' + str(i) + ' ITERATIONS') display.pause() display.displayValues(a, message='VALUES AFTER ' + str(opts.iters) + ' ITERATIONS') display.pause() display.displayQValues(a, message='Q-VALUES AFTER ' + str(opts.iters) + ' ITERATIONS') display.pause() # Figure out what to display each time step (if anything). displayCallback = lambda x: None if (not opts.nullGraphics): if (opts.manual and opts.agent is None): displayCallback = lambda state: display.displayNullValues(state) else: if (opts.agent == 'random'): displayCallback = lambda state: display.displayValues( a, state, 'CURRENT VALUES') elif (opts.agent == 'value'): displayCallback = lambda state: display.displayValues( a, state, 'CURRENT VALUES') elif (opts.agent == 'q'): displayCallback = lambda state: display.displayQValues( a, state, 'CURRENT Q-VALUES') messageCallback = lambda x: print(x) if (opts.nullGraphics): messageCallback = lambda x: None # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP pauseCallback = lambda: None if (opts.pause): pauseCallback = lambda: display.pause() # Figure out whether the user wants manual control (for debugging and demos). if (opts.manual): decisionCallback = lambda state: getUserAction(state, mdp. getPossibleActions) else: decisionCallback = a.getAction # Run episodes. if (opts.episodes > 0): logging.debug('RUNNING ' + str(opts.episodes) + ' EPISODES') returns = 0 for episode in range(1, opts.episodes + 1): returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode) if (opts.episodes > 0): logging.debug('AVERAGE RETURNS FROM START STATE:' + str((returns + 0.0) / opts.episodes)) # Display post-learning values / q-values. if (opts.agent == 'q' and not opts.manual): display.displayQValues(a, message='Q-VALUES AFTER ' + str(opts.episodes) + ' EPISODES') display.pause() display.displayValues(a, message='VALUES AFTER ' + str(opts.episodes) + ' EPISODES') display.pause()