Esempio n. 1
0
def main(argv):
    """
    Entry point for a capture game.
    The args are a blind pass of `sys.argv` with the executable stripped.
    """

    initLogging()

    # Get game components based on input
    options = readCommand(argv)

    # Special case: recorded games don't use the runGames method.
    if (options['replay'] is not None):
        logging.info('Replaying recorded game %s.' % options['replay'])

        recorded = None
        with open(options['replay'], 'rb') as file:
            recorded = pickle.load(file)

        recorded['display'] = options['display']
        replayGame(**recorded)

        return

    return runGames(**options)
def main(argv):
    """
    Entry point for the crawler simulation.
    The args are a blind pass of `sys.argv`.
    """

    initLogging()
    max_steps = _load_args(argv)
    sys.exit(run(max_steps=max_steps))
Esempio n. 3
0
def main():
    """
    Entry point for the eightpuzzle simulation.
    """

    initLogging()

    puzzle = createRandomEightPuzzle(25)
    print('A random puzzle:\n' + str(puzzle))

    problem = EightPuzzleSearchProblem(puzzle)
    path = search.bfs(problem)
    print('BFS found a path of %d moves: %s' % (len(path), str(path)))
    curr = puzzle
    i = 1
    for a in path:
        curr = curr.result(a)
        print('After %d move%s: %s' % (i,
                                       ("", "s")[i > 1], a) + '\n' + str(curr))

        input('Press return for the next state...')  # wait for key stroke
        i += 1
Esempio n. 4
0
def main(argv):
    """
    Entry point for the gridworld simulation
    The args are a blind pass of `sys.argv` with the executable stripped.
    """

    initLogging()

    opts = parseOptions(argv)

    ###########################
    # GET THE GRIDWORLD
    ###########################

    mdp = _getGridWorld(opts.grid)
    mdp.setLivingReward(opts.livingReward)
    mdp.setNoise(opts.noise)
    env = GridworldEnvironment(mdp)

    ###########################
    # GET THE DISPLAY ADAPTER
    ###########################

    display = TextGridworldDisplay(mdp)
    if not opts.textGraphics and not opts.nullGraphics:
        from pacai.ui.gridworld.gui import GraphicsGridworldDisplay
        display = GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)

    display.start()

    ###########################
    # GET THE AGENT
    ###########################

    a = None
    if (opts.agent == 'value'):
        a = ValueIterationAgent(0, mdp, opts.discount, opts.iters)
    elif (opts.agent == 'q'):
        qLearnOpts = {
            'gamma': opts.discount,
            'alpha': opts.learningRate,
            'epsilon': opts.epsilon,
            'actionFn': lambda state: mdp.getPossibleActions(state),
        }
        a = QLearningAgent(0, **qLearnOpts)
    elif (opts.agent == 'random'):
        # No reason to use the random agent without episodes.
        if (opts.episodes == 0):
            opts.episodes = 10

        class RandomMDPAgent:
            def getAction(self, state):
                return random.choice(mdp.getPossibleActions(state))

            def getValue(self, state):
                return 0.0

            def getQValue(self, state, action):
                return 0.0

            def getPolicy(self, state):
                "NOTE: 'random' is a special policy value; don't use it in your code."
                return 'random'

            def update(self, state, action, nextState, reward):
                pass

        a = RandomMDPAgent()
    else:
        if (not opts.manual):
            raise 'Unknown agent type: ' + opts.agent

    ###########################
    # RUN EPISODES
    ###########################

    # Display q/v values before simulation of episodes.
    if (not opts.manual and opts.agent == 'value'):
        if (opts.valueSteps):
            for i in range(opts.iters):
                tempAgent = ValueIterationAgent(0, mdp, opts.discount, i)
                display.displayValues(tempAgent,
                                      message='VALUES AFTER ' + str(i) +
                                      ' ITERATIONS')
                display.pause()

        display.displayValues(a,
                              message='VALUES AFTER ' + str(opts.iters) +
                              ' ITERATIONS')
        display.pause()
        display.displayQValues(a,
                               message='Q-VALUES AFTER ' + str(opts.iters) +
                               ' ITERATIONS')
        display.pause()

    # Figure out what to display each time step (if anything).
    displayCallback = lambda x: None
    if (not opts.nullGraphics):
        if (opts.manual and opts.agent is None):
            displayCallback = lambda state: display.displayNullValues(state)
        else:
            if (opts.agent == 'random'):
                displayCallback = lambda state: display.displayValues(
                    a, state, 'CURRENT VALUES')
            elif (opts.agent == 'value'):
                displayCallback = lambda state: display.displayValues(
                    a, state, 'CURRENT VALUES')
            elif (opts.agent == 'q'):
                displayCallback = lambda state: display.displayQValues(
                    a, state, 'CURRENT Q-VALUES')

    messageCallback = lambda x: print(x)
    if (opts.nullGraphics):
        messageCallback = lambda x: None

    # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
    pauseCallback = lambda: None
    if (opts.pause):
        pauseCallback = lambda: display.pause()

    # Figure out whether the user wants manual control (for debugging and demos).
    if (opts.manual):
        decisionCallback = lambda state: getUserAction(state, mdp.
                                                       getPossibleActions)
    else:
        decisionCallback = a.getAction

    # Run episodes.
    if (opts.episodes > 0):
        logging.debug('RUNNING ' + str(opts.episodes) + ' EPISODES')

    returns = 0
    for episode in range(1, opts.episodes + 1):
        returns += runEpisode(a, env, opts.discount, decisionCallback,
                              displayCallback, messageCallback, pauseCallback,
                              episode)

    if (opts.episodes > 0):
        logging.debug('AVERAGE RETURNS FROM START STATE:' +
                      str((returns + 0.0) / opts.episodes))

    # Display post-learning values / q-values.
    if (opts.agent == 'q' and not opts.manual):
        display.displayQValues(a,
                               message='Q-VALUES AFTER ' + str(opts.episodes) +
                               ' EPISODES')
        display.pause()
        display.displayValues(a,
                              message='VALUES AFTER ' + str(opts.episodes) +
                              ' EPISODES')
        display.pause()