Beispiel #1
0
def main(argv):
    cmd, mazefile, inpathfile, outpathfile, qlconfig = parseArgv(argv)

    world = environment.GridWorld(Cell, None, None, mazefile)

    if (cmd == 'astar'):
        # TOTO write a search agent to handle all this
        if not inpathfile:
            problem = search.ProblemImpl(world);
            s = planner.search.AStar(problem)
            s.search()
            path = s.getPath()
            results = s.getResults()
            if outpathfile:
                path = s.savePath(outpathfile, path)
        else:
            path = loadPath(inpathfile)
            results = {}

        vi = Visualization(world)
        vi.setPath(path)
        vi.setResults(results)
        vi.animatePath = True

        while True:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit(0)

            vi.update()

    elif (cmd == 'qlearn'):
        problem = learning.RLProblemImpl(world)

        actions = problem.actions(13)

        policy = qlearning.EpsilonGreedyPolicy(0.05)
        ai = qlearning.QLearn(problem, policy, qlconfig["alpha"], qlconfig["gamma"])

        # place agent in random position
        npc = learning.LearnAgent('@', getRandomLocation(problem), random.randrange(4), ai)
        world.addAgent(npc)

        steps = 150

        for x in range(qlconfig["episodes"]):
            for step in range(steps):
                world.update()
                if world.isDone() == True:
                    break
            loc = getRandomLocation(problem)
            npc.setLocation(loc)

        if True:
            print "Exploit:\n"
            ai.trace = True
            policy.epsilon = 0.0
            for x in range(2):
                print "Episode= %d" % (x+1)
                for step in range(steps):
                    world.update()
                    if world.isDone() == True:
                        break;
                print "Number of steps= %d" % (step+1)
                loc = getRandomLocation(problem)
                npc.setLocation(loc)

        npc._ai.viResults()