def main(argv): cmd, mazefile, inpathfile, outpathfile, qlconfig = parseArgv(argv) world = environment.GridWorld(Cell, None, None, mazefile) if (cmd == 'astar'): # TOTO write a search agent to handle all this if not inpathfile: problem = search.ProblemImpl(world); s = planner.search.AStar(problem) s.search() path = s.getPath() results = s.getResults() if outpathfile: path = s.savePath(outpathfile, path) else: path = loadPath(inpathfile) results = {} vi = Visualization(world) vi.setPath(path) vi.setResults(results) vi.animatePath = True while True: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit(0) vi.update() elif (cmd == 'qlearn'): problem = learning.RLProblemImpl(world) actions = problem.actions(13) policy = qlearning.EpsilonGreedyPolicy(0.05) ai = qlearning.QLearn(problem, policy, qlconfig["alpha"], qlconfig["gamma"]) # place agent in random position npc = learning.LearnAgent('@', getRandomLocation(problem), random.randrange(4), ai) world.addAgent(npc) steps = 150 for x in range(qlconfig["episodes"]): for step in range(steps): world.update() if world.isDone() == True: break loc = getRandomLocation(problem) npc.setLocation(loc) if True: print "Exploit:\n" ai.trace = True policy.epsilon = 0.0 for x in range(2): print "Episode= %d" % (x+1) for step in range(steps): world.update() if world.isDone() == True: break; print "Number of steps= %d" % (step+1) loc = getRandomLocation(problem) npc.setLocation(loc) npc._ai.viResults()