env = SimulatedEnvironment(domain, rf, tf, initialState) # Print the map that is being analyzed print "/////{} Grid World Analysis/////\n".format(world) MapPrinter().printMap(MapPrinter.matrixToMap(userMap)) visualizeInitialGridWorld(domain, gen, env) hashingFactory = SimpleHashableStateFactory() increment = MAX_ITERATIONS / NUM_INTERVALS timing = defaultdict(list) rewards = defaultdict(list) steps = defaultdict(list) convergence = defaultdict(list) allStates = getAllStates(domain, rf, tf, initialState) # Value Iteration iterations = range(1, MAX_ITERATIONS + 1) vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, 1) vi.setDebugCode(0) vi.performReachabilityFrom(initialState) vi.toggleUseCachedTransitionDynamics(False) print "//{} Value Iteration Analysis//".format(world) flag = True timing['Value'].append(0) for nIter in iterations: startTime = clock() vi.runVI() #timing['Value'].append((clock()-startTime) * 1000) timing['Value'].append(timing['Value'][-1] + clock() - startTime) p = vi.planFromState(initialState) convergence['Value'].append(vi.latestDelta) # evaluate the policy with evalTrials roll outs runEvals(initialState, p, rewards['Value'], steps['Value'])
hashingFactory = SimpleHashableStateFactory() increment = MAX_ITERATIONS / NUM_INTERVALS timing = defaultdict(list) rewards = defaultdict(list) steps = defaultdict(list) convergence = defaultdict(list) policy_converged = defaultdict(list) last_policy = defaultdict(list) # # Value Iteration iterations = range(1, MAX_ITERATIONS + 1) print "//Easy Value Iteration Analysis//" for nIter in iterations: startTime = clock() vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, nIter) #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms. # run planning from our initial state vi.setDebugCode(0) p = vi.planFromState(initialState) timing['Value'].append((clock() - startTime) * 1000) convergence['Value'].append(vi.latestDelta) # evaluate the policy with evalTrials roll outs runEvals(initialState, p, rewards['Value'], steps['Value']) if nIter == 1: simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter)) MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap()) print "\n\n\n" simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
hashingFactory = SimpleHashableStateFactory() increment = MAX_ITERATIONS/NUM_INTERVALS timing = defaultdict(list) rewards = defaultdict(list) steps = defaultdict(list) convergence = defaultdict(list) policy_converged = defaultdict(list) last_policy = defaultdict(list) # Value Iteration starts iterations = range(1,MAX_ITERATIONS+1) print "//hard Value Iteration Analysis//" for nIter in iterations: startTime = clock() vi = ValueIteration(domain,rf,tf,discount,hashingFactory,-1, nIter); #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms. # run planning from our initial state vi.setDebugCode(0) p = vi.planFromState(initialState); timing['Value'].append((clock()-startTime)*1000) convergence['Value'].append(vi.latestDelta) # evaluate the policy with evalTrials roll outs runEvals(initialState,p,rewards['Value'],steps['Value']) if nIter == 1: simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter)) if nIter == MAX_ITERATIONS/2: simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter)) MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap()); print "\n\n\n" simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter)) dumpCSV(iterations, timing['Value'], rewards['Value'], steps['Value'],convergence['Value'], world, 'Value')
#Print the map that is being analyzed print "/////Grid World {}x{} Analysis/////\n".format(n, n) print "i am about to print something" MapPrinter().printMap(MapPrinter.matrixToMap(userMap)) print "just printed something" # visualizeInitialGridWorld(domain, gen, env) hashingFactory = SimpleHashableStateFactory() increment = MAX_ITERATIONS / NUM_INTERVALS # Value Iteration iterations = defaultdict(list) timing = defaultdict(list) rewards = defaultdict(list) steps = defaultdict(list) print "//Size Value Iteration Analysis//" startTime = clock() vi = ValueIteration(domain, rf, tf, discount, hashingFactory, 1e-6, MAX_ITERATIONS) vi.toggleUseCachedTransitionDynamics(False) # run planning from our initial state vi.setDebugCode(0) p = vi.planFromState(initialState) timing['Value'].append(clock() - startTime) iterations['Value'].append(vi.numIterations) # evaluate the policy with one roll out visualize the trajectory runEvals(initialState, p, rewards['Value'], steps['Value']) MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap()) print "\n\n" # simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration") dumpCSV(iterations['Value'], timing['Value'], rewards['Value'], steps['Value'], [n], 'Value', n == 2)
def vIteration(world, userMap, maxX, maxY, discount=0.99, MAX_ITERATIONS=100): gen = BasicGridWorld(userMap, maxX, maxY) domain = gen.generateDomain() initialState = gen.getExampleState(domain); rf = BasicRewardFunction(maxX, maxY, userMap) tf = BasicTerminalFunction(maxX, maxY) env = SimulatedEnvironment(domain, rf, tf, initialState) visualizeInitialGridWorld(domain, gen, env) hashingFactory = SimpleHashableStateFactory() timing = defaultdict(list) rewards = defaultdict(list) steps = defaultdict(list) convergence = defaultdict(list) allStates = getAllStates(domain, rf, tf, initialState) print("*** {} Value Iteration Analysis".format(world)) MAX_ITERATIONS = MAX_ITERATIONS iterations = range(1, MAX_ITERATIONS + 1) vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, 1); vi.setDebugCode(0) vi.performReachabilityFrom(initialState) vi.toggleUseCachedTransitionDynamics(False) timing['Value'].append(0) for nIter in iterations: startTime = clock() vi.runVI() p = vi.planFromState(initialState); endTime = clock() timing['Value'].append((endTime-startTime)*1000) convergence['Value'].append(vi.latestDelta) # evaluate the policy with evalTrials roll outs runEvals(initialState, p, rewards['Value'], steps['Value'], rf, tf, evalTrials=1) if nIter == 1 or nIter == 50: simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter)) simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter)) dumpPolicyMap(MapPrinter.printPolicyMap(allStates, p, gen.getMap()), world + ' Value Iteration Policy Map.pkl') dumpCSV(nIter, timing['Value'][1:], rewards['Value'], steps['Value'], convergence['Value'], world, 'Value')