def valueIteration(defaultReward): discountedValue = 0.9 from World import World instance = World() instance.default_Reward = defaultReward # print instance.isWalls(3,2) # old actions ={'right':[0.8,0.2],'left':[1.0],'up':[0.8,0.2],'down':[1.0]} actions = { "right": {"right": 0.8, "down": 0.2}, "left": {"left": 1.0}, "up": {"up": 0.8, "left": 0.2}, "down": {"down": 1.0}, } # initialize the value valueGrid = [[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)] previousValueGrid = [[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)] iterations = 0 stop = False while not stop: iterations += 1 previousValueGrid = copyMatrix(valueGrid, instance.world_Row, instance.world_Column) for row in range(instance.world_Row): for col in range(instance.world_Column): # for all states # for all actions valueActions = [0, 0, 0, 0] count = 0 if not instance.isWalls(row, col): for key, pairs in actions.iteritems(): total = 0.0 for action, value in pairs.iteritems(): if instance.isWithinWorld(action, row, col): newCoordinates = instance.newPosition(action, row, col) total += value * valueGrid[newCoordinates[0]][newCoordinates[1]] valueActions[count] = instance.getRewards(row, col) + (discountedValue * total) count += 1 valueGrid[row][col] = max(valueActions) # print valueGrid stop = convergence(valueGrid, previousValueGrid, instance.world_Row, instance.world_Column) print valueGrid print "The number of iterations is " + str(iterations)
def valueIteration(defaultReward): discountedValue = 0.9 from World import World instance = World() instance.default_Reward = defaultReward #print instance.isWalls(3,2) # old actions ={'right':[0.8,0.2],'left':[1.0],'up':[0.8,0.2],'down':[1.0]} actions = {'right':{'right':0.8,'down':0.2},'left':{'left':1.0},'up':{'up':0.8,'left':0.2},'down':{'down':1.0}} #initialize the value valueGrid =[[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)] previousValueGrid =[[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)] iterations = 0 stop = False while not stop : iterations +=1 previousValueGrid = copyMatrix(valueGrid,instance.world_Row,instance.world_Column) for row in range(instance.world_Row): for col in range(instance.world_Column): #for all states #for all actions valueActions=[0,0,0,0] count = 0 if not instance.isWalls(row,col): for key,pairs in actions.iteritems(): total =0.0 for action,value in pairs.iteritems(): if instance.isWithinWorld(action,row,col): newCoordinates = instance.newPosition(action,row,col) total += (value*valueGrid[newCoordinates[0]][newCoordinates[1]]) valueActions[count] = instance.getRewards(row,col) + (discountedValue * total) count +=1 valueGrid[row][col] = max(valueActions) #print valueGrid stop = convergence(valueGrid,previousValueGrid,instance.world_Row,instance.world_Column) print valueGrid print "The number of iterations is "+str(iterations)