Exemple #1
0
def valueIteration(defaultReward):
    discountedValue = 0.9
    from World import World

    instance = World()
    instance.default_Reward = defaultReward
    # print instance.isWalls(3,2)

    # old actions ={'right':[0.8,0.2],'left':[1.0],'up':[0.8,0.2],'down':[1.0]}

    actions = {
        "right": {"right": 0.8, "down": 0.2},
        "left": {"left": 1.0},
        "up": {"up": 0.8, "left": 0.2},
        "down": {"down": 1.0},
    }
    # initialize the value
    valueGrid = [[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)]

    previousValueGrid = [[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)]

    iterations = 0
    stop = False

    while not stop:
        iterations += 1
        previousValueGrid = copyMatrix(valueGrid, instance.world_Row, instance.world_Column)
        for row in range(instance.world_Row):
            for col in range(instance.world_Column):
                # for all states

                # for all actions

                valueActions = [0, 0, 0, 0]
                count = 0
                if not instance.isWalls(row, col):
                    for key, pairs in actions.iteritems():

                        total = 0.0
                        for action, value in pairs.iteritems():

                            if instance.isWithinWorld(action, row, col):
                                newCoordinates = instance.newPosition(action, row, col)
                                total += value * valueGrid[newCoordinates[0]][newCoordinates[1]]

                        valueActions[count] = instance.getRewards(row, col) + (discountedValue * total)
                        count += 1

                valueGrid[row][col] = max(valueActions)

        # print valueGrid
        stop = convergence(valueGrid, previousValueGrid, instance.world_Row, instance.world_Column)

    print valueGrid
    print "The number of iterations is " + str(iterations)
Exemple #2
0
def valueIteration(defaultReward):
  discountedValue = 0.9
  from World import World
  instance = World()
  instance.default_Reward = defaultReward
  #print instance.isWalls(3,2)
 
  # old actions ={'right':[0.8,0.2],'left':[1.0],'up':[0.8,0.2],'down':[1.0]}

  actions = {'right':{'right':0.8,'down':0.2},'left':{'left':1.0},'up':{'up':0.8,'left':0.2},'down':{'down':1.0}}
  #initialize the value
  valueGrid =[[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)] 

  previousValueGrid =[[0 for x in range(instance.world_Column)] for x in range(instance.world_Row)] 

  iterations = 0
  stop = False


  while not stop :
    iterations +=1
    previousValueGrid = copyMatrix(valueGrid,instance.world_Row,instance.world_Column)
    for row in range(instance.world_Row):
      for col in range(instance.world_Column):
        #for all states

        #for all actions

        valueActions=[0,0,0,0]
        count = 0
        if not instance.isWalls(row,col):
          for key,pairs in actions.iteritems():
            
            total  =0.0
            for action,value in pairs.iteritems():
             
              if instance.isWithinWorld(action,row,col):
                newCoordinates = instance.newPosition(action,row,col)
                total += (value*valueGrid[newCoordinates[0]][newCoordinates[1]])
            
            valueActions[count] = instance.getRewards(row,col) + (discountedValue * total)
            count +=1


        valueGrid[row][col] = max(valueActions)

    #print valueGrid
    stop = convergence(valueGrid,previousValueGrid,instance.world_Row,instance.world_Column)
            
  print valueGrid 
  print "The number of iterations is "+str(iterations)