Beispiel #1
0
def generateVPIHuntersBoard(seed=None):
    width = 11
    height = 11
    foodHouseLeft = util.flipCoin(PROB_FOOD_LEFT)

    layoutTextGrid = [[' ' for _ in xrange(width)] for _ in xrange(height)]
    layoutTextGrid[0] = ['%' for _ in xrange(width)]
    layoutTextGrid[-1] = layoutTextGrid[0][:]
    for i in xrange(height):
        layoutTextGrid[i][0] = layoutTextGrid[i][-1] = '%'
    possibleLocations = pickPossibleLocations(width, height)
    # (foodX, foodY), (ghostX, ghostY) = tuple(random.sample(possibleLocations, 2))

    bottomLeft, topLeft, bottomRight, topRight = tuple(possibleLocations)

    foodX, foodY = topLeft
    ghostX, ghostY = topRight
    if not util.flipCoin(PROB_FOOD_LEFT):
        (foodX, foodY), (ghostX, ghostY) = (ghostX, ghostY), (foodX, foodY)

    layoutTextGrid[-foodY-1][foodX] = '.'
    layoutTextGrid[-ghostY-1][ghostX] = 'G'
    for foodWallX, foodWallY in buildHouseAroundCenter(foodX, foodY):
        if util.flipCoin(PROB_FOOD_RED):
            layoutTextGrid[-foodWallY-1][foodWallX] = 'R'
        else:
            layoutTextGrid[-foodWallY-1][foodWallX] = 'B'
    for ghostWallX, ghostWallY in buildHouseAroundCenter(ghostX, ghostY):
        if util.flipCoin(PROB_GHOST_RED):
            layoutTextGrid[-ghostWallY-1][ghostWallX] = 'R'
        else:
            layoutTextGrid[-ghostWallY-1][ghostWallX] = 'B'
    layoutTextGrid[5][5] = 'P'
    layoutTextRowList = [''.join(row) for row in layoutTextGrid]
    return layoutTextRowList
def mutate(crossed,prob,maxLength,threshold = 1e6):


	for k,history in enumerate(crossed):
		
		for i,ele in enumerate(history):
			
			if util.flipCoin(prob):
				#if util.flipCoin(threshold * 1./fitness(ele,s_belief,t_belief,source_M, M_proj))
				if util.flipCoin(0.8):
					if util.flipCoin(0.5):
						mutated = list(ele)
						mutated[0] = random.choice(Actions)
						crossed[k][i] = tuple(mutated)
					else:
						mutated = list(ele)
						mutated[1] = Obs[np.random.choice(range(len(Obs)),1,p=Obs_p)[0]]
						crossed[k][i] = tuple(mutated)
				else:
					mutated = list(ele)
					mutated[0] = random.choice(Actions)
					mutated[1] = Obs[np.random.choice(range(len(Obs)),1,p=Obs_p)[0]]
					crossed[k][i] = tuple(mutated)
		if util.flipCoin(prob):
			if util.flipCoin(0.5) and len(history) < maxLength:
				mutated = [0,0]
				#mutated[0] = random.choice(Actions)
				#mutated[1] = Obs[np.random.choice(range(len(Obs)),1,p=Obs_p)[0]]
				mutated =  history[-1]
				crossed[k] = history + [tuple(mutated)] 

			elif len(history)>=maxLength-1:
				crossed[k].pop()
	return crossed
Beispiel #3
0
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    # Pick Action
    legalActions = self.getLegalActions(state)
    action = None
    "*** YOUR CODE HERE ***"
    if len(legalActions) == 0:
      return None

    # pick random one of the legal actions if
    # flipCoin returns true (exploration)
    if util.flipCoin(self.epsilon):
      return random.choice(legalActions)

    # otherwise, pick the best move determined by the policy (exploitation)
    return self.getPolicy(state)
Beispiel #4
0
def makeGrid(gridString):
  walk=False
  if gridString[0][0]=='W':
    walk=True
  if walk:
    obstacleProb=0.0
  else:
    obstacleProb=0.2
  width, height = 10, 3
  grid = Grid(width, height)
  for h in range(height):
    if walk:
      grid[width-1][h]=10
    elif gridString[0][0]=='C':
      grid[width-1][h]=35
    else:
      grid[width-1][h]=2
  for x in range(0,width-1):
    for y in range(0,height):
      if util.flipCoin(obstacleProb):
        grid[x][y]=-2
      else:
        #grid[x][y]='-1'
        grid[x][y]=' '
  grid[0][0]='S'
  return grid    
Beispiel #5
0
    def chooseAction(self, state):
        #return random.choice( state.getLegalActions( self.index ) )
        if not self.firstTurnComplete:
            self.registerInitialState(state)
            self.firstTurnComplete = True
        

        """
        Picks among the actions with the highest Q(s,a).
        """
        actions = state.getLegalActions(self.index)
    
        if util.flipCoin(self.explorationRate):
            return random.choice(actions)
        
        # You can profile your evaluation time by uncommenting these lines
        # start = time.time()
        values = [(a, self.evaluate(state, a)) for a in actions]
        # print 'eval time for agent %d: %.4f' % (self.index, time.time() - start)
    
    
        #print 'VALUES: ' + str(values)  
        maxValue = max(values, key=lambda val : val[1])
        bestActions = [a for a, v in zip(actions, values) if v == maxValue]
    
        action = random.choice(bestActions)
                
        self.update(state, action, self.getSuccessor(state, action))
      
                
        #print 'Features: ' + str(self.getFeatures)
        #print 'Weights: ' + str(self.weights)
        #print 'Action: ' + str(action) + ' - ' + str(self.getPosition(state)) + '--->' + str(self.getPosition(self.getSuccessor(state, action)))
        return action
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    # Pick Action
    legalActions = self.getLegalActions(state)
    action = None

    """Description:
    If the flip of the coin is favorable it will chose a random action, else it will get the best
    """
    """ YOUR CODE HERE """
    if not legalActions:
      return None

    if util.flipCoin(self.epsilon):
      action = random.choice(legalActions)
    else:
      action = self.getPolicy(state)
    """ END CODE """

    return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legal_actions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        # util.raiseNotDefined()

        # return action
        if not legal_actions:
            return None

        # using flip coin fuction for adding randomness when
        # choosing action
        if util.flipCoin(self.epsilon):
            action = random.choice(legal_actions)
        else:
            action = self.computeActionFromQValues(state)

        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if len(legalActions) == 0:
            return None

        # epsilon greedy, explotation or exploration
        if util.flipCoin(self.epsilon):
            action = random.choice(legalActions)
        else:
            action = self.computeActionFromQValues(state)

        return action
Beispiel #9
0
 def playDefense(self,state, offensePlayerIndex):
     if state.isWinner(offensePlayerIndex):
         return True
     if util.flipCoin(self.naiveFactor):
         return True
     else:
         return False
Beispiel #10
0
 def getAction(self, state):
     legalActions = self.getLegalActions(state)
     
     if (util.flipCoin(self.epsilon)):               # Epsilon chance to choose random Hit or Stand, or follow policy. Epsilon 0 = Always policy
         return random.choice(legalActions)
   
     return self.getPolicy(state)
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        
        # Pick Action
        if state not in self.LegalActions:
            
        
            self.LegalActions[state] = self.getLegalActions(state)
        legalActions = self.LegalActions[state]
        if util.flipCoin(self.epsilon):
            return random.choice(legalActions)
        
        return self.getPolicy(state)
        
        "*** YOUR CODE HERE ***"
        util.raiseNotDefined()

        return action
Beispiel #12
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if not self.qTable.has_key(state):
            
            self.qTable[state] = {}
            for action in legalActions:
                self.qTable[state][action] = 0
        if len(legalActions) == 0:
            return None
        coin = util.flipCoin(self.epsilon)
        if coin == True :
            action = random.choice(legalActions)
        else:
            v = -9999
            for act in legalActions:
                if self.qTable[state][act] > v:
                    v = self.qTable[state][act]
                    action = act
                

        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legal_actions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if len(legal_actions) > 0:
            if util.flipCoin(self.epsilon):
                action = min([(self.state_uses[(state, act)], act) for act in legal_actions])[1]
                print action
                if not self.state_uses[(state, action)]:
                    self.state_uses[(state, action)] = 0
                else:
                    self.state_uses[(state, action)] += 1
            else:
                action = self.getPolicy(state)
                if not self.state_uses[(state, action)]:
                    self.state_uses[(state, action)] = 0
                else:
                    self.state_uses[(state, action)] += 1

        return action
Beispiel #14
0
 def getAction(self, state):
     # Acc to some probability, we take a random action
     # Otherwise, we follow the best action available
     if util.flipCoin(self.epsilon):
       return random.choice(self.getLegalActions(state))
     else:
       return self.getPolicy(state)
Beispiel #15
0
    def getAction(self, hitOrnot,position):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        targetlist=[]
        legalDistance=[]
        target=()
        legalDistance =self.getAllpossibleDistance(position) 
#        print"GA position is", position
        if legalDistance:
            if util.flipCoin(self.epsilon):
#                print "length",len(legalDistance)
                random_Distance = random.randint(0, len(legalDistance)-1)
                shoot_distance=legalDistance[random_Distance]
#                print "GA shoot_distance:",shoot_distance
                targetlist=self.findLocationWithShootDistance(position,shoot_distance)
#                print"GA TARGET LIST",targetlist,"len is",len(targetlist)
                randomTarget=random.randint(0, len(targetlist)-1)
                target=targetlist[randomTarget]
                print "shoot randomly at",target,self.q_shot_counter
            else:
                target = self.getPolicy(hitOrnot,position)
        return target
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        #Calculate probability for taking actionS
        if len(legalActions) > 0:
            #Using probability from self.epsilon
            if util.flipCoin( self.epsilon):
                #Get random action from list using random.choice
                action = random.choice( legalActions)
            else:
                #Get action from Policy pie.
                action = self.getPolicy( state)
        return action
    def getAction(self, state):
        """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
        # Pick Action

        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if len(legalActions) == 0:
            return None
        else:
            prob = util.flipCoin(self.epsilon)
            if prob:
                return random.choice(legalActions)
            else:
                q = util.Counter()
                for a in legalActions:
                    """if self.getQValue(state, a) > result or action == None :
                    action = a
                    result = self.getQValue(state, a)"""
                    q[state, a] = self.getQValue(state, a)
                return q.argMax()[1]
Beispiel #18
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        action = self.computeActionFromQValues(state)
        legal_actions=self.getLegalActions(state)
        if len(legal_actions)<=1:
            return action
        #suboptimal_actions.remove(action)


        # if state not in self.visit:
        #     self.visit[state]=0
        # self.visit[state]+=1

        if util.flipCoin(self.epsilon):#/self.visit[state]
            return random.choice(legal_actions)
        return action
Beispiel #19
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"

        # comprobamos si hay legal actions, sino, retornamos None
        if not legalActions:
            return action

        # lanzamos la moneda con epsilon para decidir que accion retornamos, al azar o best policy
        if util.flipCoin(self.epsilon):
            # retornamos una accion al azar, si no devuelve nada retornamos None
            return random.choice(legalActions) or None

        # retornamos bestPolicy
        return self.getPolicy(state)
Beispiel #20
0
 def getAction(self, state):
   """
     Compute the action to take in the current state.  With
     probability self.epsilon, we should take a random action and
     take the best policy action otherwise.  Note that if there are
     no legal actions, which is the case at the terminal state, you
     should choose None as the action.
   
     HINT: You might want to use util.flipCoin(prob)
     HINT: To pick randomly from a list, use random.choice(list)
   """  
   # Pick Action
   legalActions = self.getLegalActions(state)
   action = None
   "*** YOUR CODE HERE ***"
   if len(legalActions) < 1: 
       return None
   else:
       randomAction = util.flipCoin(self.epsilon) #epsilon = prob of true; 1-epsilon = prob false
       if randomAction: 
           action = random.choice(legalActions)
       else:
           action = self.getPolicy(state)
           print "action", action
       return action
    def getAction(self, state):
        """
            Compute the action to take in the current state.  With
            probability self.epsilon, we should take a random action and
            take the best policy action otherwise.  Note that if there are
            no legal actions, which is the case at the terminal state, you
            should choose None as the action.

            HINT: You might want to use util.flipCoin(prob)
            HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if (self.isTerminal(state)):
            return action
        prob = self.epsilon;
        if util.flipCoin(prob):
            action = random.choice(legalActions)
        else:
            action = self.getPolicy(state)

        self.doAction(state,action)
        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        qValLegalActions = util.Counter()
        for legalAction in legalActions:
            qValLegalAction = self.getQValue(state, legalAction)
            qValLegalActions[legalAction] = qValLegalAction

        action = None
        if len(qValLegalActions) > 0:
            action = qValLegalActions.argMax()
            if util.flipCoin(self.epsilon):
                action = random.choice(legalActions)

        return action
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    # Pick Action
    legalActions = self.getLegalActions(state)
    actionsToExplore = []
    
    for action in legalActions:
        if self.getQValue(state,action) == 0:
            actionsToExplore.append(action)
    
    action = None
    
    chooseRandom=util.flipCoin(self.epsilon)
   
    if (chooseRandom):
        if (len(actionsToExplore) > 0):
            action = random.choice(actionsToExplore)
        else:
            action = random.choice(legalActions)
    else:
      action=self.getPolicy(state)
    
    return action
Beispiel #24
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"

        #if terminal state return None
        if len(legalActions)==0:
            return None
        #check random true or false
        
        randomOrNot= util.flipCoin(self.epsilon)
        if  randomOrNot: 
            #Chose east, west, north, south? how do I get the list? 
            return   random.choice(legalActions)
          
        else: 
            #best policy action get policy or compute action from q values? 
            return self.computeActionFromQValues(state)
        
        util.raiseNotDefined()
Beispiel #25
0
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.
    
      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """  
    # Pick Action
    legalActions = self.getLegalActions(state)
    action = None
    "*** YOUR CODE HERE ***"
    #OUR CODE HERE

    if legalActions==None or len(legalActions) is 0:
        return None

    #So do we take a random action or not?
    if util.flipCoin(self.epsilon): #lyee says: no idea what epsilon is!
      #We will take a random action
      action= random.choice(legalActions)
    else:
      #We follow the policy
      action = self.getPolicy(state) #lyee fix: kendall previously had just getPolicy.. I added the 'self' part. hope that's what kendall meant D:
    return action
Beispiel #26
0
 def getAction(self, state):
   """
     What action to take in the current state. With
     probability self.epsilon, we should take a random
     action and take the best policy action otherwise.
   
     After you choose an action make sure to
     inform your parent self.doAction(state,action) 
     This is done for you, just don't clobber it
      
     HINT: you might want to use util.flipCoin
     here..... (see util.py)
   """  
   # Pick Action
   action = None
   epsilon = self.epsilon
   take_random_action = util.flipCoin(epsilon)
   list_of_actions = self.getLegalActions(state)
   if take_random_action:
       
       action = random.choice(list_of_actions)
   else:
       action = self.getPolicy(state)
   #return action
   # Need to inform parent of action for Pacman
   self.doAction(state,action)    
   return action
Beispiel #27
0
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    # Pick Action
    "*** YOUR CODE HERE ***"
    s = state
    legalActions = self.getLegalActions(state)
    #print 'LEGAL:'+str(legalActions)
    if len(legalActions)==0:
      #print 'NONE'
      return None

    action = None

    if util.flipCoin(self.epsilon):
      action = random.choice(legalActions)
    else:
      action = self.getPolicy(s)
      #print action
    #print 'return' +str(action)  
    return action
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    # Pick Action
    legalActions = self.getLegalActions(state)
    action = None
    "*** YOUR CODE HERE ***"
    if len(legalActions) == 0:
        #print "No legal actions"
        action = None
    elif util.flipCoin(self.epsilon):
        #print "Random Choice of Action"
        action = random.choice(legalActions)
    else:
        #print "Choice of action based on Policy"
        action = self.getPolicy(state)
    #print "Action:", action
    return action
Beispiel #29
0
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    actions = self.getLegalActions(state)
    bestQValue = -99999999
    bestActions = []
    for action in actions:
       q = self.getQValue(state, action)
       if q == bestQValue:
          bestActions.append(action)
       elif q > bestQValue:
          bestActions = [action]
          bestQValue = q
    if len(bestActions) == 0:
          return None

    # Pick Action
    legalActions = self.getLegalActions(state)
    action = None
    if legalActions:
        if util.flipCoin(self.epsilon):
            action = random.choice(legalActions)
        else:
            action = random.choice(bestActions)
    return action
  def getAction(self, state):
    """
      Compute the action to take in the current state.  With
      probability self.epsilon, we should take a random action and
      take the best policy action otherwise.  Note that if there are
      no legal actions, which is the case at the terminal state, you
      should choose None as the action.

      HINT: You might want to use util.flipCoin(prob)
      HINT: To pick randomly from a list, use random.choice(list)
    """
    # Pick Action
    legalActions = self.getLegalActions(state)
    "*** YOUR CODE HERE ***"

    # terminal state
    if not legalActions:
      return None
    else:
      # pick if we should explore by flipping a coin
      goRandom = util.flipCoin(self.epsilon)
      if goRandom:
        # randomly choose an action
        return random.choice(legalActions)
      else:
        # choose the best action
        return self.getPolicy(state)
Beispiel #31
0
    def chooseAction(self, gameState):
        """
		@version 1.2.1

		"""
        self.observationHistory.append(gameState)

        actions = gameState.getLegalActions(self.index)

        if len(actions) == 0:
            return None

        if util.flipCoin(self.learningRate):
            action = random.choice(actions)
            print("now do exploitation")
        else:
            action = self.computeActionFromQValues(gameState)

        self.stepLeft -= 1

        if self.considerBack:
            #double check actions or bestActions as argument
            action = self.updateCarryFood(
                gameState, self.backToSafetyPosition(gameState, actions))

        self.lastState = gameState
        # action = self.updateCarryFood(gameState, random.choice(bestActions))
        self.lastAction = action

        # self.updateWeights(self.lastState, self.lastAction, gameState)
        print "above all, I choose ", action
        print "============================="
        print "=============ends============"
        print "============================="

        return self.updateCarryFood(gameState, action)
Beispiel #32
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        ################################################
        # 0039026 #
        ###########
        """Instance variables you have access to
        - self.epsilon (exploration prob)
        - self.alpha (learning rate)
        - self.discount (discount rate)"""

        #util.flipCoin(p)
        #print util.flipCoin(10)
        #print self.epsilon
        bool = util.flipCoin(self.epsilon)

        if not bool:
            #print "COINhere false ----------------"
            action = self.computeActionFromQValues(state)
        else:
            #print "COINhere true ----------------"
            action = random.choice(legalActions)

        return action
Beispiel #33
0
    def getAction(self, state):
        legalActions = self.getLegalActions(state)
        action = None

        "end game"
        if not self.getLegalActions(state):
            return action

        if self.new_episode:
            "Analyze state"
            self.state_discription = self.getStateDiscription(state)
            self.new_episode = False

        "Explore or Exploit"
        if util.flipCoin(self.epsilon):
            action = random.choice(legalActions)
        else:
            "predict based on current state"

            values = self.first_model.predict(
                np.array([self.state_discription]))

            actions = copy.deepcopy(ACTIONS)
            actions = [
                action
                for _, action in sorted(zip(values, actions), reverse=True)
            ]
            for a in actions:
                if a in legalActions:
                    actions = a
                    break

        if action not in legalActions:
            action = ACTIONS[4]
        self.doAction(state, action)
        return action
Beispiel #34
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if len(legalActions) == 0:
            return action

        if util.flipCoin(self.epsilon):
            action = random.choice(legalActions)
        else:
            action = self.computeActionFromQValues(state)

        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        isHeads = util.flipCoin(self.epsilon)

        if len(legalActions) is 0:
            return None

        if isHeads:
            #print "Taking the known policy"
            return random.choice(legalActions)
        else:
            #print "Taking the random choice"
            return self.getPolicy(state)
Beispiel #36
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        #util.raiseNotDefined()
        if len(legalActions
               ) != 0:  #if there are no legal actions, returns action none
            if util.flipCoin(self.epsilon
                             ) == True:  #getting probability of exploration.
                action = random.choice(legalActions)  #getting randomly actions
            else:
                action = self.getPolicy(state)
        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """

        "*** YOUR CODE HERE ***"

        # Pick Action
        random_choice = util.flipCoin(self.epsilon)
        legalActions = self.getLegalActions(state)
        action = self.getPolicy(state)

        if random_choice:
            if not legalActions:
                return None
            return util.random.choice(legalActions)
        return action
Beispiel #38
0
    def getAction(self, state, withEpsilon=True):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.
        """
        #        print "getAction QLearningAgentt"

        # Pick Action
        print(self.epsilon)
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        if len(legalActions) == 0:
            return action

        if withEpsilon and util.flipCoin(self.epsilon):
            action = random.choice(legalActions)
        else:
            action = self.computeActionFromQValues(state)

        return action
Beispiel #39
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action

        actionList = self.getLegalActions(state)

        #returns true with self.epsilon probability
        probability = util.flipCoin(self.epsilon)

        #if true, explore
        if probability:
            return random.choice(actionList)
        #if false, do basic value-iteration
        else:
            return self.computeActionFromQValues(state)
Beispiel #40
0
    def chooseAction(self, state):
        # Append game state to observation history...
        self.observationHistory.append(state)
        # Pick Action
        legalActions = state.getLegalActions(self.index)
        action = None
        if (DEBUG):
            print self.newline()
            print "AGENT " + str(self.index) + " choosing action!"
        if len(legalActions):
            if util.flipCoin(self.epsilon) and self.isTraining():
                action = random.choice(legalActions)
                if (DEBUG):
                    print "ACTION CHOSE FROM RANDOM: " + action
            else:
                action = self.computeActionFromQValues(state)
                if (DEBUG):
                    print "ACTION CHOSE FROM Q VALUES: " + action

        self.lastAction = action

        foodLeft = len(self.getFood(state).asList())
        # Prioritize going back to start if we have <= 2 pellets left
        if foodLeft <= 2:
            bestDist = 9999
            for a in legalActions:
                successor = self.getSuccessor(state, a)
                pos2 = successor.getAgentPosition(self.index)
                dist = self.getMazeDistance(self.start, pos2)
                if dist < bestDist:
                    action = a
                    bestDist = dist

        if (DEBUG):
            print "AGENT " + str(self.index) + " chose action " + action + "!"
        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
        possible = util.flipCoin(self.epsilon)
        # print('This is the possibility', possible)  
        if possible:
          action = random.choice(legalActions)
        else:
          action = self.getPolicy(state)
        # util.raiseNotDefined()

        return action
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        "*** YOUR CODE HERE ***"
        #Local Declarations
        action = None
        actionList = self.getLegalActions(state)
        #Error check, make sure list of actions is not empty
        if not (actionList):
            return None
        #Check over probabiliy choice; choose action randomly or from computed QValues
        if (util.flipCoin(self.epsilon)):
            action = random.choice(actionList)
        else:
            action = self.computeActionFromQValues(state)
        return action
Beispiel #43
0
def generateRandomHuntersBoard(seed=None, width=None, height=None):
    """Note that this is constructing a string, so indexing is [-y-1][x] rather than [x][y]"""
    random.seed(seed)

    leftHouseTop  = util.flipCoin(PROB_LEFT_TOP)

    if not width or not height:
        width = random.randrange(11, 20, 4)
        height = random.randrange(11, 16, 4)
    layoutTextGrid = [[' ' for _ in range(width)] for _ in range(height)]
    layoutTextGrid[0] = ['%' for _ in range(width)]
    layoutTextGrid[-1] = layoutTextGrid[0][:]
    for i in range(height):
        layoutTextGrid[i][0] = layoutTextGrid[i][-1] = '%'
    possibleLocations = pickPossibleLocations(width, height)
    # (foodX, foodY), (ghostX, ghostY) = tuple(random.sample(possibleLocations, 2))

    bottomLeft, topLeft, bottomRight, topRight = tuple(possibleLocations)

    if leftHouseTop:
        foodX, foodY = topLeft
        ghostX, ghostY = bottomRight if util.flipCoin(PROB_OPPOSITE_CORNERS) else topRight
    else:
        foodX, foodY = bottomLeft
        ghostX, ghostY = topRight if util.flipCoin(PROB_OPPOSITE_CORNERS) else bottomRight
    if not util.flipCoin(PROB_FOOD_LEFT):
        (foodX, foodY), (ghostX, ghostY) = (ghostX, ghostY), (foodX, foodY)

    layoutTextGrid[-foodY-1][foodX] = '.'
    layoutTextGrid[-ghostY-1][ghostX] = 'G'
    for foodWallX, foodWallY in buildHouseAroundCenter(foodX, foodY):
        if util.flipCoin(PROB_FOOD_RED):
            layoutTextGrid[-foodWallY-1][foodWallX] = 'R'
        else:
            layoutTextGrid[-foodWallY-1][foodWallX] = 'B'
    for ghostWallX, ghostWallY in buildHouseAroundCenter(ghostX, ghostY):
        if util.flipCoin(PROB_GHOST_RED):
            layoutTextGrid[-ghostWallY-1][ghostWallX] = 'R'
        else:
            layoutTextGrid[-ghostWallY-1][ghostWallX] = 'B'
    layoutTextGrid[-2][1] = 'P'
    layoutTextRowList = [''.join(row) for row in layoutTextGrid]
    return layoutTextRowList
Beispiel #44
0
 def mutate(self, p):
     newString = ""
     for i in xrange(self.n):
         if util.flipCoin(p): newString += str(int(not int(self.string[i])))
         else: newString += self.string[i]
     self.string = newString
    def getAction(self, state):
        """
          Compute the action to take in the current state.
          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """

        action = None
        legalActions = self.getLegalActions(state)

        # List of actions, excluding those that are known to simply keep the agent in the same state
        possibleActions = []

        # List of actions and their respective Q values
        possibleActionQValues = util.Counter()

        # List of actions and the absolute values of their respective Q values,
        # excluding actions that would cause the agent to backtrack to the previous state
        possibleActionsNoBacktrack = util.Counter()
        absPossibleActionsNoBacktrack = util.Counter()

        # Assemble lists of actions that are permitted depending on the circumstances
        for action in legalActions:

            if (state, action) not in self.forbiddenActions:
                possibleActionQValues[action] = self.getQValue(state, action)
                possibleActions.append(action)

                if not self.isBacktrackAction(action):
                    possibleActionsNoBacktrack[action] = self.getQValue(
                        state, action)
                    absPossibleActionsNoBacktrack[action] = abs(
                        self.getQValue(state, action))

        if len(possibleActionQValues) > 0:

            print "goalMode: ", self.goalMode
            print "epsilon 1: ", self.epsilon1
            print "epsilon 2: ", self.epsilon2
            print "meanQValue: ", self.getValue(state)
            print "possibleActions: ", possibleActions
            print "possibleActionQValues: ", possibleActionQValues

            # Training to populate Q table
            if self.goalMode == 'maxQMode':
                if util.flipCoin(0.5):
                    # action = possibleActionsNoBacktrack.argMax()
                    action = possibleActionQValues.argMax()
                    # action = random.choice(possibleActions)
                else:
                    action = random.choice(possibleActions)

            elif self.goalMode == 'minQMode':
                if util.flipCoin(self.epsilon1):
                    # action = possibleActionsNoBacktrack.argMin()
                    action = possibleActionQValues.argMin()
                    # action = random.choice(possibleActions)
                else:
                    action = random.choice(possibleActions)
                    print "Random: ", action

            # Training to find deceptive path
            else:

                largestQValue = possibleActionQValues.argMax()

                print "Equilibrium state: ", state, self.isEquilibriumState(
                    state)

                # If agent has already found an equidistant state with the largest-Q-value action seen so far,
                # then continue to the true goal
                if self.maxQValuePolicy:
                    action = possibleActionsNoBacktrack.argMax()

                # Otherwise, keep searching for the equidistant state that has the largest-Q-value action.
                else:

                    # If the agent has arrived at (what was thought to be) the LDP, and found that this state
                    # no longer has at least one positively valued action and at least one negatively valued action,
                    # then forget about this state.
                    if self.lastDeceptivePoint is not None and state == self.lastDeceptivePoint[
                            0] and not self.isEquilibriumState(state):
                        self.lastDeceptivePoint = None

                    # If an equidistant state has been found...
                    if self.isEquilibriumState(state):

                        # If the agent has arrived at an equidistant state that has the largest-Q-value action
                        # seen so far (or if the agent has arrived at what is currently thought to be the LDP),
                        # then update the details of the likeliest LDP candidate...
                        if self.lastDeceptivePoint is None\
                                or possibleActionQValues.get(largestQValue) > self.lastDeceptivePoint[1]\
                                or state == self.lastDeceptivePoint[0]:

                            self.lastDeceptivePoint = (
                                state,
                                possibleActionQValues.get(largestQValue))

                            # Now head directly to the true goal, with probability epsilon2...
                            if util.flipCoin(1 - self.epsilon2):
                                self.maxQValuePolicy = True
                                action = possibleActionsNoBacktrack.argMax()

                            # Or continue searching for equidistant states that might have a larger Q value
                            else:
                                action = absPossibleActionsNoBacktrack.argMin()

                            if self.epsilon2 >= 1.0 / float(
                                    self.phaseTwoEpisodes):
                                self.epsilon2 -= 1.0 / float(
                                    self.phaseTwoEpisodes)

                        # If this equidistant state does NOT have the largest-Q-value action
                        # of all equidistant states seen so far, then keep searching for such an equidistant state
                        else:
                            action = absPossibleActionsNoBacktrack.argMin()

                    # Otherwise, keep searching for an equidistant state:
                    else:

                        if self.getValue(state) > 0:
                            action = possibleActionsNoBacktrack.argMin()
                        elif self.getValue(state) < 0:
                            action = possibleActionsNoBacktrack.argMax()
                        else:
                            action = absPossibleActionsNoBacktrack.argMin()

        print "self.lastDeceptivePoint: ", self.lastDeceptivePoint

        return action
 def getAction(self, state):
     if util.flipCoin(self.epsilon) is True:
         return random.choice(self.legalActions)
     return self.computeActionFromQValues(state)
Beispiel #47
0
    def getAction(self, state):
        """
		  Compute the action to take in the current state.  With
		  probability self.epsilon, we should take a random action and
		  take the best policy action otherwise.  Note that if there are
		  no legal actions, which is the case at the terminal state, you
		  should choose None as the action.

		  HINT: You might want to use util.flipCoin(prob)
		  HINT: To pick randomly from a list, use random.choice(list)
		"""
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"

        if not legalActions:
            action = None

        elif util.flipCoin(self.epsilon):
            action = random.choice(legalActions)

        else:
            action = self.computeActionFromQValues(state)
        # if action not in legalActions:
        # 	print("WTFFFF")
        action_copy = action

        if self.shield:
            safe = False
            legal_qval = []
            # print(legalActions)
            # legalActions.remove(action)

            for ac in legalActions:
                if ac != action:
                    legal_qval.append((ac, self.getQValue(state, ac)))

            sorted(legal_qval, key=lambda x: x[1], reverse=True)
            i = 0
            self.discarded = []
            # print(legal_qval)
            while not safe and len(legalActions) != 0:
                # if action not in legalActions:
                # 	print("WTFFFFF")
                px, py = state.getPacmanPosition()
                if action == 'East':
                    px += 1
                elif action == 'West':
                    px -= 1
                elif action == 'North':
                    py += 1
                elif action == 'South':
                    py -= 1

                num_ghosts = len(state.data.agentStates) - 1

                safe = True
                for j in range(num_ghosts):
                    ghostpos = state.getGhostPosition(j + 1)
                    dist = manhattanDistance((px, py), ghostpos)
                    # print(dist)
                    if dist < 2.0:
                        safe = False
                        self.discarded.append(action)
                        legalActions.remove(action)
                        break
                # if action == 'Stop':
                # 	safe = False
                # print(action)
                if not safe and i < len(legal_qval):
                    action = legal_qval[i][0]
                i += 1

            if len(legalActions) == 0:
                action = action_copy

        return action
Beispiel #48
0
	def chooseOfAction(self, gameState):
		# Pick Action
		ghost=[]
		ghostIndex = 0
		opAgents = CaptureAgent.getOpponents(self,gameState)
		currentPos = gameState.getAgentPosition(self.index)
		# Get ghost locations and states if observable
		if opAgents:
			for opponent in opAgents:
				opPos = gameState.getAgentPosition(opponent)
				opIsPacman = gameState.getAgentState(opponent).isPacman
				
				if opPos and not opIsPacman: 
					dis = abs(currentPos[0]-opPos[0])+abs(currentPos[1]-opPos[1])
					if dis<=6:
						ghost.append(opPos)
						ghostIndex = opponent
		if   len(self.getFood(gameState).asList())>2:
			
			if len(ghost) ==0 :
				if gameState.getAgentState(self.index).numCarrying>1 and  gameState.data.timeleft<200:
					self.weights =self.weights4
					print("444444444444444444444")
				else:
					self.weights = self.weights1
					print("111111111111111111111")
			else:

				if min([self.getMazeDistance(gameState.getAgentPosition(self.index),a) for a in ghost])>6:
					self.weights = self.weights1
					print("111111111111111111111")
				else:
					if gameState.getAgentState(ghostIndex).scaredTimer<10:
						if gameState.data.timeleft<200 :
							if gameState.getAgentState(self.index).numCarrying>2:
								self.weights = self.weights3
								print("33333333333333333333")
							else:
								self.weights = self.weights2
								print("2222222222222222222222")
						else:
							if gameState.getAgentState(self.index).numCarrying>10:
								if self.red:
									middle = int((gameState.data.layout.width - 2)/2 )
								else:
									middle = int((gameState.data.layout.width - 2)/2 + 1)
								if abs(gameState.getAgentPosition(self.index)[0]-middle) < middle/2:
									self.weights = self.weights3
									print("33333333333333333333")
								else :
									self.weights = self.weights2
									print("2222222222222222222222")
							else:
								self.weights = self.weights2
								print("2222222222222222222222")
					else :
						self.weights = self.weights1
						print("111111111111111111111")

		else :
			if len(ghost) ==0:
				self.weights = self.weights4
				print("44444444444444444444")
			else:
				if gameState.getAgentState(ghostIndex).scaredTimer<10:
					self.weights = self.weights3
					print("33333333333333333333")
				else: 
					self.weights = self.weights4
					print("44444444444444444444")
		legalActions = gameState.getLegalActions(self.index)
		legalActions.remove(Directions.STOP)
		
		action = None

		if len(legalActions) != 0:
			prob = util.flipCoin(self.epsilon)
			if prob:
				action = random.choice(legalActions)

			else:
				if self.weights ==self.weights1:
					action = self.getSafePolicy(gameState)
			
				else:
					action = self.getPolicy(gameState)

		if self.weights == self.weights2:

			food = self.getFeatures(gameState,action)["closest-food"]
			ghost = self.getFeatures(gameState,action)["closest-ghosts"]
			print(food*100,ghost*100)
		if not gameState.getAgentState(self.index).isPacman:
			if self.red:
				if  self.finish:
					self.mode =2 
			else:
				if self.finish:
					self.mode = 2
		return action
    def getAction(self, state):

        ############## Build network map ##############
        # This builds a network of positions pacman can move to based on a state.
        # Required for search algorithms. This is specific to this maze.
        if not self.map_built:  # If map isn't built, build it
            self.w_pos = self.wall_pos(state)  # Get wall locations
            self.map_graph = self.map_graph(self.w_pos)  # Get network
            self.map_built = True  # Turn off map building

        # Update weights
        if self.played:  # Have we played yet?
            if self.learning:

                ############## Get Reward of state ##############
                self.r = self.reward_signal(state.getScore(), self.old_score)
                self.old_score = state.getScore(
                )  # Udate the old score for next time

                ############## Extract legal moves ##############
                # Get legal actions & remove STOP
                legal = state.getLegalPacmanActions()
                if Directions.STOP in legal:
                    legal.remove(Directions.STOP)

                # Convert available actions in form of int
                available_actions = self.AvailableActions(legal)

                ############## Calculate Max Q(s', a') ##############
                # Coordinates for grids around pacman
                south = (state.getPacmanPosition()[0],
                         state.getPacmanPosition()[1] - 1)
                east = (state.getPacmanPosition()[0] + 1,
                        state.getPacmanPosition()[1])
                west = (state.getPacmanPosition()[0] - 1,
                        state.getPacmanPosition()[1])
                north = (state.getPacmanPosition()[0],
                         state.getPacmanPosition()[1] + 1)
                directions = [south, east, west, north]

                possible_directions = [
                ]  # List with +1 moves to examine in legal
                direction_integer = []  # Mapping grid examined with direction

                # Examine available actions and store grids to look at for pacman
                if 0 in available_actions:
                    possible_directions.append(east)
                    direction_integer.append(0)
                if 1 in available_actions:
                    possible_directions.append(south)
                    direction_integer.append(1)
                if 2 in available_actions:
                    possible_directions.append(west)
                    direction_integer.append(2)
                if 3 in available_actions:
                    possible_directions.append(north)
                    direction_integer.append(3)

                # Stores Q(s', a') values, and their respective function scores
                Q_values = []
                f1_score = []
                f2_score = []

                # Looking at possible actions and compute Q values
                for i in possible_directions:
                    f1 = self.Distance_to_food(state, state.getFood(), i,
                                               self.map_graph,
                                               state.getGhostPositions())
                    f2 = self.Distance_to_Ghost(state,
                                                state.getGhostPositions(), i,
                                                self.map_graph)

                    f1_score.append(f1)
                    f2_score.append(f2)

                    Q = self.weights[
                        0] + self.weights[1] * f1 + self.weights[2] * f2
                    Q_values.append(Q)

                # Choose the best action
                index = Q_values.index(max(Q_values))
                action = self.IntToMove(direction_integer[index])

                ############## Weight Updates ##############
                # UPDATE THE WEIGHTS

                difference = self.r + self.gamma * max(Q_values) - self.Qsa

                self.weights[0] = self.weights[0] + self.alpha * difference
                self.weights[1] = self.weights[
                    1] + self.alpha * difference * f1_score[index]
                self.weights[2] = self.weights[
                    2] + self.alpha * difference * f2_score[index]

                # Save the chosen action's previous function scores
                self.Qsa = max(
                    Q_values
                )  # This will be Q(s,a) after the move has been made

                # Save f values in case of death
                self.f1_death = f1_score[index]
                self.f2_death = f2_score[index]

        else:

            ############## Extract legal moves ##############
            # Get legal actions & remove STOP
            legal = state.getLegalPacmanActions()
            if Directions.STOP in legal:
                legal.remove(Directions.STOP)

            # Convert available actions in form of int
            available_actions = self.AvailableActions(legal)

            ############## Calculate Max Q(s', a') ##############
            # Coordinates for grids around pacman
            south = (state.getPacmanPosition()[0],
                     state.getPacmanPosition()[1] - 1)
            east = (state.getPacmanPosition()[0] + 1,
                    state.getPacmanPosition()[1])
            west = (state.getPacmanPosition()[0] - 1,
                    state.getPacmanPosition()[1])
            north = (state.getPacmanPosition()[0],
                     state.getPacmanPosition()[1] + 1)
            directions = [south, east, west, north]

            possible_directions = []  # List with +1 moves to examine in legal
            direction_integer = []  # Mapping grid examined with direction

            # Examine available actions and store grids to look at for pacman
            if 0 in available_actions:
                possible_directions.append(east)
                direction_integer.append(0)
            if 1 in available_actions:
                possible_directions.append(south)
                direction_integer.append(1)
            if 2 in available_actions:
                possible_directions.append(west)
                direction_integer.append(2)
            if 3 in available_actions:
                possible_directions.append(north)
                direction_integer.append(3)

            # Stores Q(s', a') values, and their respective function scores
            Q_values = []
            f1_score = []
            f2_score = []

            # Looking at possible actions and compute Q values
            for i in possible_directions:

                f1 = self.Distance_to_food(state, state.getFood(), i,
                                           self.map_graph,
                                           state.getGhostPositions())
                f2 = self.Distance_to_Ghost(state, state.getGhostPositions(),
                                            i, self.map_graph)

                f1_score.append(f1)
                f2_score.append(f2)

                Q = self.weights[
                    0] + self.weights[1] * f1 + self.weights[2] * f2
                Q_values.append(Q)

            # Choose the best action
            index = Q_values.index(max(Q_values))
            action = self.IntToMove(direction_integer[index])

            # Save the function values
            self.Qsa = max(
                Q_values)  # This will be Q(s,a) after the move has been made

            # Begin learning
            if self.learning:
                self.played = True

        # Exploration function
        if util.flipCoin(self.epsilon) and self.learning:
            choices = range(0, len(Q_values))
            index = random.choice(choices)
            action = self.IntToMove(direction_integer[index])
            self.Qsa = Q_values[
                index]  # This will be Q(s,a) after the move has been made
        print self.weights

        return action
for i in range(20):
    nasa = open('i%d.txt' % i, 'r')
    skipLine = 9
    lineIndex = 0
    result = {}
    p = [0, 0, 0, 0]

    for line in nasa:
        if lineIndex > skipLine:
            line = line.strip()
            v = line.split(' ')
            r = [int(v[0]), int(v[1]), int(v[2]), float(v[6])]

            r[-1] *= random.choice(surface)
            if not util.flipCoin(random.choice(health_prob)):
                r[-1] = 0
            r[-1] = int(r[-1])

            if p[1] not in result:
                result[p[1]] = {}

            if p[2] not in result[p[1]]:
                result[p[1]][p[2]] = {}

            if p[3] not in result[p[1]][p[2]]:
                result[p[1]][p[2]][p[3]] = {}

            if r[3] not in result[p[1]][p[2]][p[3]]:
                result[p[1]][p[2]][p[3]][r[3]] = 0
Beispiel #51
0
    def getAction(self, state):

        ############## Build network map ##############
        # This builds a network of positions pacman can move to based on a state.
        # Required for search algorithms. This is specific to this maze.
        if not self.map_built:  # If map isn't built, build it
            self.w_pos = self.wall_pos(state)  # Get wall locations
            self.map_graph = self.map_graph(self.w_pos)  # Get network
            self.map_built = True  # Turn off map building

        ############## Extract legal moves ##############
        # Get legal actions & remove STOP
        legal = state.getLegalPacmanActions()
        if Directions.STOP in legal:
            legal.remove(Directions.STOP)

        # Convert available actions in form of int
        available_actions = self.AvailableActions(legal)

        ############## Extract State information S' ##############
        # Get CURRENT state info S' (in form of key)
        self.s_cur = self.key(state.getPacmanPosition(),
                              state.getGhostPositions(), state.getFood(),
                              self.map_graph, state)

        ############## Update Q-Table ##############
        # If this is not the first action we make in a game. If it is, skip to "else"
        if self.played:
            # If this is the first time we have seen that state, initialize key-value pairs of all possible actions to 0. If not the dictionary will be empty and not function. Allows us to add states as we see them.
            for i in available_actions:
                if self.Q.get((self.s_cur, i)) == None:
                    self.Q[self.s_cur, i] = 0

            # Get the current reward (R')
            self.r_cur = self.reward_signal(state.getScore(), self.old_score)

            # Update old score
            self.old_score = state.getScore()

            # Increment the state/action pair that we were in previously. (Nsa += 1)
            self.Nsa[(self.s, self.a)] += 1

            # Calculate alpha adjustment based on Nsa (if activated)
            if self.alpha_adjustment:
                self.alpha = self.adjusted_alpha(self.Nsa[(self.s, self.a)])
            else:  # Use regular alpha if not active
                self.alpha = self.alpha

            # Update the Q Table for previous state/action pair
            self.Q[(self.s,
                    self.a)] = self.Q[(self.s, self.a)] + self.alpha * (
                        self.r + self.gamma * max(self.Q[(self.s_cur, i)]
                                                  for i in available_actions) -
                        self.Q[(self.s, self.a)])

        else:
            # This code is only run once at the beginning of each game.

            # Initialize the current reward for starting
            self.r_cur = state.getScore()
            self.old_score = state.getScore(
            )  # "Old score" is the same as current score at t = 0.
            # Initialize playing state. We will not come here again until the new game.
            self.played = True
            # Ensure dictionary is not empty for current starting position and available actions.
            # They are initialized to 0.
            for i in available_actions:
                if self.Q.get((self.s_cur, i)) == None:
                    self.Q[self.s_cur, i] = 0

        ############## Update S, R ##############
        # Adjust state, and reward. We have already updated the Q table so this will only be relevant at the next table update (given we survived an extra move)
        self.s = self.s_cur
        self.r = self.r_cur

        ############## Chosing argmax Q(s', a') and updating A ##############

        self.scores = [
        ]  # Will keep track of all rewards for each action in legal

        ############## If using function exploration
        if self.function_exploration:
            ## Adjust action (need the arg max Q(s', a'))
            # Obtaining the action which maximizes the rewards. Examine all possible actions
            # and store their Q-values in a list
            for i in available_actions:
                # If the state I can go to hasn't been visited enough time, incentivise it properly using the large reward. Agent must also be in a state of learning
                if (self.Nsa[(self.s_cur, i)] < self.Ne) and self.learning:
                    self.scores.append(self.L_Reward)
                # If it has, get the true calculated utility of that state
                else:
                    self.scores.append(self.Q[(self.s_cur, i)])

            # Verify that the number of scores which are equal to max score. This will be used to make a random choice if we have several unseen state-action pairs.
            counter = 0  # Serves as a counter and index for max score
            max_score_index = []
            for i in self.scores:
                if i == max(self.scores):
                    max_score_index.append(counter)
                counter += 1

            # Extract the index for the highest score. Either randomly when there is more than one max score, or the first element in the list when there is only 1. This is needed to map the score back with the action which produced it.
            if max_score_index > 1:
                max_ = random.choice(max_score_index)
            else:
                max_ = max_score_index[0]

            # Map the index corresponding to the highest score back to its respective action in available_actions
            self.a = available_actions[max_]
            # Convert int action to actual action and return move.
            action = self.IntToMove(self.a)

        ############## If using epsilon exploration (not used)
        if self.epsilon_exploration:
            for i in available_actions:
                self.scores.append(self.Q[(self.s_cur, i)])

            # If less than epsilon, and we're learning, make a random choice
            if util.flipCoin(self.epsilon) and self.learning:
                self.a = random.choice(available_actions)
            else:
                max_ = self.scores.index(max(self.scores))
                self.a = available_actions[max_]
            action = self.IntToMove(self.a)

        ############## Return Action Arg Max Q(S', A') ##############
        return action
Beispiel #52
0
 def __le__(self, other):
     return util.flipCoin(0.5)
Beispiel #53
0
    def chooseAction(self, state):
        start = time.time()

        self.debugDraw([(0, 0)], [0, 0, 0], clear=True)
        self_agent = state.getAgentState(self.index)
        actions = state.getLegalActions(self.index)
        food = state.getBlueFood().count(
            True) if self.isOnRedTeam else state.getRedFood().count(True)

        # Particle filtering
        self.observeState()

        particle_filtering_time = time.time() - start

        # If we're carrying enough, just go home!
        if self_agent.numCarrying >= 3:
            return self.returnHome(state)

        # Otherwise, run minimax
        elif self.use_minimax:
            max_score = -99999
            max_action = None
            alpha = -99999
            beta = 99999
            for action in actions:
                # Update successor ghost positions to be the max pos in our particle distributions
                successor = state.generateSuccessor(self.index, action)
                ghosts = self.getBeliefDistribution().argMax()
                successor = self.setGhostPositions(successor, ghosts,
                                                   self.getOpponents(state))

                time_depth = 1 - particle_filtering_time - 0.2
                result = self.minimax(successor, start, 1, alpha, beta, 1,
                                      time_depth)
                if result >= max_score:
                    max_score = result
                    max_action = action

                if max_score > beta:
                    return max_action

                alpha = max(alpha, max_score)

            action = max_action

    # Or compute action from q-values
        else:
            action = random.choice(actions) if util.flipCoin(
                self.epsilon) else self.computeActionFromQValues(state)

        # Q-learning
        if self.learn:
            reward = self.getReward(
                state.generateSuccessor(self.index, action), state)
            self.update(state, action,
                        state.generateSuccessor(self.index, action), reward)

        # Draw particle distribution
        # self.drawBeliefs()

        # Update particles
        self.elapseTime(state)

        end = time.time()
        if end - start > 1: print("Overtime --> total time was ", end - start)

        return action
Beispiel #54
0
 def actionSelector(self, gameState):
     if util.flipCoin(self.epsilon):
         return random.choice(self.getLegalActions(gameState))
     return self.getPolicy(gameState)
  def chooseAction(self,gameState):
    start = time.time()
   
    
    actions = gameState.getLegalActions(self.index)
    action = None
    foodLeft = len(self.getFood(gameState).asList())

    myCurrentPos =  gameState.getAgentState(self.index).getPosition()
    InitialPosition = gameState.getInitialAgentPosition(self.index)
    enemies = []
    enemyGhost = []
    enemyPacman = []
    for opponent in self.getOpponents(gameState):
      enemy = gameState.getAgentState(opponent)
      enemies.append(enemy)
    #print"enemies", enemies
    enemyGhost = [a for a in enemies if not a.isPacman and a.getPosition() != None]
    enemyPacman = [a for a in enemies if a.isPacman and a.getPosition() != None]
    
    ghostPositions = []
    disToG = 6666
    

    ranges = []
    enemyGhostPosition = [Ghost.getPosition() for Ghost in enemyGhost]
    enemyPacmanPosition = [Pacman.getPosition() for Pacman in enemyPacman]
    



    mid = gameState.data.layout.width / 2

    if gameState.isOnRedTeam(self.index):
      mid = mid - 1
    else:
      mid = mid + 1

    legalPositions = [p for p in gameState.getWalls().asList(False) if p[1] > 1]
    border = [p for p in legalPositions if p[0] == mid]

    if len(enemyGhostPosition) >0 and not gameState.getAgentState(self.index).isPacman:
      disToG = min([self.getMazeDistance(myCurrentPos, ghostPos) for ghostPos in enemyGhostPosition])
    
    print'position in action',myCurrentPos
    print'If Astar111',self.aSt
    if disToG <5:
      randomPoint = random.choice(border)
      while randomPoint == myCurrentPos:
        randomPoint = random.choice(border)
      self.farPoint = randomPoint
      self.aSt = True
      print'If Astar',self.aSt
      print'current pacman position',myCurrentPos
      self.stopAction = True
    
    actionList = []
    if myCurrentPos!= self.farPoint:
      
      if self.aSt:
        bestDist = 9999
        #for action2 in actions:
        #successor = self.getSuccessor(gameState, action2,self.farPoint)
        #pos2 = successor.getAgentPosition(self.index)
        #action3 = None
        print'self.actionList outside if-else',self.actionList
        if len(self.actionList) == 0:
          #self.actionList.remove('Stop')
          self.actionList = self.aStar(gameState,self.farPoint,myCurrentPos,start)
          
          #self.actionList = action3
          print'self.actionList',self.actionList
          if len(self.actionList) != 0:
            if self.actionList[0] == 9999:
              self.aSt = False
              self.skipAstar = True
              self.actionList.remove(9999)
              print'skip astar first'
        elif self.actionList[0] == 9999:
          self.aSt = False
          self.skipAstar = True
          self.actionList.remove(9999)
          print'skip a star'
        else:
          #actionList = self.actionList
          #if len(self.actionList)>0:
          bestAction = self.actionList[0]
          print 'eval time in A STAR for agent %d: %.4f' % (self.index, time.time() - start)
          print'bestAction',bestAction
            
            
          self.actionList.remove(bestAction)

          if myCurrentPos == InitialPosition or len(self.actionList) == 0:
            self.aSt = False
          else:
            #for move in actions:
            if bestAction in actions:
              return bestAction
            else:
              self.aSt = False
            #return bestAction
      else:
        self.aSt = False
          
          
          
            
            
            
          
          #bestDist = dist
          #print'bestAction',bestAction
      
          #return bestAction
    else:
      self.aSt = False
        

    if len(actions) !=0:
      probability = util.flipCoin(self.epsilon)
      if probability:
        #print"random"
        action = random.choice(actions)
      else:
        action = self.getPolicy(gameState)
    
    action = self.getPolicy(gameState)
    """if self.stopAction and not self.skipAstar:
      print'stop for a sec'
      action = Directions.STOP
      self.stopAction = False"""
      
    """elif self.aSt:
      action = Directions.STOP
      self.stopAction = False
      print'stop second time'"""
    print"final action",action
    print 'eval time in Q-Learning for agent %d: %.4f' % (self.index, time.time() - start)
    return action
 def p():return n0tf0rn00bs.flipCoin(mutationProbability)
 q=np.array([z if p()else 1-z for z in chromosome])
Beispiel #57
0
    def chooseAction(self, gameState):
        """
        Picks the best of all legal actions based on their estimated Q values, which are computed
        with a linear combination of the feature values and their weights.
        This is the function that is called at every turn; any other functions that should be called
        each turn should be called from here.
        """
        # Only update weights if we are currently training
        if TRAINING:
            # First, update weights based on reward received from the move we just took
            s_prime = self.getCurrentObservation()
            a = self.lastAction

            agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent"
            showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent")
            if showOutput:
                print(agentName + " action just taken: " + str(a))

            s = self.getPreviousObservation() if len(
                self.observationHistory) > 1 else s_prime

            reward = self.getReward(s, a, s_prime) # sets self.justDied = 25
            self.updateWeights(s, a, s_prime, reward)

        # Choose our next action!
        actions = gameState.getLegalActions(self.index)

        qValuesOfNextActions = [self.evaluatePotentialNextState(
            gameState, a) for a in actions]
        maxValue = max(qValuesOfNextActions)
        bestActions = [a for a, v in zip(
            actions, qValuesOfNextActions) if v == maxValue]

        # If there are 2 (or fewer) pellets left, the game is pretty much over, so the best action will be 
        # the one that moves us closer to where we initially started
        foodLeft = len(self.getFood(gameState).asList())
        if foodLeft <= 2:
            actionChoice = self.getActionToGoBackHome(gameState, actions)
            self.lastAction = actionChoice

            agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent"
            showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent")
            if showOutput:
                print(agentName + " CHOOSING ACTION: " + str(actionChoice))

            return actionChoice

        # Do a coin flip with probability self.epsilon to choose randomly instead of optimally, only if TRAINING
        coin_flip = util.flipCoin(self.epsilon)
        if coin_flip and TRAINING:
            actionChoice = random.choice(actions)
            self.lastAction = actionChoice

            agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent"
            showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent")
            if showOutput:
                print(agentName + "CHOOSING ACTION: " + str(actionChoice))

            return actionChoice

        # In all other cases, choose the best action based on computed Q values
        # If multiple actions are tied, break ties randomly.
        actionChoice = random.choice(bestActions)
        self.lastAction = actionChoice

        agentName = "Offensive Agent" if isinstance(self, OffensiveDummyAgent) else "Defensive Agent"
        showOutput = (DEBUG_OFFENSE_ONLY and agentName is "Offensive Agent") or (DEBUG_DEFENSE_ONLY and agentName is "Defensive Agent")
        if showOutput:
            print(agentName + " CHOOSING ACTION: " + str(actionChoice))

        return actionChoice
Beispiel #58
0
            messageCallback = lambda x: None

        else:

            if opts.manual and opts.agent == None:
                displayCallback = lambda state: display.displayNullValues(state)
            else:
                if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state,
                                                                                                 "CURRENT VALUES")
                if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state,
                                                                                                "CURRENT VALUES")
                if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state,
                                                                                             "CURRENT Q-VALUES")

        if episode < a.phaseOneEpisodes:
            if util.flipCoin(0.5):
                goalMode = 'maxQMode'
            else:
                goalMode = 'minQMode'
        else:
            goalMode = 'deceptiveMode'

        # Run episode according to goal mode
        returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback,
                              episode, goalMode)
        a.episodesSoFar += 1
        a.maxQValuePolicy = False
        a.stepCount = 0

    if episodes > 0:
        print
Beispiel #59
0
    first = False
else:
    # alpha = (alpha) / (alpha + count / 100)   # decay learning rate
    count += 1
    #if count % 300 == 0:
    #	Q_values = util.Counter()
    status = getStatus(input, output)  # win or tie or lost
    if status == 'win':
        reward = 1
    elif status == 'tie':
        reward = 0
    else:
        reward = -1
    change = getChange(input, last_input)  # + or - or o
    new_state = (status, change)
    new_q = max([Q_values[(new_state, action)] for action in actions])
    Q_values[(state, output)] = (1.0 - alpha) * Q_values[(state, output)] + \
           alpha * (reward + gamma * new_q)
    state = new_state
    if count < 20:  #count % 100 == 1:
        f.write('{0} {1} {2} {3}\n'.format(input, output, reward, Q_values))
    if util.flipCoin(epsilon):  # epsilon greedy
        output = rd.choice(actions)
    else:
        value = max([Q_values[(state, action)] for action in actions])
        output = rd.choice([
            action for action in actions if Q_values[(state, action)] == value
        ])
        # TODO, exploration
f.close()
Beispiel #60
0
    def MCTS(self, curState):
        # First, determine the tactics and set the timer
        startTime = time.time()
        QValues = util.Counter()
        Values = util.Counter()
        tactic = self.getTactics(curState, self.index)
        print(self.index, "  ", tactic)  # FIXME for debug purpose

        # Do the main loop of MCTS
        fringe = PriorityQueue()
        fringe.push(curState, 0)
        tempActions = Deque()
        bestActions = Deque()
        pathAndReward = util.Stack()
        bestReward = float("-inf")
        expandedStates = util.Counter()
        nextStates = util.Counter()
        curDepth = 0
        while fringe.isEmpty() is False:
            state = fringe.pop()
            topPos = state.getAgentPosition(self.index)
            if curDepth >= self.depth:
                # Backpropagation
                cumulativeReward = 0
                while pathAndReward.isEmpty() is False:
                    state, reward = pathAndReward.pop()
                    cumulativeReward = reward + cumulativeReward * self.discount
                    Values[state] = cumulativeReward
                if cumulativeReward > bestReward:
                    bestReward = cumulativeReward
                    bestActions = tempActions
                    # print(bestActions.list)
                    # util.pause()
                (priority, _, _) = fringe.peekPriority()
                depthDiff = curDepth + priority
                curDepth = -priority
                for _ in range(depthDiff):
                    tempActions.popBack()
            else:
                reward = self.getReward(state)
                pathAndReward.push((state, reward))
                if expandedStates[state] > 0:
                    # Not only calculate Q(s, a), should consider V(s) for some descendants
                    expandedStates[state] += 1
                    actionProb = util.Counter()
                    for action in nextStates[state]:
                        nextState = nextStates[state][action]
                        # If next state is expanded, use V(s)
                        if expandedStates[nextState] > 0:
                            actionProb[action] = Values[nextState]
                        # If next state is not expanded, use Q(s, a)
                        else:
                            actionProb[action] = QValues[topPos][action]
                    # Calculate probability according to Q(s, a) or V(s)
                    actionProb = softmax(actionProb)
                else:
                    # If the state has not been expanded, expand the state
                    expandedStates[state] += 1
                    legalActions = state.getLegalActions(self.index)
                    actionProb = util.Counter()
                    for action in legalActions:
                        # print(self.getQValue(topState, action, tactic), QValues[topPos][action])
                        if QValues[topPos] == 0:
                            QValues[topPos] = util.Counter()
                        QValues[topPos][action] = self.getQValue(
                            state, action, tactic)[0]
                        actionProb[action] = QValues[topPos][action]
                        if nextStates[state] == 0:
                            nextStates[state] = util.Counter()
                        nextStates[state][action] = self.getNextState(
                            state, action)
                    actionProb = softmax(
                        actionProb
                    )  # Calculate probability according to Q(s, a)
                # Choose action according to action probability
                flip = random.random()
                cumulative = 0
                chosenAction = "Error"  # Marking error
                if util.flipCoin(self.epsilon):
                    for prob in actionProb:
                        if cumulative <= flip <= cumulative + actionProb[prob]:
                            chosenAction = prob
                            break
                        else:
                            cumulative += actionProb[prob]
                else:
                    chosenAction = actionProb.argMax()
                tempActions.push((chosenAction, QValues[topPos][chosenAction]))
                nextState = nextStates[state][chosenAction]

                # Determine whether to do a back track
                if util.flipCoin(1 / exp(.4 * (curDepth + self.bias))):
                    fringe.push(curState, -curDepth)
                curDepth += 1
                fringe.push(nextState, -curDepth)
            endTime = time.time()
            if endTime - startTime > self.timeInterval:
                break
        self.actionsChosen = bestActions