def observationFunction(self, state):
     if not self.lastState is None:
         reward = 0.0
         # reward -= 1.
         self.observeTransition(self.lastState, self.lastAction, state,
                                reward)
     return CaptureAgent.observationFunction(self, state)
 def observationFunction(self, state):
     """
         This is where we ended up after our last action.
         The simulation should somehow ensure this is called
     """
     CaptureAgent.observationFunction(self, state)
     if not self.lastState is None:
         # get the socre change by the new move, compared to the last move
         reward = state.getScore() - self.lastState.getScore()
         #print("reward is", reward)
         if reward != 0:
             print(reward)
         print("____the weights value are ", self.weights)
         # pass the reward added to the observeTransition function
         self.observeTransition(self.lastState, self.lastAction, state,
                                reward)
     return state
Esempio n. 3
0
 def observationFunction(self, currentGameState):
     if self.lastState:
         # find the score change between each step
         rewardChange = (currentGameState.getScore() -
                         self.lastState.getScore())
         # rewardChange = 1
         # update 1-step Q values
         self.observeTransition(self.lastState, self.lastAction,
                                currentGameState, rewardChange)
     return CaptureAgent.observationFunction(self, currentGameState)
Esempio n. 4
0
  def observationFunction(self, state):
    """
      This is where we ended up after our last action.
      The simulation should somehow ensure this is called
    """
    if not self.lastState is None:
      reward = (state.getScore() - self.lastState.getScore())      
      self.observeTransition(self.lastState, self.lastAction, state, reward)

    return CaptureAgent.observationFunction(self, state)
Esempio n. 5
0
    def observationFunction(self, state):
        """
          This is where we ended up after our last action.
          The simulation should somehow ensure this is called
        """
        if not self.lastState is None:
            reward = (state.getScore() - self.lastState.getScore())
            self.observeTransition(self.lastState, self.lastAction, state, reward)

        return CaptureAgent.observationFunction(self, state)
Esempio n. 6
0
 def observationFunction(self, state):
     if not self.has_no_observation and \
        self.getPreviousObservation() is not None and \
        self.isInTraining():
         self.makeUpdate()
     return CaptureAgent.observationFunction(self, state)
 def observationFunction(self, state):
     if not self.lastState is None:
         reward = (state.getScore() - self.lastState.getScore())
         self.observeTransition(self.lastState, self.lastAction, state,
                                reward)
     return CaptureAgent.observationFunction(self, state)