예제 #1
0
 def _update(self):
     """ integrate self.action and set new self.state and self.reward.
         self.state is coded as follows:
             index 0: if in start state, this indicates goal up
             index 1: if in start state, this indicates goal down
             index 2: this indicates robot is neither at start state nor at the junction
             index 3: this indicates robot is at the junction.
     """
     Maze._update(self)
     
     # set new state
     self.state = zeros(4)
     if self.perseus == self.initPos[0]:
         if self.goUp:
             self.state[0] = 1
         else:
             self.state[1] = 1
     elif self.perseus[1] == self.length + 1:
         self.state[2] = 1
     else:
         self.state[3] = 1
     
     # set new reward
     if self.perseus[1] == self.length + 1:
         if abs(self.perseus[0] - self.goal[0]) == 2:
             # bad choice taken
             self.perseus = self.goal
             self.reward = self.bangReward
예제 #2
0
 def _update(self):
     Maze._update(self)
             
     # set reward       
     if self.goal == self.perseus:
         self.reward = 1.
         # self.reset()
     else:
         self.reward = 0
 
     # set state
     self.state = array([self.perseus[0] * self.mazeTable.shape[0] + self.perseus[1]])