Esempio n. 1
0
 def update_chain(self, state, action, reward):
     lhstate = listhash(state)
     if not lhstate in self.q:
         self.q[lhstate] = dict()
     if not action in self.q[lhstate]:
         self.q[lhstate][action] = reward
     else:
         val = self.q[lhstate][action]
         self.q[lhstate][action] = val + self.learning_rate * \
             (reward - self.discount * val)
Esempio n. 2
0
 def update_chain(self, state, action, reward):
     lhstate = listhash(state)
     if not lhstate in self.q:
         self.q[lhstate] = dict()
     if not action in self.q[lhstate]:
         self.q[lhstate][action] = reward
     else:
         val = self.q[lhstate][action]
         self.q[lhstate][action] = val + self.learning_rate * \
             (reward - self.discount * val)
Esempio n. 3
0
 def get_greedy_action(self, available_actions):
     # Do a tree search in the previously seen states
     # that match the current state
     best_action = None
     best_value = None
     for state in sublists(self.chain):
         lhstate = listhash(state)
         if lhstate in self.q:
             s = self.q[lhstate]
             for a in available_actions:
                 if a in s:
                     val = s[a]
                     if val > best_value:
                         best_action = a
                         best_value = val
     return best_action
Esempio n. 4
0
 def get_greedy_action(self, available_actions):
     # Do a tree search in the previously seen states
     # that match the current state
     best_action = None
     best_value = None
     for state in sublists(self.chain):
         lhstate = listhash(state)
         if lhstate in self.q:
             s = self.q[lhstate]
             for a in available_actions:
                 if a in s:
                     val = s[a]
                     if val > best_value:
                         best_action = a
                         best_value = val
     return best_action