Esempio n. 1
0
 def receive_reward(self, reward):
     for chain in sublists(self.chain):
         # Consider the previous moves to be the current state
         state = chain[1:]
         action = chain[0]
         self.update_chain(state, action, reward)
     self.on_frame_end()
Esempio n. 2
0
 def receive_reward(self, reward):
     for chain in sublists(self.chain):
         # Consider the previous moves to be the current state
         state = chain[1:]
         action = chain[0]
         self.update_chain(state, action, reward)
     self.on_frame_end()
Esempio n. 3
0
 def get_greedy_action(self, available_actions):
     # Do a tree search in the previously seen states
     # that match the current state
     best_action = None
     best_value = None
     for state in sublists(self.chain):
         lhstate = listhash(state)
         if lhstate in self.q:
             s = self.q[lhstate]
             for a in available_actions:
                 if a in s:
                     val = s[a]
                     if val > best_value:
                         best_action = a
                         best_value = val
     return best_action
Esempio n. 4
0
 def get_greedy_action(self, available_actions):
     # Do a tree search in the previously seen states
     # that match the current state
     best_action = None
     best_value = None
     for state in sublists(self.chain):
         lhstate = listhash(state)
         if lhstate in self.q:
             s = self.q[lhstate]
             for a in available_actions:
                 if a in s:
                     val = s[a]
                     if val > best_value:
                         best_action = a
                         best_value = val
     return best_action