def receive_reward(self, reward): for chain in sublists(self.chain): # Consider the previous moves to be the current state state = chain[1:] action = chain[0] self.update_chain(state, action, reward) self.on_frame_end()
def get_greedy_action(self, available_actions): # Do a tree search in the previously seen states # that match the current state best_action = None best_value = None for state in sublists(self.chain): lhstate = listhash(state) if lhstate in self.q: s = self.q[lhstate] for a in available_actions: if a in s: val = s[a] if val > best_value: best_action = a best_value = val return best_action