def update_q(self, q, new_state: GameState, previous_state: GameState, action_taken: tuple, reward: int) -> dict: """ Update q based on: q[s,a] = Q[s,a] + alpha(r + gamma* max_args(Q[s']) The main update function for updating a given q value. """ previous_state_hash = previous_state.value() if action_taken is None: action_taken = ("end", "end") if previous_state_hash not in q: q[previous_state_hash] = {} if action_taken not in q[previous_state_hash]: q[previous_state_hash][action_taken] = reward current_q_value = q[previous_state_hash][action_taken] q[previous_state_hash][action_taken] = \ current_q_value + self.alpha * (reward + (self.gamma * self.get_max_value(new_state.value(), q)) - current_q_value) return q