Exemple #1
0
    def update_q(self, q, new_state: GameState, previous_state: GameState, action_taken: tuple, reward: int) -> dict:
        """
        Update q based on:
        q[s,a] = Q[s,a] + alpha(r + gamma* max_args(Q[s'])

        The main update function for updating a given q value.
        """
        previous_state_hash = previous_state.value()

        if action_taken is None:
            action_taken = ("end", "end")

        if previous_state_hash not in q:
            q[previous_state_hash] = {}

        if action_taken not in q[previous_state_hash]:
            q[previous_state_hash][action_taken] = reward

        current_q_value = q[previous_state_hash][action_taken]
        q[previous_state_hash][action_taken] = \
            current_q_value + self.alpha * (reward + (self.gamma * self.get_max_value(new_state.value(), q)) - current_q_value)

        return q