Esempio n. 1
0
class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self, policy, mdp_info, params):
        self.Q = Table(mdp_info.size)
        self._lambda = params['algorithm_params']['lambda']

        trace = params.get('algorithm_params', 'replacing')
        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  params)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self._next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self._next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()
Esempio n. 2
0
class QLambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super(QLambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                              learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        if not absorbing:
            a_max = np.argmax(self.Q[next_state, :])
            q_next = self.Q[next_state, a_max]
        else:
            q_next = 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        if not absorbing:
            if action == a_max:
                self.e.table *= self.mdp_info.gamma * self._lambda
            else:
                self.e.reset()

    def episode_start(self):
        self.e.reset()
Esempio n. 3
0
class SARSALambda(TD):
    """
    The SARSA(lambda) algorithm for finite MDPs.

    """
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super().__init__(self.Q, policy, mdp_info, learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self.next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self.next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()

        super().episode_start()
Esempio n. 4
0
class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self, policy, mdp_info, learning_rate, lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self.next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self.next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()