Esempi in Python per EligibilityTrace.update

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: mushroom.utils.eligibility_trace

Classe/tipologia: EligibilityTrace

Metodo/funzione: update

Esempi su hotexamples.com: 4

EligibilityTrace.update in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per mushroom.utils.eligibility_trace.EligibilityTrace.update, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

update(3)

reset(3)

EligibilityTrace(2)

Metodi utilizzati di frequente

update (3)

reset (3)

EligibilityTrace (2)

Esempio n. 1

Mostra file

File: td.py Progetto: nixworks/mushroom

class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self, policy, mdp_info, params):
        self.Q = Table(mdp_info.size)
        self._lambda = params['algorithm_params']['lambda']

        trace = params.get('algorithm_params', 'replacing')
        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  params)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self._next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self._next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()

Esempio n. 2

Mostra file

class QLambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super(QLambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                              learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        if not absorbing:
            a_max = np.argmax(self.Q[next_state, :])
            q_next = self.Q[next_state, a_max]
        else:
            q_next = 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        if not absorbing:
            if action == a_max:
                self.e.table *= self.mdp_info.gamma * self._lambda
            else:
                self.e.reset()

    def episode_start(self):
        self.e.reset()

Esempio n. 3

Mostra file

class SARSALambda(TD):
    """
    The SARSA(lambda) algorithm for finite MDPs.

    """
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super().__init__(self.Q, policy, mdp_info, learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self.next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self.next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()

        super().episode_start()

Esempio n. 4

Mostra file

File: td.py Progetto: ronald-xie/mushroom

class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self, policy, mdp_info, learning_rate, lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self.next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self.next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()