Python EligibilityTraceの例

プログラミング言語: Python

名前空間/パッケージ名: mushroom.utils.eligibility_trace

クラス/型: EligibilityTrace

hotexamples.comのコード掲載数: 6

Python EligibilityTrace - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmushroom.utils.eligibility_trace.EligibilityTraceの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

update(3)

reset(3)

EligibilityTrace(2)

よく使われるメソッド

update (3)

reset (3)

EligibilityTrace (2)

コード例 #1

ファイルを表示

ファイル: td.py プロジェクト: nixworks/mushroom

class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self, policy, mdp_info, params):
        self.Q = Table(mdp_info.size)
        self._lambda = params['algorithm_params']['lambda']

        trace = params.get('algorithm_params', 'replacing')
        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  params)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self._next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self._next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()

コード例 #2

ファイルを表示

ファイル: td.py プロジェクト: nixworks/mushroom

    def __init__(self, policy, mdp_info, params):
        self.Q = Table(mdp_info.size)
        self._lambda = params['algorithm_params']['lambda']

        trace = params.get('algorithm_params', 'replacing')
        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  params)

コード例 #3

ファイルを表示

    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super().__init__(self.Q, policy, mdp_info, learning_rate)

コード例 #4

ファイルを表示

class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self,
                 policy,
                 mdp_info,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super().__init__(self.Q, policy, mdp_info, learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self.next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self.next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()

        super().episode_start()

コード例 #5

ファイルを表示

ファイル: td.py プロジェクト: ronald-xie/mushroom

class SARSALambdaDiscrete(TD):
    """
    Discrete version of SARSA(lambda) algorithm.

    """
    def __init__(self, policy, mdp_info, learning_rate, lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  learning_rate)

    def _update(self, state, action, reward, next_state, absorbing):
        q_current = self.Q[state, action]

        self.next_action = self.draw_action(next_state)
        q_next = self.Q[next_state, self.next_action] if not absorbing else 0.

        delta = reward + self.mdp_info.gamma * q_next - q_current
        self.e.update(state, action)

        self.Q.table += self.alpha(state, action) * delta * self.e.table
        self.e.table *= self.mdp_info.gamma * self._lambda

    def episode_start(self):
        self.e.reset()

コード例 #6

ファイルを表示

ファイル: td.py プロジェクト: ronald-xie/mushroom

    def __init__(self, policy, mdp_info, learning_rate, lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info,
                                                  learning_rate)