class SARSALambdaDiscrete(TD): """ Discrete version of SARSA(lambda) algorithm. """ def __init__(self, policy, mdp_info, params): self.Q = Table(mdp_info.size) self._lambda = params['algorithm_params']['lambda'] trace = params.get('algorithm_params', 'replacing') self.e = EligibilityTrace(self.Q.shape, trace) super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info, params) def _update(self, state, action, reward, next_state, absorbing): q_current = self.Q[state, action] self._next_action = self.draw_action(next_state) q_next = self.Q[next_state, self._next_action] if not absorbing else 0. delta = reward + self.mdp_info.gamma * q_next - q_current self.e.update(state, action) self.Q.table += self.alpha(state, action) * delta * self.e.table self.e.table *= self.mdp_info.gamma * self._lambda def episode_start(self): self.e.reset()
def __init__(self, policy, mdp_info, params): self.Q = Table(mdp_info.size) self._lambda = params['algorithm_params']['lambda'] trace = params.get('algorithm_params', 'replacing') self.e = EligibilityTrace(self.Q.shape, trace) super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info, params)
def __init__(self, policy, mdp_info, learning_rate, lambda_coeff, trace='replacing'): """ Constructor. Args: lambda_coeff (float): eligibility trace coefficient; trace (str, 'replacing'): type of eligibility trace to use. """ self.Q = Table(mdp_info.size) self._lambda = lambda_coeff self.e = EligibilityTrace(self.Q.shape, trace) super().__init__(self.Q, policy, mdp_info, learning_rate)
class SARSALambdaDiscrete(TD): """ Discrete version of SARSA(lambda) algorithm. """ def __init__(self, policy, mdp_info, learning_rate, lambda_coeff, trace='replacing'): """ Constructor. Args: lambda_coeff (float): eligibility trace coefficient; trace (str, 'replacing'): type of eligibility trace to use. """ self.Q = Table(mdp_info.size) self._lambda = lambda_coeff self.e = EligibilityTrace(self.Q.shape, trace) super().__init__(self.Q, policy, mdp_info, learning_rate) def _update(self, state, action, reward, next_state, absorbing): q_current = self.Q[state, action] self.next_action = self.draw_action(next_state) q_next = self.Q[next_state, self.next_action] if not absorbing else 0. delta = reward + self.mdp_info.gamma * q_next - q_current self.e.update(state, action) self.Q.table += self.alpha(state, action) * delta * self.e.table self.e.table *= self.mdp_info.gamma * self._lambda def episode_start(self): self.e.reset() super().episode_start()
class SARSALambdaDiscrete(TD): """ Discrete version of SARSA(lambda) algorithm. """ def __init__(self, policy, mdp_info, learning_rate, lambda_coeff, trace='replacing'): """ Constructor. Args: lambda_coeff (float): eligibility trace coefficient; trace (str, 'replacing'): type of eligibility trace to use. """ self.Q = Table(mdp_info.size) self._lambda = lambda_coeff self.e = EligibilityTrace(self.Q.shape, trace) super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info, learning_rate) def _update(self, state, action, reward, next_state, absorbing): q_current = self.Q[state, action] self.next_action = self.draw_action(next_state) q_next = self.Q[next_state, self.next_action] if not absorbing else 0. delta = reward + self.mdp_info.gamma * q_next - q_current self.e.update(state, action) self.Q.table += self.alpha(state, action) * delta * self.e.table self.e.table *= self.mdp_info.gamma * self._lambda def episode_start(self): self.e.reset()
def __init__(self, policy, mdp_info, learning_rate, lambda_coeff, trace='replacing'): """ Constructor. Args: lambda_coeff (float): eligibility trace coefficient; trace (str, 'replacing'): type of eligibility trace to use. """ self.Q = Table(mdp_info.size) self._lambda = lambda_coeff self.e = EligibilityTrace(self.Q.shape, trace) super(SARSALambdaDiscrete, self).__init__(self.Q, policy, mdp_info, learning_rate)