class SARSALambdaContinuous(TD): """ Continuous version of SARSA(lambda) algorithm. """ def __init__(self, mdp_info, policy, approximator, learning_rate, lambda_coeff, features, approximator_params=None): """ Constructor. Args: lambda_coeff (float): eligibility trace coefficient. """ self._approximator_params = dict() if approximator_params is None else \ approximator_params self.Q = Regressor(approximator, **self._approximator_params) self.e = np.zeros(self.Q.weights_size) self._lambda = lambda_coeff self._add_save_attr(_approximator_params='pickle', Q='pickle', _lambda='numpy', e='numpy') super().__init__(mdp_info, policy, self.Q, learning_rate, features) def _update(self, state, action, reward, next_state, absorbing): phi_state = self.phi(state) q_current = self.Q.predict(phi_state, action) alpha = self.alpha(state, action) self.e = self.mdp_info.gamma * self._lambda * self.e + self.Q.diff( phi_state, action) self.next_action = self.draw_action(next_state) phi_next_state = self.phi(next_state) q_next = self.Q.predict(phi_next_state, self.next_action) if not absorbing else 0. delta = reward + self.mdp_info.gamma * q_next - q_current theta = self.Q.get_weights() theta += alpha * delta * self.e self.Q.set_weights(theta) def episode_start(self): self.e = np.zeros(self.Q.weights_size) super().episode_start()
import numpy as np from matplotlib import pyplot as plt from mushroom_rl.approximators import Regressor from mushroom_rl.approximators.parametric import LinearApproximator x = np.arange(10).reshape(-1, 1) intercept = 10 noise = np.random.randn(10, 1) * 1 y = 2 * x + intercept + noise phi = np.concatenate((np.ones(10).reshape(-1, 1), x), axis=1) regressor = Regressor(LinearApproximator, input_shape=(2, ), output_shape=(1, )) regressor.fit(phi, y) print('Weights: ' + str(regressor.get_weights())) print('Gradient: ' + str(regressor.diff(np.array([[5.]])))) plt.scatter(x, y) plt.plot(x, regressor.predict(phi)) plt.show()