예제 #1
0
    def __init__(self,
                 mdp_info,
                 policy,
                 learning_rate,
                 off_policy=False,
                 beta=None,
                 delta=None):
        """
        Constructor.

        Args:
            off_policy (bool, False): whether to use the off policy setting or
                the online one;
            beta (Parameter, None): beta coefficient;
            delta (Parameter, None): delta coefficient.

        """
        self.off_policy = off_policy
        if delta is not None and beta is None:
            self.delta = delta
            self.beta = None
        elif delta is None and beta is not None:
            self.delta = None
            self.beta = beta
        else:
            raise ValueError('delta or beta parameters needed.')

        self.Q = Table(mdp_info.size)
        self.Q_tilde = Table(mdp_info.size)
        self.R_tilde = Table(mdp_info.size)
        super().__init__(mdp_info, policy, self.Q, learning_rate)
예제 #2
0
def test_boltzmann():
    np.random.seed(88)
    beta = Parameter(0.1)
    pi = Boltzmann(beta)

    Q = Table((10, 3))
    Q.table = np.random.randn(10, 3)

    pi.set_q(Q)

    s = np.array([2])
    a = np.array([1])

    p_s = pi(s)
    p_s_test = np.array([0.30676679, 0.36223227, 0.33100094])
    assert np.allclose(p_s, p_s_test)

    p_sa = pi(s, a)
    p_sa_test = np.array([0.36223227])
    assert np.allclose(p_sa, p_sa_test)

    a = pi.draw_action(s)
    a_test = 2
    assert a.item() == a_test

    beta_2 = LinearParameter(0.2, 0.1, 2)
    pi.set_beta(beta_2)
    p_sa_2 = pi(s, a)
    assert p_sa_2 < p_sa

    pi.update(s, a)
    p_sa_3 = pi(s, a)
    p_sa_3_test = np.array([0.33100094])
    assert np.allclose(p_sa_3, p_sa_3_test)
    def __init__(self,
                 value,
                 exponential=False,
                 min_value=None,
                 tol=1.,
                 window=100,
                 size=(1, )):
        """
        Constructor.

        Args:
            tol (float): value of the variance of the target variable such that the
                parameter value is 0.5.
            window (int):
        """
        self._exponential = exponential
        self._tol = tol
        self._weights_var = Table(size)
        self._samples = Table(size + (window, ))
        self._index = Table(size, dtype=int)
        self._window = window
        self._parameter_value = Table(size)

        self._add_save_attr(
            _exponential='primitive',
            _tol='primitive',
            _weights_var='mushroom',
            _samples='mushroom',
            _index='mushroom',
            _window='primitive',
            _parameter_value='mushroom',
        )

        super(WindowedVarianceParameter, self).__init__(value, min_value, size)
예제 #4
0
def test_eps_greedy():
    np.random.seed(88)
    eps = Parameter(0.1)
    pi = EpsGreedy(eps)

    Q = Table((10, 3))
    Q.table = np.random.randn(10, 3)

    pi.set_q(Q)

    s = np.array([2])
    a = np.array([1])

    p_s = pi(s)
    p_s_test = np.array([0.03333333, 0.93333333, 0.03333333])
    assert np.allclose(p_s, p_s_test)

    p_sa = pi(s, a)
    p_sa_test = np.array([0.93333333])
    assert np.allclose(p_sa, p_sa_test)

    a = pi.draw_action(s)
    a_test = 1
    assert a.item() == a_test

    eps_2 = LinearParameter(0.2, 0.1, 2)
    pi.set_epsilon(eps_2)
    p_sa_2 = pi(s, a)
    assert p_sa_2 < p_sa

    pi.update(s, a)
    pi.update(s, a)
    p_sa_3 = pi(s, a)
    print(eps_2.get_value())
    assert p_sa_3 == p_sa
    def __init__(self,
                 value,
                 exponential=False,
                 min_value=None,
                 tol=1.,
                 size=(1, )):
        """
        Constructor.

        Args:
            tol (float): value of the variance of the target variable such that
                The parameter value is 0.5.

        """
        self._exponential = exponential
        self._tol = tol
        self._weights_var = Table(size)
        self._x = Table(size)
        self._x2 = Table(size)
        self._parameter_value = Table(size)

        super().__init__(value, min_value, size)

        self._add_save_attr(
            _exponential='primitive',
            _tol='primitive',
            _weights_var='mushroom',
            _x='mushroom',
            _x2='mushroom',
            _parameter_value='mushroom',
        )
예제 #6
0
    def __init__(self, value, min_value=None, max_value=None, size=(1, )):
        """
        Constructor.

        Args:
            value (float): initial value of the parameter;
            min_value (float, None): minimum value that the parameter can reach
                when decreasing;
            max_value (float, None): maximum value that the parameter can reach
                when increasing;
            size (tuple, (1,)): shape of the matrix of parameters; this shape
                can be used to have a single parameter for each state or
                state-action tuple.

        """
        self._initial_value = value
        self._min_value = min_value
        self._max_value = max_value
        self._n_updates = Table(size)

        self._add_save_attr(
            _initial_value='primitive',
            _min_value='primitive',
            _max_value='primitive',
            _n_updates='mushroom',
        )
예제 #7
0
    def __init__(self, mdp_info, policy, learning_rate):
        Q = Table(mdp_info.size)
        self.old_q = deepcopy(Q)

        self._add_save_attr(old_q='mushroom')

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #8
0
def test_td_policy():
    Q = Table((10, 3))
    pi = TDPolicy()

    pi.set_q(Q)

    assert Q == pi.get_q()
    def __init__(self, mdp_info, policy, learning_rate):
        self.Q = Table(mdp_info.size)
        self.old_q = deepcopy(self.Q)

        self._add_save_attr(Q='pickle', old_q='pickle')

        super().__init__(mdp_info, policy, self.Q, learning_rate)
    def __init__(self, mdp_info, policy, learning_rate, sampling=True,
                 precision=1000):
        """
        Constructor.

        Args:
            sampling (bool, True): use the approximated version to speed up
                the computation;
            precision (int, 1000): number of samples to use in the approximated
                version.

        """
        self.Q = Table(mdp_info.size)
        self._sampling = sampling
        self._precision = precision

        self._add_save_attr(
            Q='pickle',
            _sampling='numpy',
            _precision='numpy',
            _n_updates='pickle',
            _sigma='pickle',
            _Q='pickle',
            _Q2='pickle',
            _weights_var='pickle',
            _w='numpy'
        )

        super().__init__(mdp_info, policy, self.Q, learning_rate)

        self._n_updates = Table(mdp_info.size)
        self._sigma = Table(mdp_info.size, initial_value=1e10)
        self._Q = Table(mdp_info.size)
        self._Q2 = Table(mdp_info.size)
        self._weights_var = Table(mdp_info.size)
예제 #11
0
def test_mellowmax():
    np.random.seed(88)
    omega = Parameter(3)
    pi = Mellowmax(omega)

    Q = Table((10, 3))
    Q.table = np.random.randn(10, 3)

    pi.set_q(Q)

    s = np.array([2])
    a = np.array([1])

    p_s = pi(s)
    p_s_test = np.array([0.08540336, 0.69215916, 0.22243748])
    assert np.allclose(p_s, p_s_test)

    p_sa = pi(s, a)
    p_sa_test = np.array([0.69215916])
    assert np.allclose(p_sa, p_sa_test)

    a = pi.draw_action(s)
    a_test = 2
    assert a.item() == a_test

    try:
        beta = Parameter(0.1)
        pi.set_beta(beta)
    except RuntimeError:
        pass
    else:
        assert False

    try:
        pi.update(s,a)
    except RuntimeError:
        pass
    else:
        assert False
예제 #12
0
    def __init__(self, mdp_info, policy, learning_rate, beta):
        """
        Constructor.

        Args:
            beta (Parameter): beta coefficient.

        """
        self.Q = Table(mdp_info.size)
        self._rho = 0.
        self.beta = beta

        super().__init__(mdp_info, policy, self.Q, learning_rate)
예제 #13
0
    def __init__(self,
                 mdp_info,
                 policy,
                 learning_rate,
                 off_policy=False,
                 beta=None,
                 delta=None):
        """
        Constructor.

        Args:
            off_policy (bool, False): whether to use the off policy setting or
                the online one;
            beta ([float, Parameter], None): beta coefficient;
            delta ([float, Parameter], None): delta coefficient.

        """
        self.off_policy = off_policy
        if delta is not None and beta is None:
            self.delta = to_parameter(delta)
            self.beta = None
        elif delta is None and beta is not None:
            self.delta = None
            self.beta = to_parameter(beta)
        else:
            raise ValueError('delta or beta parameters needed.')

        Q = Table(mdp_info.size)
        self.Q_tilde = Table(mdp_info.size)
        self.R_tilde = Table(mdp_info.size)

        self._add_save_attr(off_policy='primitive',
                            delta='mushroom',
                            beta='mushroom',
                            Q_tilde='mushroom',
                            R_tilde='mushroom')

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #14
0
    def __init__(self, mdp_info, policy, learning_rate, beta):
        """
        Constructor.

        Args:
            beta (Parameter): beta coefficient.

        """
        Q = Table(mdp_info.size)
        self._rho = 0.
        self.beta = beta

        self._add_save_attr(_rho='primitive', beta='pickle')

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #15
0
    def __init__(self, mdp_info, policy, learning_rate, beta):
        """
        Constructor.

        Args:
            beta ((float, Parameter)): beta coefficient.

        """
        Q = Table(mdp_info.size)
        self._rho = 0.
        self._beta = to_parameter(beta)

        self._add_save_attr(_rho='primitive', _beta='mushroom')

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #16
0
    def __init__(self,
                 mdp_info,
                 policy,
                 learning_rate,
                 lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        self.Q = Table(mdp_info.size)
        self._lambda = lambda_coeff

        self.e = EligibilityTrace(self.Q.shape, trace)
        super().__init__(mdp_info, policy, self.Q, learning_rate)
예제 #17
0
    def __init__(self, mdp_info, policy, learning_rate, lambda_coeff,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coeff ((float, Parameter)): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        Q = Table(mdp_info.size)
        self._lambda = to_parameter(lambda_coeff)

        self.e = EligibilityTrace(Q.shape, trace)
        self._add_save_attr(
            _lambda='mushroom',
            e='mushroom'
        )

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #18
0
    def __init__(self, mdp_info, policy, learning_rate, lambda_coef,
                 trace='replacing'):
        """
        Constructor.

        Args:
            lambda_coef (float): eligibility trace coefficient;
            trace (str, 'replacing'): type of eligibility trace to use.

        """
        Q = Table(mdp_info.size)
        self._lambda = lambda_coef

        self.e = EligibilityTrace(Q.shape, trace)
        self._add_save_attr(
            _lambda='primitive',
            e='pickle'
        )

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #19
0
    def __init__(self, mdp_info, policy, learning_rate):
        Q = Table(mdp_info.size)

        super().__init__(mdp_info, policy, Q, learning_rate)
예제 #20
0
    def __init__(self, mdp_info, policy, learning_rate):
        self.Q = Table(mdp_info.size)
        self.old_q = deepcopy(self.Q)

        super().__init__(mdp_info, policy, self.Q, learning_rate)