예제 #1
0
def test_AHC_lambda_continuous_environment_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = classic.AHC(environment, lambd=0.2)
    assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) != 0.0
예제 #2
0
def test_FSARSA_0_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FSARSA(environment)
    assert sum([algorithm.q[i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.q[i].sum() for i in algorithm.actions]) != 0.0
예제 #3
0
def test_Q_SARSA_R_continuous_environment_run_learning_episode(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = algorithm(environment, lambd=lambd)
    assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) != 0.0
예제 #4
0
def test_CMACQ_CMACSARSA_CMACR_lambda_run_learning_episode(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = algorithm(environment, lambd=lambd)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) != 0.0
예제 #5
0
def test_CMACAHC_0_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = cmac.CMACAHC(environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) != 0.0
예제 #6
0
def test_Q_SARSA_R_continuous_environment_get_greedy_actions(algorithm):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = algorithm(environment)
    environment_state = 2
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for i in range(algorithm.Q.shape[0]):
        algorithm.Q[i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])
예제 #7
0
def test_CMACAHC_get_greedy_actions():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = cmac.CMACAHC(environment)
    environment_state = (0, 0)
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for l in range(algorithm.n_layers):
        for i in range(algorithm.mi[l].shape[0]):
            algorithm.mi[l][i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])
예제 #8
0
def test_CMACQ_CMACSARSA_CMACR_get_greedy_actions(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = algorithm(environment, lmbd=lambd)
    environment_state = (0, 0)
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for l in range(algorithm.n_layers):
        for i in range(algorithm.q[l].shape[0]):
            algorithm.q[l][i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])
예제 #9
0
def test_FQ_get_greedy_actions():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FQ(environment)
    # environment state approximed for variables = (-0.5, -0.5)
    environment_state = [np.array([0.5, 0.5, 0.0]), np.array([1.0, 0.0])]
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    # all of possible actions because all q ale zeros
    assert list(greedy_actions) == algorithm.actions
    # now second actions q is modified to be greedy
    algorithm.q[1][1][0] = 1.0
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert greedy_actions == algorithm.actions[1]
예제 #10
0
def test_learning():
    env = FakeContinuousEnvironment()
    alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4)
    steps_per_episode, e = alg.learn()
    assert len(steps_per_episode) == alg.episodes == 1
    assert steps_per_episode[0] == len(e.steps)
    steps_per_episode, e = alg.learn(23)
    assert len(steps_per_episode) == alg.episodes == 24
예제 #11
0
def test_init_algorithm():
    env = FakeContinuousEnvironment()
    alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4, 0.5)
    assert alg.environment == env
    assert len(alg.actions) == len(env.actions)
    assert alg.lambd == 0.1
    assert alg.epsilon == 0.2
    assert alg.gamma == 0.3
    assert alg.alpha == 0.4
예제 #12
0
def test_FQ_get_greedy_actions_invalid_environment_state_given():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FQ(environment)
    with pytest.raises(ValueError):
        algorithm.get_greedy_actions(np.ones((2,2)))
예제 #13
0
def test_get_action():
    env = FakeContinuousEnvironment()
    alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4)
    for i in range(100):
        assert alg.get_action() in alg.actions
예제 #14
0
def test_state():
    env = FakeContinuousEnvironment(101)
    assert env.state == (0.0, 1.0)
    env.approximate_with(FakeApproximator)
    assert env.state == 1
예제 #15
0
def test_learning_with_stopping_when_learned():
    env = FakeContinuousEnvironment()
    alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4)
    steps_per_episode, e = alg.learn(100, True, spe_gte=0, wsize=13)
    assert alg.is_learned(0, 0, 13)
    assert len(steps_per_episode) == 13
예제 #16
0
def test_is_learned(steps_per_episode, spe_lte, spe_gte, wsize, expected):
    env = FakeContinuousEnvironment()
    alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4)
    alg.steps_per_episode = steps_per_episode
    assert alg.is_learned(spe_lte, spe_gte, wsize) is expected
예제 #17
0
def test_states():
    env = FakeContinuousEnvironment(101)
    with pytest.raises(AttributeError):
        env.states
    env.approximate_with(FakeApproximator)
    assert env.states == 2
예제 #18
0
import pytest

from rltoolbox.tests.fakes import (FakeClassicalAlgorithm, FakeCMACAlgorithm,
                                   FakeFuzzyAlgorithm,
                                   FakeContinuousEnvironment,
                                   FakeGridNoWallsEnvironment,
                                   FakeGridWithWallsEnvironment)
from rltoolbox.approximator import (TableApproximator, CMACApproximator,
                                    FuzzyApproximator)


@pytest.mark.parametrize('env', [
    FakeGridNoWallsEnvironment(),
    FakeGridWithWallsEnvironment(),
    FakeContinuousEnvironment().approximate_with(TableApproximator)
])
def test_classical_algorithm_setting_proper_environment(env):
    a = FakeClassicalAlgorithm(env,
                               lambd=0.0,
                               epsilon=0.0,
                               gamma=0.0,
                               alpha=0.0)


@pytest.mark.parametrize(
    'env', [FakeContinuousEnvironment().approximate_with(CMACApproximator)])
def test_classical_algorithm_setting_wrong_environment(env):
    with pytest.raises(TypeError):
        a = FakeClassicalAlgorithm(env,
                                   lambd=0.0,
                                   epsilon=0.0,
예제 #19
0
def test_done():
    env = FakeContinuousEnvironment(101, [[1.0, 2.0], []])
    assert env.done is False
    for i in range(101):
        env.do_action(0)
    assert env.done is True
예제 #20
0
def test_algorithm_name(lambd, name):
    env = FakeContinuousEnvironment()
    alg = FakeAlgorithm(env, lambd, 0.1, 0.2, 0.3)
    assert alg.name == name