def test_AHC_lambda_continuous_environment_run_learning_episode(): environment = FakeContinuousEnvironment() environment.approximate_with(TableApproximator) algorithm = classic.AHC(environment, lambd=0.2) assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) != 0.0
def test_FSARSA_0_run_learning_episode(): environment = FakeContinuousEnvironment() environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets) algorithm = fuzzy.FSARSA(environment) assert sum([algorithm.q[i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) assert sum([algorithm.q[i].sum() for i in algorithm.actions]) != 0.0
def test_Q_SARSA_R_continuous_environment_run_learning_episode(algorithm, lambd): environment = FakeContinuousEnvironment() environment.approximate_with(TableApproximator) algorithm = algorithm(environment, lambd=lambd) assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) != 0.0
def test_CMACQ_CMACSARSA_CMACR_lambda_run_learning_episode(algorithm, lambd): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = algorithm(environment, lambd=lambd) for l in range(algorithm.n_layers): assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) for l in range(algorithm.n_layers): assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) != 0.0
def test_CMACAHC_0_run_learning_episode(): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = cmac.CMACAHC(environment) for l in range(algorithm.n_layers): assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) for l in range(algorithm.n_layers): assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) != 0.0
def test_Q_SARSA_R_continuous_environment_get_greedy_actions(algorithm): environment = FakeContinuousEnvironment() environment.approximate_with(TableApproximator) algorithm = algorithm(environment) environment_state = 2 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions) for i in range(algorithm.Q.shape[0]): algorithm.Q[i][0] = 1 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions[:1])
def test_CMACAHC_get_greedy_actions(): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = cmac.CMACAHC(environment) environment_state = (0, 0) greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions) for l in range(algorithm.n_layers): for i in range(algorithm.mi[l].shape[0]): algorithm.mi[l][i][0] = 1 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions[:1])
def test_CMACQ_CMACSARSA_CMACR_get_greedy_actions(algorithm, lambd): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = algorithm(environment, lmbd=lambd) environment_state = (0, 0) greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions) for l in range(algorithm.n_layers): for i in range(algorithm.q[l].shape[0]): algorithm.q[l][i][0] = 1 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions[:1])
def test_FQ_get_greedy_actions(): environment = FakeContinuousEnvironment() environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets) algorithm = fuzzy.FQ(environment) # environment state approximed for variables = (-0.5, -0.5) environment_state = [np.array([0.5, 0.5, 0.0]), np.array([1.0, 0.0])] greedy_actions = algorithm.get_greedy_actions(environment_state) # all of possible actions because all q ale zeros assert list(greedy_actions) == algorithm.actions # now second actions q is modified to be greedy algorithm.q[1][1][0] = 1.0 greedy_actions = algorithm.get_greedy_actions(environment_state) assert greedy_actions == algorithm.actions[1]
def test_learning(): env = FakeContinuousEnvironment() alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4) steps_per_episode, e = alg.learn() assert len(steps_per_episode) == alg.episodes == 1 assert steps_per_episode[0] == len(e.steps) steps_per_episode, e = alg.learn(23) assert len(steps_per_episode) == alg.episodes == 24
def test_init_algorithm(): env = FakeContinuousEnvironment() alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4, 0.5) assert alg.environment == env assert len(alg.actions) == len(env.actions) assert alg.lambd == 0.1 assert alg.epsilon == 0.2 assert alg.gamma == 0.3 assert alg.alpha == 0.4
def test_FQ_get_greedy_actions_invalid_environment_state_given(): environment = FakeContinuousEnvironment() environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets) algorithm = fuzzy.FQ(environment) with pytest.raises(ValueError): algorithm.get_greedy_actions(np.ones((2,2)))
def test_get_action(): env = FakeContinuousEnvironment() alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4) for i in range(100): assert alg.get_action() in alg.actions
def test_state(): env = FakeContinuousEnvironment(101) assert env.state == (0.0, 1.0) env.approximate_with(FakeApproximator) assert env.state == 1
def test_learning_with_stopping_when_learned(): env = FakeContinuousEnvironment() alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4) steps_per_episode, e = alg.learn(100, True, spe_gte=0, wsize=13) assert alg.is_learned(0, 0, 13) assert len(steps_per_episode) == 13
def test_is_learned(steps_per_episode, spe_lte, spe_gte, wsize, expected): env = FakeContinuousEnvironment() alg = FakeAlgorithm(env, 0.1, 0.2, 0.3, 0.4) alg.steps_per_episode = steps_per_episode assert alg.is_learned(spe_lte, spe_gte, wsize) is expected
def test_states(): env = FakeContinuousEnvironment(101) with pytest.raises(AttributeError): env.states env.approximate_with(FakeApproximator) assert env.states == 2
import pytest from rltoolbox.tests.fakes import (FakeClassicalAlgorithm, FakeCMACAlgorithm, FakeFuzzyAlgorithm, FakeContinuousEnvironment, FakeGridNoWallsEnvironment, FakeGridWithWallsEnvironment) from rltoolbox.approximator import (TableApproximator, CMACApproximator, FuzzyApproximator) @pytest.mark.parametrize('env', [ FakeGridNoWallsEnvironment(), FakeGridWithWallsEnvironment(), FakeContinuousEnvironment().approximate_with(TableApproximator) ]) def test_classical_algorithm_setting_proper_environment(env): a = FakeClassicalAlgorithm(env, lambd=0.0, epsilon=0.0, gamma=0.0, alpha=0.0) @pytest.mark.parametrize( 'env', [FakeContinuousEnvironment().approximate_with(CMACApproximator)]) def test_classical_algorithm_setting_wrong_environment(env): with pytest.raises(TypeError): a = FakeClassicalAlgorithm(env, lambd=0.0, epsilon=0.0,
def test_done(): env = FakeContinuousEnvironment(101, [[1.0, 2.0], []]) assert env.done is False for i in range(101): env.do_action(0) assert env.done is True
def test_algorithm_name(lambd, name): env = FakeContinuousEnvironment() alg = FakeAlgorithm(env, lambd, 0.1, 0.2, 0.3) assert alg.name == name