def test_FSARSA_0_run_learning_episode(): environment = FakeContinuousEnvironment() environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets) algorithm = fuzzy.FSARSA(environment) assert sum([algorithm.q[i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) assert sum([algorithm.q[i].sum() for i in algorithm.actions]) != 0.0
def test_AHC_lambda_continuous_environment_run_learning_episode(): environment = FakeContinuousEnvironment() environment.approximate_with(TableApproximator) algorithm = classic.AHC(environment, lambd=0.2) assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) != 0.0
def test_Q_SARSA_R_continuous_environment_run_learning_episode(algorithm, lambd): environment = FakeContinuousEnvironment() environment.approximate_with(TableApproximator) algorithm = algorithm(environment, lambd=lambd) assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) != 0.0
def test_CMACQ_CMACSARSA_CMACR_lambda_run_learning_episode(algorithm, lambd): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = algorithm(environment, lambd=lambd) for l in range(algorithm.n_layers): assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) for l in range(algorithm.n_layers): assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) != 0.0
def test_CMACAHC_0_run_learning_episode(): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = cmac.CMACAHC(environment) for l in range(algorithm.n_layers): assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) == 0.0 env = algorithm.run_learning_episode() assert isinstance(env, Environment) for l in range(algorithm.n_layers): assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) != 0.0
def test_Q_SARSA_R_continuous_environment_get_greedy_actions(algorithm): environment = FakeContinuousEnvironment() environment.approximate_with(TableApproximator) algorithm = algorithm(environment) environment_state = 2 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions) for i in range(algorithm.Q.shape[0]): algorithm.Q[i][0] = 1 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions[:1])
def test_CMACAHC_get_greedy_actions(): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = cmac.CMACAHC(environment) environment_state = (0, 0) greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions) for l in range(algorithm.n_layers): for i in range(algorithm.mi[l].shape[0]): algorithm.mi[l][i][0] = 1 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions[:1])
def test_CMACQ_CMACSARSA_CMACR_get_greedy_actions(algorithm, lambd): environment = FakeContinuousEnvironment() environment.approximate_with(CMACApproximator) algorithm = algorithm(environment, lmbd=lambd) environment_state = (0, 0) greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions) for l in range(algorithm.n_layers): for i in range(algorithm.q[l].shape[0]): algorithm.q[l][i][0] = 1 greedy_actions = algorithm.get_greedy_actions(environment_state) assert np.array_equal(greedy_actions, algorithm.actions[:1])
def test_FQ_get_greedy_actions(): environment = FakeContinuousEnvironment() environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets) algorithm = fuzzy.FQ(environment) # environment state approximed for variables = (-0.5, -0.5) environment_state = [np.array([0.5, 0.5, 0.0]), np.array([1.0, 0.0])] greedy_actions = algorithm.get_greedy_actions(environment_state) # all of possible actions because all q ale zeros assert list(greedy_actions) == algorithm.actions # now second actions q is modified to be greedy algorithm.q[1][1][0] = 1.0 greedy_actions = algorithm.get_greedy_actions(environment_state) assert greedy_actions == algorithm.actions[1]
def test_state(): env = FakeContinuousEnvironment(101) assert env.state == (0.0, 1.0) env.approximate_with(FakeApproximator) assert env.state == 1
def test_states(): env = FakeContinuousEnvironment(101) with pytest.raises(AttributeError): env.states env.approximate_with(FakeApproximator) assert env.states == 2
def test_FQ_get_greedy_actions_invalid_environment_state_given(): environment = FakeContinuousEnvironment() environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets) algorithm = fuzzy.FQ(environment) with pytest.raises(ValueError): algorithm.get_greedy_actions(np.ones((2,2)))