Ejemplo n.º 1
0
    def test_should_apply_reinforcement_learning(self, cfg):
        # given
        cl = Classifier(reward=34.29, immediate_reward=11.29, cfg=cfg)
        population = ClassifiersList(*[cl])

        # when
        ClassifiersList.apply_reinforcement_learning(population, 0, 28.79,
                                                     cfg.beta, cfg.gamma)

        # then
        assert abs(33.94 - cl.r) < 0.1
        assert abs(10.74 - cl.ir) < 0.1
Ejemplo n.º 2
0
    def test_should_apply_reinforcement_learning(self, cfg):
        # given
        population = ClassifiersList(cfg=cfg)
        c1 = Classifier(cfg=cfg)
        c1.r = 34.29
        c1.ir = 11.29
        population.append(c1)

        # when
        population.apply_reinforcement_learning(0, 28.79)

        # then
        assert abs(33.94 - population[0].r) < 0.1
        assert abs(10.74 - population[0].ir) < 0.1
Ejemplo n.º 3
0
    # Play some games
    for g in range(GAMES):
        action_set = ClassifiersList(cfg=cfg)
        prev_state, action, reward, done = None, None, None, False
        state = board.reset()
        moves = 0

        while not done:
            player = determine_player(moves)  # Determine player

            match_set = ClassifiersList.form_match_set(population, state, cfg)

            if moves > 0:
                action_set.apply_alp(prev_state, action, state,
                                     ALL_MOVES + moves, population, match_set)
                action_set.apply_reinforcement_learning(
                    reward, match_set.get_maximum_fitness())
                if cfg.do_ga:
                    action_set.apply_ga(ALL_MOVES + moves, population,
                                        match_set, state)

            # Determine best action
            action = match_set.choose_action(cfg.epsilon)

            action_set = ClassifiersList.form_action_set(
                match_set, action, cfg)

            prev_state = state
            state, reward, done, debug = board.step(action)

            if done:
                action_set.apply_alp(prev_state, action, state,