Beispiel #1
0
    def test_apply_ga(self, cfg):
        # given
        cl_1 = Classifier(condition='#1#1#1#1', numerosity=12, cfg=cfg)
        cl_2 = Classifier(condition='0#0#0#0#', numerosity=9, cfg=cfg)
        action_set = ClassifiersList(*[cl_1, cl_2], cfg=cfg)
        match_set = ClassifiersList(*[cl_1, cl_2], cfg=cfg)
        population = ClassifiersList(*[cl_1, cl_2], cfg=cfg)

        random_sequence = \
            [
                0.1, 0.6,  # parent selection
                0.1, 0.5, 0.5, 0.5,  # mutation of child1
                0.5, 0.1, 0.5, 0.5,  # mutation of child2
                0.1,  # do crossover
            ] + [0.5] * 12 + [0.2] + [0.5] * 8 + \
            [0.2] + [0.5] * 20 + [0.2] + [0.5] * 20

        # when
        action_set.apply_ga(101,
                            population,
                            match_set,
                            None,
                            randomfunc=RandomMock(random_sequence),
                            samplefunc=SampleMock([0, 4]))

        # then
        modified_parent1 = Classifier(condition='#1#1#1#1',
                                      numerosity=10,
                                      tga=101,
                                      cfg=cfg)

        modified_parent2 = Classifier(condition='0#0#0#0#',
                                      numerosity=8,
                                      tga=101,
                                      cfg=cfg)

        child1 = Classifier(condition='0####1#1',
                            quality=0.25,
                            talp=101,
                            tga=101,
                            cfg=cfg)

        child2 = Classifier(condition='###10#0#',
                            quality=0.25,
                            talp=101,
                            tga=101,
                            cfg=cfg)

        expected_population = ClassifiersList(
            *[modified_parent1, modified_parent2, child1, child2], cfg=cfg)

        # it might sometime fails because one function RNDG is not mocked
        assert expected_population == population
        assert expected_population == match_set
        assert expected_population == action_set
Beispiel #2
0
    def _run_trial_explore(self, env, time, current_trial=None) \
        -> TrialMetrics:

        logger.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        action_set = ClassifiersList()
        done = False

        prev_M_best_fitness = 0
        was_greedy = False

        while not done:
            state = Perception(state)
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                ClassifiersList.apply_alp(
                    self.population,
                    match_set,
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                self.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    prev_M_best_fitness,
                    match_set.get_maximum_fitness(),
                    was_greedy)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        match_set,
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            action, was_greedy = self._epsilon_greedy(match_set)
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = Perception(state)
            prev_M_best_fitness = match_set.get_maximum_fitness()

            raw_state, last_reward, done, _ = env.step(iaction)

            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)

            if done:
                ClassifiersList.apply_alp(
                    self.population,
                    ClassifiersList(),
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                self.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    prev_M_best_fitness,
                    0,
                    was_greedy)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        ClassifiersList(),
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            steps += 1

        return TrialMetrics(steps, last_reward)
Beispiel #3
0
        prev_state, action, reward, done = None, None, None, False
        state = board.reset()
        moves = 0

        while not done:
            player = determine_player(moves)  # Determine player

            match_set = ClassifiersList.form_match_set(population, state, cfg)

            if moves > 0:
                action_set.apply_alp(prev_state, action, state,
                                     ALL_MOVES + moves, population, match_set)
                action_set.apply_reinforcement_learning(
                    reward, match_set.get_maximum_fitness())
                if cfg.do_ga:
                    action_set.apply_ga(ALL_MOVES + moves, population,
                                        match_set, state)

            # Determine best action
            action = match_set.choose_action(cfg.epsilon)

            action_set = ClassifiersList.form_action_set(
                match_set, action, cfg)

            prev_state = state
            state, reward, done, debug = board.step(action)

            if done:
                action_set.apply_alp(prev_state, action, state,
                                     ALL_MOVES + moves, population, None)
                action_set.apply_reinforcement_learning(reward, 0)