Beispiel #1
0
    def test_should_return_all_possible_actions(self, cfg):
        # given
        population = ClassifiersList(cfg=cfg)
        actions = set()

        # when
        for _ in range(1000):
            act = choose_action(population, epsilon=1.0)
            actions.add(act)

        # then
        assert 8 == len(actions)
Beispiel #2
0
    def test_should_return_all_possible_actions(self, cfg):
        # given
        all_actions = cfg.number_of_possible_actions
        population = ClassifiersList()
        actions = set()

        # when
        for _ in range(1000):
            act = choose_action(population,
                                all_actions=all_actions,
                                epsilon=1.0)
            actions.add(act)

        # then
        assert len(actions) == all_actions
Beispiel #3
0
    def _run_trial_explore(self, env, time, current_trial=None):
        logging.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = parse_state(raw_state, self.cfg.perception_mapper_fcn)
        action = None
        reward = None
        prev_state = None
        action_set = ClassifierList()
        done = False

        while not done:
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                action_set.apply_alp(prev_state, action, state, time + steps,
                                     self.population, match_set, self.cfg)
                action_set.apply_reinforcement_learning(
                    reward, match_set.get_maximum_fitness())
                if self.cfg.do_ga:
                    pass
                    # TODO: implement GA

            action = choose_action(match_set,
                                   self.cfg.number_of_possible_actions,
                                   self.cfg.epsilon)
            internal_action = parse_action(action, self.cfg.action_mapping_fcn)
            logging.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = state
            raw_state, reward, done, _ = env.step(internal_action)
            state = parse_state(raw_state, self.cfg.perception_mapper_fcn)

            if done:
                action_set.apply_alp(prev_state, action, state, time + steps,
                                     self.population, None, self.cfg)
                action_set.apply_reinforcement_learning(reward, 0)
                if self.cfg.do_ga:
                    pass
                    # TODO: implement GA
            steps += 1

        return steps
Beispiel #4
0
    def _run_trial_exploit(self, env, time=None, current_trial=None) \
            -> TrialMetrics:
        logger.debug("** Running trial exploit **")

        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)

        reward = 0
        action_set = ClassifierList()
        done = False

        while not done:
            match_set = self.population.form_match_set(state)

            if steps > 0:
                ClassifierList.apply_reinforcement_learning(
                    action_set, reward, match_set.get_maximum_fitness(),
                    self.cfg.beta, self.cfg.gamma)

            # Execute best action
            action = choose_action(match_set,
                                   self.cfg.number_of_possible_actions,
                                   epsilon=0.0,
                                   biased_exploration_prob=0.0)
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            action_set = match_set.form_action_set(action)

            raw_state, reward, done, _ = env.step(iaction)
            state = self.cfg.environment_adapter.to_genotype(raw_state)

            if done:
                ClassifierList.apply_reinforcement_learning(
                    action_set, reward, 0, self.cfg.beta, self.cfg.gamma)

            steps += 1

        return TrialMetrics(steps, reward)
Beispiel #5
0
    def _run_trial_explore(self, env, time, current_trial=None) \
            -> TrialMetrics:
        """
        Executes explore trial

        Parameters
        ----------
        env
        time

        Returns
        -------
        Tuple[int, int]
            Tuple of total steps taken and final reward
        """
        logger.debug("** Running trial explore ** ")

        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)

        action = env.action_space.sample()
        reward = 0
        prev_state = Perception.empty()
        action_set = ClassifierList()
        done = False

        while not done:
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                ClassifierList.apply_alp(self.population, match_set,
                                         action_set, prev_state, action, state,
                                         time + steps, self.cfg.theta_exp,
                                         self.cfg)
                ClassifierList.apply_reinforcement_learning(
                    action_set, reward, match_set.get_maximum_fitness(),
                    self.cfg.beta, self.cfg.gamma)
                if self.cfg.do_ga:
                    ClassifierList.apply_ga(time + steps, self.population,
                                            match_set, action_set, state,
                                            self.cfg.theta_ga, self.cfg.mu,
                                            self.cfg.chi, self.cfg.theta_as,
                                            self.cfg.do_subsumption,
                                            self.cfg.theta_exp)

            action = choose_action(match_set,
                                   self.cfg.number_of_possible_actions,
                                   self.cfg.epsilon,
                                   self.cfg.biased_exploration)
            logger.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = state
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            raw_state, reward, done, _ = env.step(iaction)
            state = self.cfg.environment_adapter.to_genotype(raw_state)

            if done:
                ClassifierList.apply_alp(self.population, ClassifierList(),
                                         action_set, prev_state, action, state,
                                         time + steps, self.cfg.theta_exp,
                                         self.cfg)
                ClassifierList.apply_reinforcement_learning(
                    action_set, reward, 0, self.cfg.beta, self.cfg.gamma)
                if self.cfg.do_ga:
                    ClassifierList.apply_ga(time + steps, self.population,
                                            match_set, action_set, state,
                                            self.cfg.theta_ga, self.cfg.mu,
                                            self.cfg.chi, self.cfg.theta_as,
                                            self.cfg.do_subsumption,
                                            self.cfg.theta_exp)
            steps += 1

        return TrialMetrics(steps, reward)