Beispiel #1
0
    def _run_trial_exploit(self, env, trials, current_trial) -> TrialMetrics:
        logger.debug("** Running trial exploit **")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        selected_cl = None
        prev_selected_cl = None
        done = False

        while not done:
            state = Perception(state)
            match_set = self.population.form_match_set(state)

            selected_cl = self._best_cl(match_set)
            action = selected_cl.action
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)

            raw_state, last_reward, done, _ = env.step(iaction)
            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)
            steps += 1

        return TrialMetrics(steps, last_reward)
Beispiel #2
0
    def _run_trial_explore(self, env, trials, current_trial) -> TrialMetrics:
        logger.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        selected_cl = None
        prev_selected_cl = None
        done = False

        while not done:
            state = Perception(state)
            match_set = self.population.form_match_set(state)

            if steps > 0:
                alp.apply(prev_state,
                          state,
                          selected_cl,
                          self.population)
                rl.bucket_brigade_update(
                    selected_cl,
                    prev_selected_cl,
                    last_reward)

            prev_selected_cl = selected_cl

            # TODO: you can do it better
            if random.random() < self.cfg.epsilon:
                selected_cl = random.choice(match_set)
            else:
                selected_cl = self._best_cl(match_set)

            action = selected_cl.action
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)

            prev_state = Perception(state)

            raw_state, last_reward, done, _ = env.step(iaction)

            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)

            if done:
                alp.apply(prev_state,
                          state,
                          selected_cl,
                          self.population)
                rl.bucket_brigade_update(
                    selected_cl,
                    prev_selected_cl,
                    last_reward)


            steps += 1

        return TrialMetrics(steps, last_reward)
Beispiel #3
0
    def _run_trial_explore(self, env, time, current_trial=None) \
            -> TrialMetrics:

        logger.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        action_set = ClassifiersList()
        done = False

        while not done:
            if self.cfg.do_action_planning and \
                    self._time_for_action_planning(steps + time):
                # Action Planning for increased model learning
                steps_ap, state, prev_state, action_set, \
                    action, last_reward = \
                    self._run_action_planning(env, steps + time, state,
                                              prev_state, action_set, action,
                                              last_reward)
                steps += steps_ap

            state = Perception(state)
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                ClassifiersList.apply_alp(
                    self.population,
                    match_set,
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                ClassifiersList.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    match_set.get_maximum_fitness(),
                    self.cfg.beta,
                    self.cfg.gamma
                )
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        match_set,
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            action = choose_action(
                match_set,
                self.cfg.number_of_possible_actions,
                self.cfg.epsilon,
                self.cfg.biased_exploration
            )
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = Perception(state)
            raw_state, last_reward, done, _ = env.step(iaction)

            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)

            if done:
                ClassifiersList.apply_alp(
                    self.population,
                    ClassifiersList(),
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                ClassifiersList.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    0,
                    self.cfg.beta,
                    self.cfg.gamma)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        ClassifiersList(),
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            steps += 1

        return TrialMetrics(steps, last_reward)
Beispiel #4
0
    def test_should_create_empty_perception(self):
        # when
        p = Perception.empty()

        # then
        assert p is not None
Beispiel #5
0
    def _run_trial_explore(self, env, time, current_trial=None) \
        -> TrialMetrics:

        logger.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        action_set = ClassifiersList()
        done = False

        prev_M_best_fitness = 0
        was_greedy = False

        while not done:
            state = Perception(state)
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                ClassifiersList.apply_alp(
                    self.population,
                    match_set,
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                self.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    prev_M_best_fitness,
                    match_set.get_maximum_fitness(),
                    was_greedy)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        match_set,
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            action, was_greedy = self._epsilon_greedy(match_set)
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = Perception(state)
            prev_M_best_fitness = match_set.get_maximum_fitness()

            raw_state, last_reward, done, _ = env.step(iaction)

            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)

            if done:
                ClassifiersList.apply_alp(
                    self.population,
                    ClassifiersList(),
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                self.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    prev_M_best_fitness,
                    0,
                    was_greedy)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        ClassifiersList(),
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            steps += 1

        return TrialMetrics(steps, last_reward)
Beispiel #6
0
    def _run_trial_explore(self, env, time, current_trial=None) \
            -> TrialMetrics:
        """
        Executes explore trial

        Parameters
        ----------
        env
        time

        Returns
        -------
        Tuple[int, int]
            Tuple of total steps taken and final reward
        """
        logger.debug("** Running trial explore ** ")

        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)

        action = env.action_space.sample()
        reward = 0
        prev_state = Perception.empty()
        action_set = ClassifierList()
        done = False

        while not done:
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                ClassifierList.apply_alp(self.population, match_set,
                                         action_set, prev_state, action, state,
                                         time + steps, self.cfg.theta_exp,
                                         self.cfg)
                ClassifierList.apply_reinforcement_learning(
                    action_set, reward, match_set.get_maximum_fitness(),
                    self.cfg.beta, self.cfg.gamma)
                if self.cfg.do_ga:
                    ClassifierList.apply_ga(time + steps, self.population,
                                            match_set, action_set, state,
                                            self.cfg.theta_ga, self.cfg.mu,
                                            self.cfg.chi, self.cfg.theta_as,
                                            self.cfg.do_subsumption,
                                            self.cfg.theta_exp)

            action = choose_action(match_set,
                                   self.cfg.number_of_possible_actions,
                                   self.cfg.epsilon,
                                   self.cfg.biased_exploration)
            logger.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = state
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            raw_state, reward, done, _ = env.step(iaction)
            state = self.cfg.environment_adapter.to_genotype(raw_state)

            if done:
                ClassifierList.apply_alp(self.population, ClassifierList(),
                                         action_set, prev_state, action, state,
                                         time + steps, self.cfg.theta_exp,
                                         self.cfg)
                ClassifierList.apply_reinforcement_learning(
                    action_set, reward, 0, self.cfg.beta, self.cfg.gamma)
                if self.cfg.do_ga:
                    ClassifierList.apply_ga(time + steps, self.population,
                                            match_set, action_set, state,
                                            self.cfg.theta_ga, self.cfg.mu,
                                            self.cfg.chi, self.cfg.theta_as,
                                            self.cfg.do_subsumption,
                                            self.cfg.theta_exp)
            steps += 1

        return TrialMetrics(steps, reward)