Esempio n. 1
0
    def _run_trial_explore(self, env, time, current_trial=None) \
        -> TrialMetrics:

        logger.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        action_set = ClassifiersList()
        done = False

        prev_M_best_fitness = 0
        was_greedy = False

        while not done:
            state = Perception(state)
            match_set = self.population.form_match_set(state)

            if steps > 0:
                # Apply learning in the last action set
                ClassifiersList.apply_alp(
                    self.population,
                    match_set,
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                self.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    prev_M_best_fitness,
                    match_set.get_maximum_fitness(),
                    was_greedy)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        match_set,
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            action, was_greedy = self._epsilon_greedy(match_set)
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)
            action_set = match_set.form_action_set(action)

            prev_state = Perception(state)
            prev_M_best_fitness = match_set.get_maximum_fitness()

            raw_state, last_reward, done, _ = env.step(iaction)

            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)

            if done:
                ClassifiersList.apply_alp(
                    self.population,
                    ClassifiersList(),
                    action_set,
                    prev_state,
                    action,
                    state,
                    time + steps,
                    self.cfg.theta_exp,
                    self.cfg)
                self.apply_reinforcement_learning(
                    action_set,
                    last_reward,
                    prev_M_best_fitness,
                    0,
                    was_greedy)
                if self.cfg.do_ga:
                    ClassifiersList.apply_ga(
                        time + steps,
                        self.population,
                        ClassifiersList(),
                        action_set,
                        state,
                        self.cfg.theta_ga,
                        self.cfg.mu,
                        self.cfg.chi,
                        self.cfg.theta_as,
                        self.cfg.do_subsumption,
                        self.cfg.theta_exp)

            steps += 1

        return TrialMetrics(steps, last_reward)
Esempio n. 2
0
if __name__ == '__main__':

    # Play some games
    for g in range(GAMES):
        action_set = ClassifiersList(cfg=cfg)
        prev_state, action, reward, done = None, None, None, False
        state = board.reset()
        moves = 0

        while not done:
            player = determine_player(moves)  # Determine player

            match_set = ClassifiersList.form_match_set(population, state, cfg)

            if moves > 0:
                action_set.apply_alp(prev_state, action, state,
                                     ALL_MOVES + moves, population, match_set)
                action_set.apply_reinforcement_learning(
                    reward, match_set.get_maximum_fitness())
                if cfg.do_ga:
                    action_set.apply_ga(ALL_MOVES + moves, population,
                                        match_set, state)

            # Determine best action
            action = match_set.choose_action(cfg.epsilon)

            action_set = ClassifiersList.form_action_set(
                match_set, action, cfg)

            prev_state = state
            state, reward, done, debug = board.step(action)