def test_apply_ga(self, cfg): # given cl_1 = Classifier(condition='#1#1#1#1', numerosity=12, cfg=cfg) cl_2 = Classifier(condition='0#0#0#0#', numerosity=9, cfg=cfg) action_set = ClassifiersList(*[cl_1, cl_2], cfg=cfg) match_set = ClassifiersList(*[cl_1, cl_2], cfg=cfg) population = ClassifiersList(*[cl_1, cl_2], cfg=cfg) random_sequence = \ [ 0.1, 0.6, # parent selection 0.1, 0.5, 0.5, 0.5, # mutation of child1 0.5, 0.1, 0.5, 0.5, # mutation of child2 0.1, # do crossover ] + [0.5] * 12 + [0.2] + [0.5] * 8 + \ [0.2] + [0.5] * 20 + [0.2] + [0.5] * 20 # when action_set.apply_ga(101, population, match_set, None, randomfunc=RandomMock(random_sequence), samplefunc=SampleMock([0, 4])) # then modified_parent1 = Classifier(condition='#1#1#1#1', numerosity=10, tga=101, cfg=cfg) modified_parent2 = Classifier(condition='0#0#0#0#', numerosity=8, tga=101, cfg=cfg) child1 = Classifier(condition='0####1#1', quality=0.25, talp=101, tga=101, cfg=cfg) child2 = Classifier(condition='###10#0#', quality=0.25, talp=101, tga=101, cfg=cfg) expected_population = ClassifiersList( *[modified_parent1, modified_parent2, child1, child2], cfg=cfg) # it might sometime fails because one function RNDG is not mocked assert expected_population == population assert expected_population == match_set assert expected_population == action_set
def _run_trial_explore(self, env, time, current_trial=None) \ -> TrialMetrics: logger.debug("** Running trial explore ** ") # Initial conditions steps = 0 raw_state = env.reset() state = self.cfg.environment_adapter.to_genotype(raw_state) action = env.action_space.sample() last_reward = 0 prev_state = Perception.empty() action_set = ClassifiersList() done = False prev_M_best_fitness = 0 was_greedy = False while not done: state = Perception(state) match_set = self.population.form_match_set(state) if steps > 0: # Apply learning in the last action set ClassifiersList.apply_alp( self.population, match_set, action_set, prev_state, action, state, time + steps, self.cfg.theta_exp, self.cfg) self.apply_reinforcement_learning( action_set, last_reward, prev_M_best_fitness, match_set.get_maximum_fitness(), was_greedy) if self.cfg.do_ga: ClassifiersList.apply_ga( time + steps, self.population, match_set, action_set, state, self.cfg.theta_ga, self.cfg.mu, self.cfg.chi, self.cfg.theta_as, self.cfg.do_subsumption, self.cfg.theta_exp) action, was_greedy = self._epsilon_greedy(match_set) iaction = self.cfg.environment_adapter.to_lcs_action(action) logger.debug("\tExecuting action: [%d]", action) action_set = match_set.form_action_set(action) prev_state = Perception(state) prev_M_best_fitness = match_set.get_maximum_fitness() raw_state, last_reward, done, _ = env.step(iaction) state = self.cfg.environment_adapter.to_genotype(raw_state) state = Perception(state) if done: ClassifiersList.apply_alp( self.population, ClassifiersList(), action_set, prev_state, action, state, time + steps, self.cfg.theta_exp, self.cfg) self.apply_reinforcement_learning( action_set, last_reward, prev_M_best_fitness, 0, was_greedy) if self.cfg.do_ga: ClassifiersList.apply_ga( time + steps, self.population, ClassifiersList(), action_set, state, self.cfg.theta_ga, self.cfg.mu, self.cfg.chi, self.cfg.theta_as, self.cfg.do_subsumption, self.cfg.theta_exp) steps += 1 return TrialMetrics(steps, last_reward)
prev_state, action, reward, done = None, None, None, False state = board.reset() moves = 0 while not done: player = determine_player(moves) # Determine player match_set = ClassifiersList.form_match_set(population, state, cfg) if moves > 0: action_set.apply_alp(prev_state, action, state, ALL_MOVES + moves, population, match_set) action_set.apply_reinforcement_learning( reward, match_set.get_maximum_fitness()) if cfg.do_ga: action_set.apply_ga(ALL_MOVES + moves, population, match_set, state) # Determine best action action = match_set.choose_action(cfg.epsilon) action_set = ClassifiersList.form_action_set( match_set, action, cfg) prev_state = state state, reward, done, debug = board.step(action) if done: action_set.apply_alp(prev_state, action, state, ALL_MOVES + moves, population, None) action_set.apply_reinforcement_learning(reward, 0)