def test_should_apply_reinforcement_learning(self, cfg): # given cl = Classifier(reward=34.29, immediate_reward=11.29, cfg=cfg) population = ClassifiersList(*[cl]) # when ClassifiersList.apply_reinforcement_learning(population, 0, 28.79, cfg.beta, cfg.gamma) # then assert abs(33.94 - cl.r) < 0.1 assert abs(10.74 - cl.ir) < 0.1
def test_should_apply_reinforcement_learning(self, cfg): # given population = ClassifiersList(cfg=cfg) c1 = Classifier(cfg=cfg) c1.r = 34.29 c1.ir = 11.29 population.append(c1) # when population.apply_reinforcement_learning(0, 28.79) # then assert abs(33.94 - population[0].r) < 0.1 assert abs(10.74 - population[0].ir) < 0.1
# Play some games for g in range(GAMES): action_set = ClassifiersList(cfg=cfg) prev_state, action, reward, done = None, None, None, False state = board.reset() moves = 0 while not done: player = determine_player(moves) # Determine player match_set = ClassifiersList.form_match_set(population, state, cfg) if moves > 0: action_set.apply_alp(prev_state, action, state, ALL_MOVES + moves, population, match_set) action_set.apply_reinforcement_learning( reward, match_set.get_maximum_fitness()) if cfg.do_ga: action_set.apply_ga(ALL_MOVES + moves, population, match_set, state) # Determine best action action = match_set.choose_action(cfg.epsilon) action_set = ClassifiersList.form_action_set( match_set, action, cfg) prev_state = state state, reward, done, debug = board.step(action) if done: action_set.apply_alp(prev_state, action, state,