def cover(p0: Perception, action: int, p1: Perception, time: int, cfg: Configuration) -> Classifier: """ Covering - creates a classifier that anticipates a change correctly. The reward of the new classifier is set to 0 to prevent *reward bubbles* in the environmental model. Parameters ---------- p0: Perception previous perception action: int chosen action p1: Perception current perception time: int current epoch cfg: Configuration algorithm configuration class Returns ------- Classifier new classifier """ # In paper it's advised to set experience and reward of newly generated # classifier to 0. However in original code these values are initialized # with defaults 1 and 0.5 correspondingly. new_cl = Classifier(action=action, experience=0, reward=0, cfg=cfg) new_cl.tga = time new_cl.talp = time new_cl.specialize(p0, p1) return new_cl
def test_should_return_latest_action(self, cfg): # given all_actions = cfg.number_of_possible_actions population = ClassifiersList() c0 = Classifier(action=0, cfg=cfg) c0.talp = 1 # when population.append(c0) # Should return first action with no classifiers assert 1 == choose_latest_action(population, all_actions) # Add rest of classifiers population.append(Classifier(action=3, cfg=cfg)) population.append(Classifier(action=7, cfg=cfg)) population.append(Classifier(action=5, cfg=cfg)) population.append(Classifier(action=1, cfg=cfg)) population.append(Classifier(action=4, cfg=cfg)) population.append(Classifier(action=2, cfg=cfg)) population.append(Classifier(action=6, cfg=cfg)) # Assign each classifier random talp from certain range for cl in population: cl.talp = randint(70, 100) # But third classifier (action 7) will be the executed long time ago population[2].talp = randint(10, 20) # then assert choose_latest_action(population, all_actions) == 7