コード例 #1
0
def test_evaluate_individuals():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ration_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    max_lead_time = datetime.timedelta(minutes=0.001)
    with CompositionTimer(max_lead_time=max_lead_time) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    max_lead_time = datetime.timedelta(minutes=5)
    with CompositionTimer(max_lead_time=max_lead_time) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
コード例 #2
0
def test_composition_timer():
    generation_num = 100
    reached = False
    start = datetime.datetime.now()
    with CompositionTimer(max_lead_time=datetime.timedelta(minutes=0.01)) as timer:
        for generation in range(generation_num):
            time.sleep(1)
            if timer.is_time_limit_reached(generation_num=generation):
                reached = True
                break

    spent_time = (datetime.datetime.now() - start).seconds
    assert reached and spent_time == 1
コード例 #3
0
    def optimise(self, objective_function, offspring_rate=0.5):

        if self.parameters.genetic_scheme_type == GeneticSchemeTypesEnum.steady_state:
            num_of_new_individuals = math.ceil(self.requirements.pop_size * offspring_rate)
        else:
            num_of_new_individuals = self.requirements.pop_size - 1

        with CompositionTimer() as t:

            self.history = []

            for ind in self.population:
                ind.fitness = objective_function(ind)

            self._add_to_history(self.population)

            for generation_num in range(self.requirements.num_of_generations - 1):
                print(f'Generation num: {generation_num}')

                individuals_to_select = regularized_population(reg_type=self.parameters.regularization_type,
                                                               population=self.population,
                                                               objective_function=objective_function,
                                                               chain_class=self.chain_class)

                selected_individuals = selection(types=self.parameters.selection_types,
                                                 population=individuals_to_select,
                                                 pop_size=num_of_new_individuals * 2)

                new_population = []

                for ind_num, parent_num in zip(range(num_of_new_individuals), range(0, len(selected_individuals), 2)):
                    new_population.append(
                        self.reproduce(selected_individuals[parent_num], selected_individuals[parent_num + 1]))

                    new_population[ind_num].fitness = objective_function(new_population[ind_num])

                self.population = heredity(self.parameters.genetic_scheme_type, self.parameters.selection_types,
                                           self.population,
                                           new_population, self.requirements.pop_size - 1)

                self.population.append(self.best_individual)

                self._add_to_history(self.population)

                print('spent time:', t.minutes_from_start)
                print(f'Best metric is {self.best_individual.fitness}')

                if t.is_max_time_reached(self.requirements.max_lead_time, generation_num):
                    break

        return self.best_individual, self.history
コード例 #4
0
    def optimise(self,
                 objective_function,
                 offspring_rate: float = 0.5,
                 on_next_iteration_callback=None):
        if on_next_iteration_callback is None:
            on_next_iteration_callback = self.default_on_next_iteration_callback

        if self.population is None:
            self.population = self._make_population(self.requirements.pop_size)

        num_of_new_individuals = self.offspring_size(offspring_rate)
        self.log.info(
            f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}'
        )
        with CompositionTimer() as t:

            if self.requirements.add_single_model_chains:
                best_single_model, self.requirements.primary = \
                    self._best_single_models(objective_function)

            for ind in self.population:
                ind.fitness = objective_function(ind)

            on_next_iteration_callback(self.population)

            self.log.info(f'Best metric is {self.best_individual.fitness}')

            while not t.is_time_limit_reached(self.requirements.max_lead_time) \
                    and self.generation_num != self.requirements.num_of_generations - 1:
                self.log.info(f'Generation num: {self.generation_num}')
                self.num_of_gens_without_improvements = self.update_stagnation_counter(
                )
                self.log.info(
                    f'max_depth: {self.max_depth}, no improvements: {self.num_of_gens_without_improvements}'
                )
                if self.parameters.with_auto_depth_configuration and self.generation_num != 0:
                    self.max_depth_recount()

                self.max_std = self.update_max_std()

                individuals_to_select = regularized_population(
                    reg_type=self.parameters.regularization_type,
                    population=self.population,
                    objective_function=objective_function,
                    chain_class=self.chain_class)

                if num_of_new_individuals == 1 and len(self.population) == 1:
                    new_population = list(self.reproduce(self.population[0]))
                    new_population[0].fitness = objective_function(
                        new_population[0])
                else:
                    num_of_parents = num_of_parents_in_crossover(
                        num_of_new_individuals)

                    selected_individuals = selection(
                        types=self.parameters.selection_types,
                        population=individuals_to_select,
                        pop_size=num_of_parents)

                    new_population = []

                    for parent_num in range(0, len(selected_individuals), 2):
                        new_population += self.reproduce(
                            selected_individuals[parent_num],
                            selected_individuals[parent_num + 1])

                        new_population[
                            parent_num].fitness = objective_function(
                                new_population[parent_num])
                        new_population[parent_num +
                                       1].fitness = objective_function(
                                           new_population[parent_num + 1])

                self.requirements.pop_size = self.next_population_size(
                    new_population)
                num_of_new_individuals = self.offspring_size(offspring_rate)
                self.log.info(
                    f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}'
                )

                self.prev_best = deepcopy(self.best_individual)

                self.population = inheritance(
                    self.parameters.genetic_scheme_type,
                    self.parameters.selection_types, self.population,
                    new_population, self.num_of_inds_in_next_pop)

                if self.with_elitism:
                    self.population.append(self.prev_best)

                on_next_iteration_callback(self.population)
                self.log.info(
                    f'spent time: {round(t.minutes_from_start, 1)} min')
                self.log.info(f'Best metric is {self.best_individual.fitness}')

                self.generation_num += 1

            best = self.best_individual
            if self.requirements.add_single_model_chains and \
                    (best_single_model.fitness <= best.fitness):
                best = best_single_model
        return best
コード例 #5
0
    def optimise(self, objective_function, offspring_rate: float = 0.5, on_next_iteration_callback=None):
        if on_next_iteration_callback is None:
            on_next_iteration_callback = self.default_on_next_iteration_callback

        if self.population is None:
            self.population = self._make_population(self.requirements.pop_size)

        num_of_new_individuals = self.offspring_size(offspring_rate)
        self.log.info(f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}')

        with CompositionTimer(max_lead_time=self.requirements.max_lead_time, log=self.log) as t:

            if self.requirements.allow_single_operations:
                self.best_single_operation, self.requirements.primary = \
                    self._best_single_operations(objective_function, timer=t)

            self._evaluate_individuals(self.population, objective_function, timer=t)

            if self.archive is not None:
                self.archive.update(self.population)

            on_next_iteration_callback(self.population, self.archive)

            self.log_info_about_best()

            while t.is_time_limit_reached(self.generation_num) is False \
                    and self.generation_num != self.requirements.num_of_generations - 1:

                self.log.info(f'Generation num: {self.generation_num}')

                self.num_of_gens_without_improvements = self.update_stagnation_counter()
                self.log.info(f'max_depth: {self.max_depth}, no improvements: {self.num_of_gens_without_improvements}')

                if self.parameters.with_auto_depth_configuration and self.generation_num != 0:
                    self.max_depth_recount()

                self.max_std = self.update_max_std()

                individuals_to_select = regularized_population(reg_type=self.parameters.regularization_type,
                                                               population=self.population,
                                                               objective_function=objective_function,
                                                               chain_class=self.chain_class, timer=t)

                if self.parameters.multi_objective:
                    filtered_archive_items = duplicates_filtration(archive=self.archive,
                                                                   population=individuals_to_select)
                    individuals_to_select = deepcopy(individuals_to_select) + filtered_archive_items

                if num_of_new_individuals == 1 and len(self.population) == 1:
                    new_population = list(self.reproduce(self.population[0]))
                    self._evaluate_individuals(new_population, objective_function, timer=t)
                else:
                    num_of_parents = num_of_parents_in_crossover(num_of_new_individuals)

                    selected_individuals = selection(types=self.parameters.selection_types,
                                                     population=individuals_to_select,
                                                     pop_size=num_of_parents)

                    new_population = []

                    for parent_num in range(0, len(selected_individuals), 2):
                        new_population += self.reproduce(selected_individuals[parent_num],
                                                         selected_individuals[parent_num + 1])

                    self._evaluate_individuals(new_population, objective_function, timer=t)

                self.requirements.pop_size = self.next_population_size(new_population)
                num_of_new_individuals = self.offspring_size(offspring_rate)
                self.log.info(f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}')

                self.prev_best = deepcopy(self.best_individual)

                self.population = inheritance(self.parameters.genetic_scheme_type, self.parameters.selection_types,
                                              self.population,
                                              new_population, self.num_of_inds_in_next_pop)

                if not self.parameters.multi_objective and self.with_elitism:
                    self.population.append(self.prev_best)

                if self.archive is not None:
                    self.archive.update(self.population)

                on_next_iteration_callback(self.population, self.archive)
                self.log.info(f'spent time: {round(t.minutes_from_start, 1)} min')
                self.log_info_about_best()

                self.generation_num += 1

            best = self.result_individual()
            self.log.info('Result:')
            self.log_info_about_best()

        return best