def test_evaluate_individuals(): project_root_path = str(project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') full_path_train = os.path.join(str(project_root()), file_path_train) task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(full_path_train, task=task) available_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) metric_function = ClassificationMetricsEnum.ROCAUC_penalty composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types) builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \ with_metrics(metric_function) composer = builder.build() train_data, test_data = train_test_data_setup( dataset_to_compose, sample_split_ration_for_tasks[dataset_to_compose.task.task_type]) metric_function_for_nodes = partial(composer.composer_metric, composer.metrics, train_data, test_data) population = [chain_first(), chain_second(), chain_third(), chain_fourth()] max_lead_time = datetime.timedelta(minutes=0.001) with CompositionTimer(max_lead_time=max_lead_time) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, is_multi_objective=False, timer=t) assert len(population) == 1 assert population[0].fitness is not None population = [chain_first(), chain_second(), chain_third(), chain_fourth()] max_lead_time = datetime.timedelta(minutes=5) with CompositionTimer(max_lead_time=max_lead_time) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, is_multi_objective=False, timer=t) assert len(population) == 4 assert all([ind.fitness is not None for ind in population])
def test_composition_timer(): generation_num = 100 reached = False start = datetime.datetime.now() with CompositionTimer(max_lead_time=datetime.timedelta(minutes=0.01)) as timer: for generation in range(generation_num): time.sleep(1) if timer.is_time_limit_reached(generation_num=generation): reached = True break spent_time = (datetime.datetime.now() - start).seconds assert reached and spent_time == 1
def optimise(self, objective_function, offspring_rate=0.5): if self.parameters.genetic_scheme_type == GeneticSchemeTypesEnum.steady_state: num_of_new_individuals = math.ceil(self.requirements.pop_size * offspring_rate) else: num_of_new_individuals = self.requirements.pop_size - 1 with CompositionTimer() as t: self.history = [] for ind in self.population: ind.fitness = objective_function(ind) self._add_to_history(self.population) for generation_num in range(self.requirements.num_of_generations - 1): print(f'Generation num: {generation_num}') individuals_to_select = regularized_population(reg_type=self.parameters.regularization_type, population=self.population, objective_function=objective_function, chain_class=self.chain_class) selected_individuals = selection(types=self.parameters.selection_types, population=individuals_to_select, pop_size=num_of_new_individuals * 2) new_population = [] for ind_num, parent_num in zip(range(num_of_new_individuals), range(0, len(selected_individuals), 2)): new_population.append( self.reproduce(selected_individuals[parent_num], selected_individuals[parent_num + 1])) new_population[ind_num].fitness = objective_function(new_population[ind_num]) self.population = heredity(self.parameters.genetic_scheme_type, self.parameters.selection_types, self.population, new_population, self.requirements.pop_size - 1) self.population.append(self.best_individual) self._add_to_history(self.population) print('spent time:', t.minutes_from_start) print(f'Best metric is {self.best_individual.fitness}') if t.is_max_time_reached(self.requirements.max_lead_time, generation_num): break return self.best_individual, self.history
def optimise(self, objective_function, offspring_rate: float = 0.5, on_next_iteration_callback=None): if on_next_iteration_callback is None: on_next_iteration_callback = self.default_on_next_iteration_callback if self.population is None: self.population = self._make_population(self.requirements.pop_size) num_of_new_individuals = self.offspring_size(offspring_rate) self.log.info( f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}' ) with CompositionTimer() as t: if self.requirements.add_single_model_chains: best_single_model, self.requirements.primary = \ self._best_single_models(objective_function) for ind in self.population: ind.fitness = objective_function(ind) on_next_iteration_callback(self.population) self.log.info(f'Best metric is {self.best_individual.fitness}') while not t.is_time_limit_reached(self.requirements.max_lead_time) \ and self.generation_num != self.requirements.num_of_generations - 1: self.log.info(f'Generation num: {self.generation_num}') self.num_of_gens_without_improvements = self.update_stagnation_counter( ) self.log.info( f'max_depth: {self.max_depth}, no improvements: {self.num_of_gens_without_improvements}' ) if self.parameters.with_auto_depth_configuration and self.generation_num != 0: self.max_depth_recount() self.max_std = self.update_max_std() individuals_to_select = regularized_population( reg_type=self.parameters.regularization_type, population=self.population, objective_function=objective_function, chain_class=self.chain_class) if num_of_new_individuals == 1 and len(self.population) == 1: new_population = list(self.reproduce(self.population[0])) new_population[0].fitness = objective_function( new_population[0]) else: num_of_parents = num_of_parents_in_crossover( num_of_new_individuals) selected_individuals = selection( types=self.parameters.selection_types, population=individuals_to_select, pop_size=num_of_parents) new_population = [] for parent_num in range(0, len(selected_individuals), 2): new_population += self.reproduce( selected_individuals[parent_num], selected_individuals[parent_num + 1]) new_population[ parent_num].fitness = objective_function( new_population[parent_num]) new_population[parent_num + 1].fitness = objective_function( new_population[parent_num + 1]) self.requirements.pop_size = self.next_population_size( new_population) num_of_new_individuals = self.offspring_size(offspring_rate) self.log.info( f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}' ) self.prev_best = deepcopy(self.best_individual) self.population = inheritance( self.parameters.genetic_scheme_type, self.parameters.selection_types, self.population, new_population, self.num_of_inds_in_next_pop) if self.with_elitism: self.population.append(self.prev_best) on_next_iteration_callback(self.population) self.log.info( f'spent time: {round(t.minutes_from_start, 1)} min') self.log.info(f'Best metric is {self.best_individual.fitness}') self.generation_num += 1 best = self.best_individual if self.requirements.add_single_model_chains and \ (best_single_model.fitness <= best.fitness): best = best_single_model return best
def optimise(self, objective_function, offspring_rate: float = 0.5, on_next_iteration_callback=None): if on_next_iteration_callback is None: on_next_iteration_callback = self.default_on_next_iteration_callback if self.population is None: self.population = self._make_population(self.requirements.pop_size) num_of_new_individuals = self.offspring_size(offspring_rate) self.log.info(f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}') with CompositionTimer(max_lead_time=self.requirements.max_lead_time, log=self.log) as t: if self.requirements.allow_single_operations: self.best_single_operation, self.requirements.primary = \ self._best_single_operations(objective_function, timer=t) self._evaluate_individuals(self.population, objective_function, timer=t) if self.archive is not None: self.archive.update(self.population) on_next_iteration_callback(self.population, self.archive) self.log_info_about_best() while t.is_time_limit_reached(self.generation_num) is False \ and self.generation_num != self.requirements.num_of_generations - 1: self.log.info(f'Generation num: {self.generation_num}') self.num_of_gens_without_improvements = self.update_stagnation_counter() self.log.info(f'max_depth: {self.max_depth}, no improvements: {self.num_of_gens_without_improvements}') if self.parameters.with_auto_depth_configuration and self.generation_num != 0: self.max_depth_recount() self.max_std = self.update_max_std() individuals_to_select = regularized_population(reg_type=self.parameters.regularization_type, population=self.population, objective_function=objective_function, chain_class=self.chain_class, timer=t) if self.parameters.multi_objective: filtered_archive_items = duplicates_filtration(archive=self.archive, population=individuals_to_select) individuals_to_select = deepcopy(individuals_to_select) + filtered_archive_items if num_of_new_individuals == 1 and len(self.population) == 1: new_population = list(self.reproduce(self.population[0])) self._evaluate_individuals(new_population, objective_function, timer=t) else: num_of_parents = num_of_parents_in_crossover(num_of_new_individuals) selected_individuals = selection(types=self.parameters.selection_types, population=individuals_to_select, pop_size=num_of_parents) new_population = [] for parent_num in range(0, len(selected_individuals), 2): new_population += self.reproduce(selected_individuals[parent_num], selected_individuals[parent_num + 1]) self._evaluate_individuals(new_population, objective_function, timer=t) self.requirements.pop_size = self.next_population_size(new_population) num_of_new_individuals = self.offspring_size(offspring_rate) self.log.info(f'pop size: {self.requirements.pop_size}, num of new inds: {num_of_new_individuals}') self.prev_best = deepcopy(self.best_individual) self.population = inheritance(self.parameters.genetic_scheme_type, self.parameters.selection_types, self.population, new_population, self.num_of_inds_in_next_pop) if not self.parameters.multi_objective and self.with_elitism: self.population.append(self.prev_best) if self.archive is not None: self.archive.update(self.population) on_next_iteration_callback(self.population, self.archive) self.log.info(f'spent time: {round(t.minutes_from_start, 1)} min') self.log_info_about_best() self.generation_num += 1 best = self.result_individual() self.log.info('Result:') self.log_info_about_best() return best