def test_filter_duplicates(): archive = tools.ParetoFront() archive_items = [pipeline_first(), pipeline_second(), pipeline_third()] adapter = PipelineAdapter() population = [ Individual(adapter.adapt(c)) for c in [ pipeline_first(), pipeline_second(), pipeline_third(), pipeline_fourth() ] ] archive_items_fitness = ((-0.80001, 0.25), (-0.7, 0.1), (-0.9, 0.7)) population_fitness = ((-0.8, 0.25), (-0.59, 0.25), (-0.9, 0.7), (-0.7, 0.1)) weights = tuple([-1 for _ in range(len(population_fitness[0]))]) for ind_num in range(len(archive_items)): archive_items[ind_num].fitness = MultiObjFitness( values=archive_items_fitness[ind_num], weights=weights) for ind_num in range(len(population)): population[ind_num].fitness = MultiObjFitness( values=population_fitness[ind_num], weights=weights) archive.update(archive_items) filtered_archive = filter_duplicates(archive, population) assert len(filtered_archive) == 1 assert filtered_archive[0].fitness.values[0] == -0.80001 assert filtered_archive[0].fitness.values[1] == 0.25
def test_evaluate_individuals(): project_root_path = str(fedot_project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') full_path_train = os.path.join(str(fedot_project_root()), file_path_train) task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(full_path_train, task=task) available_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) metric_function = ClassificationMetricsEnum.ROCAUC_penalty composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types) builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \ with_metrics(metric_function) composer = builder.build() pipelines_to_evaluate = [ pipeline_first(), pipeline_second(), pipeline_third(), pipeline_fourth() ] train_data, test_data = train_test_data_setup( dataset_to_compose, sample_split_ratio_for_tasks[dataset_to_compose.task.task_type]) metric_function_for_nodes = partial(composer.composer_metric, composer.metrics, train_data, test_data) adapter = PipelineAdapter() population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate] timeout = datetime.timedelta(minutes=0.001) params = GraphGenerationParams(adapter=PipelineAdapter(), advisor=PipelineChangeAdvisor()) with OptimisationTimer(timeout=timeout) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, graph_generation_params=params, is_multi_objective=False, timer=t) assert len(population) == 1 assert population[0].fitness is not None population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate] timeout = datetime.timedelta(minutes=5) with OptimisationTimer(timeout=timeout) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, graph_generation_params=params, is_multi_objective=False, timer=t) assert len(population) == 4 assert all([ind.fitness is not None for ind in population])
def build(self) -> Composer: optimiser_type = GPGraphOptimiser if self.optimiser_parameters.genetic_scheme_type == GeneticSchemeTypesEnum.parameter_free: optimiser_type = GPGraphParameterFreeOptimiser graph_generation_params = GraphGenerationParams( adapter=PipelineAdapter(self._composer.log), advisor=PipelineChangeAdvisor()) archive_type = None if len(self._composer.metrics) > 1: archive_type = tools.ParetoFront() # TODO add possibility of using regularization in MO alg self.optimiser_parameters.regularization_type = RegularizationTypesEnum.none self.optimiser_parameters.multi_objective = True if self.optimiser_parameters.mutation_types is None: self.optimiser_parameters.mutation_types = [ boosting_mutation, parameter_change_mutation, single_edge_mutation, single_change_mutation, single_drop_mutation, single_add_mutation ] optimiser = optimiser_type( initial_graph=self._composer.initial_pipeline, requirements=self._composer.composer_requirements, graph_generation_params=graph_generation_params, parameters=self.optimiser_parameters, log=self._composer.log, archive_type=archive_type, metrics=self._composer.metrics) self._composer.optimiser = optimiser return self._composer
def test_mutation(): adapter = PipelineAdapter() ind = Individual(adapter.adapt(pipeline_first())) mutation_types = [MutationTypesEnum.none] log = default_log(__name__) graph_gener_params = GraphGenerationParams() task = Task(TaskTypesEnum.classification) primary_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) secondary_model_types = ['xgboost', 'knn', 'lda', 'qda'] composer_requirements = GPComposerRequirements( primary=primary_model_types, secondary=secondary_model_types, mutation_prob=1) new_ind = mutation(mutation_types, graph_gener_params, ind, composer_requirements, log=log, max_depth=3) assert new_ind.graph == ind.graph mutation_types = [MutationTypesEnum.growth] composer_requirements = GPComposerRequirements( primary=primary_model_types, secondary=secondary_model_types, mutation_prob=0) new_ind = mutation(mutation_types, graph_gener_params, ind, composer_requirements, log=log, max_depth=3) assert new_ind.graph == ind.graph ind = Individual(adapter.adapt(pipeline_fifth())) new_ind = mutation(mutation_types, graph_gener_params, ind, composer_requirements, log=log, max_depth=3) assert new_ind.graph == ind.graph
def test_selection(): num_of_inds = 2 population = rand_population_gener_and_eval(pop_size=4) graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) selected_individuals = selection(types=[SelectionTypesEnum.tournament], population=population, pop_size=num_of_inds, params=graph_params) assert (all([ind in population for ind in selected_individuals]) and len(selected_individuals) == num_of_inds)
def test_individuals_selection_random_individuals(): num_of_inds = 2 population = rand_population_gener_and_eval(pop_size=4) types = [SelectionTypesEnum.tournament] graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) selected_individuals = individuals_selection(types=types, individuals=population, pop_size=num_of_inds, graph_params=graph_params) selected_individuals_ref = [str(ind) for ind in selected_individuals] assert (len(set(selected_individuals_ref)) == len(selected_individuals) and len(selected_individuals) == num_of_inds)
def test_crossover(): adapter = PipelineAdapter() graph_example_first = adapter.adapt(pipeline_first()) graph_example_second = adapter.adapt(pipeline_second()) log = default_log(__name__) crossover_types = [CrossoverTypesEnum.none] new_graphs = crossover(crossover_types, Individual(graph_example_first), Individual(graph_example_second), max_depth=3, log=log, crossover_prob=1) assert new_graphs[0].graph == graph_example_first assert new_graphs[1].graph == graph_example_second crossover_types = [CrossoverTypesEnum.subtree] new_graphs = crossover(crossover_types, Individual(graph_example_first), Individual(graph_example_second), max_depth=3, log=log, crossover_prob=0) assert new_graphs[0].graph == graph_example_first assert new_graphs[1].graph == graph_example_second
def rand_population_gener_and_eval(pop_size=4): models_set = ['knn', 'logit', 'rf'] requirements = GPComposerRequirements(primary=models_set, secondary=models_set, max_depth=1) pipeline_gener_params = GraphGenerationParams( advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) random_pipeline_function = partial(random_graph, params=pipeline_gener_params, requirements=requirements) population = [ Individual(random_pipeline_function()) for _ in range(pop_size) ] # evaluation for ind in population: ind.fitness = obj_function() return population
def test_boosting_mutation_for_linear_graph(): """ Tests boosting mutation can add correct boosting cascade """ linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')])) init_node = OptNode('scaling') model_node = OptNode('knn', [init_node]) boosting_graph = \ OptGraph( OptNode('logit', [model_node, OptNode('linear', [OptNode('class_decompose', [model_node, init_node])])])) composer_requirements = GPComposerRequirements(primary=['scaling'], secondary=['logit'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=PipelineAdapter(), advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_boosting = False for _ in range(100): graph_after_mutation = mutation(types=[boosting_mutation], params=graph_params, ind=Individual(linear_one_node), requirements=composer_requirements, log=default_log(__name__), max_depth=2).graph if not successful_mutation_boosting: successful_mutation_boosting = \ graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id else: break assert successful_mutation_boosting # check that obtained pipeline can be fitted pipeline = PipelineAdapter().restore(graph_after_mutation) data = file_data() pipeline.fit(data) result = pipeline.predict(data) assert result is not None