def generate_history(generations, pop_size): history = OptHistory() converter = GraphGenerationParams().adapter for gen in range(generations): new_pop = [] for idx in range(pop_size): pipeline = pipeline_first() ind = Individual(converter.adapt(pipeline)) ind.fitness = 1 / (gen * idx + 1) new_pop.append(ind) history.add_to_history(new_pop) return history
def build(self) -> Composer: optimiser_type = GPGraphOptimiser if self.optimiser_parameters.genetic_scheme_type == GeneticSchemeTypesEnum.parameter_free: optimiser_type = GPGraphParameterFreeOptimiser graph_generation_params = GraphGenerationParams( adapter=PipelineAdapter(self._composer.log), advisor=PipelineChangeAdvisor()) archive_type = None if len(self._composer.metrics) > 1: archive_type = tools.ParetoFront() # TODO add possibility of using regularization in MO alg self.optimiser_parameters.regularization_type = RegularizationTypesEnum.none self.optimiser_parameters.multi_objective = True if self.optimiser_parameters.mutation_types is None: self.optimiser_parameters.mutation_types = [ boosting_mutation, parameter_change_mutation, single_edge_mutation, single_change_mutation, single_drop_mutation, single_add_mutation ] optimiser = optimiser_type( initial_graph=self._composer.initial_pipeline, requirements=self._composer.composer_requirements, graph_generation_params=graph_generation_params, parameters=self.optimiser_parameters, log=self._composer.log, archive_type=archive_type, metrics=self._composer.metrics) self._composer.optimiser = optimiser return self._composer
def test_drop_mutation_for_linear_graph(): """ Tests single_drop mutation can remove node """ linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')])) linear_one_node = OptGraph(OptNode('logit')) composer_requirements = GPComposerRequirements(primary=['scaling'], secondary=['logit'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_drop = False for _ in range(100): graph_after_mutation = mutation(types=[MutationTypesEnum.single_drop], params=graph_params, ind=Individual(linear_two_nodes), requirements=composer_requirements, log=default_log(__name__), max_depth=2).graph if not successful_mutation_drop: successful_mutation_drop = \ graph_after_mutation.root_node.descriptive_id == linear_one_node.root_node.descriptive_id else: break assert successful_mutation_drop
def test_edge_mutation_for_graph(): """ Tests edge mutation can add edge between nodes """ graph_without_edge = \ OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])])) primary = OptNode('scaling') graph_with_edge = \ OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [primary]), primary])) composer_requirements = GPComposerRequirements( primary=['scaling', 'one_hot_encoding'], secondary=['logit', 'scaling'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_edge = False for _ in range(100): graph_after_mutation = mutation(types=[MutationTypesEnum.single_edge], params=graph_params, ind=Individual(graph_without_edge), requirements=composer_requirements, log=default_log(__name__), max_depth=graph_with_edge.depth).graph if not successful_mutation_edge: successful_mutation_edge = \ graph_after_mutation.root_node.descriptive_id == graph_with_edge.root_node.descriptive_id else: break assert successful_mutation_edge
def test_intermediate_add_mutation_for_linear_graph(): """ Tests single_add mutation can add node between two existing nodes """ linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')])) linear_three_nodes_inner = \ OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])])) composer_requirements = GPComposerRequirements( primary=['scaling'], secondary=['one_hot_encoding'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_inner = False for _ in range(100): graph_after_mutation = mutation(types=[MutationTypesEnum.single_add], params=graph_params, ind=Individual(linear_two_nodes), requirements=composer_requirements, log=default_log(__name__), max_depth=3).graph if not successful_mutation_inner: successful_mutation_inner = \ graph_after_mutation.root_node.descriptive_id == linear_three_nodes_inner.root_node.descriptive_id else: break assert successful_mutation_inner
def test_evaluate_individuals(): project_root_path = str(fedot_project_root()) file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') full_path_train = os.path.join(str(fedot_project_root()), file_path_train) task = Task(TaskTypesEnum.classification) dataset_to_compose = InputData.from_csv(full_path_train, task=task) available_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) metric_function = ClassificationMetricsEnum.ROCAUC_penalty composer_requirements = GPComposerRequirements( primary=available_model_types, secondary=available_model_types) builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \ with_metrics(metric_function) composer = builder.build() pipelines_to_evaluate = [ pipeline_first(), pipeline_second(), pipeline_third(), pipeline_fourth() ] train_data, test_data = train_test_data_setup( dataset_to_compose, sample_split_ratio_for_tasks[dataset_to_compose.task.task_type]) metric_function_for_nodes = partial(composer.composer_metric, composer.metrics, train_data, test_data) adapter = PipelineAdapter() population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate] timeout = datetime.timedelta(minutes=0.001) params = GraphGenerationParams(adapter=PipelineAdapter(), advisor=PipelineChangeAdvisor()) with OptimisationTimer(timeout=timeout) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, graph_generation_params=params, is_multi_objective=False, timer=t) assert len(population) == 1 assert population[0].fitness is not None population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate] timeout = datetime.timedelta(minutes=5) with OptimisationTimer(timeout=timeout) as t: evaluate_individuals(individuals_set=population, objective_function=metric_function_for_nodes, graph_generation_params=params, is_multi_objective=False, timer=t) assert len(population) == 4 assert all([ind.fitness is not None for ind in population])
def test_selection(): num_of_inds = 2 population = rand_population_gener_and_eval(pop_size=4) graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) selected_individuals = selection(types=[SelectionTypesEnum.tournament], population=population, pop_size=num_of_inds, params=graph_params) assert (all([ind in population for ind in selected_individuals]) and len(selected_individuals) == num_of_inds)
def test_individuals_selection_random_individuals(): num_of_inds = 2 population = rand_population_gener_and_eval(pop_size=4) types = [SelectionTypesEnum.tournament] graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) selected_individuals = individuals_selection(types=types, individuals=population, pop_size=num_of_inds, graph_params=graph_params) selected_individuals_ref = [str(ind) for ind in selected_individuals] assert (len(set(selected_individuals_ref)) == len(selected_individuals) and len(selected_individuals) == num_of_inds)
def rand_population_gener_and_eval(pop_size=4): models_set = ['knn', 'logit', 'rf'] requirements = GPComposerRequirements(primary=models_set, secondary=models_set, max_depth=1) pipeline_gener_params = GraphGenerationParams( advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter()) random_pipeline_function = partial(random_graph, params=pipeline_gener_params, requirements=requirements) population = [ Individual(random_pipeline_function()) for _ in range(pop_size) ] # evaluation for ind in population: ind.fitness = obj_function() return population
def test_boosting_mutation_for_linear_graph(): """ Tests boosting mutation can add correct boosting cascade """ linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')])) init_node = OptNode('scaling') model_node = OptNode('knn', [init_node]) boosting_graph = \ OptGraph( OptNode('logit', [model_node, OptNode('linear', [OptNode('class_decompose', [model_node, init_node])])])) composer_requirements = GPComposerRequirements(primary=['scaling'], secondary=['logit'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=PipelineAdapter(), advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_boosting = False for _ in range(100): graph_after_mutation = mutation(types=[boosting_mutation], params=graph_params, ind=Individual(linear_one_node), requirements=composer_requirements, log=default_log(__name__), max_depth=2).graph if not successful_mutation_boosting: successful_mutation_boosting = \ graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id else: break assert successful_mutation_boosting # check that obtained pipeline can be fitted pipeline = PipelineAdapter().restore(graph_after_mutation) data = file_data() pipeline.fit(data) result = pipeline.predict(data) assert result is not None
def run_custom_example( timeout: datetime.timedelta = datetime.timedelta(minutes=0.2)): data = pd.read_csv( os.path.join(fedot_project_root(), 'examples', 'data', 'custom_encoded.csv')) nodes_types = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10'] rules = [has_no_self_cycled_nodes, has_no_cycle, _has_no_duplicates] initial = CustomGraphModel(nodes=[ CustomGraphNode(nodes_from=None, content=node_type) for node_type in nodes_types ]) requirements = GPComposerRequirements(primary=nodes_types, secondary=nodes_types, max_arity=10, max_depth=10, pop_size=5, num_of_generations=5, crossover_prob=0.8, mutation_prob=0.9, timeout=timeout) optimiser_parameters = GPGraphOptimiserParameters( genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, mutation_types=[custom_mutation], crossover_types=[CrossoverTypesEnum.none], regularization_type=RegularizationTypesEnum.none) graph_generation_params = GraphGenerationParams(adapter=DirectAdapter( base_graph_class=CustomGraphModel, base_node_class=CustomGraphNode), rules_for_constraint=rules) optimizer = GPGraphOptimiser( graph_generation_params=graph_generation_params, metrics=[], parameters=optimiser_parameters, requirements=requirements, initial_graph=initial, log=default_log(logger_name='Bayesian', verbose_level=1)) optimized_network = optimizer.optimise(partial(custom_metric, data=data)) optimized_network.show()
def test_mutation(): adapter = PipelineAdapter() ind = Individual(adapter.adapt(pipeline_first())) mutation_types = [MutationTypesEnum.none] log = default_log(__name__) graph_gener_params = GraphGenerationParams() task = Task(TaskTypesEnum.classification) primary_model_types, _ = OperationTypesRepository().suitable_operation( task_type=task.task_type) secondary_model_types = ['xgboost', 'knn', 'lda', 'qda'] composer_requirements = GPComposerRequirements( primary=primary_model_types, secondary=secondary_model_types, mutation_prob=1) new_ind = mutation(mutation_types, graph_gener_params, ind, composer_requirements, log=log, max_depth=3) assert new_ind.graph == ind.graph mutation_types = [MutationTypesEnum.growth] composer_requirements = GPComposerRequirements( primary=primary_model_types, secondary=secondary_model_types, mutation_prob=0) new_ind = mutation(mutation_types, graph_gener_params, ind, composer_requirements, log=log, max_depth=3) assert new_ind.graph == ind.graph ind = Individual(adapter.adapt(pipeline_fifth())) new_ind = mutation(mutation_types, graph_gener_params, ind, composer_requirements, log=log, max_depth=3) assert new_ind.graph == ind.graph
def test_custom_graph_opt(): nodes_types = ['A', 'B', 'C', 'D'] rules = [has_no_self_cycled_nodes] requirements = GPComposerRequirements(primary=nodes_types, secondary=nodes_types, max_arity=3, max_depth=3, pop_size=5, num_of_generations=5, crossover_prob=0.8, mutation_prob=0.9) optimiser_parameters = GPGraphOptimiserParameters( genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, mutation_types=[ MutationTypesEnum.simple, MutationTypesEnum.reduce, MutationTypesEnum.growth, MutationTypesEnum.local_growth ], regularization_type=RegularizationTypesEnum.none) graph_generation_params = GraphGenerationParams(adapter=DirectAdapter( CustomModel, CustomNode), rules_for_constraint=rules) optimizer = GPGraphOptimiser( graph_generation_params=graph_generation_params, metrics=[], parameters=optimiser_parameters, requirements=requirements, initial_graph=None) optimized_network = optimizer.optimise(custom_metric) assert optimized_network is not None assert isinstance(optimized_network, CustomModel) assert isinstance(optimized_network.nodes[0], CustomNode) assert 'custom_A' in [str(_) for _ in optimized_network.nodes]