コード例 #1
0
def test_filter_duplicates():
    archive = tools.ParetoFront()
    archive_items = [pipeline_first(), pipeline_second(), pipeline_third()]
    adapter = PipelineAdapter()

    population = [
        Individual(adapter.adapt(c)) for c in [
            pipeline_first(),
            pipeline_second(),
            pipeline_third(),
            pipeline_fourth()
        ]
    ]
    archive_items_fitness = ((-0.80001, 0.25), (-0.7, 0.1), (-0.9, 0.7))
    population_fitness = ((-0.8, 0.25), (-0.59, 0.25), (-0.9, 0.7), (-0.7,
                                                                     0.1))
    weights = tuple([-1 for _ in range(len(population_fitness[0]))])
    for ind_num in range(len(archive_items)):
        archive_items[ind_num].fitness = MultiObjFitness(
            values=archive_items_fitness[ind_num], weights=weights)
    for ind_num in range(len(population)):
        population[ind_num].fitness = MultiObjFitness(
            values=population_fitness[ind_num], weights=weights)
    archive.update(archive_items)
    filtered_archive = filter_duplicates(archive, population)
    assert len(filtered_archive) == 1
    assert filtered_archive[0].fitness.values[0] == -0.80001
    assert filtered_archive[0].fitness.values[1] == 0.25
コード例 #2
0
def test_evaluate_individuals():
    project_root_path = str(fedot_project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    pipelines_to_evaluate = [
        pipeline_first(),
        pipeline_second(),
        pipeline_third(),
        pipeline_fourth()
    ]

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ratio_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    adapter = PipelineAdapter()
    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=0.001)
    params = GraphGenerationParams(adapter=PipelineAdapter(),
                                   advisor=PipelineChangeAdvisor())
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=5)
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
コード例 #3
0
    def build(self) -> Composer:
        optimiser_type = GPGraphOptimiser
        if self.optimiser_parameters.genetic_scheme_type == GeneticSchemeTypesEnum.parameter_free:
            optimiser_type = GPGraphParameterFreeOptimiser

        graph_generation_params = GraphGenerationParams(
            adapter=PipelineAdapter(self._composer.log),
            advisor=PipelineChangeAdvisor())

        archive_type = None
        if len(self._composer.metrics) > 1:
            archive_type = tools.ParetoFront()
            # TODO add possibility of using regularization in MO alg
            self.optimiser_parameters.regularization_type = RegularizationTypesEnum.none
            self.optimiser_parameters.multi_objective = True

        if self.optimiser_parameters.mutation_types is None:
            self.optimiser_parameters.mutation_types = [
                boosting_mutation, parameter_change_mutation,
                single_edge_mutation, single_change_mutation,
                single_drop_mutation, single_add_mutation
            ]

        optimiser = optimiser_type(
            initial_graph=self._composer.initial_pipeline,
            requirements=self._composer.composer_requirements,
            graph_generation_params=graph_generation_params,
            parameters=self.optimiser_parameters,
            log=self._composer.log,
            archive_type=archive_type,
            metrics=self._composer.metrics)

        self._composer.optimiser = optimiser

        return self._composer
コード例 #4
0
def test_mutation():
    adapter = PipelineAdapter()
    ind = Individual(adapter.adapt(pipeline_first()))
    mutation_types = [MutationTypesEnum.none]
    log = default_log(__name__)
    graph_gener_params = GraphGenerationParams()
    task = Task(TaskTypesEnum.classification)
    primary_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)
    secondary_model_types = ['xgboost', 'knn', 'lda', 'qda']
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=1)
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
    mutation_types = [MutationTypesEnum.growth]
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=0)
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
    ind = Individual(adapter.adapt(pipeline_fifth()))
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
コード例 #5
0
def test_selection():
    num_of_inds = 2
    population = rand_population_gener_and_eval(pop_size=4)
    graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(),
                                         adapter=PipelineAdapter())

    selected_individuals = selection(types=[SelectionTypesEnum.tournament],
                                     population=population,
                                     pop_size=num_of_inds,
                                     params=graph_params)
    assert (all([ind in population for ind in selected_individuals])
            and len(selected_individuals) == num_of_inds)
コード例 #6
0
def test_individuals_selection_random_individuals():
    num_of_inds = 2
    population = rand_population_gener_and_eval(pop_size=4)
    types = [SelectionTypesEnum.tournament]
    graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(),
                                         adapter=PipelineAdapter())
    selected_individuals = individuals_selection(types=types,
                                                 individuals=population,
                                                 pop_size=num_of_inds,
                                                 graph_params=graph_params)
    selected_individuals_ref = [str(ind) for ind in selected_individuals]
    assert (len(set(selected_individuals_ref)) == len(selected_individuals)
            and len(selected_individuals) == num_of_inds)
コード例 #7
0
def test_crossover():
    adapter = PipelineAdapter()
    graph_example_first = adapter.adapt(pipeline_first())
    graph_example_second = adapter.adapt(pipeline_second())
    log = default_log(__name__)
    crossover_types = [CrossoverTypesEnum.none]
    new_graphs = crossover(crossover_types,
                           Individual(graph_example_first),
                           Individual(graph_example_second),
                           max_depth=3,
                           log=log,
                           crossover_prob=1)
    assert new_graphs[0].graph == graph_example_first
    assert new_graphs[1].graph == graph_example_second
    crossover_types = [CrossoverTypesEnum.subtree]
    new_graphs = crossover(crossover_types,
                           Individual(graph_example_first),
                           Individual(graph_example_second),
                           max_depth=3,
                           log=log,
                           crossover_prob=0)
    assert new_graphs[0].graph == graph_example_first
    assert new_graphs[1].graph == graph_example_second
コード例 #8
0
def rand_population_gener_and_eval(pop_size=4):
    models_set = ['knn', 'logit', 'rf']
    requirements = GPComposerRequirements(primary=models_set,
                                          secondary=models_set,
                                          max_depth=1)
    pipeline_gener_params = GraphGenerationParams(
        advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter())
    random_pipeline_function = partial(random_graph,
                                       params=pipeline_gener_params,
                                       requirements=requirements)
    population = [
        Individual(random_pipeline_function()) for _ in range(pop_size)
    ]
    # evaluation
    for ind in population:
        ind.fitness = obj_function()
    return population
コード例 #9
0
def test_boosting_mutation_for_linear_graph():
    """
    Tests boosting mutation can add correct boosting cascade
    """

    linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')]))

    init_node = OptNode('scaling')
    model_node = OptNode('knn', [init_node])

    boosting_graph = \
        OptGraph(
            OptNode('logit',
                    [model_node, OptNode('linear',
                                         [OptNode('class_decompose',
                                                  [model_node, init_node])])]))

    composer_requirements = GPComposerRequirements(primary=['scaling'],
                                                   secondary=['logit'],
                                                   mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=PipelineAdapter(),
        advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)),
        rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_boosting = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[boosting_mutation],
                                        params=graph_params,
                                        ind=Individual(linear_one_node),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=2).graph
        if not successful_mutation_boosting:
            successful_mutation_boosting = \
                graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id
        else:
            break
    assert successful_mutation_boosting

    # check that obtained pipeline can be fitted
    pipeline = PipelineAdapter().restore(graph_after_mutation)
    data = file_data()
    pipeline.fit(data)
    result = pipeline.predict(data)
    assert result is not None