Ejemplo n.º 1
0
def test_is_eq_archive():
    archive_chain_first = chain_third()
    archive_chain_second = chain_first()
    archive_chain_third = chain_third()
    archive_chain_forth = chain_third()
    population_first = [
        archive_chain_first, archive_chain_second, archive_chain_third
    ]
    population_second = deepcopy(population_first)
    population_first.append(archive_chain_forth)
    eval_fitness = [(-0.9821, 0.8), (-0.8215, 0.6), (-0.21111, 0.4),
                    (-0.92, 0.9)]
    for population in (population_first, population_second):
        for chain_num, chain in enumerate(population):
            fitness = MultiObjFitness(
                values=eval_fitness[chain_num],
                weights=tuple(
                    [-1 for _ in range(len(eval_fitness[chain_num]))]))
            chain.fitness = fitness
    archive_first = tools.ParetoFront()
    archive_first.update(population_first)
    assert len(archive_first.items) == 3
    archive_second = tools.ParetoFront()
    archive_second.update(population_second)
    assert is_equal_archive(archive_first, archive_second)
    new_fitness = (-0.9821, 0.80001)
    population_second[0].fitness.values = new_fitness
    archive_third = tools.ParetoFront()
    archive_third.update(population_second)
    assert not is_equal_archive(archive_first, archive_third)
Ejemplo n.º 2
0
def test_evaluate_individuals():
    project_root_path = str(project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ration_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    max_lead_time = datetime.timedelta(minutes=0.001)
    with CompositionTimer(max_lead_time=max_lead_time) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    max_lead_time = datetime.timedelta(minutes=5)
    with CompositionTimer(max_lead_time=max_lead_time) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
Ejemplo n.º 3
0
def test_filter_duplicates():
    archive = tools.ParetoFront()
    archive_items = [chain_first(), chain_second(), chain_third()]
    population = [chain_first(), chain_second(), chain_third(), chain_fourth()]
    archive_items_fitness = ((-0.80001, 0.25), (-0.7, 0.1), (-0.9, 0.7))
    population_fitness = ((-0.8, 0.25), (-0.59, 0.25), (-0.9, 0.7), (-0.7,
                                                                     0.1))
    weights = tuple([-1 for _ in range(len(population_fitness[0]))])
    for ind_num in range(len(archive_items)):
        archive_items[ind_num].fitness = MultiObjFitness(
            values=archive_items_fitness[ind_num], weights=weights)
    for ind_num in range(len(population)):
        population[ind_num].fitness = MultiObjFitness(
            values=population_fitness[ind_num], weights=weights)
    archive.update(archive_items)
    filtered_archive = filter_duplicates(archive, population)
    assert len(filtered_archive) == 1
    assert filtered_archive[0].fitness.values[0] == -0.80001
    assert filtered_archive[0].fitness.values[1] == 0.25
Ejemplo n.º 4
0
def test_mutation():
    chain = chain_first()
    mutation_types = [MutationTypesEnum.none]
    log = default_log(__name__)
    chain_gener_params = ChainGenerationParams()
    task = Task(TaskTypesEnum.classification)
    primary_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)
    secondary_model_types = ['xgboost', 'knn', 'lda', 'qda']
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=1)
    new_chain = mutation(mutation_types,
                         chain_gener_params,
                         chain,
                         composer_requirements,
                         log=log,
                         max_depth=3)
    assert new_chain == chain
    mutation_types = [MutationTypesEnum.growth]
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=0)
    new_chain = mutation(mutation_types,
                         chain_gener_params,
                         chain,
                         composer_requirements,
                         log=log,
                         max_depth=3)
    assert new_chain == chain
    chain = chain_fifth()
    assert not constraint_function(chain)
    new_chain = mutation(mutation_types,
                         chain_gener_params,
                         chain,
                         composer_requirements,
                         log=log,
                         max_depth=3)
    assert new_chain == chain
Ejemplo n.º 5
0
def test_pareto_front():
    archive_chain_first = chain_third()
    archive_chain_second = chain_first()
    archive_chain_third = chain_third()
    population = [
        archive_chain_first, archive_chain_second, archive_chain_third
    ]

    eval_fitness = [(-0.9821, 0.8), (-0.8215, 0.6), (-0.9821, 0.8)]

    for chain_num, chain in enumerate(population):
        fitness = MultiObjFitness(
            values=eval_fitness[chain_num],
            weights=tuple([-1 for _ in range(len(eval_fitness[chain_num]))]))
        chain.fitness = fitness

    front = tools.ParetoFront()
    front.__class__ = FedotParetoFront
    front.update(population)

    assert len(front) == 2
Ejemplo n.º 6
0
def test_crossover():
    chain_example_first = chain_first()
    chain_example_second = chain_second()
    log = default_log(__name__)
    crossover_types = [CrossoverTypesEnum.none]
    new_chains = crossover(crossover_types,
                           chain_example_first,
                           chain_example_second,
                           max_depth=3,
                           log=log,
                           crossover_prob=1)
    assert new_chains[0] == chain_example_first
    assert new_chains[1] == chain_example_second
    crossover_types = [CrossoverTypesEnum.subtree]
    new_chains = crossover(crossover_types,
                           chain_example_first,
                           chain_example_second,
                           max_depth=3,
                           log=log,
                           crossover_prob=0)
    assert new_chains[0] == chain_example_first
    assert new_chains[1] == chain_example_second