Ejemplo n.º 1
0
 def make_secondary_node_as_primary(node_child):
     extracted_type = node_child.operation.operation_type
     new_primary_node = PrimaryNode(extracted_type)
     this_node_children = self.node_children(node_child)
     for node in this_node_children:
         index = node.nodes_from.index(node_child)
         node.nodes_from.remove(node_child)
         node.nodes_from.insert(index, new_primary_node)
Ejemplo n.º 2
0
def chain_with_only_data_operations():
    first = PrimaryNode(operation_type='one_hot_encoding')
    second = SecondaryNode(operation_type='scaling', nodes_from=[first])
    final = SecondaryNode(operation_type='ransac_lin_reg', nodes_from=[second])

    chain = Chain(final)

    return chain
Ejemplo n.º 3
0
def test_only_ts_specific_operations_are_primary():
    """ Incorrect chain
    lagged \
             linear -> final forecast
     ridge /
    """
    node_lagged = PrimaryNode('lagged')
    node_ridge = PrimaryNode('ridge')
    node_final = SecondaryNode('linear', nodes_from=[node_lagged, node_ridge])
    incorrect_chain = Chain(node_final)

    with pytest.raises(Exception) as exc:
        assert only_ts_specific_operations_are_primary(incorrect_chain)

    assert str(
        exc.value
    ) == f'{ERROR_PREFIX} Chain for forecasting has not ts_specific preprocessing in primary nodes'
Ejemplo n.º 4
0
def create_four_depth_chain():
    knn_node = PrimaryNode('knn')
    lda_node = PrimaryNode('lda')
    xgb_node = PrimaryNode('xgboost')
    logit_node = PrimaryNode('logit')

    logit_node_second = SecondaryNode('logit', nodes_from=[knn_node, lda_node])
    xgb_node_second = SecondaryNode('xgboost', nodes_from=[logit_node])

    qda_node_third = SecondaryNode('qda', nodes_from=[xgb_node_second])
    knn_node_third = SecondaryNode('knn', nodes_from=[logit_node_second, xgb_node])

    knn_root = SecondaryNode('knn', nodes_from=[qda_node_third, knn_node_third])

    chain = Chain(knn_root)

    return chain
Ejemplo n.º 5
0
def test_chain_str():
    # given
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])
    chain = Chain()
    chain.add_node(final)

    expected_chain_description = "{'depth': 2, 'length': 4, 'nodes': [xgboost, logit, lda, knn]}"

    # when
    actual_chain_description = str(chain)

    # then
    assert actual_chain_description == expected_chain_description
Ejemplo n.º 6
0
def get_complex_regr_chain():
    node_scaling = PrimaryNode(operation_type='scaling')
    node_ridge = SecondaryNode('ridge', nodes_from=[node_scaling])
    node_linear = SecondaryNode('linear', nodes_from=[node_scaling])
    final = SecondaryNode('xgbreg', nodes_from=[node_ridge, node_linear])
    chain = Chain(final)

    return chain
Ejemplo n.º 7
0
def get_multiscale_chain(model_trend='lstm', model_residual='ridge'):
    node_trend = PrimaryNode('trend_data_model')
    node_first_trend = SecondaryNode(model_trend, nodes_from=[node_trend])

    if model_trend == 'lstm':
        # decrease the number of epochs to fit
        node_first_trend.model.params = {'epochs': 1}

    node_residual = PrimaryNode('residual_data_model')
    node_model_residual = SecondaryNode(model_residual,
                                        nodes_from=[node_residual])

    node_final = SecondaryNode(
        'linear', nodes_from=[node_model_residual, node_first_trend])

    chain = Chain(node_final)

    return chain
Ejemplo n.º 8
0
def default_valid_chain():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[first])
    final = SecondaryNode(model_type='logit', nodes_from=[second, third])

    chain = Chain(final)

    return chain
Ejemplo n.º 9
0
def get_ar_chain():
    """
    Function return chain with AR model
    """

    node_ar = PrimaryNode('ar')
    chain = Chain(node_ar)

    return chain
Ejemplo n.º 10
0
def chain_with_pca() -> Chain:
    node_scaling = PrimaryNode('scaling')
    node_pca = SecondaryNode('pca', nodes_from=[node_scaling])
    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_final = SecondaryNode('rf', nodes_from=[node_pca, node_lda])

    chain = Chain(node_final)

    return chain
Ejemplo n.º 11
0
def test_node_factory_log_reg_correct(data_setup):
    model_type = 'logit'
    node = PrimaryNode(model_type=model_type)

    expected_model = Model(model_type=model_type).__class__
    actual_model = node.model.__class__

    assert node.__class__ == PrimaryNode
    assert expected_model == actual_model
Ejemplo n.º 12
0
def chain_fourth():
    #          XG
    #      |         \
    #     XG          KNN
    #   |    \        |  \
    # QDA     KNN     LR  LDA
    # |  \    |    \
    # RF  RF  KNN KNN
    chain = chain_first()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.update_subtree(chain.root_node.nodes_from[0].nodes_from[1], new_node)
    new_node = SecondaryNode('knn')
    for model_type in ('knn', 'knn'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.update_subtree(chain.root_node.nodes_from[0].nodes_from[0], new_node)
    return chain
Ejemplo n.º 13
0
def chain_simple() -> Chain:
    node_scaling = PrimaryNode('scaling')
    node_svc = SecondaryNode('svc', nodes_from=[node_scaling])
    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_final = SecondaryNode('rf', nodes_from=[node_svc, node_lda])

    chain = Chain(node_final)

    return chain
Ejemplo n.º 14
0
def get_simple_chain():
    """ Function return simple chain with the following structure
    lagged -> linear
    """
    node_lagged = PrimaryNode('lagged')
    node_final = SecondaryNode('linear', nodes_from=[node_lagged])
    chain_simple = Chain(node_final)

    return chain_simple
Ejemplo n.º 15
0
def test_chain_fine_tune_all_nodes_correct(classification_dataset):
    data = classification_dataset

    first = PrimaryNode(operation_type='scaling')
    second = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='dt', nodes_from=[first, second])

    chain = Chain(final)

    iterations_total, time_limit_minutes = 5, 1
    tuned_chain = chain.fine_tune_all_nodes(loss_function=roc,
                                            input_data=data,
                                            iterations=iterations_total,
                                            max_lead_time=time_limit_minutes)
    tuned_chain.predict(input_data=data)

    is_tuning_finished = True

    assert is_tuning_finished
Ejemplo n.º 16
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()),
        RandomForestClassifier()
    )
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results)

    print(roc_auc_value)

    chain = Chain()
    node_first = PrimaryNode('direct_data_model')
    node_second = PrimaryNode('bernb')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Ejemplo n.º 17
0
def chain_with_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Ejemplo n.º 18
0
def test_node_deletion_sample_method():
    # given
    _, train_data, test_data, node_index, result_dir = given_data()
    primary_first = PrimaryNode('knn')
    primary_second = PrimaryNode('knn')
    central = SecondaryNode('xgboost', nodes_from=[primary_first, primary_second])
    secondary_first = SecondaryNode('lda', nodes_from=[central])
    secondary_second = SecondaryNode('lda', nodes_from=[central])
    root = SecondaryNode('logit', nodes_from=[secondary_first, secondary_second])
    chain_with_multiple_children = Chain(nodes=root)

    # when
    result = NodeDeletionAnalyze(chain=chain_with_multiple_children,
                                 train_data=train_data,
                                 test_data=test_data,
                                 path_to_save=result_dir).sample(node_index)

    # then
    assert result is None
Ejemplo n.º 19
0
def chain_with_multiple_roots():
    first = PrimaryNode(operation_type='logit')
    root_first = SecondaryNode(operation_type='logit', nodes_from=[first])
    root_second = SecondaryNode(operation_type='logit', nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Ejemplo n.º 20
0
def baseline_chain():
    chain = Chain()
    last_node = SecondaryNode(model_type='xgboost', nodes_from=[])
    for requirement_model in ['knn', 'logit']:
        new_node = PrimaryNode(requirement_model)
        chain.add_node(new_node)
        last_node.nodes_from.append(new_node)
    chain.add_node(last_node)

    return chain
Ejemplo n.º 21
0
def chain_with_self_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Ejemplo n.º 22
0
def test_fixed_structure_composer(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    available_operation_types = ['logit', 'lda', 'knn']

    metric_function = ClassificationMetricsEnum.ROCAUC

    req = GPComposerRequirements(primary=available_operation_types,
                                 secondary=available_operation_types,
                                 pop_size=2,
                                 num_of_generations=1,
                                 crossover_prob=0.4,
                                 mutation_prob=0.5,
                                 allow_single_operations=False)

    # Prepare init chain
    first = PrimaryNode(operation_type='xgboost')
    second = PrimaryNode(operation_type='scaling')
    final = SecondaryNode(operation_type='logit', nodes_from=[first, second])
    reference_chain = Chain(final)

    builder = FixedStructureComposerBuilder(
        task=Task(TaskTypesEnum.classification)).with_initial_chain(
            reference_chain).with_metrics(metric_function).with_requirements(
                req)
    composer = builder.build()

    chain_composed = composer.compose_chain(data=dataset_to_compose)
    chain_composed.fit_from_scratch(input_data=dataset_to_compose)

    predicted_random_composed = chain_composed.predict(dataset_to_validate)

    roc_on_valid_random_composed = roc_auc(
        y_true=dataset_to_validate.target,
        y_score=predicted_random_composed.predict)

    assert roc_on_valid_random_composed > 0.6
    assert chain_composed.depth == reference_chain.depth
    assert chain_composed.length == reference_chain.length
Ejemplo n.º 23
0
def test_delete_primary_node():
    # given
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = SecondaryNode(operation_type='knn', nodes_from=[first])
    final = SecondaryNode(operation_type='xgboost', nodes_from=[second, third])
    chain = Chain()
    chain.add_node(final)

    # when
    chain.delete_node(first)

    new_primary_node = [
        node for node in chain.nodes if node.operation.operation_type == 'knn'
    ][0]

    # then
    assert len(chain.nodes) == 3
    assert isinstance(new_primary_node, PrimaryNode)
Ejemplo n.º 24
0
def get_ensemble_chain():
    chain = Chain()
    nodes_list = []
    for model in ['linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr']:
        node = PrimaryNode(model)
        chain.add_node(node)
        nodes_list.append(node)

    node_final = SecondaryNode('linear', nodes_from=nodes_list)
    chain.add_node(node_final)
    return chain
Ejemplo n.º 25
0
def chain_with_isolated_nodes():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    isolated = SecondaryNode(model_type='logit', nodes_from=[])
    chain = Chain()

    for node in [first, second, third, isolated]:
        chain.add_node(node)

    return chain
Ejemplo n.º 26
0
def test_chain_with_datamodel_fit_correct(data_setup):
    data = data_setup
    train_data, test_data = train_test_data_setup(data)

    chain = Chain()

    node_data = PrimaryNode('logit')
    node_first = PrimaryNode('bernb')
    node_second = SecondaryNode('rf')

    node_second.nodes_from = [node_first, node_data]

    chain.add_node(node_data)
    chain.add_node(node_first)
    chain.add_node(node_second)

    chain.fit(train_data)
    results = np.asarray(probs_to_labels(chain.predict(test_data).predict))

    assert results.shape == test_data.target.shape
Ejemplo n.º 27
0
def valid_chain():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second])
    last = SecondaryNode(operation_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, last]:
        chain.add_node(node)

    return chain
Ejemplo n.º 28
0
def chain_with_isolated_components():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[])
    fourth = SecondaryNode(operation_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, fourth]:
        chain.add_node(node)

    return chain
Ejemplo n.º 29
0
def test_node_repr():
    # given
    operation_type = 'logit'
    test_model_node = PrimaryNode(operation_type=operation_type)
    expected_node_description = operation_type

    # when
    actual_node_description = repr(test_model_node)

    # then
    assert actual_node_description == expected_node_description
Ejemplo n.º 30
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain