コード例 #1
0
def test_pipeline_hierarchy_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)

    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[first])
    final = SecondaryNode(operation_type='logit', nodes_from=[second, third])

    pipeline = Pipeline()
    for node in [first, second, third, final]:
        pipeline.add_node(node)

    pipeline.unfit()
    train_predicted = pipeline.fit(input_data=train)

    assert pipeline.root_node.descriptive_id == (
        '((/n_logit_default_params;)/'
        'n_logit_default_params;;(/'
        'n_logit_default_params;)/'
        'n_logit_default_params;)/'
        'n_logit_default_params')

    assert pipeline.length == 4
    assert pipeline.depth == 3
    assert train_predicted.predict.shape[0] == train.target.shape[0]
    assert final.fitted_operation is not None
コード例 #2
0
def create_pipeline_with_several_nested_atomized_model() -> Pipeline:
    pipeline = Pipeline()
    atomized_op = create_atomized_model_with_several_atomized_models()
    node_atomized_model = PrimaryNode(operation_type=atomized_op)

    node_atomized_model_secondary = SecondaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary.nodes_from = [node_atomized_model]

    node_knn = SecondaryNode('knn')
    node_knn.custom_params = {'n_neighbors': 9}
    node_knn.nodes_from = [node_atomized_model]

    node_knn_second = SecondaryNode('knn')
    node_knn_second.custom_params = {'n_neighbors': 5}
    node_knn_second.nodes_from = [
        node_atomized_model, node_atomized_model_secondary, node_knn
    ]

    node_atomized_model_secondary_second = \
        SecondaryNode(operation_type=create_atomized_model_with_several_atomized_models())

    node_atomized_model_secondary_second.nodes_from = [node_knn_second]

    pipeline.add_node(node_atomized_model_secondary_second)

    return pipeline
コード例 #3
0
def pipeline_with_secondary_nodes_only():
    first = SecondaryNode(operation_type='logit', nodes_from=[])
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    pipeline = Pipeline()
    pipeline.add_node(first)
    pipeline.add_node(second)

    return pipeline
コード例 #4
0
def pipeline_with_self_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    second.nodes_from.append(second)

    pipeline = Pipeline()
    pipeline.add_node(first)
    pipeline.add_node(second)

    return pipeline
コード例 #5
0
def pipeline_with_multiple_roots():
    first = PrimaryNode(operation_type='logit')
    root_first = SecondaryNode(operation_type='logit', nodes_from=[first])
    root_second = SecondaryNode(operation_type='logit', nodes_from=[first])
    pipeline = Pipeline()

    for node in [first, root_first, root_second]:
        pipeline.add_node(node)

    return pipeline
コード例 #6
0
def pipeline_with_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    pipeline = Pipeline()
    for node in [first, second, third]:
        pipeline.add_node(node)

    return pipeline
コード例 #7
0
def baseline_pipeline():
    pipeline = Pipeline()
    last_node = SecondaryNode(operation_type='xgboost', nodes_from=[])
    for requirement_model in ['knn', 'logit']:
        new_node = PrimaryNode(requirement_model)
        pipeline.add_node(new_node)
        last_node.nodes_from.append(new_node)
    pipeline.add_node(last_node)

    return pipeline
コード例 #8
0
def pipeline_with_isolated_components():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[])
    fourth = SecondaryNode(operation_type='logit', nodes_from=[third])

    pipeline = Pipeline()
    for node in [first, second, third, fourth]:
        pipeline.add_node(node)

    return pipeline
コード例 #9
0
def valid_pipeline():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second])
    last = SecondaryNode(operation_type='logit', nodes_from=[third])

    pipeline = Pipeline()
    for node in [first, second, third, last]:
        pipeline.add_node(node)

    return pipeline
コード例 #10
0
def pipeline_third():
    #    QDA
    #  |     \
    # RF     RF
    pipeline = Pipeline()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    pipeline.add_node(new_node)
    [pipeline.add_node(node_from) for node_from in new_node.nodes_from]
    return pipeline
コード例 #11
0
def test_pipeline_repr():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])
    pipeline = Pipeline()
    pipeline.add_node(final)

    expected_pipeline_description = "{'depth': 2, 'length': 4, 'nodes': [xgboost, logit, lda, knn]}"

    assert repr(pipeline) == expected_pipeline_description
コード例 #12
0
def test_delete_node_with_redirection():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = SecondaryNode(operation_type='knn', nodes_from=[first, second])
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[third])
    pipeline = Pipeline()
    pipeline.add_node(final)

    pipeline.delete_node(third)

    assert len(pipeline.nodes) == 3
    assert first in pipeline.root_node.nodes_from
コード例 #13
0
def create_pipeline() -> Pipeline:
    pipeline = Pipeline()
    node_logit = PrimaryNode('logit')

    node_lda = PrimaryNode('lda')
    node_lda.custom_params = {'n_components': 1}

    node_xgboost = SecondaryNode('xgboost')
    node_xgboost.custom_params = {'n_components': 1}
    node_xgboost.nodes_from = [node_logit, node_lda]

    pipeline.add_node(node_xgboost)

    return pipeline
コード例 #14
0
def pipeline_third():
    #      XG
    #   /  |  \
    #  KNN LDA KNN
    root_of_tree = SecondaryNode('xgboost')
    for model_type in ('knn', 'lda', 'knn'):
        root_of_tree.nodes_from.append(PrimaryNode(model_type))
    pipeline = Pipeline()

    for node in root_of_tree.nodes_from:
        pipeline.add_node(node)
    pipeline.add_node(root_of_tree)

    return pipeline
コード例 #15
0
def test_update_node_in_pipeline_correct():
    first = PrimaryNode(operation_type='logit')
    final = SecondaryNode(operation_type='xgboost', nodes_from=[first])

    pipeline = Pipeline()
    pipeline.add_node(final)
    new_node = PrimaryNode('svc')
    replacing_node = SecondaryNode('logit', nodes_from=[new_node])

    pipeline.update_node(old_node=first, new_node=replacing_node)

    assert replacing_node in pipeline.nodes
    assert new_node in pipeline.nodes
    assert first not in pipeline.nodes
コード例 #16
0
def test_secondary_nodes_is_invariant_to_inputs_order(data_setup):
    data = data_setup
    train, test = train_test_data_setup(data)

    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])

    pipeline = Pipeline()
    for node in [first, second, third, final]:
        pipeline.add_node(node)

    first = deepcopy(first)
    second = deepcopy(second)
    third = deepcopy(third)

    final_shuffled = SecondaryNode(operation_type='xgboost',
                                   nodes_from=[third, first, second])

    pipeline_shuffled = Pipeline()
    # change order of nodes in list
    for node in [final_shuffled, third, first, second]:
        pipeline_shuffled.add_node(node)

    train_predicted = pipeline.fit(input_data=train)

    train_predicted_shuffled = pipeline_shuffled.fit(input_data=train)

    # train results should be invariant
    assert pipeline.root_node.descriptive_id == pipeline_shuffled.root_node.descriptive_id
    assert np.equal(train_predicted.predict, train_predicted_shuffled.predict).all()

    test_predicted = pipeline.predict(input_data=test)
    test_predicted_shuffled = pipeline_shuffled.predict(input_data=test)

    # predict results should be invariant
    assert np.equal(test_predicted.predict, test_predicted_shuffled.predict).all()

    # change parents order for the nodes fitted pipeline
    nodes_for_change = pipeline.nodes[3].nodes_from
    pipeline.nodes[3].nodes_from = [nodes_for_change[2], nodes_for_change[0], nodes_for_change[1]]
    pipeline.nodes[3].unfit()
    pipeline.fit(train)
    test_predicted_re_shuffled = pipeline.predict(input_data=test)

    # predict results should be invariant
    assert np.equal(test_predicted.predict, test_predicted_re_shuffled.predict).all()
コード例 #17
0
def create_atomized_model_with_several_atomized_models() -> AtomizedModel:
    pipeline = Pipeline()
    node_atomized_model_primary = PrimaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary = SecondaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary_second = SecondaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary_third = SecondaryNode(
        operation_type=create_atomized_model())

    node_atomized_model_secondary.nodes_from = [node_atomized_model_primary]
    node_atomized_model_secondary_second.nodes_from = [
        node_atomized_model_primary
    ]
    node_atomized_model_secondary_third.nodes_from = [
        node_atomized_model_secondary, node_atomized_model_secondary_second
    ]

    pipeline.add_node(node_atomized_model_secondary_third)
    atomized_model = AtomizedModel(pipeline)

    return atomized_model
コード例 #18
0
def test_pipeline_with_custom_params_for_model(data_setup):
    data = data_setup
    custom_params = dict(n_neighbors=1,
                         weights='uniform',
                         p=1)

    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    final = SecondaryNode(operation_type='knn', nodes_from=[first, second])

    pipeline = Pipeline()
    pipeline.add_node(final)
    pipeline_default_params = deepcopy(pipeline)

    pipeline.root_node.custom_params = custom_params

    pipeline_default_params.fit(data)
    pipeline.fit(data)

    custom_params_prediction = pipeline.predict(data).predict
    default_params_prediction = pipeline_default_params.predict(data).predict

    assert not np.array_equal(custom_params_prediction, default_params_prediction)
コード例 #19
0
    def compose_pipeline(self, data: InputData,
                         initial_pipeline: Optional[Pipeline],
                         composer_requirements: ComposerRequirements,
                         metrics: Optional[Callable]) -> Pipeline:
        # TODO: fix this later?
        train_data = data
        test_data = data
        metric_function_for_nodes = partial(metric_for_nodes,
                                            metric_function=metrics,
                                            train_data=train_data,
                                            test_data=test_data)

        optimiser = RandomSearchOptimiser(self.__iter_num, PrimaryNode,
                                          SecondaryNode)
        best_nodes_set, history = optimiser.optimise(
            metric_function_for_nodes, composer_requirements.primary,
            composer_requirements.secondary)

        best_pipeline = Pipeline()
        [best_pipeline.add_node(nodes) for nodes in best_nodes_set]

        return best_pipeline
コード例 #20
0
def test_pipeline_with_datamodel_fit_correct(data_setup):
    data = data_setup
    train_data, test_data = train_test_data_setup(data)

    pipeline = Pipeline()

    node_data = PrimaryNode('logit')
    node_first = PrimaryNode('bernb')
    node_second = SecondaryNode('rf')

    node_second.nodes_from = [node_first, node_data]

    pipeline.add_node(node_data)
    pipeline.add_node(node_first)
    pipeline.add_node(node_second)

    pipeline.fit(train_data)
    results = np.asarray(probs_to_labels(pipeline.predict(test_data).predict))

    assert results.shape == test_data.target.shape
コード例 #21
0
def pipeline_first():
    #    XG
    #  |     \
    # XG     KNN
    # |  \    |  \
    # LR LDA LR  LDA
    pipeline = Pipeline()

    root_of_tree, root_child_first, root_child_second = \
        [SecondaryNode(model) for model in ('xgboost', 'xgboost', 'knn')]

    for root_node_child in (root_child_first, root_child_second):
        for requirement_model in ('logit', 'lda'):
            new_node = PrimaryNode(requirement_model)
            root_node_child.nodes_from.append(new_node)
            pipeline.add_node(new_node)
        pipeline.add_node(root_node_child)
        root_of_tree.nodes_from.append(root_node_child)

    pipeline.add_node(root_of_tree)
    return pipeline
コード例 #22
0
def nodes_to_pipeline(nodes: List[Node]) -> Pipeline:
    pipeline = Pipeline()
    [pipeline.add_node(nodes) for nodes in nodes]
    return pipeline