Beispiel #1
0
def create_pipeline() -> Pipeline:
    node_logit = PrimaryNode('logit')

    node_lda = PrimaryNode('lda')
    node_lda.custom_params = {'n_components': 1}

    node_xgboost = PrimaryNode('xgboost')

    node_knn = PrimaryNode('knn')
    node_knn.custom_params = {'n_neighbors': 9}

    node_knn_second = SecondaryNode('knn')
    node_knn_second.custom_params = {'n_neighbors': 5}
    node_knn_second.nodes_from = [node_lda, node_knn]

    node_logit_second = SecondaryNode('logit')
    node_logit_second.nodes_from = [node_xgboost, node_lda]

    node_lda_second = SecondaryNode('lda')
    node_lda_second.custom_params = {'n_components': 1}
    node_lda_second.nodes_from = [node_logit_second, node_knn_second, node_logit]

    node_xgboost_second = SecondaryNode('xgboost')
    node_xgboost_second.nodes_from = [node_logit, node_logit_second, node_knn]

    node_knn_third = SecondaryNode('knn')
    node_knn_third.custom_params = {'n_neighbors': 8}
    node_knn_third.nodes_from = [node_lda_second, node_xgboost_second]

    pipeline = Pipeline(node_knn_third)

    return pipeline
Beispiel #2
0
def create_pipeline_with_several_nested_atomized_model() -> Pipeline:
    pipeline = Pipeline()
    atomized_op = create_atomized_model_with_several_atomized_models()
    node_atomized_model = PrimaryNode(operation_type=atomized_op)

    node_atomized_model_secondary = SecondaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary.nodes_from = [node_atomized_model]

    node_knn = SecondaryNode('knn')
    node_knn.custom_params = {'n_neighbors': 9}
    node_knn.nodes_from = [node_atomized_model]

    node_knn_second = SecondaryNode('knn')
    node_knn_second.custom_params = {'n_neighbors': 5}
    node_knn_second.nodes_from = [
        node_atomized_model, node_atomized_model_secondary, node_knn
    ]

    node_atomized_model_secondary_second = \
        SecondaryNode(operation_type=create_atomized_model_with_several_atomized_models())

    node_atomized_model_secondary_second.nodes_from = [node_knn_second]

    pipeline.add_node(node_atomized_model_secondary_second)

    return pipeline
Beispiel #3
0
    def roll_pipeline_structure(
            self,
            operation_object: ['OperationTemplate', 'AtomizedModelTemplate'],
            visited_nodes: dict,
            path: str = None):
        """
        The function recursively traverses all disjoint operations
        and connects the operations in a pipeline.

        :params operation_object: operationTemplate or AtomizedOperationTemplate
        :params visited_nodes: array to remember which node was visited
        :params path: path to save
        :return: root_node
        """
        if operation_object.operation_id in visited_nodes:
            return visited_nodes[operation_object.operation_id]

        if operation_object.operation_type == atomized_model_type():
            atomized_model = operation_object.next_pipeline_template
            if operation_object.nodes_from:
                node = SecondaryNode(operation_type=atomized_model)
            else:
                node = PrimaryNode(operation_type=atomized_model)
        else:
            if operation_object.nodes_from:
                node = SecondaryNode(operation_object.operation_type)
            else:
                node = PrimaryNode(operation_object.operation_type)
            node.operation.params = operation_object.params
            node.rating = operation_object.rating

        if hasattr(
                operation_object, 'fitted_operation_path'
        ) and operation_object.fitted_operation_path and path is not None:
            path_to_operation = os.path.join(
                path, operation_object.fitted_operation_path)
            if not os.path.isfile(path_to_operation):
                message = f"Fitted operation on the path: {path_to_operation} does not exist."
                self.log.error(message)
                raise FileNotFoundError(message)

            fitted_operation = joblib.load(path_to_operation)
            operation_object.fitted_operation = fitted_operation
            node.fitted_operation = fitted_operation

        nodes_from = [
            operation_template
            for operation_template in self.operation_templates
            if operation_template.operation_id in operation_object.nodes_from
        ]
        node.nodes_from = [
            self.roll_pipeline_structure(node_from, visited_nodes, path)
            for node_from in nodes_from
        ]

        visited_nodes[operation_object.operation_id] = node
        return node
Beispiel #4
0
def create_atomized_model_with_several_atomized_models() -> AtomizedModel:
    pipeline = Pipeline()
    node_atomized_model_primary = PrimaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary = SecondaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary_second = SecondaryNode(
        operation_type=create_atomized_model())
    node_atomized_model_secondary_third = SecondaryNode(
        operation_type=create_atomized_model())

    node_atomized_model_secondary.nodes_from = [node_atomized_model_primary]
    node_atomized_model_secondary_second.nodes_from = [
        node_atomized_model_primary
    ]
    node_atomized_model_secondary_third.nodes_from = [
        node_atomized_model_secondary, node_atomized_model_secondary_second
    ]

    pipeline.add_node(node_atomized_model_secondary_third)
    atomized_model = AtomizedModel(pipeline)

    return atomized_model
Beispiel #5
0
def create_pipeline() -> Pipeline:
    pipeline = Pipeline()
    node_logit = PrimaryNode('logit')

    node_lda = PrimaryNode('lda')
    node_lda.custom_params = {'n_components': 1}

    node_xgboost = SecondaryNode('xgboost')
    node_xgboost.custom_params = {'n_components': 1}
    node_xgboost.nodes_from = [node_logit, node_lda]

    pipeline.add_node(node_xgboost)

    return pipeline
Beispiel #6
0
def test_pipeline_with_datamodel_fit_correct(data_setup):
    data = data_setup
    train_data, test_data = train_test_data_setup(data)

    pipeline = Pipeline()

    node_data = PrimaryNode('logit')
    node_first = PrimaryNode('bernb')
    node_second = SecondaryNode('rf')

    node_second.nodes_from = [node_first, node_data]

    pipeline.add_node(node_data)
    pipeline.add_node(node_first)
    pipeline.add_node(node_second)

    pipeline.fit(train_data)
    results = np.asarray(probs_to_labels(pipeline.predict(test_data).predict))

    assert results.shape == test_data.target.shape