Beispiel #1
0
def generate_pipeline() -> Pipeline:
    node_scaling = PrimaryNode('scaling')
    node_first = SecondaryNode('kmeans', nodes_from=[node_scaling])
    node_second = SecondaryNode('kmeans', nodes_from=[node_scaling])
    node_root = SecondaryNode('logit', nodes_from=[node_first, node_second])
    pipeline = Pipeline(node_root)
    return pipeline
Beispiel #2
0
def generate_pipeline() -> Pipeline:
    node_scaling = PrimaryNode('scaling')
    node_lasso = SecondaryNode('lasso', nodes_from=[node_scaling])
    node_ridge = SecondaryNode('ridge', nodes_from=[node_scaling])
    node_root = SecondaryNode('linear', nodes_from=[node_lasso, node_ridge])
    pipeline = Pipeline(node_root)
    return pipeline
Beispiel #3
0
def get_knn_class_pipeline(k_neighbors):
    """ Function return pipeline with K-nn classification model in it """
    node_scaling = PrimaryNode('scaling')
    node_final = SecondaryNode('knn', nodes_from=[node_scaling])
    node_final.custom_params = {'n_neighbors': k_neighbors}
    pipeline = Pipeline(node_final)
    return pipeline
Beispiel #4
0
def run_pipeline_from_automl(train_file_path: str,
                             test_file_path: str,
                             max_run_time: timedelta = timedelta(minutes=10)):
    """ Function run pipeline with Auto ML models in nodes

    :param train_file_path: path to the csv file with data for train
    :param test_file_path: path to the csv file with data for validation
    :param max_run_time: maximum running time for customization of the "tpot" model

    :return roc_auc_value: ROC AUC metric for pipeline
    """
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    node_scaling = PrimaryNode('scaling')
    node_tpot = PrimaryNode('tpot')

    node_tpot.operation.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_tpot, node_lda])
    OperationTypesRepository.assign_repo('model', 'automl_repository.json')
    pipeline = Pipeline(node_rf)

    pipeline.fit(train_data)
    results = pipeline.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Beispiel #5
0
def get_pipeline():
    node_scaling = PrimaryNode('scaling')
    node_ransac = SecondaryNode('ransac_lin_reg', nodes_from=[node_scaling])
    node_ridge = SecondaryNode('lasso', nodes_from=[node_ransac])
    pipeline = Pipeline(node_ridge)

    return pipeline
Beispiel #6
0
def boosting_mutation(pipeline: Pipeline, requirements, params,
                      **kwargs) -> Any:
    """
    This type of mutation adds the additional 'boosting' cascade to the existing pipeline.
    """

    task_type = params.advisor.task.task_type
    decompose_operations, _ = OperationTypesRepository(
        'data_operation').suitable_operation(task_type=task_type,
                                             tags=['decompose'])
    decompose_operation = decompose_operations[0]

    existing_pipeline = pipeline

    if len(pipeline.nodes) == 1:
        # to deal with single-node pipeline
        data_source = pipeline.nodes[0]
    else:
        data_source = PrimaryNode('scaling')

    decompose_parents = [existing_pipeline.root_node, data_source]

    node_decompose = SecondaryNode(decompose_operation,
                                   nodes_from=decompose_parents)
    node_boost = SecondaryNode('linear', nodes_from=[node_decompose])
    node_final = SecondaryNode(
        choice(requirements.secondary),
        nodes_from=[node_boost, existing_pipeline.root_node])
    pipeline.nodes.extend([node_decompose, node_final, node_boost])
    return pipeline
Beispiel #7
0
def generate_straight_pipeline():
    """ Simple linear pipeline """
    node_scaling = PrimaryNode('scaling')
    node_ridge = SecondaryNode('ridge', nodes_from=[node_scaling])
    node_linear = SecondaryNode('linear', nodes_from=[node_ridge])
    pipeline = Pipeline(node_linear)
    return pipeline
Beispiel #8
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()), RandomForestClassifier())
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results)

    print(f'ROC AUC for TPOT: {roc_auc_value}')

    node_scaling = PrimaryNode('scaling')
    node_bernb = SecondaryNode('bernb', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_bernb, node_scaling])
    pipeline = Pipeline(node_rf)

    pipeline.fit(train_data)
    results = pipeline.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(f'ROC AUC for FEDOT: {roc_auc_value}')

    return roc_auc_value
Beispiel #9
0
def test_pipeline_hierarchy_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)

    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[first])
    final = SecondaryNode(operation_type='logit', nodes_from=[second, third])

    pipeline = Pipeline()
    for node in [first, second, third, final]:
        pipeline.add_node(node)

    pipeline.unfit()
    train_predicted = pipeline.fit(input_data=train)

    assert pipeline.root_node.descriptive_id == (
        '((/n_logit_default_params;)/'
        'n_logit_default_params;;(/'
        'n_logit_default_params;)/'
        'n_logit_default_params;)/'
        'n_logit_default_params')

    assert pipeline.length == 4
    assert pipeline.depth == 3
    assert train_predicted.predict.shape[0] == train.target.shape[0]
    assert final.fitted_operation is not None
Beispiel #10
0
def ts_pipeline_with_incorrect_data_flow():
    """
    Connection lagged -> lagged is incorrect
    Connection ridge -> ar is incorrect also
       lagged - lagged - ridge \
                                ar -> final forecast
                lagged - ridge /
    """

    # First level
    node_lagged = PrimaryNode('lagged')

    # Second level
    node_lagged_1 = SecondaryNode('lagged', nodes_from=[node_lagged])
    node_lagged_2 = PrimaryNode('lagged')

    # Third level
    node_ridge_1 = SecondaryNode('ridge', nodes_from=[node_lagged_1])
    node_ridge_2 = SecondaryNode('ridge', nodes_from=[node_lagged_2])

    # Fourth level - root node
    node_final = SecondaryNode('ar', nodes_from=[node_ridge_1, node_ridge_2])
    pipeline = Pipeline(node_final)

    return pipeline
Beispiel #11
0
def get_composite_pipeline(composite_flag: bool = True) -> Pipeline:
    node_first = PrimaryNode('cnn')
    node_first.custom_params = {
        'image_shape': (28, 28, 1),
        'architecture': 'deep',
        'num_classes': 10,
        'epochs': 15,
        'batch_size': 128
    }
    node_second = PrimaryNode('cnn')
    node_second.custom_params = {
        'image_shape': (28, 28, 1),
        'architecture_type': 'simplified',
        'num_classes': 10,
        'epochs': 10,
        'batch_size': 128
    }
    node_final = SecondaryNode('rf', nodes_from=[node_first, node_second])

    if not composite_flag:
        node_final = SecondaryNode('rf', nodes_from=[node_first])

    pipeline = Pipeline(node_final)

    return pipeline
Beispiel #12
0
def get_complex_pipeline():
    """
    Pipeline looking like this
    smoothing - lagged - ridge \
                                \
                                 ridge -> final forecast
                                /
                lagged - ridge /
    """

    # First level
    node_smoothing = PrimaryNode('smoothing')

    # Second level
    node_lagged_1 = SecondaryNode('lagged', nodes_from=[node_smoothing])
    node_lagged_2 = PrimaryNode('lagged')

    # Third level
    node_ridge_1 = SecondaryNode('ridge', nodes_from=[node_lagged_1])
    node_ridge_2 = SecondaryNode('ridge', nodes_from=[node_lagged_2])

    # Fourth level - root node
    node_final = SecondaryNode('ridge',
                               nodes_from=[node_ridge_1, node_ridge_2])
    pipeline = Pipeline(node_final)

    return pipeline
Beispiel #13
0
def get_nodes():
    first_node = PrimaryNode('knn')
    second_node = PrimaryNode('knn')
    third_node = SecondaryNode('lda', nodes_from=[first_node, second_node])
    root = SecondaryNode('logit', nodes_from=[third_node])

    return [root, third_node, first_node, second_node]
Beispiel #14
0
def pipeline_with_secondary_nodes_only():
    first = SecondaryNode(operation_type='logit', nodes_from=[])
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    pipeline = Pipeline()
    pipeline.add_node(first)
    pipeline.add_node(second)

    return pipeline
Beispiel #15
0
def pipeline_with_only_data_operations():
    first = PrimaryNode(operation_type='one_hot_encoding')
    second = SecondaryNode(operation_type='scaling', nodes_from=[first])
    final = SecondaryNode(operation_type='ransac_lin_reg', nodes_from=[second])

    pipeline = Pipeline(final)

    return pipeline
Beispiel #16
0
def get_complex_regr_pipeline():
    node_scaling = PrimaryNode(operation_type='scaling')
    node_ridge = SecondaryNode('ridge', nodes_from=[node_scaling])
    node_linear = SecondaryNode('linear', nodes_from=[node_scaling])
    final = SecondaryNode('xgbreg', nodes_from=[node_ridge, node_linear])
    pipeline = Pipeline(final)

    return pipeline
Beispiel #17
0
def get_non_refinement_pipeline():
    """ Create 3-level pipeline without class_decompose node """
    node_scaling = PrimaryNode('scaling')
    node_rf = SecondaryNode('rf', nodes_from=[node_scaling])
    node_logit = SecondaryNode('logit', nodes_from=[node_scaling])
    node_xgboost = SecondaryNode('xgboost', nodes_from=[node_logit, node_rf])
    pipeline = Pipeline(node_xgboost)
    return pipeline
Beispiel #18
0
def test_distance_to_primary_level():
    first_node = PrimaryNode('knn')
    second_node = PrimaryNode('knn')
    third_node = SecondaryNode('lda', nodes_from=[first_node, second_node])
    root = SecondaryNode('logit', nodes_from=[third_node])

    distance = root.distance_to_primary_level

    assert distance == 2
Beispiel #19
0
def pipeline_with_pca() -> Pipeline:
    node_scaling = PrimaryNode('scaling')
    node_pca = SecondaryNode('pca', nodes_from=[node_scaling])
    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_final = SecondaryNode('rf', nodes_from=[node_pca, node_lda])

    pipeline = Pipeline(node_final)

    return pipeline
Beispiel #20
0
def pipeline_simple() -> Pipeline:
    node_scaling = PrimaryNode('scaling')
    node_svc = SecondaryNode('svc', nodes_from=[node_scaling])
    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_final = SecondaryNode('rf', nodes_from=[node_svc, node_lda])

    pipeline = Pipeline(node_final)

    return pipeline
Beispiel #21
0
def default_valid_pipeline():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[first])
    final = SecondaryNode(operation_type='logit', nodes_from=[second, third])

    pipeline = Pipeline(final)

    return pipeline
Beispiel #22
0
def test_ordered_subnodes_hierarchy():
    first_node = PrimaryNode('knn')
    second_node = PrimaryNode('knn')
    third_node = SecondaryNode('lda', nodes_from=[first_node, second_node])
    root = SecondaryNode('logit', nodes_from=[third_node])

    ordered_nodes = root.ordered_subnodes_hierarchy()

    assert len(ordered_nodes) == 4
Beispiel #23
0
def pipeline_with_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    pipeline = Pipeline()
    for node in [first, second, third]:
        pipeline.add_node(node)

    return pipeline
Beispiel #24
0
def pipeline_with_multiple_roots():
    first = PrimaryNode(operation_type='logit')
    root_first = SecondaryNode(operation_type='logit', nodes_from=[first])
    root_second = SecondaryNode(operation_type='logit', nodes_from=[first])
    pipeline = Pipeline()

    for node in [first, root_first, root_second]:
        pipeline.add_node(node)

    return pipeline
Beispiel #25
0
def valid_pipeline():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second])
    last = SecondaryNode(operation_type='logit', nodes_from=[third])

    pipeline = Pipeline()
    for node in [first, second, third, last]:
        pipeline.add_node(node)

    return pipeline
Beispiel #26
0
def pipeline_with_isolated_components():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[])
    fourth = SecondaryNode(operation_type='logit', nodes_from=[third])

    pipeline = Pipeline()
    for node in [first, second, third, fourth]:
        pipeline.add_node(node)

    return pipeline
Beispiel #27
0
def get_pipeline():
    node_lagged_1 = PrimaryNode('lagged')
    node_lagged_1.custom_params = {'window_size': 120}
    node_lagged_2 = PrimaryNode('lagged')
    node_lagged_2.custom_params = {'window_size': 10}

    node_first = SecondaryNode('ridge', nodes_from=[node_lagged_1])
    node_second = SecondaryNode('dtreg', nodes_from=[node_lagged_2])
    node_final = SecondaryNode('ridge', nodes_from=[node_first, node_second])
    pipeline = Pipeline(node_final)

    return pipeline
Beispiel #28
0
def create_classification_pipeline_with_preprocessing():
    node_scaling = PrimaryNode('scaling')
    node_rfe = PrimaryNode('rfe_lin_class')

    xgb_node = SecondaryNode('xgboost', nodes_from=[node_scaling])
    logit_node = SecondaryNode('logit', nodes_from=[node_rfe])

    knn_root = SecondaryNode('knn', nodes_from=[xgb_node, logit_node])

    pipeline = Pipeline(knn_root)

    return pipeline
Beispiel #29
0
def get_non_refinement_pipeline(lagged):
    """ Create 4-level pipeline without decompose operation """

    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': lagged}
    node_lasso = SecondaryNode('lasso', nodes_from=[node_lagged])
    node_dtreg = SecondaryNode('dtreg', nodes_from=[node_lagged])
    node_dtreg.custom_params = {'max_depth': 3}
    final_node = SecondaryNode('ridge', nodes_from=[node_lasso, node_dtreg])

    pipeline = Pipeline(final_node)
    return pipeline
Beispiel #30
0
    def roll_pipeline_structure(
            self,
            operation_object: ['OperationTemplate', 'AtomizedModelTemplate'],
            visited_nodes: dict,
            path: str = None):
        """
        The function recursively traverses all disjoint operations
        and connects the operations in a pipeline.

        :params operation_object: operationTemplate or AtomizedOperationTemplate
        :params visited_nodes: array to remember which node was visited
        :params path: path to save
        :return: root_node
        """
        if operation_object.operation_id in visited_nodes:
            return visited_nodes[operation_object.operation_id]

        if operation_object.operation_type == atomized_model_type():
            atomized_model = operation_object.next_pipeline_template
            if operation_object.nodes_from:
                node = SecondaryNode(operation_type=atomized_model)
            else:
                node = PrimaryNode(operation_type=atomized_model)
        else:
            if operation_object.nodes_from:
                node = SecondaryNode(operation_object.operation_type)
            else:
                node = PrimaryNode(operation_object.operation_type)
            node.operation.params = operation_object.params
            node.rating = operation_object.rating

        if hasattr(
                operation_object, 'fitted_operation_path'
        ) and operation_object.fitted_operation_path and path is not None:
            path_to_operation = os.path.join(
                path, operation_object.fitted_operation_path)
            if not os.path.isfile(path_to_operation):
                message = f"Fitted operation on the path: {path_to_operation} does not exist."
                self.log.error(message)
                raise FileNotFoundError(message)

            fitted_operation = joblib.load(path_to_operation)
            operation_object.fitted_operation = fitted_operation
            node.fitted_operation = fitted_operation

        nodes_from = [
            operation_template
            for operation_template in self.operation_templates
            if operation_template.operation_id in operation_object.nodes_from
        ]
        node.nodes_from = [
            self.roll_pipeline_structure(node_from, visited_nodes, path)
            for node_from in nodes_from
        ]

        visited_nodes[operation_object.operation_id] = node
        return node