Exemple #1
0
def create_chain_with_several_nested_atomized_model() -> Chain:
    chain = Chain()
    node_atomized_model = PrimaryNode(
        model_type=create_atomized_model_with_several_atomized_models())

    node_atomized_model_secondary = SecondaryNode(
        model_type=create_atomized_model())
    node_atomized_model_secondary.nodes_from = [node_atomized_model]

    node_knn = SecondaryNode('knn')
    node_knn.custom_params = {'n_neighbors': 9}
    node_knn.nodes_from = [node_atomized_model]

    node_knn_second = SecondaryNode('knn')
    node_knn_second.custom_params = {'n_neighbors': 5}
    node_knn_second.nodes_from = [
        node_atomized_model, node_atomized_model_secondary, node_knn
    ]

    node_atomized_model_secondary_second = \
        SecondaryNode(model_type=create_atomized_model_with_several_atomized_models())

    node_atomized_model_secondary_second.nodes_from = [node_knn_second]

    chain.add_node(node_atomized_model_secondary_second)

    return chain
Exemple #2
0
def run_chain_from_automl(train_file_path: str,
                          test_file_path: str,
                          max_run_time: timedelta = timedelta(minutes=10)):
    """ Function run chain with Auto ML models in nodes

    :param train_file_path: path to the csv file with data for train
    :param test_file_path: path to the csv file with data for validation
    :param max_run_time: maximum running time for customization of the "tpot" model

    :return roc_auc_value: ROC AUC metric for chain
    """
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    chain = Chain()
    node_scaling = PrimaryNode('scaling')
    node_tpot = PrimaryNode('tpot')

    node_tpot.operation.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = SecondaryNode('lda', nodes_from=[node_scaling])
    node_rf = SecondaryNode('rf', nodes_from=[node_tpot, node_lda])
    chain.add_node(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Exemple #3
0
def run_chain_from_automl(train_file_path: str, test_file_path: str,
                          max_run_time: timedelta = timedelta(minutes=10)):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    chain = Chain()
    node_tpot = PrimaryNode('tpot')

    node_tpot.model.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = PrimaryNode('lda')
    node_rf = SecondaryNode('rf')

    node_rf.nodes_from = [node_tpot, node_lda]

    chain.add_node(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Exemple #4
0
    def fine_tune_certain_node(self, model_id, input_data: InputData, iterations: int = 30,
                               max_lead_time: timedelta = timedelta(minutes=5)):
        """
        Optimize hyperparameters of models in the certain node,
        defined by model id

        :param int model_id: number of the certain model in the chain.
        Look for it in exported json file of your model.
        :param input_data: data used for tuning
        :param iterations: max number of iterations
        :param max_lead_time: max time available for tuning process
        :param verbose: flag used for status printing to console, default False
        :return: updated chain object
        """

        subchain = Chain()
        new_root = extract_subtree_root(root_model_id=model_id,
                                        chain_template=self.chain_template)
        subchain.add_node(new_root)
        subchain.fit(input_data=input_data, use_cache=False)

        updated_subchain = Tune(subchain).fine_tune_root_node(input_data=input_data,
                                                              iterations=iterations,
                                                              max_lead_time=max_lead_time)

        self._update_template(model_id=model_id,
                              updated_node=updated_subchain.root_node)

        updated_chain = Chain()
        self.chain_template.convert_to_chain(chain=updated_chain)

        return updated_chain
Exemple #5
0
def test_chain_hierarchy_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)

    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[first])
    final = SecondaryNode(operation_type='logit', nodes_from=[second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    chain.unfit()
    train_predicted = chain.fit(input_data=train)

    assert chain.root_node.descriptive_id == ('((/n_logit_default_params;)/'
                                              'n_logit_default_params;;(/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params')

    assert chain.length == 4
    assert chain.depth == 3
    assert train_predicted.predict.shape[0] == train.target.shape[0]
    assert final.fitted_operation is not None
Exemple #6
0
def chain_with_secondary_nodes_only():
    first = SecondaryNode(operation_type='logit', nodes_from=[])
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Exemple #7
0
def chain_with_self_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Exemple #8
0
def chain_with_multiple_roots():
    first = PrimaryNode(operation_type='logit')
    root_first = SecondaryNode(operation_type='logit', nodes_from=[first])
    root_second = SecondaryNode(operation_type='logit', nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Exemple #9
0
def chain_with_cycle():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Exemple #10
0
def baseline_chain():
    chain = Chain()
    last_node = SecondaryNode(model_type='xgboost', nodes_from=[])
    for requirement_model in ['knn', 'logit']:
        new_node = PrimaryNode(requirement_model)
        chain.add_node(new_node)
        last_node.nodes_from.append(new_node)
    chain.add_node(last_node)

    return chain
Exemple #11
0
def chain_with_isolated_components():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[])
    fourth = SecondaryNode(operation_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, fourth]:
        chain.add_node(node)

    return chain
Exemple #12
0
def valid_chain():
    first = PrimaryNode(operation_type='logit')
    second = SecondaryNode(operation_type='logit', nodes_from=[first])
    third = SecondaryNode(operation_type='logit', nodes_from=[second])
    last = SecondaryNode(operation_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, last]:
        chain.add_node(node)

    return chain
Exemple #13
0
def chain_with_isolated_nodes():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    isolated = SecondaryNode(model_type='logit', nodes_from=[])
    chain = Chain()

    for node in [first, second, third, isolated]:
        chain.add_node(node)

    return chain
Exemple #14
0
def two_level_chain():
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='knn')
    third = SecondaryNode(model_type='xgboost',
                          nodes_from=[first, second])

    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Exemple #15
0
def test_secondary_nodes_is_invariant_to_inputs_order(data_setup):
    data = data_setup
    train, test = train_test_data_setup(data)

    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    first = deepcopy(first)
    second = deepcopy(second)
    third = deepcopy(third)

    final_shuffled = SecondaryNode(operation_type='xgboost',
                                   nodes_from=[third, first, second])

    chain_shuffled = Chain()
    # change order of nodes in list
    for node in [final_shuffled, third, first, second]:
        chain_shuffled.add_node(node)

    train_predicted = chain.fit(input_data=train)

    train_predicted_shuffled = chain_shuffled.fit(input_data=train)

    # train results should be invariant
    assert chain.root_node.descriptive_id == chain_shuffled.root_node.descriptive_id
    assert np.equal(train_predicted.predict,
                    train_predicted_shuffled.predict).all()

    test_predicted = chain.predict(input_data=test)
    test_predicted_shuffled = chain_shuffled.predict(input_data=test)

    # predict results should be invariant
    assert np.equal(test_predicted.predict,
                    test_predicted_shuffled.predict).all()

    # change parents order for the nodes fitted chain
    nodes_for_change = chain.nodes[3].nodes_from
    chain.nodes[3].nodes_from = [
        nodes_for_change[2], nodes_for_change[0], nodes_for_change[1]
    ]
    chain.nodes[3].unfit()
    chain.fit(train)
    test_predicted_re_shuffled = chain.predict(input_data=test)

    # predict results should be invariant
    assert np.equal(test_predicted.predict,
                    test_predicted_re_shuffled.predict).all()
Exemple #16
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain
Exemple #17
0
def get_ensemble_chain():
    chain = Chain()
    nodes_list = []
    for model in ['linear', 'ridge', 'lasso', 'rfr', 'dtreg', 'knnreg', 'svr']:
        node = PrimaryNode(model)
        chain.add_node(node)
        nodes_list.append(node)

    node_final = SecondaryNode('linear', nodes_from=nodes_list)
    chain.add_node(node_final)
    return chain
Exemple #18
0
def get_composite_multiscale_chain():
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_lstm_trend = SecondaryNode('ridge', nodes_from=[node_trend])
    node_residual = PrimaryNode('residual_data_model')
    node_ridge_residual = SecondaryNode('ridge', nodes_from=[node_residual])

    node_final = SecondaryNode(
        'linear', nodes_from=[node_ridge_residual, node_lstm_trend])
    chain.add_node(node_final)
    return chain
Exemple #19
0
def test_delete_node_with_redirection():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = SecondaryNode(operation_type='knn', nodes_from=[first, second])
    final = SecondaryNode(operation_type='xgboost', nodes_from=[third])
    chain = Chain()
    chain.add_node(final)

    chain.delete_node(third)

    assert len(chain.nodes) == 3
    assert first in chain.root_node.nodes_from
Exemple #20
0
def test_update_node_in_chain_raise_exception():
    first = PrimaryNode(operation_type='logit')
    final = SecondaryNode(operation_type='xgboost', nodes_from=[first])

    chain = Chain()
    chain.add_node(final)
    replacing_node = SecondaryNode('logit')

    with pytest.raises(ValueError) as exc:
        chain.update_node(old_node=first, new_node=replacing_node)

    assert str(exc.value) == "Can't update PrimaryNode with SecondaryNode"
Exemple #21
0
def test_chain_repr():
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = PrimaryNode(operation_type='knn')
    final = SecondaryNode(operation_type='xgboost',
                          nodes_from=[first, second, third])
    chain = Chain()
    chain.add_node(final)

    expected_chain_description = "{'depth': 2, 'length': 4, 'nodes': [xgboost, logit, lda, knn]}"

    assert repr(chain) == expected_chain_description
Exemple #22
0
def chain_third():
    #      XG
    #   |  |  \
    #  KNN LDA KNN
    root_of_tree = SecondaryNode('xgboost')
    for model_type in ('knn', 'lda', 'knn'):
        root_of_tree.nodes_from.append(PrimaryNode(model_type))
    chain = Chain()

    for node in root_of_tree.nodes_from:
        chain.add_node(node)
    chain.add_node(root_of_tree)

    return chain
Exemple #23
0
def create_chain() -> Chain:
    chain = Chain()
    node_logit = PrimaryNode('logit')

    node_lda = PrimaryNode('lda')
    node_lda.custom_params = {'n_components': 1}

    node_xgboost = SecondaryNode('xgboost')
    node_xgboost.custom_params = {'n_components': 1}
    node_xgboost.nodes_from = [node_logit, node_lda]

    chain.add_node(node_xgboost)

    return chain
Exemple #24
0
def create_atomized_model_with_several_atomized_models() -> AtomizedModel:
    chain = Chain()
    node_atomized_model_primary = PrimaryNode(operation_type=create_atomized_model())
    node_atomized_model_secondary = SecondaryNode(operation_type=create_atomized_model())
    node_atomized_model_secondary_second = SecondaryNode(operation_type=create_atomized_model())
    node_atomized_model_secondary_third = SecondaryNode(operation_type=create_atomized_model())

    node_atomized_model_secondary.nodes_from = [node_atomized_model_primary]
    node_atomized_model_secondary_second.nodes_from = [node_atomized_model_primary]
    node_atomized_model_secondary_third.nodes_from = [node_atomized_model_secondary,
                                                      node_atomized_model_secondary_second]

    chain.add_node(node_atomized_model_secondary_third)
    atomized_model = AtomizedModel(chain)

    return atomized_model
Exemple #25
0
def get_composite_chain():
    """ Function return complex chain with the following structure
    lagged -> ridge \
                     ridge
    lagged -> treg  |
    """
    chain = Chain()
    node_lagged_1 = PrimaryNode('lagged')
    node_lagged_1.custom_params = {'window_size': 110}
    node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged_1])

    node_lagged_2 = PrimaryNode('lagged')
    node_lagged_2.custom_params = {'window_size': 20}
    node_treg = SecondaryNode('treg', nodes_from=[node_lagged_2])

    node_final = SecondaryNode('ridge', nodes_from=[node_treg, node_ridge])
    chain.add_node(node_final)
    return chain
Exemple #26
0
def test_delete_primary_node():
    # given
    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    third = SecondaryNode(operation_type='knn', nodes_from=[first])
    final = SecondaryNode(operation_type='xgboost', nodes_from=[second, third])
    chain = Chain()
    chain.add_node(final)

    # when
    chain.delete_node(first)

    new_primary_node = [
        node for node in chain.nodes if node.operation.operation_type == 'knn'
    ][0]

    # then
    assert len(chain.nodes) == 3
    assert isinstance(new_primary_node, PrimaryNode)
Exemple #27
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()),
        RandomForestClassifier()
    )
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results)

    print(roc_auc_value)

    chain = Chain()
    node_first = PrimaryNode('direct_data_model')
    node_second = PrimaryNode('bernb')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target,
                            y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Exemple #28
0
def test_chain_with_custom_params_for_model(data_setup):
    data = data_setup
    custom_params = dict(n_neighbors=1, weights='uniform', p=1)

    first = PrimaryNode(operation_type='logit')
    second = PrimaryNode(operation_type='lda')
    final = SecondaryNode(operation_type='knn', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)
    chain_default_params = deepcopy(chain)

    chain.root_node.custom_params = custom_params

    chain_default_params.fit(data)
    chain.fit(data)

    custom_params_prediction = chain.predict(data).predict
    default_params_prediction = chain_default_params.predict(data).predict

    assert not np.array_equal(custom_params_prediction,
                              default_params_prediction)
Exemple #29
0
def test_chain_sequential_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)

    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    final = SecondaryNode(model_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    train_predicted = chain.fit(input_data=train, use_cache=False)

    assert chain.root_node.descriptive_id == ('(((/n_logit_default_params;)/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params')

    assert chain.length == 4
    assert chain.depth == 4
    assert train_predicted.predict.shape[0] == train.target.shape[0]
    assert final.cache.actual_cached_state is not None
Exemple #30
0
def real_chain(chain_template, with_cache=True):
    nodes_by_templates = []
    for level in range(0, len(chain_template)):
        for template in chain_template[level]:
            if len(template.parents) == 0:
                node = PrimaryNode(model_type=template.model_type)
            else:
                node = SecondaryNode(nodes_from=real_parents(
                    nodes_by_templates, template),
                                     model_type=template.model_type)
            node.model = template.model_instance
            if with_cache:
                cache = FittedModelCache(related_node=node)
                cache.append(
                    CachedState(preprocessor=template.preprocessor,
                                model=template.fitted_model))
                node.cache = cache
            nodes_by_templates.append((node, template))

    chain = Chain()
    for node, _ in nodes_by_templates:
        chain.add_node(node)

    return chain