Esempio n. 1
0
def two_level_chain():
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='knn')
    third = SecondaryNode(model_type='xgboost', nodes_from=[first, second])

    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Esempio n. 2
0
def chain_with_self_cycle():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Esempio n. 3
0
def get_class_chain():
    # Chain composition
    first = PrimaryNode(model_type='xgboost')
    second = PrimaryNode(model_type='knn')
    final = SecondaryNode(model_type='logit', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)

    return chain
Esempio n. 4
0
def get_regr_chain():
    # Chain composition
    first = PrimaryNode(model_type='xgbreg')
    second = PrimaryNode(model_type='knnreg')
    final = SecondaryNode(model_type='linear', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)

    return chain
Esempio n. 5
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain
Esempio n. 6
0
def chain_second():
    #    XG
    #  |     \
    # DT      KNN
    # |  \    |  \
    # KNN KNN LR  LDA
    chain = chain_first()
    new_node = SecondaryNode('dt')
    for model_type in ('knn', 'knn'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.replace_node_with_parents(chain.root_node.nodes_from[0], new_node)
    return chain
Esempio n. 7
0
def compose_chain() -> Chain:
    chain = Chain()
    node_first = PrimaryNode('svc')
    node_second = PrimaryNode('lda')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    return chain
Esempio n. 8
0
def chain_fifth():
    #    KNN
    #  |     \
    # XG      KNN
    # |  \    |  \
    # LR LDA KNN  KNN
    chain = chain_first()
    new_node = SecondaryNode('knn')
    chain.update_node(chain.root_node, new_node)
    new_node = PrimaryNode('knn')
    chain.update_node(chain.root_node.nodes_from[1].nodes_from[0], new_node)
    chain.update_node(chain.root_node.nodes_from[1].nodes_from[1], new_node)

    return chain
Esempio n. 9
0
def test_chain_sequential_fit_correct(data_setup):
    data = data_setup
    train, _ = train_test_data_setup(data)

    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second])
    final = SecondaryNode(model_type='logit', nodes_from=[third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    train_predicted = chain.fit(input_data=train, use_cache=False)

    assert chain.root_node.descriptive_id == ('(((/n_logit_default_params;)/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params;)/'
                                              'n_logit_default_params')

    assert chain.length == 4
    assert chain.depth == 4
    assert train_predicted.predict.shape[0] == train.target.shape[0]
    assert final.cache.actual_cached_state is not None
Esempio n. 10
0
def test_cache_actuality_after_model_change(data_setup):
    """The non-affected nodes has actual cache after changing the model"""
    chain = chain_first()
    train, _ = data_setup
    chain.fit(input_data=train)
    new_node = SecondaryNode(model_type='logit')
    chain.update_node(old_node=chain.root_node.nodes_from[0],
                      new_node=new_node)

    root_parent_first = chain.root_node.nodes_from[0]

    nodes_with_non_actual_cache = [chain.root_node, root_parent_first]
    nodes_with_actual_cache = [node for node in chain.nodes if node not in nodes_with_non_actual_cache]

    # non-affected nodes are actual
    assert all([node.cache.actual_cached_state for node in nodes_with_actual_cache])
    # affected nodes and their childs has no any actual cache
    assert not any([node.cache.actual_cached_state for node in nodes_with_non_actual_cache])
Esempio n. 11
0
def chain_first():
    #    XG
    #  |     \
    # XG      KNN
    # |  \    |  \
    # LR LDA LR  LDA
    chain = Chain()

    root_of_tree, root_child_first, root_child_second = \
        [SecondaryNode(model) for model in ('xgboost', 'xgboost', 'knn')]

    for root_node_child in (root_child_first, root_child_second):
        for requirement_model in ('logit', 'lda'):
            new_node = PrimaryNode(requirement_model)
            root_node_child.nodes_from.append(new_node)
            chain.add_node(new_node)
        chain.add_node(root_node_child)
        root_of_tree.nodes_from.append(root_node_child)

    chain.add_node(root_of_tree)
    return chain
Esempio n. 12
0
def test_chain_with_custom_params_for_model(data_setup):
    data = data_setup
    custom_params = dict(n_neighbors=1, weights='uniform', p=1)

    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='lda')
    final = SecondaryNode(model_type='knn', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)
    chain_default_params = deepcopy(chain)

    chain.root_node.custom_params = custom_params

    chain_default_params.fit(data)
    chain.fit(data)

    custom_params_prediction = chain.predict(data).predict
    default_params_prediction = chain_default_params.predict(data).predict

    assert not np.array_equal(custom_params_prediction,
                              default_params_prediction)
Esempio n. 13
0
def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    training_features = train_data.features
    testing_features = test_data.features
    training_target = train_data.target
    testing_target = test_data.target

    # Average CV score on the training set was: 0.93755
    exported_pipeline = make_pipeline(
        StackingEstimator(estimator=BernoulliNB()), RandomForestClassifier())
    # Fix random state for all the steps in exported pipeline
    set_param_recursive(exported_pipeline.steps, 'random_state', 1)

    exported_pipeline.fit(training_features, training_target)
    results = exported_pipeline.predict_proba(testing_features)[:, 1]

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results)

    print(roc_auc_value)

    chain = Chain()
    node_first = PrimaryNode('direct_data_model')
    node_second = PrimaryNode('bernb')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Esempio n. 14
0
def real_chain(chain_template, with_cache=True):
    nodes_by_templates = []
    for level in range(0, len(chain_template)):
        for template in chain_template[level]:
            if len(template.parents) == 0:
                node = PrimaryNode(model_type=template.model_type)
            else:
                node = SecondaryNode(nodes_from=real_parents(
                    nodes_by_templates, template),
                                     model_type=template.model_type)
            node.model = template.model_instance
            if with_cache:
                cache = FittedModelCache(related_node=node)
                cache.append(
                    CachedState(preprocessor=template.preprocessor,
                                model=template.fitted_model))
                node.cache = cache
            nodes_by_templates.append((node, template))

    chain = Chain()
    for node, _ in nodes_by_templates:
        chain.add_node(node)

    return chain