Esempio n. 1
0
def run_chain_from_automl(train_file_path: str,
                          test_file_path: str,
                          max_run_time: timedelta = timedelta(minutes=10)):
    train_data = InputData.from_csv(train_file_path)
    test_data = InputData.from_csv(test_file_path)

    testing_target = test_data.target

    chain = Chain()
    node_tpot = PrimaryNode('tpot')
    node_tpot.model.params = {'max_run_time_sec': max_run_time.seconds}

    node_lda = PrimaryNode('lda')
    node_rf = SecondaryNode('rf')

    node_rf.nodes_from = [node_tpot, node_lda]

    chain.add_node(node_rf)

    chain.fit(train_data)
    results = chain.predict(test_data)

    roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict)
    print(roc_auc_value)

    return roc_auc_value
Esempio n. 2
0
    def compose_chain(self, data: InputData,
                      initial_chain: Optional[Chain],
                      composer_requirements: ComposerRequirements,
                      metrics: Optional[Callable],
                      optimiser_parameters=None,
                      is_visualise: bool = False) -> Chain:
        new_chain = Chain()

        if self.dummy_chain_type == DummyChainTypeEnum.hierarchical:
            # (y1, y2) -> y
            last_node = SecondaryNode(composer_requirements.secondary[0])

            for requirement_model in composer_requirements.primary:
                new_node = PrimaryNode(requirement_model)
                new_chain.add_node(new_node)
                last_node.nodes_from.append(new_node)
            new_chain.add_node(last_node)
        elif self.dummy_chain_type == DummyChainTypeEnum.flat:
            # (y1) -> (y2) -> y
            first_node = PrimaryNode(composer_requirements.primary[0])
            new_chain.add_node(first_node)
            prev_node = first_node
            for requirement_model in composer_requirements.secondary:
                new_node = SecondaryNode(requirement_model)
                new_node.nodes_from = [prev_node]
                prev_node = new_node
                new_chain.add_node(new_node)
        else:
            raise NotImplementedError()
        return new_chain
Esempio n. 3
0
def get_simple_chain():
    first = PrimaryNode(model_type='xgboost')
    second = PrimaryNode(model_type='knn')
    final = SecondaryNode(model_type='logit', nodes_from=[first, second])

    chain = Chain(final)

    return chain
Esempio n. 4
0
def chain_with_incorrect_decomposition_structure():
    first = PrimaryNode(model_type='trend_data_model')
    second = PrimaryNode(model_type='residual_data_model')
    final = SecondaryNode(model_type='linear', nodes_from=[first, second])

    chain = Chain(final)

    return chain
Esempio n. 5
0
def chain_with_incorrect_task_type():
    first = PrimaryNode(model_type='linear')
    second = PrimaryNode(model_type='linear')
    final = SecondaryNode(model_type='kmeans', nodes_from=[first, second])

    chain = Chain(final)

    return chain, Task(TaskTypesEnum.classification)
Esempio n. 6
0
def chain_with_pca() -> Chain:
    node_first = PrimaryNode('pca_data_model')
    node_second = PrimaryNode('lda')
    node_final = SecondaryNode('rf', nodes_from=[node_first, node_second])

    chain = Chain(node_final)

    return chain
Esempio n. 7
0
def chain_simple() -> Chain:
    node_first = PrimaryNode('svc')
    node_second = PrimaryNode('lda')
    node_final = SecondaryNode('rf', nodes_from=[node_first, node_second])

    chain = Chain(node_final)

    return chain
Esempio n. 8
0
def chain_with_primary_composition_model():
    first = PrimaryNode(model_type='additive_data_model')
    second = PrimaryNode(model_type='residual_data_model')
    final = SecondaryNode(model_type='additive_data_model',
                          nodes_from=[first, second])

    chain = Chain(final)

    return chain
def get_comp_chain():
    node_first = PrimaryNode('lstm')
    node_second = PrimaryNode('rfr')

    node_final = SecondaryNode('linear',
                               nodes_from=[node_first, node_second],
                               manual_preprocessing_func=EmptyStrategy)
    chain = Chain(node_final)
    return chain
Esempio n. 10
0
def chain_with_incorrect_root_model():
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='logit')
    final = SecondaryNode(model_type='direct_data_model',
                          nodes_from=[first, second])

    chain = Chain(final)

    return chain
Esempio n. 11
0
def get_class_chain():
    # Chain composition
    first = PrimaryNode(model_type='xgboost')
    second = PrimaryNode(model_type='knn')
    final = SecondaryNode(model_type='logit', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)

    return chain
Esempio n. 12
0
def two_level_chain():
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='knn')
    third = SecondaryNode(model_type='xgboost', nodes_from=[first, second])

    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Esempio n. 13
0
def get_regr_chain():
    # Chain composition
    first = PrimaryNode(model_type='xgbreg')
    second = PrimaryNode(model_type='knnreg')
    final = SecondaryNode(model_type='linear', nodes_from=[first, second])

    chain = Chain()
    chain.add_node(final)

    return chain
Esempio n. 14
0
def compose_chain() -> Chain:
    chain = Chain()
    node_first = PrimaryNode('svc')
    node_second = PrimaryNode('lda')
    node_third = SecondaryNode('rf')

    node_third.nodes_from.append(node_first)
    node_third.nodes_from.append(node_second)

    chain.add_node(node_third)

    return chain
Esempio n. 15
0
def test_secondary_nodes_is_invariant_to_inputs_order(data_setup):
    data = data_setup
    train, test = train_test_data_setup(data)
    first = PrimaryNode(model_type='logit')
    second = PrimaryNode(model_type='lda')
    third = PrimaryNode(model_type='knn')
    final = SecondaryNode(model_type='xgboost',
                          nodes_from=[first, second, third])

    chain = Chain()
    for node in [first, second, third, final]:
        chain.add_node(node)

    first = deepcopy(first)
    second = deepcopy(second)
    third = deepcopy(third)
    final_shuffled = SecondaryNode(model_type='xgboost',
                                   nodes_from=[third, first, second])

    chain_shuffled = Chain()
    # change order of nodes in list
    for node in [final_shuffled, third, first, second]:
        chain_shuffled.add_node(node)

    train_predicted = chain.fit(input_data=train)

    train_predicted_shuffled = chain_shuffled.fit(input_data=train)

    # train results should be invariant
    assert chain.root_node.descriptive_id == chain_shuffled.root_node.descriptive_id
    assert np.equal(train_predicted.predict,
                    train_predicted_shuffled.predict).all()

    test_predicted = chain.predict(input_data=test)
    test_predicted_shuffled = chain_shuffled.predict(input_data=test)

    # predict results should be invariant
    assert np.equal(test_predicted.predict,
                    test_predicted_shuffled.predict).all()

    # change parents order for the nodes fitted chain
    nodes_for_change = chain.nodes[3].nodes_from
    chain.nodes[3].nodes_from = [
        nodes_for_change[2], nodes_for_change[0], nodes_for_change[1]
    ]
    chain.nodes[3].cache.clear()
    chain.fit(train)
    test_predicted_re_shuffled = chain.predict(input_data=test)

    # predict results should be invariant
    assert np.equal(test_predicted.predict,
                    test_predicted_re_shuffled.predict).all()
Esempio n. 16
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=64,
                                     is_visualise=False):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    chain = get_composite_lstm_chain()

    chain_simple = Chain()
    node_single = PrimaryNode('ridge')
    chain_simple.add_node(node_single)

    chain_lstm = Chain()
    node_lstm = PrimaryNode('lstm')
    chain_lstm.add_node(node_lstm)

    chain.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid = calculate_validation_metric(
        chain.predict(dataset_to_validate), dataset_to_validate,
        f'full-composite_{forecast_length}', is_visualise)

    chain_lstm.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_lstm_only = calculate_validation_metric(
        chain_lstm.predict(dataset_to_validate), dataset_to_validate,
        f'full-lstm-only_{forecast_length}', is_visualise)

    chain_simple.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_simple = calculate_validation_metric(
        chain_simple.predict(dataset_to_validate), dataset_to_validate,
        f'full-simple_{forecast_length}', is_visualise)

    print(f'RMSE composite: {rmse_on_valid}')
    print(f'RMSE simple: {rmse_on_valid_simple}')
    print(f'RMSE LSTM only: {rmse_on_valid_lstm_only}')

    return rmse_on_valid_simple
Esempio n. 17
0
def get_composite_lstm_chain():
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_lstm_trend = SecondaryNode('lasso', nodes_from=[node_trend])

    node_residual = PrimaryNode('residual_data_model')
    node_ridge_residual = SecondaryNode('ridge', nodes_from=[node_residual])

    node_final = SecondaryNode(
        'additive_data_model',
        nodes_from=[node_ridge_residual, node_lstm_trend])
    chain.add_node(node_final)
    return chain
Esempio n. 18
0
def test_eval_strategy_logreg(data_setup):
    data_set = data_setup
    train, test = train_test_data_setup(data=data_set)
    test_skl_model = LogisticRegression(C=10., random_state=1,
                                        solver='liblinear',
                                        max_iter=10000, verbose=0)
    test_skl_model.fit(train.features, train.target)
    expected_result = test_skl_model.predict(test.features)

    test_model_node = PrimaryNode(model_type='logit')
    test_model_node.fit(input_data=train)
    actual_result = test_model_node.predict(input_data=test)

    assert len(actual_result.predict) == len(expected_result)
Esempio n. 19
0
def test_regression_chain_with_datamodel_fit_correct():
    data = get_synthetic_regression_data()
    train_data, test_data = train_test_data_setup(data)

    node_data = PrimaryNode('direct_data_model')
    node_first = PrimaryNode('ridge')
    node_second = SecondaryNode('lasso')
    node_second.nodes_from = [node_first, node_data]

    chain = Chain(node_second)

    chain.fit(train_data)
    results = chain.predict(test_data)

    assert results.predict.shape == test_data.target.shape
Esempio n. 20
0
def get_composite_lstm_chain():
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_trend.labels = ["fixed"]
    node_lstm_trend = SecondaryNode('linear', nodes_from=[node_trend])
    node_trend.labels = ["fixed"]
    node_residual = PrimaryNode('residual_data_model')
    node_ridge_residual = SecondaryNode('linear', nodes_from=[node_residual])

    node_final = SecondaryNode(
        'additive_data_model',
        nodes_from=[node_ridge_residual, node_lstm_trend])
    node_final.labels = ["fixed"]
    chain.add_node(node_final)
    return chain
Esempio n. 21
0
def get_decomposed_chain(model_trend='lstm', model_residual='ridge'):
    chain = Chain()
    node_trend = PrimaryNode('trend_data_model')
    node_first_trend = SecondaryNode('lstm', nodes_from=[node_trend])

    if model_trend == 'lstm':
        # decrease the number of epochs to fit
        node_first_trend.model.params = {'epochs': 1}

    node_residual = PrimaryNode('residual_data_model')
    node_model_residual = SecondaryNode(model_residual,
                                        nodes_from=[node_residual])

    node_final = SecondaryNode(
        'additive_data_model',
        nodes_from=[node_model_residual, node_first_trend])
    chain.add_node(node_final)
    return chain
Esempio n. 22
0
def test_node_factory_log_reg_correct(data_setup):
    model_type = 'logit'
    node = PrimaryNode(model_type=model_type)

    expected_model = Model(model_type=model_type).__class__
    actual_model = node.model.__class__

    assert node.__class__ == PrimaryNode
    assert expected_model == actual_model
Esempio n. 23
0
def test_chain_with_datamodel_fit_correct(data_setup):
    data = data_setup
    train_data, test_data = train_test_data_setup(data)

    chain = Chain()
    node_data = PrimaryNode('direct_data_model')
    node_first = PrimaryNode('bernb')
    node_second = SecondaryNode('rf')
    node_second.nodes_from = [node_first, node_data]

    chain.add_node(node_data)
    chain.add_node(node_first)
    chain.add_node(node_second)

    chain.fit(train_data)
    results = np.asarray(probs_to_labels(chain.predict(test_data).predict))

    assert results.shape == test_data.target.shape
Esempio n. 24
0
def chain_fourth():
    #          XG
    #      |         \
    #     XG          KNN
    #   |    \        |  \
    # QDA     KNN     LR  LDA
    # |  \    |    \
    # RF  RF  KNN KNN
    chain = chain_first()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.replace_node_with_parents(chain.root_node.nodes_from[0].nodes_from[1], new_node)
    new_node = SecondaryNode('knn')
    for model_type in ('knn', 'knn'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.replace_node_with_parents(chain.root_node.nodes_from[0].nodes_from[0], new_node)
    return chain
Esempio n. 25
0
def default_valid_chain():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[first])
    final = SecondaryNode(model_type='logit', nodes_from=[second, third])

    chain = Chain(final)

    return chain
Esempio n. 26
0
def baseline_chain():
    chain = Chain()
    last_node = SecondaryNode(model_type='xgboost', nodes_from=[])
    for requirement_model in ['knn', 'logit']:
        new_node = PrimaryNode(requirement_model)
        chain.add_node(new_node)
        last_node.nodes_from.append(new_node)
    chain.add_node(last_node)

    return chain
Esempio n. 27
0
def chain_with_cycle():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    third = SecondaryNode(model_type='logit', nodes_from=[second, first])
    second.nodes_from.append(third)
    chain = Chain()
    for node in [first, second, third]:
        chain.add_node(node)

    return chain
Esempio n. 28
0
def chain_with_multiple_roots():
    first = PrimaryNode(model_type='logit')
    root_first = SecondaryNode(model_type='logit', nodes_from=[first])
    root_second = SecondaryNode(model_type='logit', nodes_from=[first])
    chain = Chain()

    for node in [first, root_first, root_second]:
        chain.add_node(node)

    return chain
Esempio n. 29
0
def chain_with_self_cycle():
    first = PrimaryNode(model_type='logit')
    second = SecondaryNode(model_type='logit', nodes_from=[first])
    second.nodes_from.append(second)

    chain = Chain()
    chain.add_node(first)
    chain.add_node(second)

    return chain
Esempio n. 30
0
def chain_third():
    #    QDA
    #  |     \
    # RF     RF
    chain = Chain()
    new_node = SecondaryNode('qda')
    for model_type in ('rf', 'rf'):
        new_node.nodes_from.append(PrimaryNode(model_type))
    chain.add_node(new_node)
    [chain.add_node(node_from) for node_from in new_node.nodes_from]
    return chain