Exemple #1
0
def run_svm_using_test_train_split(data, data_with_features, split, task):
    """
#
    @param X: numpy
    @param y: numpy
    @return:
    """

    # split training set and test set
    (x_train, y_train), (x_test, y_test) = data.split_train_test(split,
                                                                 stratify=True,
                                                                 task=task)
    # reset_train_test_split=True,
    # splitted_edges_dir=
    # split_by_node=True)
    x_train_with_features, x_test_with_features = data_with_features.loc[
                                                      x_train], \
                                                  data_with_features.loc[
                                                      x_test]

    # train model
    clf = svm.SVC(gamma='scale',
                  decision_function_shape='ovr',
                  probability=True)
    clf.fit(x_train_with_features, y_train)

    ## train set
    ### use decision_function
    y_train_pred = clf.decision_function(x_train_with_features).argmax(1)
    y_train_pred_proba = clf.decision_function(x_train_with_features)

    ## test set
    ### use decision function
    y_test_pred = clf.decision_function(x_test_with_features).argmax(1)
    y_test_pred_proba = clf.decision_function(x_test_with_features)

    # report performance of model
    print('=======training set=======')
    train_report_np, train_columns, train_index = report_performance(
        y_train,
        y_train_pred,
        y_train_pred_proba,
        np.unique(y_train),
        plot=True,
        verbose=True,
        return_value_for_cv=True)
    print('=======test set=======')
    test_report_np, test_columns, test_index = report_performance(
        y_test,
        y_test_pred,
        y_test_pred_proba,
        np.unique(y_test),
        plot=True,
        verbose=True,
        return_value_for_cv=True)

    #========= save to file=========
    save2file(train_report_np, train_columns, train_index, test_report_np,
              test_columns, test_index)
Exemple #2
0
def run_rf_using_test_train_split(data, data_with_features, split, task):
    """
#
    @param X: numpy
    @param y: numpy
    @return:
    """

    # split training set and test set
    (x_train, y_train), (x_test, y_test) = data.split_train_test(split,
                                                                 stratify=True,
                                                                 task=task)
    # reset_train_test_split=True,
    # split_by_node=True)
    x_train_with_features, x_test_with_features = data_with_features.loc[
                                                      x_train], \
                                                  data_with_features.loc[
                                                      x_test]

    # train model
    clf = RandomForestClassifier(max_depth=2, random_state=0)
    clf.fit(x_train_with_features, y_train)

    ## train set
    ### use decision_function
    y_train_pred = clf.predict_proba(x_train_with_features).argmax(1)
    y_train_pred_proba = clf.predict_proba(x_train_with_features)

    ## test set
    ### use decision function
    y_test_pred = clf.predict_proba(x_test_with_features).argmax(1)
    y_test_pred_proba = clf.predict_proba(x_test_with_features)

    # report performance of model
    print('=======training set=======')
    train_report_np, train_columns, train_index = report_performance(
        y_train,
        y_train_pred,
        y_train_pred_proba,
        np.unique(y_train),
        plot=True,
        verbose=True,
        return_value_for_cv=True)
    print('=======test set=======')
    test_report_np, test_columns, test_index = report_performance(
        y_test,
        y_test_pred,
        y_test_pred_proba,
        np.unique(y_test),
        plot=True,
        verbose=True,
        return_value_for_cv=True)

    #========= save to file=========
    save2file(train_report_np, train_columns, train_index, test_report_np,
              test_columns, test_index)
Exemple #3
0
def run_neural_network_using_train_test_split(data,
                                              x_with_features,
                                              task,
                                              splitted_edges_dir,
                                              split=None,
                                              split_by_node=None):
    # =====================
    # ==parameter setting
    # =====================
    # epochs = 1
    # epochs = 10
    # epochs = 150
    # TODO make sure that x_with_features have row = edges and columns = features dimension

    train_set_np, test_set_np = data.split_train_test(
        split,
        stratify=True,
        task=task,
        splitted_edges_dir=splitted_edges_dir,
        split_by_node=split_by_node)

    def get_train_test_set():
        # train_set_np, test_set_np = data.train_set, data.test_set

        # x_train,y_train, x_test,y_test have to be used as row index to get access to x_with_features
        x_train_ind, y_train = train_set_np[:, :2], train_set_np[:, -1].astype(
            float)
        x_test_ind, y_test = test_set_np[:, :2], test_set_np[:,
                                                             -1].astype(float)

        # from keras.utils import to_categorical
        # y_train = to_categorical(y_train.astype(int))
        # y_test = to_categorical(y_test.astype(int))

        import numpy as np

        # what should train and test of link prediction be?
        x_with_features.index = x_with_features.index.map(str)
        tmp = x_with_features.reindex(x_train_ind[:, 0]).dropna().to_numpy()
        tmp_1 = x_with_features.reindex(x_train_ind[:, 1]).dropna().to_numpy()
        x_train = np.concatenate([tmp, tmp_1], axis=1)

        tmp = x_with_features.reindex(x_test_ind[:, 0]).dropna().to_numpy()
        tmp_1 = x_with_features.reindex(x_test_ind[:, 1]).dropna().to_numpy()
        x_test = np.concatenate([tmp, tmp_1], axis=1)

        return (y_train, x_train, x_train_ind), (y_test, x_test, x_test_ind)

    (y_train, x_train, x_train_ind), (y_test, x_test,
                                      x_test_ind) = get_train_test_set()

    # x_train = np.concatenate([x_with_features.loc[x_train_ind[:, 0]],
    #                           x_with_features.loc[x_train_ind[:, 1]]], axis=1)
    #
    # x_test = np.concatenate([x_with_features.loc[x_test_ind[:, 0]],
    #                          x_with_features.loc[x_test_ind[:, 1]]], axis=1)
    assert x_train.shape[0] == x_train_ind.shape[0], ''

    # TODO pass the correct value in
    y_train_pred, y_train_pred_proba, y_test_pred, y_test_pred_proba = run_neural_network_for_each_fold(
        x_train, x_test, y_train, y_test)

    print(f"================training set==================")

    report_final_train_performance_report_np, columns_of_performance_metric, indices_of_performance_metric = report_performance(
        y_train,
        y_train_pred,
        y_train_pred_proba,
        np.unique(y_train),
        verbose=True,
        plot=False,
        return_value_for_cv=True)

    print(f"================test set==================")

    report_final_test_performance_report_np, columns_of_performance_metric, indices_of_performance_metric = report_performance(
        y_test,
        y_test_pred,
        y_test_pred_proba,
        np.unique(y_test),
        verbose=True,
        plot=False,
        return_value_for_cv=True)

    save2file(report_final_train_performance_report_np,
              columns_of_performance_metric, indices_of_performance_metric,
              report_final_test_performance_report_np,
              columns_of_performance_metric, indices_of_performance_metric)
Exemple #4
0
def run_lr_using_test_train_split_with_link_prediction(data, x_with_features,
                                                       task,
                                                       splitted_edges_dir,
                                                       split, split_by_node):
    train_set_np, test_set_np = data.split_train_test(
        split,
        stratify=True,
        task=task,
        splitted_edges_dir=splitted_edges_dir,
        split_by_node=split_by_node)
    # train_set_np, test_set_np = data.train_set, data.test_set

    # x_train,y_train, x_test,y_test have to be used as row index to get access to x_with_features
    x_train_ind, y_train = train_set_np[:, :2], train_set_np[:,
                                                             -1].astype(float)
    x_test_ind, y_test = test_set_np[:, :2], test_set_np[:, -1].astype(float)

    # from keras.utils import to_categorical
    # y_train = to_categorical(y_train.astype(int))
    # y_test = to_categorical(y_test.astype(int))

    import numpy as np

    # what should train and test of link prediction be?
    x_with_features.index = x_with_features.index.map(str)
    tmp = x_with_features.reindex(x_train_ind[:, 0]).dropna().to_numpy()
    tmp_1 = x_with_features.reindex(x_train_ind[:, 1]).dropna().to_numpy()
    x_train = np.concatenate([tmp, tmp_1], axis=1)

    tmp = x_with_features.reindex(x_test_ind[:, 0]).dropna().to_numpy()
    tmp_1 = x_with_features.reindex(x_test_ind[:, 1]).dropna().to_numpy()
    x_test = np.concatenate([tmp, tmp_1], axis=1)

    # x_train = np.concatenate([x_with_features.loc[x_train_ind[:, 0]],
    #                           x_with_features.loc[x_train_ind[:, 1]]], axis=1)
    #
    # x_test = np.concatenate([x_with_features.loc[x_test_ind[:, 0]],
    #                          x_with_features.loc[x_test_ind[:, 1]]], axis=1)

    assert x_train.shape[0] == x_train_ind.shape[0], ''

    # train model
    clf = LogisticRegression(random_state=0)
    clf.fit(x_train, y_train)

    ## train set
    ### use decision_function
    y_train_pred = clf.predict_proba(x_train).argmax(1)
    y_train_pred_proba = clf.predict_proba(x_train)

    ## test set
    ### use decision function
    y_test_pred = clf.predict_proba(x_test).argmax(1)
    y_test_pred_proba = clf.predict_proba(x_test)

    # report performance of model
    print('=======training set=======')
    train_report_np, train_columns, train_index = report_performance(
        y_train,
        y_train_pred,
        y_train_pred_proba,
        np.unique(y_train),
        plot=True,
        verbose=True,
        return_value_for_cv=True)
    print('=======test set=======')
    test_report_np, test_columns, test_index = report_performance(
        y_test,
        y_test_pred,
        y_test_pred_proba,
        np.unique(y_test),
        plot=True,
        verbose=True,
        return_value_for_cv=True)

    #========= save to file=========
    save2file(train_report_np, train_columns, train_index, test_report_np,
              test_columns, test_index)