Example #1
0
def test_deep_pr_classification(graph, delimiter):

    if ".mat" in graph:
        example_net = load_hinmine_object(graph,
                                          delimiter)  ## add support for weight
        embedding = hinmine_embedding_pr(example_net,
                                         parallel=True,
                                         verbose=True,
                                         use_decomposition=False,
                                         from_mat=True)
    else:
        embedding = decompose_test(graph, "---")
    print("Trainset dimension {}, testset dimension {}.".format(
        embedding['data'].shape, embedding['targets'].shape))

    ##### learn #####

    rs = ShuffleSplit(3, test_size=0.5, random_state=42)
    batch = 0
    threshold = 0.5
    models_results = []

    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    embedding['data'] = scaler.fit_transform(embedding['data'])

    for train_index, test_index in rs.split(embedding['targets']):

        batch += 1
        print("Fold: {}".format(batch))

        train_X = embedding['data'][train_index]
        train_Y = embedding['targets'][train_index]
        test_X = embedding['data'][test_index]
        test_Y = embedding['targets'][test_index]

        ## for m in models...

        preds = convolutional_model(train_X, train_Y, test_X, vtag=0)
        #        preds = baseline_dense_model(train_X, train_Y,test_X,vtag=0)
        preds[preds >= threshold] = 13
        preds[preds < threshold] = 0
        sc_micro = f1_score(test_Y, preds, average='micro')
        sc_macro = f1_score(test_Y, preds, average='macro')
        print("This fold's scores; micro: {}, macro: {}".format(
            sc_micro, sc_macro))
        models_results.append((sc_micro, sc_macro))

    micros = []
    macros = []
    for v in models_results:
        if v[1] > 0:
            micros.append(v[0])
            macros.append(v[1])

    print("Model: {} micro: {} macro: {}".format("base", np.mean(micros),
                                                 np.mean(macros)))

    print("Finished test - deep learning..")
Example #2
0
def test_label_propagation():

    example_net = load_hinmine_object("../data/imdb_gml.gml",
                                      "---")  ## add support for weight
    ## split and re-weight
    print("Beginning decomposition..")
    decomposed = hinmine_decompose(example_net, heuristic="idf", cycle=None)
    print("Beginning label propagation..")
    pmat = run_label_propagation(decomposed, weights="balanced")
    print(pmat)
Example #3
0
def test_weighted_embedding(graph, delimiter):

    print("Weighted embedding test - weighted")
    example_net = load_hinmine_object(
        graph, "---", weight_tag="weight")  ## add support for weight
    print("embedding in progress..")
    embedding = hinmine_embedding_pr(example_net,
                                     parallel=True,
                                     verbose=True,
                                     use_decomposition=False,
                                     from_mat=False)
Example #4
0
def decompose_test(fname, out, name):
    example_net = load_hinmine_object(fname, "---")  ## add support for weight

    ## split and re-weight
    print("Beginning decomposition..")
    # c2 = ["movie_____features_____person_____acts_in_____movie"]
    heuristic_list = ["idf", "tf", "chi", "ig", "gr", "delta", "rf", "okapi"]
    for h in heuristic_list:
        dout = hinmine_decompose(example_net,
                                 heuristic=h,
                                 cycle=None,
                                 parallel=True)
        net = dout.decomposed['decomposition']
        labels = dout.label_matrix
        sio.savemat(out + name + "_" + h + ".mat", {
            'group': labels,
            'network': net
        })
Example #5
0
def decompose_test(fname, delim):

    example_net = load_hinmine_object(fname, delim)  ## add support for weight

    ## split and re-weight
    print("Beginning decomposition..")

    # c2 = ["movie_____features_____person_____acts_in_____movie"]

    decomposed = hinmine_decompose(example_net,
                                   heuristic="idf",
                                   cycle=None,
                                   parallel=True)

    ## embedding
    print("Starting embedding..")
    embedding = hinmine_embedding_pr(decomposed, parallel=True, verbose=True)
    print(embedding['data'].shape, embedding['targets'].shape)

    return embedding
Example #6
0
def test_classification(graph, delimiter):

    ## direct decomposition

    if ".mat" in graph:
        example_net = load_hinmine_object(graph,
                                          delimiter)  ## add support for weight
        embedding = hinmine_embedding_pr(example_net,
                                         parallel=True,
                                         verbose=True,
                                         use_decomposition=False,
                                         from_mat=True,
                                         feature_permutator_first="0001",
                                         deep_embedding=True)
    else:
        example_net = load_hinmine_object(graph,
                                          "---")  ## add support for weight

        ## split and re-weight
        print("Beginning decomposition..")

        decomposed = hinmine_decompose(example_net,
                                       heuristic="idf",
                                       cycle=None,
                                       parallel=True)

        ## embedding
        print("Starting embedding..")
        embedding = hinmine_embedding_pr(decomposed,
                                         parallel=True,
                                         verbose=True,
                                         use_decomposition=True,
                                         from_mat=False,
                                         feature_permutator_first="0001",
                                         deep_embedding=True)

    print("Trainset dimension {}, testset dimension {}.".format(
        embedding['data'].shape, embedding['targets'].shape))

    ## 10 splits 50% train

    rs = ShuffleSplit(10, test_size=0.5, random_state=42)

    results = []

    v = LogisticRegression(penalty="l2")
    v = OneVsRestClassifier(v)

    batch = 0

    threshold = 0.5

    #sel = preprocessing.StandardScaler()

    scores_micro = []
    scores_macro = []
    #embedding['data'] = sel.fit_transform(embedding['data'])

    for train_index, test_index in rs.split(embedding['targets']):

        batch += 1

        print("Fold: {}".format(batch))
        train_X = embedding['data'][train_index]
        train_Y = embedding['targets'][train_index]
        test_X = embedding['data'][test_index]
        test_Y = embedding['targets'][test_index]
        model_preds = v.fit(train_X, train_Y).predict_proba(test_X)
        model_preds[model_preds > threshold] = 1
        model_preds[model_preds <= threshold] = 0
        sc_micro = f1_score(test_Y, model_preds, average='micro')
        sc_macro = f1_score(test_Y, model_preds, average='macro')
        print(sc_micro, sc_macro)
        scores_micro.append(sc_micro)
        scores_macro.append(sc_macro)

    results.append(("LR, t:{}".format(str(threshold)), np.mean(scores_micro),
                    np.mean(scores_macro)))

    results = sorted(results, key=lambda tup: tup[2])

    for x in results:
        cls, score_mi, score_ma = x
        print(
            "Classifier: {} performed with micro F1 score {} and macro F1 score {}"
            .format(cls, score_mi, score_ma))

    print("Finished test - classification basic")
Example #7
0
def test_laplacian_embedding(graph, delimiter):

    if ".mat" in graph:
        example_net = load_hinmine_object(graph,
                                          "---")  ## add support for weight
        embedding = hinmine_laplacian(example_net,
                                      verbose=True,
                                      use_decomposition=False,
                                      from_mat=True)
    else:
        example_net = load_hinmine_object(graph,
                                          "---")  ## add support for weight
        ## spt and re-weight
        print("Beginning decomposition..")

        decomposed = hinmine_decompose(example_net,
                                       heuristic="idf",
                                       cycle=None,
                                       parallel=True)
        ## embedding
        print("Starting embedding..")
        embedding = hinmine_laplacian(decomposed, verbose=True)

    print("Trainset dimension {}, testset dimension {}.".format(
        embedding['data'].shape, embedding['targets'].shape))

    rs = ShuffleSplit(10, test_size=0.5, random_state=42)
    results = []
    v = LogisticRegression(penalty="l2")
    v = OneVsRestClassifier(v)
    batch = 0
    scores_micro = []
    scores_macro = []
    threshold = 0.5

    for train_index, test_index in rs.split(embedding['targets']):

        batch += 1
        print("Fold: {}".format(batch))
        train_X = embedding['data'][train_index]
        train_Y = embedding['targets'][train_index]
        test_X = embedding['data'][test_index]
        test_Y = embedding['targets'][test_index]
        model_preds = convolutional_model(train_X, train_Y, test_X, vtag=0)
        model_preds[model_preds > threshold] = 1
        model_preds[model_preds <= threshold] = 0
        sc_micro = f1_score(test_Y, model_preds, average='micro')
        sc_macro = f1_score(test_Y, model_preds, average='macro')
        print(sc_micro, sc_macro)
        scores_micro.append(sc_micro)
        scores_macro.append(sc_macro)

    results.append(("LR, t:{}".format(str(threshold)), np.mean(scores_micro),
                    np.mean(scores_macro)))

    results = sorted(results, key=lambda tup: tup[2])

    for x in results:
        cls, score_mi, score_ma = x
        print(
            "Classifier: {} performed with micro F1 score {} and macro F1 score {}"
            .format(cls, score_mi, score_ma))

    print("Finished test - laplacian classification basic")