def test_deep_pr_classification(graph, delimiter): if ".mat" in graph: example_net = load_hinmine_object(graph, delimiter) ## add support for weight embedding = hinmine_embedding_pr(example_net, parallel=True, verbose=True, use_decomposition=False, from_mat=True) else: embedding = decompose_test(graph, "---") print("Trainset dimension {}, testset dimension {}.".format( embedding['data'].shape, embedding['targets'].shape)) ##### learn ##### rs = ShuffleSplit(3, test_size=0.5, random_state=42) batch = 0 threshold = 0.5 models_results = [] from sklearn.preprocessing import StandardScaler scaler = StandardScaler() embedding['data'] = scaler.fit_transform(embedding['data']) for train_index, test_index in rs.split(embedding['targets']): batch += 1 print("Fold: {}".format(batch)) train_X = embedding['data'][train_index] train_Y = embedding['targets'][train_index] test_X = embedding['data'][test_index] test_Y = embedding['targets'][test_index] ## for m in models... preds = convolutional_model(train_X, train_Y, test_X, vtag=0) # preds = baseline_dense_model(train_X, train_Y,test_X,vtag=0) preds[preds >= threshold] = 13 preds[preds < threshold] = 0 sc_micro = f1_score(test_Y, preds, average='micro') sc_macro = f1_score(test_Y, preds, average='macro') print("This fold's scores; micro: {}, macro: {}".format( sc_micro, sc_macro)) models_results.append((sc_micro, sc_macro)) micros = [] macros = [] for v in models_results: if v[1] > 0: micros.append(v[0]) macros.append(v[1]) print("Model: {} micro: {} macro: {}".format("base", np.mean(micros), np.mean(macros))) print("Finished test - deep learning..")
def test_label_propagation(): example_net = load_hinmine_object("../data/imdb_gml.gml", "---") ## add support for weight ## split and re-weight print("Beginning decomposition..") decomposed = hinmine_decompose(example_net, heuristic="idf", cycle=None) print("Beginning label propagation..") pmat = run_label_propagation(decomposed, weights="balanced") print(pmat)
def test_weighted_embedding(graph, delimiter): print("Weighted embedding test - weighted") example_net = load_hinmine_object( graph, "---", weight_tag="weight") ## add support for weight print("embedding in progress..") embedding = hinmine_embedding_pr(example_net, parallel=True, verbose=True, use_decomposition=False, from_mat=False)
def decompose_test(fname, out, name): example_net = load_hinmine_object(fname, "---") ## add support for weight ## split and re-weight print("Beginning decomposition..") # c2 = ["movie_____features_____person_____acts_in_____movie"] heuristic_list = ["idf", "tf", "chi", "ig", "gr", "delta", "rf", "okapi"] for h in heuristic_list: dout = hinmine_decompose(example_net, heuristic=h, cycle=None, parallel=True) net = dout.decomposed['decomposition'] labels = dout.label_matrix sio.savemat(out + name + "_" + h + ".mat", { 'group': labels, 'network': net })
def decompose_test(fname, delim): example_net = load_hinmine_object(fname, delim) ## add support for weight ## split and re-weight print("Beginning decomposition..") # c2 = ["movie_____features_____person_____acts_in_____movie"] decomposed = hinmine_decompose(example_net, heuristic="idf", cycle=None, parallel=True) ## embedding print("Starting embedding..") embedding = hinmine_embedding_pr(decomposed, parallel=True, verbose=True) print(embedding['data'].shape, embedding['targets'].shape) return embedding
def test_classification(graph, delimiter): ## direct decomposition if ".mat" in graph: example_net = load_hinmine_object(graph, delimiter) ## add support for weight embedding = hinmine_embedding_pr(example_net, parallel=True, verbose=True, use_decomposition=False, from_mat=True, feature_permutator_first="0001", deep_embedding=True) else: example_net = load_hinmine_object(graph, "---") ## add support for weight ## split and re-weight print("Beginning decomposition..") decomposed = hinmine_decompose(example_net, heuristic="idf", cycle=None, parallel=True) ## embedding print("Starting embedding..") embedding = hinmine_embedding_pr(decomposed, parallel=True, verbose=True, use_decomposition=True, from_mat=False, feature_permutator_first="0001", deep_embedding=True) print("Trainset dimension {}, testset dimension {}.".format( embedding['data'].shape, embedding['targets'].shape)) ## 10 splits 50% train rs = ShuffleSplit(10, test_size=0.5, random_state=42) results = [] v = LogisticRegression(penalty="l2") v = OneVsRestClassifier(v) batch = 0 threshold = 0.5 #sel = preprocessing.StandardScaler() scores_micro = [] scores_macro = [] #embedding['data'] = sel.fit_transform(embedding['data']) for train_index, test_index in rs.split(embedding['targets']): batch += 1 print("Fold: {}".format(batch)) train_X = embedding['data'][train_index] train_Y = embedding['targets'][train_index] test_X = embedding['data'][test_index] test_Y = embedding['targets'][test_index] model_preds = v.fit(train_X, train_Y).predict_proba(test_X) model_preds[model_preds > threshold] = 1 model_preds[model_preds <= threshold] = 0 sc_micro = f1_score(test_Y, model_preds, average='micro') sc_macro = f1_score(test_Y, model_preds, average='macro') print(sc_micro, sc_macro) scores_micro.append(sc_micro) scores_macro.append(sc_macro) results.append(("LR, t:{}".format(str(threshold)), np.mean(scores_micro), np.mean(scores_macro))) results = sorted(results, key=lambda tup: tup[2]) for x in results: cls, score_mi, score_ma = x print( "Classifier: {} performed with micro F1 score {} and macro F1 score {}" .format(cls, score_mi, score_ma)) print("Finished test - classification basic")
def test_laplacian_embedding(graph, delimiter): if ".mat" in graph: example_net = load_hinmine_object(graph, "---") ## add support for weight embedding = hinmine_laplacian(example_net, verbose=True, use_decomposition=False, from_mat=True) else: example_net = load_hinmine_object(graph, "---") ## add support for weight ## spt and re-weight print("Beginning decomposition..") decomposed = hinmine_decompose(example_net, heuristic="idf", cycle=None, parallel=True) ## embedding print("Starting embedding..") embedding = hinmine_laplacian(decomposed, verbose=True) print("Trainset dimension {}, testset dimension {}.".format( embedding['data'].shape, embedding['targets'].shape)) rs = ShuffleSplit(10, test_size=0.5, random_state=42) results = [] v = LogisticRegression(penalty="l2") v = OneVsRestClassifier(v) batch = 0 scores_micro = [] scores_macro = [] threshold = 0.5 for train_index, test_index in rs.split(embedding['targets']): batch += 1 print("Fold: {}".format(batch)) train_X = embedding['data'][train_index] train_Y = embedding['targets'][train_index] test_X = embedding['data'][test_index] test_Y = embedding['targets'][test_index] model_preds = convolutional_model(train_X, train_Y, test_X, vtag=0) model_preds[model_preds > threshold] = 1 model_preds[model_preds <= threshold] = 0 sc_micro = f1_score(test_Y, model_preds, average='micro') sc_macro = f1_score(test_Y, model_preds, average='macro') print(sc_micro, sc_macro) scores_micro.append(sc_micro) scores_macro.append(sc_macro) results.append(("LR, t:{}".format(str(threshold)), np.mean(scores_micro), np.mean(scores_macro))) results = sorted(results, key=lambda tup: tup[2]) for x in results: cls, score_mi, score_ma = x print( "Classifier: {} performed with micro F1 score {} and macro F1 score {}" .format(cls, score_mi, score_ma)) print("Finished test - laplacian classification basic")