def __init__(self, G, G_test, root="results/lp/"): super(LinkPredictionTuning, self).__init__(G, root=root) self.task = "lp" train_E = G.edges train_E_false = self.GetNegativeEdges(G, len(train_E)) test_E = G_test.edges test_E_false = self.GetNegativeEdges(G_test, len(test_E)) self.split = EvalSplit() self.split.set_splits(train_E, train_E_false=train_E_false, test_E=test_E, test_E_false=test_E_false, TG=G) self.training_graph = create_self_defined_dataset(root_dir="", name_dict={}, name="training " + self.tipo, weighted=True, directed=False, attributed=True)() self.training_graph.set_g(G) self.evaluator = LPEvaluator(self.split)
def main(): # Initialize some parameters inpath = list() nw_names = ['network', 'blogCatalog'] # Stores the names of the networks evaluated inpath.append("../evalne/tests/data/network.edgelist") # inpath.append("../../data/BlogCatalog/blog.edgelist") outpath = "./output/" if not os.path.exists(outpath): os.makedirs(outpath) directed = False # indicates if the graphs are directed or undirected delimiters = (',', '\t') # indicates the delimiter in the original graph repeats = 2 # number of time the experiment will be repeated # Create a scoresheet to store the results scoresheet = Scoresheet(tr_te='test') for i in range(len(inpath)): # Create folders for the evaluation results (one per input network) if not os.path.exists(outpath): os.makedirs(outpath) # Load and preprocess the graph G = preprocess(inpath[i], outpath, delimiters[i], directed) # For each repeat of the experiment generate new data splits for repeat in range(repeats): print('Repetition {} of experiment'.format(repeat)) # Generate one train/test split with default parameters traintest_split = EvalSplit() traintest_split.compute_splits(G, nw_name=nw_names[i], train_frac=0.8, split_id=repeat) trainvalid_split = EvalSplit() trainvalid_split.compute_splits(traintest_split.TG, nw_name=nw_names[i], train_frac=0.9, split_id=repeat) # Create an evaluator nee = LPEvaluator(traintest_split, trainvalid_split) # Evaluate baselines eval_baselines(nee, directed, scoresheet) # Evaluate other NE methods eval_other(nee, scoresheet) # Write results averaged over exp repeats to a single file scoresheet.write_tabular(filename=os.path.join(outpath, 'eval_output.txt'), metric='auroc') print("End of evaluation")
def main(args=None): cpu_number = multiprocessing.cpu_count() parser = argparse.ArgumentParser(description='Path of networks') parser.add_argument('-n', type=str, help='Multiplex 1') parser.add_argument('-m', type=str, help='Multiplex 2') parser.add_argument('-b', type=str, help='Bipartite') args = parser.parse_args(args) print(args) ######################################################################## # Parameters multiverse and train/test ######################################################################## EMBED_DIMENSION = 128 CLOSEST_NODES = np.int64(300) NUM_SAMPLED = np.int64(10) LEARNING_RATE = np.float64(0.01) KL = False NB_CHUNK = np.int64(1) CHUNK_SIZE = np.int64(100) NUM_STEPS_1 = np.int64(100 * 10**6 / CHUNK_SIZE) # If toy example #EMBED_DIMENSION = 128 #CLOSEST_NODES = np.int64(2) #NUM_SAMPLED = np.int64(10) #LEARNING_RATE = np.float64(0.01) #KL = False #NB_CHUNK = np.int64(1) #CHUNK_SIZE = np.int64(2) #NUM_STEPS_1 = np.int64(100*10**6/CHUNK_SIZE) train_frac = 0.7 solver = 'lbfgs' max_iter = 1000 split_alg = 'random' lp_model = RandomForestClassifier(n_estimators=400, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, \ max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True,\ oob_score=True, n_jobs=cpu_number, random_state=777, verbose=0, warm_start=False) graph_name = 'Test_Eval' ################################################################################## # !! Careful !! # Check if nodes in the bipartite have the same nodes in the multiplex # networks. If not you have to remove the nodes in the multiplexes not included in the # bipartites ################################################################################## ################################################################################### # EvalNE Link prediction processing ################################################################################### data_bipartite = pd.read_csv(args.b, delimiter=' ', header=None) data_bipartite = data_bipartite.drop(columns=[0, 3]) data_bipartite.to_csv('bipartite_2colformat.csv', header=None, index=None, sep=' ') G_hetereogeneous = f.preprocess('bipartite_2colformat.csv', '.', ' ', False, False, True) print('Preprocessing done') G_hetereogeneous_traintest_split = EvalSplit() G_hetereogeneous_traintest_split.compute_splits(G_hetereogeneous, split_alg=split_alg, train_frac=train_frac, owa=False) nee = LPEvaluator(G_hetereogeneous_traintest_split, dim=EMBED_DIMENSION, lp_model=lp_model) G_heterogeneous_split = (G_hetereogeneous_traintest_split.TG) os.replace('bipartite_2colformat.csv', './Generated_graphs/' + 'bipartite_2colformat.csv') print('Splitting done') # Write the bipartite training graph for multiverse in extended edgelist format 'layer n1 n2 weight' file_multi = open('bipartite_training_graph_' + '_' + graph_name, 'w+') tmp_array_het = [] tmp_array_het = np.asarray(G_heterogeneous_split.edges) for i in range(len(tmp_array_het[:, 0])): if tmp_array_het[i, 0] in list(data_bipartite[2]): tmp = tmp_array_het[i, 0] tmp_array_het[i, 0] = tmp_array_het[i, 1] tmp_array_het[i, 1] = tmp tmp_array_het = np.hstack((tmp_array_het, np.ones( (len(tmp_array_het), 1)))) tmp_array_het = np.hstack((np.ones( (len(tmp_array_het), 1)), tmp_array_het)) tmp_array_het = np.vstack(tmp_array_het) tmp_array_het = np.int_(tmp_array_het) np.savetxt(file_multi, tmp_array_het, fmt='%s', delimiter=' ', newline=os.linesep) file_multi.close() os.replace( 'bipartite_training_graph_' + '_' + graph_name, './Generated_graphs/' + 'bipartite_training_graph_' + '_' + graph_name + '.txt') ################################################################################### # MULTIVERSE ################################################################################### r_readRDS = robjects.r['readRDS'] print('RWR-MH') proc = subprocess.Popen(['Rscript', './RWR/GenerateSimMatrix_MH.R', \ '-n', '.' + args.n, \ '-m', '.' + args.m, \ '-b', '../Generated_graphs/'+ 'bipartite_training_graph_' + '_'+ graph_name+'.txt', '-o', '../ResultsRWR/MatrixSimilarityMultiplexHet'+graph_name, '-c', str(cpu_number)]) proc.wait() proc.kill() print('RWR done') r_DistancematrixPPI = r_readRDS( './ResultsRWR/MatrixSimilarityMultiplexHet' + graph_name + '.rds') import gc gc.collect() ######################################################################## # Processing of the network ######################################################################## reverse_data_DistancematrixPPI, list_neighbours, nodes, data_DistancematrixPPI, neighborhood, nodesstr \ = f.netpreprocess_hetero(r_DistancematrixPPI, CLOSEST_NODES) ######################################################################## # Initialization ######################################################################## embeddings = np.random.normal(0, 1, [np.size(nodes), EMBED_DIMENSION]) ######################################################################## # Training and saving best embeddings ######################################################################## # Train and test during training neighborhood = np.asarray(neighborhood) nodes = np.asarray(nodes) embeddings = f.train(neighborhood, nodes, list_neighbours, NUM_STEPS_1, NUM_SAMPLED, LEARNING_RATE, \ CLOSEST_NODES, CHUNK_SIZE, NB_CHUNK, embeddings, reverse_data_DistancematrixPPI) X = dict(zip(range(embeddings.shape[0]), embeddings)) X = {str(int(nodesstr[key]) + 1): X[key] for key in X} np.save('embeddings_MH', X) date = datetime.datetime.now() os.replace('embeddings_MH.npy', './ResultsMultiVERSE/' + 'embeddings_MH.npy') ######################################################################## # Link prediction for evaluation of MH ######################################################################## edge_emb = ['hadamard', 'weighted_l1', 'weighted_l2', 'average', 'cosine'] results_embeddings_methods = dict() for i in range(len(edge_emb)): tmp_result_multiverse = nee.evaluate_ne(data_split=nee.traintest_split, X=X, method="Multiverse", edge_embed_method=edge_emb[i], label_binarizer=lp_model) results_embeddings_methods[tmp_result_multiverse.method + '_' + str( edge_emb[i])] = tmp_result_multiverse.get_all()[1][4] ######################################################################## # Analysis and saving of the results ######################################################################## Result_file = 'Result_LinkpredMultiplexHet_' + graph_name + '_' + str( date) + '.txt' with open(Result_file, "w+") as overall_result: print("%s: \n\ EMBED_DIMENSION: %s \n\ CLOSEST_NODES: %s \n\ NUM_STEPS_1: %s \n\ NUM_SAMPLED: %s \n\ LEARNING_RATE: %s \n\ CHUNK_SIZE: %s \n\ NB_CHUNK: %s \n\ train_frac: %s \n\ solver: %s \n\ max_iter: %s \n\ split_alg: %s \n\ " % (str(date), EMBED_DIMENSION, CLOSEST_NODES, NUM_STEPS_1, NUM_SAMPLED, LEARNING_RATE, CHUNK_SIZE, NB_CHUNK, train_frac, solver, max_iter, split_alg), file=overall_result) print('Overall MULTIVERSE AUC hadamard:', results_embeddings_methods['Multiverse_hadamard'], file=overall_result) print('Overall MULTIVERSE AUC weighted_l1:', results_embeddings_methods['Multiverse_weighted_l1'], file=overall_result) print('Overall MULTIVERSE AUC weighted_l2:', results_embeddings_methods['Multiverse_weighted_l2'], file=overall_result) print('Overall MULTIVERSE AUC average:', results_embeddings_methods['Multiverse_average'], file=overall_result) print('Overall MULTIVERSE AUC cosine:', results_embeddings_methods['Multiverse_cosine'], file=overall_result) overall_result.close() os.replace(Result_file, './ResultsMultiVERSE/' + Result_file) print('End')
class LinkPredictionTuning(Tuning): r""" Clase general de entrenamiento y testeo de embeddings de grafos para la tarea de prediccion de enlaces. Parameters ---------- G: NetworkX graph Grafo de entrenamiento. G_test: NetworkX graph Grafo de testeo. root: str directorio en el que se guardaran los resultados """ def __init__(self, G, G_test, root="results/lp/"): super(LinkPredictionTuning, self).__init__(G, root=root) self.task = "lp" train_E = G.edges train_E_false = self.GetNegativeEdges(G, len(train_E)) test_E = G_test.edges test_E_false = self.GetNegativeEdges(G_test, len(test_E)) self.split = EvalSplit() self.split.set_splits(train_E, train_E_false=train_E_false, test_E=test_E, test_E_false=test_E_false, TG=G) self.training_graph = create_self_defined_dataset(root_dir="", name_dict={}, name="training " + self.tipo, weighted=True, directed=False, attributed=True)() self.training_graph.set_g(G) self.evaluator = LPEvaluator(self.split) def GetNegativeEdges(self, G, n): r""" Metodo auxiliar que muestrea enlaces negativos. Parameters ---------- G: NetworkX graph Grafo bipartito. n: int cantidad de enlaces que muestrear. """ prop_nodes = [n for n, d in G.nodes(data=True) if d['bipartite'] == 0] user_nodes = [n for n, d in G.nodes(data=True) if d['bipartite'] == 1] non_edges = [] while len(non_edges) <= n: random_prop = random.choice(prop_nodes) random_user = random.choice(user_nodes) edge = (random_prop, random_user) if G.has_edge(*edge): continue else: non_edges.append(edge) return non_edges def TestModel(self, emb, time=-1, method_name="method_name"): r""" Testea un embedding y lo guarda en el scoresheet. Parameters ---------- emb: dict diccionario de embeddings, llaves son los nodos y los valores una lista con el embedding time: float tiempo de ejecucion del metodo, para guardar en el scoresheet method_name: str nombre del metodo con el que guardar. """ df = pd.DataFrame(emb).T X = df.T.to_dict("list") X = {str(k): np.array(v) for k, v in X.items() } # tiene que ser array por que se hacen sumas self.evaluator.dim = df.shape[1] reslp = [] for edge_method in [ "weighted_l1", "weighted_l2", "hadamard", "average" ]: #TO DO que no evalue en los 4 embeddings de enlaces res = self.evaluator.evaluate_ne(self.split, X=X, method=method_name, edge_embed_method=edge_method, params={"nw_name": "GPI"}) res.params.update({'eval_time': time}) reslp.append(res) self.scoresheet.log_results(reslp) return reslp
def main(args=None): cpu_number = multiprocessing.cpu_count() parser = argparse.ArgumentParser(description='Path of networks') parser.add_argument('-m', type=str, help='Multiplex') args = parser.parse_args(args) graph_path = args.m ######################################################################## # Parameters multiverse and train/test ######################################################################## EMBED_DIMENSION = 128 CLOSEST_NODES = np.int64(20) NUM_SAMPLED = np.int64(3) LEARNING_RATE = np.float64(0.01) NB_CHUNK = np.int64(1) CHUNK_SIZE = np.int64(10) NUM_STEPS_1 = np.int64(100 * 10**6 / CHUNK_SIZE) graph_name = os.path.basename(graph_path) train_frac = 0.7 solver = 'lbfgs' max_iter = 2000 split_alg = 'spanning_tree' lp_model = LogisticRegressionCV(Cs=10, cv= 5, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1.0, max_iter=max_iter, \ multi_class='ovr', n_jobs=cpu_number, random_state=None, refit=True, scoring='roc_auc', solver=solver, tol=0.0001, verbose=0) edge_data_by_type, _, all_nodes = f.load_network_data(graph_path) nb_layers = len(edge_data_by_type.keys()) # Divide multiplex graph in several in edgelist format for layer in range(nb_layers - 1): file = open( 'multiplex_graph_layer_' + str(layer + 1) + '_' + graph_name, 'w+') tmp_array = np.asarray(edge_data_by_type[str(layer + 1)]) np.savetxt(file, tmp_array, fmt='%s') file.close() os.replace( 'multiplex_graph_layer_' + str(layer + 1) + '_' + graph_name, 'Generated_graphs/' + 'multiplex_graph_layer_' + str(layer + 1) + '_' + graph_name) # Load each graph with EvalNE, preprocess and split train/test edges nee = list() G_original = list() Gsplit = list() traintestsplit = list() for layer in range(nb_layers - 1): G_original.append( f.preprocess('./Generated_graphs/' + 'multiplex_graph_layer_' + str(layer + 1) + '_' + graph_name, '.', ' ', directed=False, relabel=False, del_self_loops=True)) G_original_traintest_split = EvalSplit() G_original_traintest_split.compute_splits(G_original[layer], split_alg=split_alg, train_frac=train_frac, owa=False) traintestsplit.append(G_original_traintest_split) nee.append( LPEvaluator(G_original_traintest_split, dim=EMBED_DIMENSION, lp_model=lp_model)) Gsplit.append(G_original_traintest_split.TG) # Write the multiplex training graph for multiverse in extended edgelist format 'layer n1 n2 weight' file_multi = open('multiverse_graph_' + 'training' + '_' + graph_name, 'w+') matrix_train_edges = [] sorted_matrix_train_edges = [] tmp_array_multi = [] tmp_array_collapsed = [] for layer in range(nb_layers - 1): tmp_array = np.asarray(Gsplit[layer].edges) tmp_array = np.hstack((tmp_array, np.ones((len(tmp_array), 1)))) tmp_array = np.hstack(((layer + 1) * np.ones( (len(tmp_array), 1)), tmp_array)) tmp_array = np.vstack(tmp_array) tmp_array_multi.append(tmp_array) tmp_array_mat_train_edges = np.asarray(Gsplit[layer].edges) tmp_array_mat_train_edges = np.hstack( (tmp_array_mat_train_edges, np.ones((len(tmp_array_mat_train_edges), 1)))) tmp_array_mat_train_edges = np.hstack(((layer) * np.ones( (len(tmp_array), 1)), tmp_array_mat_train_edges)) matrix_train_edges.append(tmp_array_mat_train_edges) matrix_train_edges = sorted(tmp_array_mat_train_edges, key=itemgetter(1)) sorted_matrix_train_edges.extend(matrix_train_edges) matrix_train_edges = [] tmp_array_multi = np.vstack(tmp_array_multi) tmp_array_multi = np.int_(tmp_array_multi) np.savetxt(file_multi, tmp_array_multi, fmt='%s', delimiter=' ', newline=os.linesep) file_multi.close() os.replace( 'multiverse_graph_' + 'training' + '_' + graph_name, './Generated_graphs/' + 'multiverse_graph_' + 'training' + '_' + graph_name + '.txt') ###################################################################################" # MULTIVERSE ###################################################################################" r_readRDS = robjects.r['readRDS'] proc = subprocess.Popen(['Rscript', './RWR/GenerateSimMatrix.R', \ '-n', '../Generated_graphs/'+'multiverse_graph_' + 'training' + '_'+ graph_name+'.txt', '-o', \ '../ResultsRWR/MatrixSimilarityMultiplex'+graph_name, '-c', str(cpu_number)]) proc.wait() pid = proc.pid proc.kill() print('RWR done') r_DistancematrixPPI = r_readRDS('./ResultsRWR/MatrixSimilarityMultiplex' + graph_name + '.rds') ######################################################################## # Processing of the network ######################################################################## reverse_data_DistancematrixPPI, list_neighbours, nodes, data_DistancematrixPPI, nodes_incomponent, neighborhood, nodesstr \ = f.netpreprocess(r_DistancematrixPPI, graph_path, CLOSEST_NODES) ######################################################################## # Initialization ######################################################################## embeddings = np.random.normal(0, 1, [np.size(nodes), EMBED_DIMENSION]) ######################################################################## # Training and saving best embeddings ######################################################################## nodes = np.asarray(nodes) embeddings = f.train(neighborhood, nodes, list_neighbours, NUM_STEPS_1, NUM_SAMPLED, LEARNING_RATE, \ CLOSEST_NODES, CHUNK_SIZE, NB_CHUNK, embeddings, reverse_data_DistancematrixPPI) X = dict(zip(range(embeddings.shape[0]), embeddings)) X = {str(int(nodesstr[key]) + 1): X[key] for key in X} np.save('embeddings_M', X) date = datetime.datetime.now() os.replace('embeddings_M.npy', './ResultsMultiVERSE/' + 'embeddings_M.npy') print('Embedding done') ######################################################################## # Evaluation on link prediction ######################################################################## edge_emb = ['hadamard', 'weighted_l1', 'weighted_l2', 'average', 'cosine'] results_embeddings_methods = dict() date = datetime.datetime.now() for layer in range(nb_layers - 1): for i in range(len(edge_emb)): tmp_result_multiverse = nee[layer].evaluate_ne( data_split=nee[layer].traintest_split, X=X, method="Multiverse", edge_embed_method=edge_emb[i], label_binarizer=lp_model) results_embeddings_methods[ tmp_result_multiverse.method + '_' + str(layer) + str(edge_emb[i])] = tmp_result_multiverse.get_all()[1][4] print('Evaluation done') ######################################################################## # Analysis and saving of the results ######################################################################## tmp_Multiverse_Result_hada = 0 tmp_Multiverse_Result_wl1 = 0 tmp_Multiverse_Result_wL2 = 0 tmp_Multiverse_Result_avg = 0 tmp_Multiverse_Result_cos = 0 for layer in range(nb_layers - 1): tmp_Multiverse_Result_hada += results_embeddings_methods[ 'Multiverse' + '_' + str(layer) + str(edge_emb[0])] tmp_Multiverse_Result_wl1 += results_embeddings_methods[ 'Multiverse' + '_' + str(layer) + str(edge_emb[1])] tmp_Multiverse_Result_wL2 += results_embeddings_methods[ 'Multiverse' + '_' + str(layer) + str(edge_emb[2])] tmp_Multiverse_Result_avg += results_embeddings_methods[ 'Multiverse' + '_' + str(layer) + str(edge_emb[3])] tmp_Multiverse_Result_cos += results_embeddings_methods[ 'Multiverse' + '_' + str(layer) + str(edge_emb[4])] results_embeddings_methods[ 'Multiverse_av_hadamard'] = tmp_Multiverse_Result_hada / (nb_layers - 1) results_embeddings_methods[ 'Multiverse_av_weighted_l1'] = tmp_Multiverse_Result_wl1 / (nb_layers - 1) results_embeddings_methods[ 'Multiverse_av_weighted_l2'] = tmp_Multiverse_Result_wL2 / (nb_layers - 1) results_embeddings_methods[ 'Multiverse_av_average'] = tmp_Multiverse_Result_avg / (nb_layers - 1) results_embeddings_methods[ 'Multiverse_av_cosine'] = tmp_Multiverse_Result_cos / (nb_layers - 1) # Save results Result_file = 'Result_Linkpred_Multiplex_' + graph_name + '_' + str( date) + '.txt' with open(Result_file, "w+") as overall_result: print("%s: \n\ EMBED_DIMENSION: %s \n\ CLOSEST_NODES: %s \n\ NUM_STEPS_1: %s \n\ NUM_SAMPLED: %s \n\ LEARNING_RATE: %s \n\ CHUNK_SIZE: %s \n\ NB_CHUNK: %s \n\ train_frac: %s \n\ solver: %s \n\ max_iter: %s \n\ split_alg: %s \n\ " % (str(date), EMBED_DIMENSION, CLOSEST_NODES, NUM_STEPS_1, NUM_SAMPLED, LEARNING_RATE, CHUNK_SIZE, NB_CHUNK, train_frac, solver, max_iter, split_alg), file=overall_result) print('Overall MULTIVERSE AUC hadamard:', results_embeddings_methods['Multiverse_av_hadamard'], file=overall_result) print('Overall MULTIVERSE AUC weighted_l1:', results_embeddings_methods['Multiverse_av_weighted_l1'], file=overall_result) print('Overall MULTIVERSE AUC weighted_l2:', results_embeddings_methods['Multiverse_av_weighted_l2'], file=overall_result) print('Overall MULTIVERSE AUC average:', results_embeddings_methods['Multiverse_av_average'], file=overall_result) print('Overall MULTIVERSE AUC cosine:', results_embeddings_methods['Multiverse_av_cosine'], file=overall_result) overall_result.close() os.replace(Result_file, './ResultsMultiVERSE/' + Result_file) print('End')
def evaluate(setup): # Set the random seed random.seed(setup.seed) np.random.seed(setup.seed) # Get input and output paths inpaths = setup.inpaths filename = '{}_eval_{}'.format(setup.task, datetime.now().strftime("%m%d_%H%M")) outpath = os.path.join(os.getcwd(), filename) if not os.path.exists(outpath): os.makedirs(outpath) # Logging configuration (file opened in append mode) logging.basicConfig(filename=os.path.join(outpath, 'eval.log'), format='%(asctime)s - %(levelname)s: %(message)s', datefmt='%d-%m-%y %H:%M:%S', level=logging.INFO) logging.info('Evaluation start') if setup.task != 'nc': logging.info('Running evaluation using classifier: {}'.format( setup.lp_model)) # Create a Scoresheet object to store all results if setup.task == 'nr': scoresheet = Scoresheet(tr_te='train', precatk_vals=setup.precatk_vals) else: scoresheet = Scoresheet(tr_te='test', precatk_vals=setup.precatk_vals) # Initialize some variables edge_split_time = list() lp_coef = dict() repeats = setup.lp_num_edge_splits if setup.task == 'lp' else 1 t = tqdm(total=len(inpaths) * repeats) t.set_description(desc='Progress on {} task'.format(setup.task)) # Loop over all input networks for i in range(len(inpaths)): logging.info('====== Evaluating {} network ======'.format( setup.names[i])) print('\nEvaluating {} network...'.format(setup.names[i])) print('=====================================') # Create path to store info per network if needed nw_outpath = os.path.join(outpath, setup.names[i]) if setup.save_prep_nw or setup.curves != '': if not os.path.exists(nw_outpath): os.makedirs(nw_outpath) # Load and preprocess the graph G, ids = preprocess(setup, nw_outpath, i) if setup.task == 'nc': try: labels = pp.read_labels(setup.labelpaths[i], idx_mapping=ids) except (ValueError, IOError) as e: logging.exception( 'Exception occurred while reading labels of `{}` network. Skipping network eval...' .format(setup.names[i])) break # For each repeat of the experiment generate new edge splits for repeat in range(repeats): logging.info( '------ Repetition {} of experiment ------'.format(repeat)) print('\nRepetition {} of experiment...'.format(repeat)) print('-------------------------------------') # Create train and validation edge splits traintest_split = EvalSplit() trainvalid_split = EvalSplit() split_time = time.time() if setup.task == 'lp': # For LP compute train/test and train/valid splits traintest_split.compute_splits(G, nw_name=setup.names[i], train_frac=setup.traintest_frac, split_alg=setup.split_alg, owa=setup.owa, fe_ratio=setup.fe_ratio, split_id=repeat, verbose=setup.verbose) trainvalid_split.compute_splits( traintest_split.TG, nw_name=setup.names[i], train_frac=setup.trainvalid_frac, split_alg=setup.split_alg, owa=setup.owa, fe_ratio=setup.fe_ratio, split_id=repeat, verbose=setup.verbose) # traintest_split.save_tr_graph(nw_outpath + '/TG_rep_{}'.format(repeat), ',', True, False, False) # Create an LP evaluator nee = LPEvaluator(traintest_split, trainvalid_split, setup.embed_dim, setup.lp_model) elif setup.task == 'nr': # For NR set TG = G no train/valid split needed and get random subset of true and false edges for pred pos_e, neg_e = stt.random_edge_sample(nx.adj_matrix(G), setup.nr_edge_samp_frac, nx.is_directed(G)) if len(pos_e) == 0: logging.error( 'Sampling fraction {} on {} network returned 0 positive edges. Skipping evaluation...' .format(setup.nr_edge_samp_frac, setup.names[i])) break traintest_split.set_splits(train_E=pos_e, train_E_false=neg_e, test_E=None, test_E_false=None, directed=nx.is_directed(G), nw_name=setup.names[i], TG=G) # Create an NR evaluator nee = NREvaluator(traintest_split, setup.embed_dim, setup.lp_model) else: # Create an NC evaluator (train/valid fraction hardcoded to 10%) nee = NCEvaluator(G, labels, setup.names[i], setup.nc_num_node_splits, setup.nc_node_fracs, 0.2, setup.embed_dim) edge_split_time.append(time.time() - split_time) # Evaluate baselines if setup.lp_baselines is not None and setup.task != 'nc': eval_baselines(setup, nee, i, scoresheet, repeat, nw_outpath) # Evaluate other NE methods if setup.methods_opne is not None or setup.methods_other is not None: lp_coef = eval_other(setup, nee, i, scoresheet, repeat, nw_outpath) # Update progress bar t.update(1) # Store in a pickle file the results up to this point in evaluation scoresheet.write_pickle(os.path.join(outpath, 'eval.pkl')) # Store the results if setup.scores is not None: if setup.scores == 'all': scoresheet.write_all( filename=os.path.join(outpath, 'eval_output.txt')) else: scoresheet.write_tabular(filename=os.path.join( outpath, 'eval_output.txt'), metric=setup.scores) scoresheet.write_tabular(filename=os.path.join( outpath, 'eval_output.txt'), metric='eval_time') scoresheet.write_pickle(os.path.join(outpath, 'eval.pkl')) # Close progress bar t.close() print('Average edge split times per dataset:') print(setup.names) print(np.array(edge_split_time).reshape(-1, repeats).mean(axis=1)) # if setup.task != 'nc': # print('Coefficients of LP model ({}) for each NE method:'.format(setup.lp_model)) # print(lp_coef) logging.info('Evaluation end\n\n')
from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit from evalne.evaluation.score import Scoresheet from evalne.utils import preprocess as pp # Load and preprocess the network #G = pp.load_graph('evalne/tests/data/network.edgelist') G = pp.load_graph( '../Graph_Conv_Neural_Nets/generic_datasets/Zachary-Karate/Zachary-Karate.edgelist' ) G, _ = pp.prep_graph(G) # Create an evaluator and generate train/test edge split traintest_split = EvalSplit( ) # Bhevencious: EvalSplit() contains methods used to READ/SET a variety of properties/variables. Use the DOT & PARANTHESIS helpers to access parameters. traintest_split.compute_splits(G, nw_name='Zachary-Karate.edgelist', train_frac=0.8) nee = LPEvaluator(traintest_split) # Create a Scoresheet to store the results scoresheet = Scoresheet() # Set the baselines methods = [ 'adamic_adar_index', 'common_neighbours', 'jaccard_coefficient', 'katz', 'preferential_attachment', 'resource_allocation_index', 'random_prediction' ] # Evaluate baselines for method in methods:
# Contact: [email protected] # Date: 18/12/2018 # This simple example is the one presented in the README.md file. from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit from evalne.evaluation.score import Scoresheet from evalne.utils import preprocess as pp # Load and preprocess the network G = pp.load_graph('../evalne/tests/data/network.edgelist') G, _ = pp.prep_graph(G) # Create an evaluator and generate train/test edge split traintest_split = EvalSplit() traintest_split.compute_splits(G) nee = LPEvaluator(traintest_split) # Create a Scoresheet to store the results scoresheet = Scoresheet() # Set the baselines methods = ['random_prediction', 'common_neighbours', 'jaccard_coefficient'] # Evaluate baselines for method in methods: result = nee.evaluate_baseline(method=method) scoresheet.log_results(result) try: