def calc_map_heu_s(node_l, train_digraph, train_digraph1, test_digraph1, trp, trn): trd, trl = create_score_dataset(trp, trn, allh, train_digraph) mean = np.mean(trd, axis=0) std = np.std(trd, axis=0) trd = (trd - mean) / std clasifier = train_classifier(trd, trl) estimated_adj = getscore6(train_digraph, node_l, clasifier, allh, mean, std) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) for (st, ed) in train_digraph1.edges(): train_digraph.add_edge(st, ed) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP1 = scores.computeMAP(filtered_edge_list, test_digraph1) estimated_adj = getscore6(train_digraph, node_l, clasifier, allh, mean, std) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP2 = scores.computeMAP(filtered_edge_list, test_digraph1) print(MAP1, MAP2) return MAP1, MAP2
def expLPT(digraph, graph_embedding, res_pre, m_summ, K=100000, is_undirected=True): """This function is used to experiment graph reconstruction for temporally varying graphs. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. res_pre (Str): Prefix to be used to save the result. m_summ (Str): String to denote the name of the summary file. K (Int): The maximum value to be use to get the precision curves. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. """ print('\tLink Prediction Temporal') t1 = time() # learn graph embedding on whole graph X, _ = graph_embedding.learn_embedding(graph=digraph) t2 = time() print('\t\tTime taken to learn the embedding: %f sec' % (t2 - t1)) estimated_adj = graph_embedding.get_reconstructed_adj(X) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected ) filtered_edge_list = [e for e in predicted_edge_list if not digraph.has_edge(e[0], e[1])] if 'partition' in digraph.node[0]: filtered_edge_list = [e for e in predicted_edge_list if digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition']] sorted_edges = sorted(predicted_edge_list, key=lambda x: x[2], reverse=True) print('\t\tPredicted edge list computed in %f sec. Saving edge list.' % (time() - t2)) pickle.dump( sorted_edges[:K], open('%s_%s_predEdgeList.pickle' % (res_pre, m_summ), 'wb') ) print('\t\tSaved edge list.')
def evaluate_unsupervised_embedding(di_graph, graph_embedding, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.8, is_undirected=True) X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=False) sample_edges = sample_edge_new(train_digraph, test_digraph, 0.5) filtered_edge_list = getscore4(train_digraph, graph_embedding, sample_edges) AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) test_digraph1, node_l = graph_util.sample_graph(test_digraph, 1024) X = X[node_l] estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not (train_digraph.has_edge(node_l[e[0]], node_l[e[1]])) ] MAP = scores.computeMAP(filtered_edge_list, test_digraph1) print(AP, ROC, MAP) return AP, ROC, MAP
def evaluate_unsupervised_all(di_graph, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.8, is_undirected=True) sample_edges = sample_edge_new(train_digraph, test_digraph) test_digraph1, node_l = graph_util.sample_graph(test_digraph, 1024) AP = [] ROC = [] MAP = [] heurestics = [cn, jc, pa, aa] for x in heurestics: estimated_adj = getscore1(train_digraph, node_l, x) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP1 = scores.computeMAP(filtered_edge_list, test_digraph1) MAP.append(MAP1) filtered_edge_list = getscore3(train_digraph, sample_edges, x) AP1, ROC1 = scores.computeAP_ROC(filtered_edge_list, test_digraph) AP.append(AP1) ROC.append(ROC1) print(AP1, ROC1, MAP1) return AP, ROC, MAP
def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True, is_weighted=False): node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) # If weighted, compute the error in reconstructed weights of observed edges if is_weighted: digraph_adj = nx.to_numpy_matrix(digraph) estimated_adj[digraph_adj == 0] = 0 err = np.linalg.norm(digraph_adj - estimated_adj) err_baseline = np.linalg.norm(digraph_adj) else: err = None err_baseline = None return (MAP, prec_curv, err, err_baseline)
def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True): node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) return (MAP, prec_curv)
def calc_map_s(embedding, X1, X2, train_digraph, train_digraph1, node_l, test_digraph1, trp, trn, had): if (had == 1): func = hadamard1 elif (had == 0): func = hadamard2 elif (had == -1): func = hadamard3 trd, trl = create_vector_dataset(trp, trn, func, X1) mean = np.mean(trd, axis=0) std = np.std(trd, axis=0) trd = (trd - mean) / std clasifier = train_classifier(trd, trl) X1 = X1[node_l] estimated_adj = getscore2(train_digraph, node_l, clasifier, func, X1, mean, std) for (st, ed) in train_digraph1.edges(): train_digraph.add_edge(st, ed) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP1 = scores.computeMAP(filtered_edge_list, test_digraph1) X2 = X2[node_l] estimated_adj = getscore2(train_digraph, node_l, clasifier, func, X2, mean, std) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP2 = scores.computeMAP(filtered_edge_list, test_digraph1) print(MAP1, MAP2) return MAP1, MAP2
def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True, is_weighted=False): """This function evaluates the graph reconstruction accuracy of the embedding algorithms. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X_stat (Vector): Embedding of the the nodes of the graph. node_l (Int): Number of nodes in the graph. file_suffix (Str): The name of the algorithm and dataset used to save the embedding. sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. is_weighted (bool): Boolean flag to denote whether the edges of the graph is weighted. Returns: Numpy Array: Consiting of Mean average precision precision curve, errors and error baselines. """ node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) if 'partition' in digraph.node[0]: predicted_edge_list = [ e for e in predicted_edge_list if digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition'] ] MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) # If weighted, compute the error in reconstructed weights of observed edges if is_weighted: digraph_adj = nx.to_numpy_matrix(digraph) estimated_adj[digraph_adj == 0] = 0 err = np.linalg.norm(digraph_adj - estimated_adj) err_baseline = np.linalg.norm(digraph_adj) else: err = None err_baseline = None return (MAP, prec_curv, err, err_baseline)
def evaluateStaticLinkPrediction(train_digraph, test_digraph, graph_embedding, X, node_l=None, sample_ratio_e=None, is_undirected=True, store_predictions=1): """This function evaluates the static link prediction accuracy of the embedding algorithms. Args: train_digraph (Object): directed networkx graph object used for training the algorithm. test_digraph (Object): directed networkx graph object to be used for testing the algorithm. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X (Vector): Embedding of the the nodes of the graph. node_l (Int): Number of nodes in the graph. sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. store_prediction (Int): Stores the predicted values. Returns: Numpy Array: Consiting of Mean average precision and the precision curve values. """ node_num = train_digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected ) else: eval_edge_pairs = None if X is None: # If not an embedding approach, store the new subgraph graph_embedding.learn_embedding(train_digraph) estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs ) filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(e[0], e[1])] if 'partition' in train_digraph.node[0]: filtered_edge_list = [e for e in predicted_edge_list if train_digraph.node[e[0]]['partition'] != train_digraph.node[e[1]]['partition']] pickle.dump(filtered_edge_list, open('gem/nodeListMap/preds.pickle', 'wb')) pickle.dump(test_digraph, open('gem/nodeListMap/test_graph.pickle', 'wb')) t1 = time() MAP = metrics.computeMAP(filtered_edge_list, test_digraph) t2 = time() prec_curv, _ = metrics.computePrecisionCurve( filtered_edge_list, test_digraph ) t3 = time() print('MAP computation time: %f sec, prec: %f sec' % (t2 - t1, t3 - t2)) return (MAP, prec_curv)
def calc_map_us(embedding, X, node_l, train_digraph, test_digraph1): estimated_adj = embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not (train_digraph.has_edge(node_l[e[0]], node_l[e[1]])) ] MAP = scores.computeMAP(filtered_edge_list, test_digraph1) print(MAP) return MAP
def evaluateStaticLinkPrediction(digraph, graph_embedding, train_ratio=0.8, n_sample_nodes=None, sample_ratio_e=None, no_python=False, is_undirected=True): node_num = digraph.number_of_nodes() # seperate train and test graph train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest( digraph, train_ratio=train_ratio, is_undirected=is_undirected) if not nx.is_connected(train_digraph.to_undirected()): train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len) tdl_nodes = train_digraph.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) nx.relabel_nodes(train_digraph, nodeListMap, copy=False) test_digraph = test_digraph.subgraph(tdl_nodes) nx.relabel_nodes(test_digraph, nodeListMap, copy=False) # learning graph embedding X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=no_python) node_l = None if n_sample_nodes: test_digraph, node_l = graph_util.sample_graph(test_digraph, n_sample_nodes) X = X[node_l] # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) if node_l is None: node_l = list(range(train_digraph.number_of_nodes())) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l(e[0]), node_l(e[1])) ] MAP = metrics.computeMAP(filtered_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve(filtered_edge_list, test_digraph) return (MAP, prec_curv)
def calc_map_heu(node_l, train_digraph, test_digraph1): MAP = [] heurestics = [cn, jc, pa, aa] for x in heurestics: estimated_adj = getscore1(train_digraph, node_l, x) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP1 = scores.computeMAP(filtered_edge_list, test_digraph1) MAP.append(MAP1) print(MAP) return MAP
def evaluate_supervised(di_graph, graph_embedding, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.6, is_undirected=True) train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest( test_digraph, train_ratio=0.5, is_undirected=is_undirected) X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=False) trp, trn = create_edge_dataset(train_digraph, train_digraph1) trd, trl = create_vector_dataset(trp, trn, hadamard2, X) mean = np.mean(trd, axis=0) std = np.std(trd, axis=0) trd = (trd - mean) / std clasifier = train_classifier(trd, trl) for (st, ed) in train_digraph1.edges(): train_digraph.add_edge(st, ed) sample_edges = sample_edge_new(train_digraph, test_digraph, 0.5) X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=False) filtered_edge_list = getscore5(train_digraph, sample_edges, clasifier, hadamard2, X, mean, std) AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) test_digraph, node_l = graph_util.sample_graph(test_digraph, 1024) X = X[node_l] estimated_adj = getscore2(train_digraph, node_l, clasifier, hadamard2, X, mean, std) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP = scores.computeMAP(filtered_edge_list, test_digraph) print(MAP) return AP, ROC, MAP
def evaluateStaticLinkPrediction(digraph, graph_embedding, train_ratio=0.8, n_sample_nodes=None, sample_ratio_e=None, no_python=False, is_undirected=True): node_num = digraph.number_of_nodes() print('eslp graph') print(digraph.edges()[:3]) # seperate train and test graph train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest( digraph, train_ratio=train_ratio, is_undirected=is_undirected ) print('eslp training graph') print(train_digraph.edges()[:3]) if not nx.is_connected(train_digraph.to_undirected()): train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len ) tdl_nodes = train_digraph.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) reversedNodeListMap = dict(zip(range(len(tdl_nodes)),tdl_nodes)) print(nodeListMap) nx.relabel_nodes(train_digraph, nodeListMap, copy=False) test_digraph = test_digraph.subgraph(tdl_nodes) nx.relabel_nodes(test_digraph, nodeListMap, copy=False) else: reversedNodeListMap = dict(zip(tdl_nodes,tdl_nodes)) print('elsp training graph after largest cc') print(train_digraph.edges()[:3]) # learning graph embedding X, _ = graph_embedding.learn_embedding( graph=train_digraph, no_python=no_python ) node_l = None if n_sample_nodes: test_digraph, node_l = graph_util.sample_graph( test_digraph, n_sample_nodes ) X = X[node_l] # print('len graph edges') # print(len(graph.nodes())) # print('embedding vectors number') # print(len(self._X)) node2vec_dict = {} print('GUESS embedding node2vc train result') for i in range(len(X)): node2vec_dict[reversedNodeListMap[train_digraph.nodes()[i]]] = X[i] # print(str(train_digraph.nodes()[i])+" "+str(reversedNodeListMap[train_digraph.nodes()[i]]) + " "+ str(X[i])) # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected ) else: eval_edge_pairs = None estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs ) if node_l is None: node_l = list(range(train_digraph.number_of_nodes())) filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])] MAP = metrics.computeMAP(filtered_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve( filtered_edge_list, test_digraph ) return (MAP, prec_curv, node2vec_dict)
def evaluateStaticLinkPrediction(digraph, train_digraph, test_digraph, graph_embedding, is_undirected=True, n_sample_nodes=None, sample_ratio_e=None, no_python=False): node_num = digraph.number_of_nodes() nodesTotalGraph = digraph.nodes() nodesTrainGraph = train_digraph.nodes() nodesTestGraph = test_digraph.nodes() for node in nodesTotalGraph: if not (node in nodesTrainGraph): train_digraph.add_node(int(node)) if not (node in nodesTestGraph): test_digraph.add_node(int(node)) if not nx.is_connected(train_digraph.to_undirected() ): # Se il Grafo di Training non e' connesso train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len) tdl_nodes = train_digraph.nodes( ) # Contiene i nodi del Grafo di Training connesso nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) nx.relabel_nodes( train_digraph, nodeListMap, copy=False ) # Rietichetta i nodi modificando l'ID originale in 'numero di nodo' test_digraph = test_digraph.subgraph( tdl_nodes ) # Il nuovo Grafo di Test sara' composto soltanto dai nodi che sono anche presenti in tdl_nodes nx.relabel_nodes(test_digraph, nodeListMap, copy=False) X, _ = graph_embedding.learn_embedding( graph=train_digraph, no_python=no_python) # Costruisce l'Embedding del Grafo node_l = None if n_sample_nodes: test_digraph, node_l = graph_util.sample_graph(test_digraph, n_sample_nodes) X = X[node_l] # VALUTAZIONE if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(e[0], e[1]) ] MAP = metrics.computeMAP(filtered_edge_list, test_digraph) # Calcola la Mean Average Precision #prec_curv, _ = metrics.computePrecisionCurve(filtered_edge_list, test_digraph) #return (MAP, prec_curv) return MAP