def expLP(digraph, graph_embedding, n_sample_nodes, rounds, res_pre, m_summ, train_ratio=0.8, no_python=False, is_undirected=True): print('\tLink Prediction') summ_file = open('%s_%s.lpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) MAP = [None] * rounds prec_curv = [None] * rounds for round_id in range(rounds): MAP[round_id], prec_curv[round_id] = \ evaluateStaticLinkPrediction(digraph, graph_embedding, train_ratio=train_ratio, n_sample_nodes=1024, no_python=no_python, is_undirected=is_undirected) summ_file.write( '\t%f/%f\t%s\n' % (np.mean(MAP), np.std(MAP), metrics.getPrecisionReport(prec_curv[0], len(prec_curv[0])))) summ_file.close() pickle.dump([MAP, prec_curv], open('%s_%s.lp' % (res_pre, m_summ), 'wb'))
def expGR(digraph, graph_embedding, X, n_sampled_nodes, rounds, res_pre, m_summ, is_undirected=True): print('\tGraph Reconstruction') summ_file = open('%s_%s.grsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) if len(digraph.nodes) <= n_sampled_nodes: rounds = 1 MAP = [None] * rounds prec_curv = [None] * rounds err = [None] * rounds err_b = [None] * rounds n_nodes = [None] * rounds n_edges = [None] * rounds for round_id in range(rounds): sampled_digraph, node_l = graph_util.sample_graph( digraph, n_sampled_nodes=n_sampled_nodes ) n_nodes[round_id] = len(sampled_digraph.nodes) n_edges[round_id] = len(sampled_digraph.edges) print('\t\tRound: %d, n_nodes: %d, n_edges:%d\n' % (round_id, n_nodes[round_id], n_edges[round_id])) sampled_X = X[node_l] MAP[round_id], prec_curv[round_id], err[round_id], err_b[round_id] = \ evaluateStaticGraphReconstruction(sampled_digraph, graph_embedding, sampled_X, node_l, is_undirected=is_undirected) try: summ_file.write('Err: %f/%f\n' % (np.mean(err), np.std(err))) summ_file.write('Err_b: %f/%f\n' % (np.mean(err_b), np.std(err_b))) except TypeError: pass summ_file.write('%f/%f\t%s\n' % (np.mean(MAP), np.std(MAP), metrics.getPrecisionReport(prec_curv[0], n_edges[0]))) pickle.dump([n_nodes, n_edges, MAP, prec_curv, err, err_b], open('%s_%s.gr' % (res_pre, m_summ), 'wb'))
def expGR(digraph, graph_embedding, X, n_sampled_nodes_l, rounds, res_pre, m_summ, K=10000, is_undirected=True, sampling_scheme="u_rand"): """This function is used to experiment graph reconstruction. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X (Vector): Embedding of the the nodes of the graph. n_sampled_node_l (Int): Number of nodes in the graph. rounds (Int): The number of times the graph reconstruction is performed. res_pre (Str): Prefix to be used to save the result. m_summ (Str): String to denote the name of the summary file. K (Int): The maximum value to be use to get the precision curves. sampling_scheme (Str): Sampling schme used to sample nodes to be reconstructed. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. Returns: Numpy Array: Consisting of Mean average precision. """ print('\tGraph Reconstruction') summ_file = open('%s_%s_%s.grsumm' % (res_pre, m_summ, sampling_scheme), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) n_sample_nodes_l = [ min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l ] if not n_sample_nodes_l: n_sample_nodes_l = [node_num] MAP = {} prec_curv = {} err = {} err_b = {} n_nodes = {} n_edges = {} # if digraph.number_of_nodes() <= n_sampled_nodes: # rounds = 1 for n_s in n_sampled_nodes_l: n_s = int(n_s) MAP[n_s] = [None] * rounds prec_curv[n_s] = [None] * rounds err[n_s] = [None] * rounds err_b[n_s] = [None] * rounds n_nodes[n_s] = [None] * rounds n_edges[n_s] = [None] * rounds for rid in range(rounds): if sampling_scheme == "u_rand": sampled_digraph, node_l = graph_util.sample_graph( digraph, n_sampled_nodes=n_s) else: sampled_digraph, node_l = graph_util.sample_graph_rw( digraph, n_sampled_nodes=n_s) n_nodes[n_s][rid] = sampled_digraph.number_of_nodes() n_edges[n_s][rid] = sampled_digraph.number_of_edges() print('\t\tRound: %d/%d, n_nodes: %d, n_edges:%d\n' % (rid, rounds, n_nodes[n_s][rid], n_edges[n_s][rid])) sampled_X = X[node_l] MAP[n_s][rid], prec_curv[n_s][rid], err[n_s][rid], err_b[n_s][rid] = \ evaluateStaticGraphReconstruction(sampled_digraph, graph_embedding, sampled_X, node_l, is_undirected=is_undirected) prec_curv[n_s][rid] = prec_curv[n_s][rid][:K] summ_file.write('n_s:%d' % n_s) try: summ_file.write('\tErr: %f/%f\n' % (np.mean(err[n_s]), np.std(err[n_s]))) summ_file.write('\tErr_b: %f/%f\n' % (np.mean(err_b[n_s]), np.std(err_b[n_s]))) except TypeError: pass summ_file.write( '\t%f/%f\t%s\n' % (np.mean(MAP[n_s]), np.std(MAP[n_s]), metrics.getPrecisionReport(prec_curv[n_s][0], n_edges[n_s][0]))) pickle.dump( [n_nodes, n_edges, MAP, prec_curv, err, err_b, n_sampled_nodes_l], open('%s_%s_%s.gr' % (res_pre, m_summ, sampling_scheme), 'wb')) return MAP[list(MAP.keys())[0]]
def expLP(digraph, graph_embedding, n_sample_nodes_l, rounds, res_pre, m_summ, train_ratio=0.8, no_python=True, K=32768, is_undirected=True, sampling_scheme="u_rand"): print('\tLink Prediction') MAP = {} prec_curv = {} n_sample_nodes_l = [min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l] # Randomly hide (1-train_ratio)*100% of links node_num = digraph.number_of_nodes() train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest( digraph, train_ratio=train_ratio, is_undirected=is_undirected ) # Ensure the resulting train subgraph is connected if not nx.is_connected(train_digraph.to_undirected()): train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len ) tdl_nodes = train_digraph.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) train_digraph = nx.relabel_nodes(train_digraph, nodeListMap, copy=True) test_digraph = test_digraph.subgraph(tdl_nodes) ### unfroze the graph test_digraph = nx.Graph(test_digraph) ####nx.relabel_nodes(test_digraph, nodeListMap, copy=False) test_digraph = nx.relabel_nodes(test_digraph, nodeListMap, copy=True) pickle.dump(nodeListMap, open('gem/nodeListMap/lp_lcc.pickle', 'wb')) t1 = time() # learn graph embedding on train subgraph print( 'Link Prediction train graph n_nodes: %d, n_edges: %d' % ( train_digraph.number_of_nodes(), train_digraph.number_of_edges()) ) X, _ = graph_embedding.learn_embedding( graph=train_digraph, no_python=no_python ) if X is not None and X.shape[0] != train_digraph.number_of_nodes(): pdb.set_trace() print('Time taken to learn the embedding: %f sec' % (time() - t1)) # sample test graph for evaluation and store results node_l = None if not n_sample_nodes_l: n_sample_nodes_l = [node_num] summ_file = open('%s_%s_%s.lpsumm' % (res_pre, m_summ, sampling_scheme), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) for n_s in n_sample_nodes_l: n_s = int(n_s) n_s = min(n_s, train_digraph.number_of_nodes()) MAP[n_s] = [None] * rounds prec_curv[n_s] = [None] * rounds for round_id in range(rounds): if sampling_scheme == "u_rand": train_digraph_s, node_l = graph_util.sample_graph( train_digraph, n_s ) else: train_digraph_s, node_l = graph_util.sample_graph_rw( train_digraph, n_s ) if X is not None: X_sub = X[node_l] else: X_sub = None test_digraph_s = test_digraph.subgraph(node_l) nodeListMap = dict(zip(node_l, range(len(node_l)))) pickle.dump(nodeListMap, open('gem/nodeListMap/lp_lcc_samp.pickle', 'wb')) test_digraph_s = nx.relabel_nodes(test_digraph_s, nodeListMap, copy=True) MAP[n_s][round_id], prec_curv[n_s][round_id] = \ evaluateStaticLinkPrediction(train_digraph_s, test_digraph_s, graph_embedding, X_sub, node_l=node_l, is_undirected=is_undirected) prec_curv[n_s][round_id] = prec_curv[n_s][round_id][:K] summ_file.write('\tn_s:%d, %f/%f\t%s\n' % ( n_s, np.mean(MAP[n_s]), np.std(MAP[n_s]), metrics.getPrecisionReport( prec_curv[n_s][0], len(prec_curv[n_s][0]) ) )) summ_file.close() #if len(prec_curv[-1][0]) < 100: #pdb.set_trace() pickle.dump([MAP, prec_curv, n_sample_nodes_l], open('%s_%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme, str(train_ratio)), 'wb')) print('Link prediction evaluation complete. Time: %f sec' % (time() - t1)) # prec_curv2 = [p[4096] for p in prec_curv[prec_curv.keys()[0]]] return MAP[list(MAP.keys())[0]] # prec_curv2