def load_models(G, edge_f, models): isDirected = True # Load graph G = G.to_directed() # models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) models.append( SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=50, xeta=0.01, n_batch=100, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) # return models,edge_f # For each model, learn the embedding and evaluate on graph reconstruction and visualization for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) # Y, t = embedding.learn_embedding(graph=None, edge_f=edge_f, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Y, None) ###HERE #--------------------------------------------------------------------------------- print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format( MAP, prec_curv[:5])) # --------------------------------------------------------------------------------- # Visualize viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) #plt.show() return models
def GraphReconstruction(self, G, verbose=True, visualize=True, directed=False): # convert to directed form for base library gem, if needed if (not directed): G = G.to_directed() # important that nodes are contiguously numbered G = nx.convert_node_labels_to_integers( G, first_label=0, ordering='default', label_attribute="original_label") # now find best performing embedding maxMAP = 0 for embedding in self.models: if (verbose): print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) # Learn embedding - accepts a networkx graph or file with edge list t1 = time() Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) if (verbose): print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Y, None) #--------------------------------------------------------------------------------- print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format(MAP, prec_curv[:5])) #--------------------------------------------------------------------------------- # Visualize if (visualize): viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) plt.show() # one can display using 'TkAgg' matplotlib backend plt.savefig("embedding_" + embedding._method_name ) # saving figure with 'Agg' matplotlib backend # keep track of the best embedding so far if (maxMAP < MAP): bestEmbedding = embedding maxMAP = MAP return bestEmbedding
def evaluate_embedding(graph, embedding): class _Dummy(object): def __init__(self, embedding): self.embedding = embedding def get_reconstructed_adj(self, X=None, node_l=None): node_num = self.embedding.shape[0] adj_mtx_r = np.zeros((node_num, node_num)) for v_i in range(node_num): for v_j in range(node_num): if v_i == v_j: continue adj_mtx_r[v_i, v_j] = self.get_edge_weight(v_i, v_j) return adj_mtx_r def get_edge_weight(self, i, j): return np.dot(self.embedding[i, :], self.embedding[j, :]) dummy_model = _Dummy(embedding) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(graph, dummy_model, embedding, None) return (MAP, prec_curv)
def generate_metrics(G, model, embedding, labels, predicted_labels, S=None, cv=5): """Generates a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset. # Arguments: x (NEGraph): A NeuroEmbed graph. cv (int): Optional. Number of cross-validation folds to use. # Returns: dict: A result dictionary with all models and results. """ out_metrics = {} clf = RandomForestClassifier(n_estimators=2000) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, model, embedding, is_undirected=False, is_weighted=True) out_metrics['MAP'] = MAP if labels is not None: scores = cross_val_score(clf, embedding, labels, cv=cv) print(scores) out_metrics['CV'] = scores.mean() if S is not None: scores = cross_val_score(clf, np.hstack((embedding, S)), labels, cv=cv) print(scores) out_metrics['CVAnatomy+Graph'] = scores.mean() scores = cross_val_score(clf, S, labels, cv=cv) print(scores) out_metrics['CVAnatomyOnly'] = scores.mean() out_metrics['ARC Clustering'] = metrics.adjusted_rand_score( labels, predicted_labels) out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score( labels, predicted_labels) return out_metrics
xeta: sgd step size parameter n_batch: minibatch size for SGD modelfile: Files containing previous encoder and decoder models weightfile: Files containing previous encoder and decoder weights ''' models.append( SDNE(d=args.d, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2, K=args.k, n_units=args.nunits, n_iter=args.niter, xeta=args.xeta, n_batch=args.nbatch, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding( graph=G, edge_f=None, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Y, None) # --------------------------------------------------------------------------------- print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format( MAP, prec_curv[:5])) # --------------------------------------------------------------------------------- # Visualize # viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) # plt.show()
from gem.embedding.gf import GraphFactorization as gf from gem.evaluation import evaluate_graph_reconstruction as gr from gem.utils.graph_util import * # Instatiate the embedding method with hyperparameters em = gf(2, 100000, 1*10**-4, 1.0) # Load graph graph = loadGraphFromEdgeListTxt('data/karate.edgelist') # Learn embedding - accepts a networkx graph or file with edge list Y, t = em.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True) # Evaluate on graph reconstruction MAP, prec_curv = gr.evaluateStaticGraphReconstruction(graph, em, Y, None) print MAP, prec_curve
modelfile=[ './intermediate/enc_model.json', './intermediate/dec_model.json' ], weightfile=[ './intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5' ])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv = gr.evaluateStaticGraphReconstruction( G, embedding, Y, None) #--------------------------------------------------------------------------------- print(("\tMAP: {} \t preccision curve: {}\n\n\n\n" + '-' * 100).format( MAP, prec_curv)) #--------------------------------------------------------------------------------- # Visualize viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) plt.show()
def benchmark(x, cv=5): """This function automatically runs through a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset. # Arguments: x (NEGraph): A NeuroEmbed graph. cv (int): Optional. Number of cross-validation folds to use. # Returns: dict: A result dictionary with all models and results. """ all_results = {} G, X, y, S, names = x.G, x.X, x.y, x.S, x.names out_metrics = {} model = ASEEmbedding() model.fit(X) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, model, model.H, is_undirected=False, is_weighted=True) out_metrics['MAP'] = MAP d = model.H.shape[1] // 2 out_metrics = generate_metrics(G, model, model.H, y, model.y, S, cv=cv) all_results['ASE'] = out_metrics raw_model = RawEmbedding() raw_model.fit(X, n_components=d) out_metrics = generate_metrics(G, raw_model, raw_model.H, y, raw_model.y, S, cv=cv) all_results['Raw'] = out_metrics G = nx.from_numpy_matrix(X, create_using=nx.DiGraph) Gd = nx.from_numpy_matrix(X + 1e-9, create_using=nx.DiGraph) models = {} if N2VC_available: models['node2vec'] = node2vec(d=d, max_iter=10, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1) models['HOPE'] = HOPE(d=d, beta=0.01) models['Laplacian Eigenmaps'] = LaplacianEigenmaps(d=d) for model_name, embedding in models.items(): if model_name == 'node2vec': Xh, t = embedding.learn_embedding(graph=Gd, edge_f=None, is_weighted=True, no_python=True) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( Gd, embedding, Xh, is_undirected=False, is_weighted=False) else: Xh, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Xh, is_undirected=False, is_weighted=False) Xh = np.real(Xh) if y is not None: clf = RandomForestClassifier(n_estimators=200) clf = MLPClassifier(alpha=1, max_iter=100000) clusterer = GaussianMixture(n_components=Xh.shape[1]) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) scores = cross_val_score(clf, Xh, y, cv=cv) out_metrics['CV'] = scores.mean() if S is not None: scores = cross_val_score(clf, np.hstack((Xh, S)), y, cv=cv) out_metrics['CVAnatomy+Graph'] = scores.mean() scores = cross_val_score(clf, S, y, cv=cv) out_metrics['CVAnatomyOnly'] = scores.mean() out_metrics['ARC Clustering'] = metrics.adjusted_rand_score( y, predict_labels) out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score( y, predict_labels) out_metrics['MAP'] = MAP print(model_name, out_metrics) all_results[model_name] = out_metrics return all_results
print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G_train, valgraph=G_val, edge_f=None, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction:train MANE, avgrecpred, avgrectrue, err, err_baseline = gr.evaluateStaticGraphReconstruction( G_train, embedding, Y, None, is_weighted=True, is_undirected=False) print("MANE train is ", MANE) print("avgrec 10 pred train is ", avgrecpred) print("avgrec 10 true is ", avgrectrue) print("MSE train is ", pow(err, 2) / G_train.number_of_edges()) #print(("\tMAP: {} \t precision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5])) #viz.plot_embedding2D(embedding.get_embedding(), di_graph=G_train, node_colors=None) #plt.show() #plt.clf() # Evaluate on graph reconstruction:val MANE, avgrecpred, avgrectrue, err, err_baseline = gr.evaluateStaticGraphReconstruction( G_val, embedding, Y, None, is_weighted=True, is_undirected=False) print("MANE val is ", MANE) print("avgrec 10 pred val is ", avgrecpred) print("avgrec 10 true val is ", avgrectrue)