def main(opts): dataset = opts.dataset embed_dim = int(opts.dimension) # File that contains the edges. Format: source target # Optionally, you can add weights as third column: source target weight edge_f = 'Data/%s.edgelist' % dataset # Specify whether the edges are directed # isDirected = True print "Loading Dataset" # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False) #G = G.to_directed() embedding = LaplacianEigenmaps(d=embed_dim) print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list print "Starting Embedding" Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) np_save(writable("Embedding_Results", "jac_" + dataset + str(embed_dim)), Y)
def main(data_set_name): dimensions = 4 input_file = './graph/' + data_set_name + '.tsv' output_file = './emb/' + data_set_name + '.emb' # Instatiate the embedding method with hyperparameters graph_factorization = LaplacianEigenmaps(dimensions) # Load graph graph = graph_util.loadGraphFromEdgeListTxt(input_file) # Learn embedding - accepts a networkx graph or file with edge list embeddings_array, t = graph_factorization.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True) embeddings = pandas.DataFrame(embeddings_array) embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
def main(args): # Load edgelist G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed) G = G.to_directed() # Preprocess the graph # G, _ = prep_graph(G) if args.method == 'gf': # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta), # regularization coefficient (regu) as inputs model = GraphFactorization(d=args.dimension, max_iter=args.max_iter, eta=args.eta, regu=args.regu) elif args.method == 'hope': # HOPE takes embedding dimension (d) and decay factor (beta) as inputs model = HOPE(d=args.dimension, beta=args.beta) elif args.method == 'lap': # LE takes embedding dimension (d) as input model = LaplacianEigenmaps(d=args.dimension) elif args.method == 'lle': # LLE takes embedding dimension (d) as input model = LocallyLinearEmbedding(d=args.dimension) elif args.method == 'sdne': encoder_layer_list = ast.literal_eval(args.encoder_list) # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch) # location of modelfile and weightfile save (modelfile and weightfile) as inputs model = SDNE(d=args.dimension, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2, K=len(encoder_layer_list), n_units=encoder_layer_list, n_iter=args.max_iter, xeta=args.learning_rate, n_batch=args.bs) # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']) else: raise ValueError('The requested method does not exist!') # Learn the node embeddings Y, t = model.learn_embedding(graph=G, edge_f=None, is_weighted=args.weighted, no_python=True) Z = np.real_if_close(Y, tol=1000) # Save the node embeddings to a file np.savetxt(args.output, Z, delimiter=',', fmt='%f')
def __init__(self, dim=4, models=[]): # Initialize set of possible models # see "Graph Embedding Techniques, Applications, and Performance: A Survey" by # Goyal and Ferrera (2017) for a taxonomy of graph embedding methods if not models: # if no models specified, create some default ones # Presently all methods are "factorization based methods" # first method very expensive, unless C++ version installed # models.append(GraphFactorization(d=2, max_iter=100000, eta=1*10**-4, regu=1.0)) models.append(HOPE(d=dim, beta=0.01)) models.append(LaplacianEigenmaps(d=dim)) models.append(LocallyLinearEmbedding(d=dim)) # The following "random walk based" and "deep learning based" methods will be enabled in the future # models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) # models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500, # modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], # weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'])) self.models = models
def _get_embeddings(self, embedding_space): # You can comment out the methods you don't want to run models = list() for embed_method in self.embeddings: ## if embed_method == EMEDDINGS.GRAPH_FACTORIZATIONE_MBEDDINGS: ## models.append(GraphFactorization(embedding_space, 100000, 1 * 10 ** -4, 1.0)) if embed_method == EMEDDINGS.LAPLACIAN_EIGENMAPS_EMBEDDINGS: models.append(LaplacianEigenmaps(embedding_space)) if embed_method == EMEDDINGS.LOCALLY_LINEAR_EMBEDDING: models.append(LocallyLinearEmbedding(embedding_space)) if embed_method == EMEDDINGS.HOPE_EMBEDDING: models.append(HOPE(2 + 1, 0.01)) if embed_method == EMEDDINGS.NODE2VEC_EMBEDDING_EMBEDDINGS: models.append(node2vec(2, 1, 80, 10, 10, 1, 1)) # Embeddings I was unable to get working yet - it seems that HOPE converts k to k+1 for some reason.... # if embed_method == EMEDDINGS.SDNE_EMBEDDING_EMBEDDINGS: # models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500, # modelfile=[base_path + '/intermediate/enc_model.json', base_path + '/intermediate/dec_model.json'], # weightfile=[base_path + '/intermediate/enc_weights.hdf5', base_path + '/intermediate/dec_weights.hdf5'])) return models
from gem.embedding.lap import LaplacianEigenmaps import networkx as nx from timeit import Timer sizes = [200, 500, 1000, 2000, 3000, 5000, 10000, 15000, 20000, 35000, 50000] _DENS = 1e-3 for s in sizes: G = nx.gnp_random_graph(s, _DENS, directed=True) l_eig = LaplacianEigenmaps(d=128) t = Timer('l_eig.learn_embedding(G)', setup='from __main__ import l_eig, G') n_runs = 3 if s <= 5000 else 1 exec_times = t.repeat(n_runs, 1) print(f'{s}: {exec_times}') with open('l_eig_times.txt', 'a') as f: f.write(f'{s}: {exec_times}\n')
G = graph_util.loadGraphFromEdgeListTxt(list_graphs[grp], directed=list_directed[grp]) G = G.to_directed() if not os.path.exists('SAVER_SUP/'+fig_name[grp]+str(x+1)): os.makedirs('SAVER_SUP/'+fig_name[grp]+str(x+1)) # split the graph into 60-20-20 ratio, 60% for calculating the edge features, 20% for training the classifier, 20% for evaluating the model. train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(G, train_ratio = 0.6, is_undirected=True) train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(test_digraph, train_ratio=0.5, is_undirected=True) # embeddings without relearning print ("saving for LE") for dim in dimensions: embedding=LaplacianEigenmaps(d=dim) X, _ = embedding.learn_embedding(graph=train_digraph, no_python=False) file_name='SAVER_SUP/'+fig_name[grp]+str(x+1)+'/LE1_'+str(dim) parameter_file=open(file_name, 'wb') pickle.dump(X,parameter_file) parameter_file.close() print ("saving for DEEPWALK") for dim in dimensions: embedding=node2vec(d=dim, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1) X, _ = embedding.learn_embedding(graph=train_digraph, no_python=False) file_name='SAVER_SUP/'+fig_name[grp]+str(x+1)+'/DEEPWALK1_'+str(dim) parameter_file=open(file_name, 'wb') pickle.dump(X,parameter_file) parameter_file.close()
#before running the embedding a check is done to see if the file is completed completed_file_path = scratch_folder + "/" + use_model_type + "_" + uni_name + ".csv" # load path of the university path load_path = file_folder + "/" + uni_name + ".graphml" # save path of the embedded data save_path = project_folder + "/" + use_model_type + "_" + uni_name + ".csv" # create an empty list of models models = [] # using and else if statement load the model for this task # The end result is a list that is 1 long if use_model_type == "HOPE": models.append(HOPE(d=dims * 2, beta=0.01)) elif use_model_type == "LapEig": models.append(LaplacianEigenmaps(d=dims)) elif use_model_type == "LLE": models.append(LocallyLinearEmbedding(d=dims)) elif use_model_type == "node2vec": models.append( node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) else: # This logically has to be SDNE as there are no other options models.append( SDNE(d=dims * 2,
def get_embeddings(graph, embedding_algorithm_enum, dimension_count, hyperparameter, lower=None, higher=None): """Generate embeddings. """ if embedding_algorithm_enum is EmbeddingType.LocallyLinearEmbedding: embedding_alg = LocallyLinearEmbedding(d=dimension_count) elif embedding_algorithm_enum is EmbeddingType.Hope: embedding_alg = HOPE(d=dimension_count, beta=0.01) elif embedding_algorithm_enum is EmbeddingType.GF: embedding_alg = GraphFactorization(d=dimension_count, max_iter=100000, eta=1 * 10**-4, regu=1.0) elif embedding_algorithm_enum is EmbeddingType.LaplacianEigenmaps: embedding_alg = LaplacianEigenmaps(d=dimension_count) elif embedding_algorithm_enum is EmbeddingType.DegreeNeigDistributionWithout: A = np.array([ np.histogram([graph.degree(neig) for neig in graph.neighbors(i)], bins=dimension_count, density=True, range=(lower, higher))[0] for i in graph.nodes() ]) A = (A - A.mean(axis=0)) / A.std(axis=0) return A elif embedding_algorithm_enum is EmbeddingType.DegreeNeigDistribution: A = np.array([ np.concatenate([ np.array([graph.degree(i) / (higher * dimension_count)]), np.histogram( [graph.degree(neig) for neig in graph.neighbors(i)], bins=dimension_count - 1, density=True, range=(lower, higher))[0] ], axis=0) for i in graph.nodes() ]) A = (A - A.mean(axis=0)) / A.std(axis=0) return A elif embedding_algorithm_enum is EmbeddingType.DegreeNeigNeigDistribution: bin_length = int(dimension_count / 2) A = np.array([ np.concatenate([ np.array([graph.degree(i) / (higher)]), np.histogram( [graph.degree(neig) for neig in graph.neighbors(i)], bins=bin_length, density=True, range=(lower, higher))[0], np.histogram([ graph.degree(neigneig) for neig in graph.neighbors(i) for neigneig in graph.neighbors(neig) ], bins=bin_length, density=True, range=(lower, higher))[0] ], axis=0) for i in graph.nodes() ]) A = (A - A.mean(axis=0)) / A.std(axis=0) A[:, 0] = A[:, 0] A[:, 1:1 + bin_length] = A[:, 1:1 + bin_length] A[:, 2 + bin_length:] = A[:, 2 + bin_length:] * hyperparameter A = np.nan_to_num(A) return A else: raise NotImplementedError A, t = embedding_alg.learn_embedding(graph=graph, no_python=True) A = np.dot(A, np.diag(np.sign(np.mean(A, axis=0)))) A = (A - A.mean(axis=0)) / A.std(axis=0) return A
ROC1_n2vA[it2][it1] = ROC1; ROC2_n2vA[it2][it1] = ROC2 print ("evaluating for LE") for it2 in xrange(len(dimensions)): train_digraph_temp=train_digraph.copy() print (it1,it2) dim=dimensions[it2] file_name='SAVER_SUP/'+fig_name[fig]+str(it1+1)+'/LE1_'+str(dim) parameter_file=open(file_name, 'rb') X1 = pickle.load(parameter_file) parameter_file.close() file_name='SAVER_SUP/'+fig_name[fig]+str(it1+1)+'/LE2_'+str(dim) parameter_file=open(file_name, 'rb') X2 = pickle.load(parameter_file) parameter_file.close() embedding=LaplacianEigenmaps(d=dim) AP1,AP2,ROC1,ROC2 = evaluation_measures.calc_aproc_s(embedding, X1, X2, train_digraph_temp, train_digraph1, test_digraph, sample_edges, trp, trn, 1) AP1_LE[it2][it1] = AP1; AP2_LE[it2][it1] = AP2; ROC1_LE[it2][it1] = ROC1; ROC2_LE[it2][it1] = ROC2 print ("evaluating for VERSE") for it2 in xrange(len(dimensions)): train_digraph_temp=train_digraph.copy() print (it1,it2) dim=dimensions[it2] file_name='SAVER_SUP/'+fig_name[fig]+str(it1+1)+'/VERSE1_'+str(dim) parameter_file=open(file_name, 'rb') X1 = pickle.load(parameter_file) parameter_file.close() file_name='SAVER_SUP/'+fig_name[fig]+str(it1+1)+'/VERSE2_'+str(dim) parameter_file=open(file_name, 'rb')
def __init__(self, d=2): self.model = LaplacianEigenmaps(d=d)
def benchmark(x, cv=5): """This function automatically runs through a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset. # Arguments: x (NEGraph): A NeuroEmbed graph. cv (int): Optional. Number of cross-validation folds to use. # Returns: dict: A result dictionary with all models and results. """ all_results = {} G, X, y, S, names = x.G, x.X, x.y, x.S, x.names out_metrics = {} model = ASEEmbedding() model.fit(X) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, model, model.H, is_undirected=False, is_weighted=True) out_metrics['MAP'] = MAP d = model.H.shape[1] // 2 out_metrics = generate_metrics(G, model, model.H, y, model.y, S, cv=cv) all_results['ASE'] = out_metrics raw_model = RawEmbedding() raw_model.fit(X, n_components=d) out_metrics = generate_metrics(G, raw_model, raw_model.H, y, raw_model.y, S, cv=cv) all_results['Raw'] = out_metrics G = nx.from_numpy_matrix(X, create_using=nx.DiGraph) Gd = nx.from_numpy_matrix(X + 1e-9, create_using=nx.DiGraph) models = {} if N2VC_available: models['node2vec'] = node2vec(d=d, max_iter=10, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1) models['HOPE'] = HOPE(d=d, beta=0.01) models['Laplacian Eigenmaps'] = LaplacianEigenmaps(d=d) for model_name, embedding in models.items(): if model_name == 'node2vec': Xh, t = embedding.learn_embedding(graph=Gd, edge_f=None, is_weighted=True, no_python=True) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( Gd, embedding, Xh, is_undirected=False, is_weighted=False) else: Xh, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Xh, is_undirected=False, is_weighted=False) Xh = np.real(Xh) if y is not None: clf = RandomForestClassifier(n_estimators=200) clf = MLPClassifier(alpha=1, max_iter=100000) clusterer = GaussianMixture(n_components=Xh.shape[1]) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) scores = cross_val_score(clf, Xh, y, cv=cv) out_metrics['CV'] = scores.mean() if S is not None: scores = cross_val_score(clf, np.hstack((Xh, S)), y, cv=cv) out_metrics['CVAnatomy+Graph'] = scores.mean() scores = cross_val_score(clf, S, y, cv=cv) out_metrics['CVAnatomyOnly'] = scores.mean() out_metrics['ARC Clustering'] = metrics.adjusted_rand_score( y, predict_labels) out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score( y, predict_labels) out_metrics['MAP'] = MAP print(model_name, out_metrics) all_results[model_name] = out_metrics return all_results
def laplacianEigenmaps(netData, **kwargs): d = kwargs.get('d', 2) from gem.embedding.lap import LaplacianEigenmaps emb = LaplacianEigenmaps(d=d) return attMethods.GEMexport(netData, emb)
neighbour_train_features = add_neighbour_data(g, train_data, nodes) neighbour_test_features = add_neighbour_data(g, test_data, nodes) print('Score without neighbours data') create_and_evaluate_classifier(no_neighbour_train_features, train_targets, no_neighbour_test_features, test_targets) print('Score with neighbours data') # we can use the same '[]_targets' because both (with neighbours and without) datasets are extracted from the same # datasets: 'train_data' and 'test_data' and follow the same order of nodes create_and_evaluate_classifier(neighbour_train_features, train_targets, neighbour_test_features, test_targets) ############################################################### if not os.path.exists(LAPLACIAN_EMB_FILE_PATH): laplacian = LaplacianEigenmaps(d=50) embs = laplacian.learn_embedding(g, edge_f=None, is_weighted=False, no_python=True) save_embeddings(LAPLACIAN_EMB_FILE_PATH, embs[0], list(g.nodes)) laplacian_embs = read_embeddings(LAPLACIAN_EMB_FILE_PATH) node2vec_embs = read_embeddings( NODE2VEC_EMB_FILE_PATH) # we already have the embeddings sdne_embs = read_embeddings( SDNE_EMB_FILE_PATH) # we already have the embeddings print('LAPLACIAN') create_and_evaluate_classifier( *extract_data_from_embs(laplacian_embs, train_data, test_data))
train_digraph, test_digraph, sample_edges) for it2 in xrange(4): print(it1, it2) AP_us[it2][it1] = AP[it2] ROC_us[it2][it1] = ROC[it2] print("evaluating for LE") for it2 in xrange(len(dimensions)): print(it1, it2) dim = dimensions[it2] file_name = 'SAVER/' + fig_name[fig] + str( it1 + 1) + '/LE_' + str(dim) parameter_file = open(file_name, 'rb') X = pickle.load(parameter_file) parameter_file.close() embedding = LaplacianEigenmaps(d=dim) embedding._X = X AP, ROC = evaluation_measures.calc_aproc_us( embedding, X, train_digraph, test_digraph, sample_edges) AP_LE[it2][it1] = AP ROC_LE[it2][it1] = ROC print("evaluating for DEEPWALK") for it2 in xrange(len(dimensions)): print(it1, it2) dim = dimensions[it2] file_name = 'SAVER/' + fig_name[fig] + str( it1 + 1) + '/DEEPWALK_' + str(dim) parameter_file = open(file_name, 'rb') X = pickle.load(parameter_file) parameter_file.close()
# File that contains the edges. Format: source target # Optionally, you can add weights as third column: source target weight edge_f = 'data/karate.edgelist' # Specify whether the edges are directed isDirected = True # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected) G = G.to_directed() models = [] # You can comment out the methods you don't want to run models.append( GraphFactorization(d=2, max_iter=100000, eta=1 * 10**-4, regu=1.0)) models.append(HOPE(d=4, beta=0.01)) models.append(LaplacianEigenmaps(d=2)) models.append(LocallyLinearEmbedding(d=2)) #models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) #models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500, # modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], # weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True)
def plot_embed_graph(subreddit_title, edges, positions=None, node_labels=None, node_colors=None, edge_colors=None, with_labels=True): plot_HOPE = 1 plot_LE = 0 plot_LLE = 0 # Construct Graph or DiGraph from data collected. G = nx.DiGraph() print("Adding edges...") for edge in edges: G.add_edge(edge[0], edge[1]) models = [] if (plot_HOPE): # HOPE takes embedding dimension (d) and decay factor (beta) as inputs models.append(HOPE(d=4, beta=0.01)) if (plot_LE): # LE takes embedding dimension (d) as input models.append(LaplacianEigenmaps(d=2)) if (plot_LLE): # LLE takes embedding dimension (d) as input models.append(LocallyLinearEmbedding(d=2)) model_count = 0 graph_out = None for embedding in models: model_count = model_count + 1 plt.figure(model_count) print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) skip_training = 0 if not skip_training: t1 = time() # Learn embedding - accepts a networkx graph or file with edge list print("Now we train...") try: Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) except ValueError: regular_plot(subreddit_title, edges, positions=None, node_labels=node_labels, node_colors=node_colors, edge_colors=edge_colors, with_labels=with_labels) return print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction # MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None) #--------------------------------------------------------------------------------- # print(("\tMAP: {} \t precision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5])) #--------------------------------------------------------------------------------- # Visualize print("Training finished... Let's visualize...") graph_out = regular_plot(subreddit_title, edges, positions=embedding.get_embedding(), node_labels=node_labels, node_colors=node_colors, edge_colors=edge_colors, with_labels=with_labels) # viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=sub_colors, labels=graph_labels) # plt.title("Scraping Reddit starting from "+subreddit_title) # plt.show() return graph_out