def binary_community_graph(N, k, maxk, mu): ## OS system is windows if sys.platform[0] == "w": args = ["gem/c_exe/benchm.exe"] fcall = "gem/c_exe/benchm.exe" else: args = ["gem/c_exe/benchm"] fcall = "gem/c_exe/benchm" args.append("-N %d" % N) args.append("-k %d" % k) args.append("-maxk %d" % maxk) args.append("-mu %f" % mu) t1 = time() print(args) try: os.system("%s -N %d -k %d -maxk %d -mu %f" % (fcall, N, k, maxk, mu)) # call(args) except Exception as e: print('ERROR: %s' % str(e)) print('gem/c_exe/benchm not found. Please compile gf, place benchm in the path and grant executable permission') t2 = time() print('\tTime taken to generate random graph: %f sec' % (t2 - t1)) try: graph = graph_util.loadGraphFromEdgeListTxt('gem/c_exe/network.dat') node_labels = np.loadtxt('gem/c_exe/community.dat') except: graph = graph_util.loadGraphFromEdgeListTxt('network.dat') node_labels = np.loadtxt('community.dat') node_labels = node_labels[:, -1].reshape(-1, 1) enc = OneHotEncoder() return graph, enc.fit_transform(node_labels)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): c_flag = True if not graph and not edge_f: raise Exception('graph/edge_f needed') if no_python: try: from c_ext import graphFac_ext except ImportError: print( 'Could not import C++ module for Graph Factorization. Reverting to python implementation. Please recompile graphFac_ext from graphFac.cpp using bjam' ) c_flag = False if c_flag: if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph_util.saveGraphToEdgeListTxt(graph, 'tempGraph.graph') is_weighted = True edge_f = 'tempGraph.graph' t1 = time() graphFac_ext.learn_embedding(edge_f, "tempGraphGF.emb", True, is_weighted, self._d, self._eta, self._regu, self._max_iter) self._X = graph_util.loadEmbedding('tempGraphGF.emb') t2 = time() return self._X, (t2 - t1) if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() self._node_num = graph.number_of_nodes() self._X = 0.01 * np.random.randn(self._node_num, self._d) for iter_id in range(self._max_iter): if not iter_id % self._print_step: [f1, f2, f] = self._get_f_value(graph) print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' % (iter_id, f, f1, f2)) for i, j, w in graph.edges(data='weight', default=1): if j <= i: continue term1 = -(w - np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :] term2 = self._regu * self._X[i, :] delPhi = term1 + term2 self._X[i, :] -= self._eta * delPhi t2 = time() return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph_util.write_edgelist(graph, 'tempGraph_verse.graph') try: os.system( "python gem/verse-master/python/convert.py tempGraph_verse.graph outgraph_verse.bcsr" ) except Exception as e: print(str(e)) args = "gem/verse-master/src/verse -input outgraph_verse.bcsr -output tempGraph_verse.emb -dim " + str( self._d) + " -alpha " + str(self._alpha) + " -threads " + str( self._threads) + " -nsamples " + str(self._nsamples) t1 = time() try: os.system(args) except Exception as e: print(str(e)) raise Exception( './verse not found. Please compile, place verse in the path and grant executable permission' ) self._X = np.fromfile('tempGraph_verse.emb', np.float32).reshape(graph.number_of_nodes(), self._d) t2 = time() return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() L_sym = nx.normalized_laplacian_matrix(graph) k = self._d + 1 # We hit this error message https://stackoverflow.com/questions/18436667/python-scipy-sparse-matrix-svd-with-error-arpack-error-3-no-shifts-could-be-app # so we are increasing ncv accordingly w, v = lg.eigs(L_sym, k=k, which='SM', ncv=4 * k) idx = np.argsort(w) # sort eigenvalues w = w[idx] v = v[:, idx] t2 = time() self._X = v[:, 1:] p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) eig_err = np.linalg.norm(p_d_p_t - L_sym) # print('Laplacian matrix recon. error (low rank): %f' % eig_err) return self._X.real, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): args = ["gem/c_exe/node2vec"] if not graph and not edge_f: raise Exception('graph/edge_f needed') if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph_util.saveGraphToEdgeListTxtn2v(graph, 'embs/n2v_tempGraph.graph') args.append("-i:embs/n2v_tempGraph.graph") args.append("-o:embs/n2v_tempGraph.emb") args.append("-d:%d" % self._d) args.append("-l:%d" % self._walkLength) args.append("-r:%d" % self._numWalks) args.append("-k:%d" % self._contextSize) args.append("-e:%d" % self._max_iter) args.append("-p:%f" % self._return_p) args.append("-q:%f" % self._inout_p) args.append("-v") args.append("-dr") args.append("-w") t1 = time() try: call(args) except Exception as e: print(str(e)) raise Exception( './node2vec not found. Please compile snap, place node2vec in the path and grant executable permission' ) t2 = time() self._X = graph_util.loadEmbedding('embs/n2v_tempGraph.emb') return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix(graph) M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) # p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T)) # eig_err = np.linalg.norm(p_d_p_t - L_sym) # print 'Laplacian reconstruction error (low rank approx): %f' % eig_err return self._X, (t2 - t1)
def main(opts): dataset = opts.dataset embed_dim = int(opts.dimension) # File that contains the edges. Format: source target # Optionally, you can add weights as third column: source target weight edge_f = 'Data/%s.edgelist' % dataset # Specify whether the edges are directed # isDirected = True print "Loading Dataset" # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False) #G = G.to_directed() embedding = LaplacianEigenmaps(d=embed_dim) print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list print "Starting Embedding" Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) np_save(writable("Embedding_Results", "jac_" + dataset + str(embed_dim)), Y)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() A = nx.to_scipy_sparse_matrix(graph) normalize(A, norm='l1', axis=1, copy=False) I_n = sp.eye(len(graph.nodes)) I_min_A = I_n - A # In scipy.sparse.linalg maxiter defaults to num_nodes * 10, but in testing this sometimes isn't enough which # crashes instead of settling on whatever eigenvectors are at after maxiters u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM', maxiter=(graph.number_of_nodes() * 1000)) t2 = time() self._X = vt.T self._X = self._X[:, 1:] return self._X.real, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): # A = nx.to_scipy_sparse_matrix(G) # if not np.allclose(A.T, A): # print "laplace eigmap approach only works for symmetric graphs!" # return # self._node_num = A.shape[0] # D = np.diag(np.sum(A, 1)) # L_G = D - A # zeroRows = np.where(D.sum(1)==0) # D[zeroRows, zeroRows] = np.inf # d_min_half = np.linalg.inv(np.sqrt(D)) # L_sym = np.dot(d_min_half, np.dot(L_G, d_min_half)) if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() L_sym = nx.normalized_laplacian_matrix(graph) w, v = lg.eigs(L_sym, k=self._d+1, which='SM') t2 = time() self._X = v[:, 1:] # p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) # eig_err = np.linalg.norm(p_d_p_t - L_sym) # print 'Laplacian matrix reconstruction error (low rank): %f' % eig_err # p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T)) # eig_err = np.linalg.norm(p_d_p_t - L_sym) # print 'Laplacian reconstruction error (low rank approx): %f' % eig_err return self._X, (t2-t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix(graph) M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) zipbObj = zip(list(graph.nodes), self._X.tolist()) # Create a dictionary from zip object models = dict(zipbObj) return models, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() L_sym = nx.normalized_laplacian_matrix(graph) try: w, v = lg.eigs(L_sym, k=self._d + 1, which='SM') t2 = time() self._X = v[:, 1:] p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) eig_err = np.linalg.norm(p_d_p_t - L_sym) print('Laplacian matrix recon. error (low rank): %f' % eig_err) return self._X, (t2 - t1) except: print('SVD did not converge. Assigning random emebdding') self._X = np.random.randn(L_sym.shape[0], self._d) t2 = time() return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix(graph, nodelist=range(graph.number_of_nodes())) if self._sim_fn == "katz": M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A elif self._sim_fn == "pagerank": # np.matrix can't A = np.array(A) # in case the sum is 0 row_sums = A.sum(axis=1) + 1e-8 P = A / row_sums[:, np.newaxis] M_g = np.eye(graph.number_of_nodes()) - self._beta * P M_l = (1 - self._beta) * np.eye(graph.number_of_nodes()) elif self._sim_fn == "cn": M_g = np.eye(graph.number_of_nodes()) M_l = np.dot(A, A) elif self._sim_fn == "aa": D = A.sum(axis=1) + A.sum(axis=0) D = np.diag(np.reciprocal(D.astype('float'))) M_g = np.eye(graph.number_of_nodes()) M_l = np.dot(np.dot(A, D), A) else: M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A try: S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) return self._X, (t2 - t1) except: print( 'Singularity Matrix or SVD did not converge. Assigning random emebdding' ) X1 = np.random.randn(A.shape[0], self._d // 2) X2 = np.random.randn(A.shape[0], self._d // 2) t2 = time() self._X = np.concatenate((X1, X2), axis=1) return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() nNodes = graph.number_of_nodes() nEdges = graph.number_of_edges() print('num nodes: ', nNodes) print('num edges: ', nEdges) S = nx.to_numpy_matrix(graph, nodelist=sorted(graph.nodes())) A = Normalizer(norm='l1').fit_transform(S) if self._d == None: self._d = 2 * self._K else: assert self._d == 2 * self._K # Tensorization md_array = np.zeros((nNodes, nNodes, self._K)) int_res = A for i in range(self._K): md_array[:, :, i] = int_res int_res = int_res.dot(A) emb = np.zeros((nNodes, self._d)) for i in range(self._K): print('Slab id: ', i) slab = np.reshape(md_array[:, :, i], (nNodes, nNodes, 1)) XX = tl.tensor(slab) print('Tensor shape: ', XX.shape) factors = parafac(XX, rank=self._R, n_iter_max=self._n_iter, init='random') # random_state=123, source_emb = factors[0] target_emb = factors[1] proximity_emb = factors[2] print('Source emb shape: ', source_emb.shape) print('Target emb shape: ', target_emb.shape) print('Proximity emb shape: ', proximity_emb.shape) source_proximity_emb = np.dot(source_emb, proximity_emb.T) target_proximity_emb = np.dot(target_emb, proximity_emb.T) emb[:, [i, i + self._K]] = np.concatenate( (source_proximity_emb, target_proximity_emb), axis=1) self._X = emb print("Embedding shape: ", self._X.shape) t2 = time() return self._X, (t2 - t1)
def tsv2edgelist(file, isDirected=True): """ load an edgelist from a tsv file for use in the GEM package """ G = (graph_util.loadGraphFromEdgeListTxt('../data/Nedgelist.tsv', directed=isDirected)) if isDirected: return G.to_directed() else: return G
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): # A = nx.to_scipy_sparse_matrix(G) # if not np.allclose(A.T, A): # print "laplace eigmap approach only works for symmetric graphs!" # return # self._node_num = A.shape[0] # D = np.diag(np.sum(A, 1)) # L_G = D - A # zeroRows = np.where(D.sum(1)==0) # D[zeroRows, zeroRows] = np.inf # d_min_half = np.linalg.inv(np.sqrt(D)) # L_sym = np.dot(d_min_half, np.dot(L_G, d_min_half)) if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() L_sym = nx.normalized_laplacian_matrix(graph) w, v = lg.eigs(L_sym, k=self._d + 1, which='SM') t2 = time() self._X = v[:, 1:] #p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) #eig_err = np.linalg.norm(p_d_p_t - L_sym) #print 'Laplacian matrix reconstruction error (low rank): %f' % eig_err #p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T)) #eig_err = np.linalg.norm(p_d_p_t - L_sym) #print 'Laplacian reconstruction error (low rank approx): %f' % eig_err # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI listNodes = graph.nodes() listNodes = list(set(listNodes)) # Elimina i doppioni dalla lista listNodes.sort( ) # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale nA = np.asarray(listNodes, dtype=int) dE = self._d nR = (nA.max()) + 1 XX = np.zeros((nR, dE)) for i in range(0, nA.__len__()): XX[nA[i]] = cp.copy(self._X[i]) self._X = np.zeros((nR, dE)) self._X = cp.copy(XX) return self._X, (t2 - t1)
def executeLinkPrediction(dataset, isDirected, method): G_train = graph_util.loadGraphFromEdgeListTxt( 'gem/datasets/' + dataset + '/u1LIKECompact.base', directed=isDirected) # Carica il Grafo di Train G_test = graph_util.loadGraphFromEdgeListTxt( 'gem/datasets/' + dataset + '/u1LIKECompact.test', directed=isDirected) # Carica il Grafo di Test G_total = graph_util.loadGraphFromEdgeListTxt( 'gem/datasets/' + dataset + '/u1LIKECompact.edgelist', directed=isDirected) # Carica il Grafo totale (Train + Test) MAP = lp.evaluateStaticLinkPrediction(G_total, G_train, G_test, method, is_undirected=(not isDirected)) print method.get_method_summary() print "MAP:", MAP
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix( graph) # Crea la matrice di adiacenza del Grafo 'graph' #print "Graph:\n", graph #print "Matrice di adiacenza del Grafo:\n", A M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) # p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T)) # eig_err = np.linalg.norm(p_d_p_t - L_sym) # print 'Laplacian reconstruction error (low rank approx): %f' % eig_err # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI listNodes = graph.nodes() listNodes = list(set(listNodes)) # Elimina i doppioni dalla lista listNodes.sort( ) # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale nA = np.asarray(listNodes, dtype=int) dE = self._d nR = (nA.max()) + 1 XX = np.zeros((nR, dE)) for i in range(0, nA.__len__()): XX[nA[i]] = cp.copy(self._X[i]) self._X = np.zeros((nR, dE)) self._X = cp.copy(XX) return self._X, (t2 - t1)
def run_sdne(edges_file, modified_filename, vertices_filename): # Instatiate the embedding method with hyperparameters em = sdne(d=100, beta=5, alpha=1e-6, nu1=1e-3, nu2=1e-3, K=3, n_units=[500, 300], rho=0.3, n_iter=1, xeta=1e-4, n_batch=500, modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5']) # Load graph graph = graph_util.loadGraphFromEdgeListTxt(edges_file) # Learn embedding - accepts a networkx graph or file with edge list Y, t = em.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True) create_converted_file(em.get_embedding(None), modified_filename, vertices_filename)
def main(args): # Load edgelist G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed) G = G.to_directed() # Preprocess the graph # G, _ = prep_graph(G) if args.method == 'gf': # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta), # regularization coefficient (regu) as inputs model = GraphFactorization(d=args.dimension, max_iter=args.max_iter, eta=args.eta, regu=args.regu) elif args.method == 'hope': # HOPE takes embedding dimension (d) and decay factor (beta) as inputs model = HOPE(d=args.dimension, beta=args.beta) elif args.method == 'lap': # LE takes embedding dimension (d) as input model = LaplacianEigenmaps(d=args.dimension) elif args.method == 'lle': # LLE takes embedding dimension (d) as input model = LocallyLinearEmbedding(d=args.dimension) elif args.method == 'sdne': encoder_layer_list = ast.literal_eval(args.encoder_list) # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch) # location of modelfile and weightfile save (modelfile and weightfile) as inputs model = SDNE(d=args.dimension, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2, K=len(encoder_layer_list), n_units=encoder_layer_list, n_iter=args.max_iter, xeta=args.learning_rate, n_batch=args.bs) # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']) else: raise ValueError('The requested method does not exist!') # Learn the node embeddings Y, t = model.learn_embedding(graph=G, edge_f=None, is_weighted=args.weighted, no_python=True) Z = np.real_if_close(Y, tol=1000) # Save the node embeddings to a file np.savetxt(args.output, Z, delimiter=',', fmt='%f')
def prep_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) t1 = time() S = (S + S.T) / 2 print (S.shape) self._node_num = graph.number_of_nodes() print (self._node_num)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() self._X = np.random.randn(graph.number_of_nodes(), 1) t2 = time() return self._X, (t2 - t1)
def main(data_set_name): dimensions = 4 input_file = './graph/' + data_set_name + '.tsv' output_file = './emb/' + data_set_name + '.emb' # Instatiate the embedding method with hyperparameters graph_factorization = LaplacianEigenmaps(dimensions) # Load graph graph = graph_util.loadGraphFromEdgeListTxt(input_file) # Learn embedding - accepts a networkx graph or file with edge list embeddings_array, t = graph_factorization.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True) embeddings = pandas.DataFrame(embeddings_array) embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() L_sym = nx.normalized_laplacian_matrix(graph) w, v = lg.eigs(L_sym, k=self._d + 1, which='SM') self._X = v[:, 1:] p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) eig_err = np.linalg.norm(p_d_p_t - L_sym) return self._X, eig_err
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() A = nx.to_scipy_sparse_matrix(graph) normalize(A, norm='l1', axis=1, copy=False) I = sp.eye(graph.number_of_nodes()) I_min_A = I - A u, s, vt = lg.svds(I_min_A, k=self._d+1, which='SM') t2 = time() self._X = vt.T self._X = self._X[:, 1:] return self._X, (t2-t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): args = ["node2vec"] if not graph and not edge_f: raise Exception('graph/edge_f needed') if graph is None: graph = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=True) print("node2vec graph") print(graph.edges()[:3]) # print(graph) graph_util.saveGraphToEdgeListTxtn2v(graph, 'tempGraph.graph') args.append("-i:tempGraph.graph") args.append("-o:tempGraph.emb") args.append("-d:%d" % self._d) args.append("-l:%d" % self._walk_len) args.append("-r:%d" % self._num_walks) args.append("-k:%d" % self._con_size) args.append("-e:%d" % self._max_iter) args.append("-p:%f" % self._ret_p) args.append("-q:%f" % self._inout_p) args.append("-v") args.append("-dr") args.append("-w") t1 = time() try: call(args) except Exception as e: print(str(e)) raise Exception( './node2vec not found. Please compile snap, place node2vec in the system path and grant executable permission' ) self._X = graph_util.loadEmbedding('tempGraph.emb') t2 = time() print('len graph edges') print(len(graph.nodes())) print('embedding vectors number') print(len(self._X)) print('GUESS embedding node2vc') for i in range(len(self._X)): print(str(graph.nodes()[i]) + " " + str(self._X[i])) # print (self._X) return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): args = ["gem/c_exe/node2vec"] if not graph and not edge_f: raise Exception('graph/edge_f needed') if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graphFileName = 'gem/intermediate/%s_n2v.graph' % self._data_set embFileName = 'gem/intermediate/%s_%d_n2v.emb' % (self._data_set, self._d) try: f = open(graphFileName, 'r') f.close() except IOError: graph_util.saveGraphToEdgeListTxtn2v(graph, graphFileName) args.append("-i:%s" % graphFileName) args.append("-o:%s" % embFileName) args.append("-d:%d" % self._d) args.append("-l:%d" % self._walk_len) args.append("-r:%d" % self._num_walks) args.append("-k:%d" % self._con_size) args.append("-e:%d" % self._max_iter) args.append("-p:%f" % self._ret_p) args.append("-q:%f" % self._inout_p) args.append("-v") args.append("-dr") args.append("-w") t1 = time() try: call(args) except Exception as e: print(str(e)) raise Exception( './node2vec not found. Please compile snap, place node2vec in the path and grant executable permission' ) self._X = graph_util.loadEmbedding(embFileName) t2 = time() call(["rm", embFileName]) return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix(graph) #A est l'adjacency matrix sous forme de numpy matrix M_g = np.eye(graph.number_of_nodes()) - self._beta * A #identite - 0.01 * A M_l = self._beta * A #0.01 * A S = np.dot(np.linalg.inv(M_g), M_l) #produit de M_g-1 et M_l u, s, vt = lg.svds(S, k=self._d // 2) #valeurs propres et vecteurs propres X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) test = self._X p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() L_sym = nx.normalized_laplacian_matrix(graph) w, v = lg.eigs(L_sym, k=self._d + 1, which='SM') idx = np.argsort(w) # sort eigenvalues w = w[idx] v = v[:, idx] t2 = time() self._X = v[:, 1:] p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) eig_err = np.linalg.norm(p_d_p_t - L_sym) print('Laplacian matrix recon. error (low rank): %f' % eig_err) return self._X.real, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, amatrix=None, nodelist=None, weight='weight'): if graph is None and edge_f is None and amatrix is None: raise Exception('graph/edge_f/amatrix needed') if amatrix is None: if graph is None: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) if nodelist is None: nodelist = sorted(graph.nodes) A = nx.to_numpy_matrix(graph, nodelist=nodelist, weight=weight) n = graph.number_of_nodes() else: A = np.mat(amatrix) n = A.shape[0] if self._proximity == 'katz': M_g = np.eye(n) - self._beta * A M_l = self._beta * A elif self._proximity == 'common-neighbors': M_g = np.eye(n) M_l = A * A S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) return self._X, eig_err
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() A = nx.to_scipy_sparse_matrix(graph) normalize(A, norm='l1', axis=1, copy=False) I = sp.eye(graph.number_of_nodes()) I_min_A = I - A u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM') t2 = time() self._X = vt.T self._X = self._X[:, 1:] # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI listNodes = graph.nodes() listNodes = list(set(listNodes)) # Elimina i doppioni dalla lista listNodes.sort( ) # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale nA = np.asarray(listNodes, dtype=int) dE = self._d nR = (nA.max()) + 1 XX = np.zeros((nR, dE)) for i in range(0, nA.__len__()): XX[nA[i]] = cp.copy(self._X[i]) self._X = np.zeros((nR, dE)) self._X = cp.copy(XX) return self._X, (t2 - t1)