def main(data_set_name): dimensions = 4 input_file = './graph/' + data_set_name + '.tsv' output_file = './emb/' + data_set_name + '.emb' # Instatiate the embedding method with hyperparameters sdne = SDNE(dimensions) # Load graph graph = graph_util.loadGraphFromEdgeListTxt(input_file) # Learn embedding - accepts a networkx graph or file with edge list embeddings_array, t = sdne.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True) embeddings = pandas.DataFrame(embeddings_array) embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
def main(args): # Load edgelist G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed) G = G.to_directed() # Preprocess the graph # G, _ = prep_graph(G) if args.method == 'gf': # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta), # regularization coefficient (regu) as inputs model = GraphFactorization(d=args.dimension, max_iter=args.max_iter, eta=args.eta, regu=args.regu) elif args.method == 'hope': # HOPE takes embedding dimension (d) and decay factor (beta) as inputs model = HOPE(d=args.dimension, beta=args.beta) elif args.method == 'lap': # LE takes embedding dimension (d) as input model = LaplacianEigenmaps(d=args.dimension) elif args.method == 'lle': # LLE takes embedding dimension (d) as input model = LocallyLinearEmbedding(d=args.dimension) elif args.method == 'sdne': encoder_layer_list = ast.literal_eval(args.encoder_list) # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch) # location of modelfile and weightfile save (modelfile and weightfile) as inputs model = SDNE(d=args.dimension, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2, K=len(encoder_layer_list), n_units=encoder_layer_list, n_iter=args.max_iter, xeta=args.learning_rate, n_batch=args.bs) # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']) else: raise ValueError('The requested method does not exist!') # Learn the node embeddings Y, t = model.learn_embedding(graph=G, edge_f=None, is_weighted=args.weighted, no_python=True) Z = np.real_if_close(Y, tol=1000) # Save the node embeddings to a file np.savetxt(args.output, Z, delimiter=',', fmt='%f')
def load_models(G, edge_f, models): isDirected = True # Load graph G = G.to_directed() # models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) models.append( SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=50, xeta=0.01, n_batch=100, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) # return models,edge_f # For each model, learn the embedding and evaluate on graph reconstruction and visualization for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) # Y, t = embedding.learn_embedding(graph=None, edge_f=edge_f, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Y, None) ###HERE #--------------------------------------------------------------------------------- print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format( MAP, prec_curv[:5])) # --------------------------------------------------------------------------------- # Visualize viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) #plt.show() return models
def __init__(self, dims, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[64, 32], rho=0.3, n_iter=50, xeta=0.01, n_batch=500): super().__init__() self.dims = dims self.beta = beta self.alpha = alpha self.nu1 = nu1 self.nu2 = nu2 self.K = K self.n_units = n_units self.rho = rho self.n_iter = n_iter self.xeta = xeta self.n_batch = n_batch modelfile = [ './intermediate/enc_model.json', './intermediate/dec_model.json' ], weightfile = [ './intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5' ] self.embedding_model = SDNE(d=dims, beta=beta, alpha=alpha, nu1=nu1, nu2=self.nu2, K=K, n_units=n_units, rho=rho, n_iter=n_iter, xeta=xeta, n_batch=n_batch, modelfile=[ './intermediate/enc_model.json', './intermediate/dec_model.json' ], weightfile=[ './intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5' ])
def SDNE(netData, **kwargs): d = kwargs.get('d', 2) beta = kwargs.get('beta', 5) alpha = kwargs.get('alpha', 1e-5) nu1 = kwargs.get('nu1', 1e-6) nu2 = kwargs.get('nu2', 1e-6) K = kwargs.get('K', 3) n_units = kwargs.get('n_units', [ 50, 15, ]) rho = kwargs.get('rho', 0.3) n_iter = kwargs.get('n_iter', 50) xeta = kwargs.get('xeta', 0.01) n_batch = kwargs.get('n_batch', 500) modelfile = kwargs.get('modelfile', [ './intermediate/enc_model.json', './intermediate/dec_model.json' ]) weightfile = kwargs.get('weightfile', [ './intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5' ]) from gem.embedding.sdne import SDNE emb = SDNE(d=d, beta=beta, alpha=alpha, nu1=nu1, nu2=nu2, K=K, n_units=n_units, rho=rho, n_iter=n_iter, xeta=xeta, n_batch=n_batch, modelfile=modelfile, weightfile=weightfile) return attMethods.GEMexport(netData, emb)
nu1: L1-reg hyperparameter nu2: L2-reg hyperparameter K: number of hidden layers in encoder/decoder n_units: vector of length K-1 containing #units in hidden layers of encoder/decoder, not including the units in the embedding layer rho: bounding ratio for number of units in consecutive layers (< 1) n_iter: number of sgd iterations for first embedding (const) xeta: sgd step size parameter n_batch: minibatch size for SGD modelfile: Files containing previous encoder and decoder models weightfile: Files containing previous encoder and decoder weights ''' models.append( SDNE(d=args.d, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2, K=args.k, n_units=args.nunits, n_iter=args.niter, xeta=args.xeta, n_batch=args.nbatch, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding( graph=G, edge_f=None, is_weighted=True, no_python=True) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Y, None) # ---------------------------------------------------------------------------------
walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) else: # This logically has to be SDNE as there are no other options models.append( SDNE(d=dims * 2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=50, xeta=0.01, n_batch=100, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) #check to see if file has already been embedded if os.path.isfile(completed_file_path): print("Embeddings already created. Terminating task") else: # read the graph ml file G = nx.read_graphml(load_path)
node_colors = pickle.load( open('data/sbm_node_labels.pickle', 'rb'), encoding='latin1' ) node_colors_arr = [None] * node_colors.shape[0] for idx in range(node_colors.shape[0]): node_colors_arr[idx] = np.where(node_colors[idx, :].toarray() == 1)[1][0] models = [] # Load the models you want to run models.append(GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm')) models.append(HOPE(d=256, beta=0.01)) models.append(LaplacianEigenmaps(d=128)) models.append(LocallyLinearEmbedding(d=128)) models.append(node2vec(d=182, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set='sbm')) models.append(SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[500, 300,], rho=0.3, n_iter=30, xeta=0.001,n_batch=500, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) # For each model, learn the embedding and evaluate on graph reconstruction and visualization for embedding in models: print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None) #--------------------------------------------------------------------------------- print(("\tMAP: {} \t preccision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5])) #--------------------------------------------------------------------------------- # Visualize viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=node_colors_arr)
num_walks=10, con_size=10, ret_p=1, inout_p=1)) models.append( SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=50, xeta=0.01, n_batch=500, modelfile=[ './intermediate/enc_model.json', './intermediate/dec_model.json' ], weightfile=[ './intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5' ])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time()
from gem.embedding.sdne import SDNE import networkx as nx from timeit import Timer sizes = [200, 500, 1000, 2000, 3000, 5000, 10000, 15000, 20000, 35000, 50000] _DENS = 1e-3 for s in sizes: G = nx.fast_gnp_random_graph(s, _DENS, directed=True) sdne_ = SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[50, 15], rho=0.3, n_iter=10, xeta=0.01, n_batch=500) t = Timer('sdne_.learn_embedding(G)', setup='from __main__ import sdne_, G') n_runs = 3 if s <= 5000 else 1 exec_times = t.repeat(n_runs, 1) print(f'{s}: {exec_times}') with open('sdne_times.txt', 'a') as f: f.write(f'{s}: {exec_times}\n')
embedding = lap(4) # d Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print 'Laplacian Eigenmaps:\n\tTraining time: %f' % t # sdne embedding = SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=1, xeta=0.01, n_batch=500, savefilesuffix='karate') Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print 'SDNE:\n\tTraining time: %f' % t # node2vec args = input_args(file_path='data/karate.edgelist',
#模型运行 print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() #根据grid_search调参 embedding = SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=50, xeta=0.01, n_batch=500, modelfile=[ './outdata/intermediate/enc_model.json', './outdata/intermediate/dec_model.json' ], weightfile=[ './outdata/intermediate/enc_weights.hdf5', './outdata/intermediate/dec_weights.hdf5' ]) # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, is_weighted=True) # ev.evaluateNodeClassification()