Ejemplo n.º 1
0
def main(data_set_name):
    dimensions = 4
    input_file = './graph/' + data_set_name + '.tsv'
    output_file = './emb/' + data_set_name + '.emb'
    # Instatiate the embedding method with hyperparameters
    sdne = SDNE(dimensions)

    # Load graph
    graph = graph_util.loadGraphFromEdgeListTxt(input_file)

    # Learn embedding - accepts a networkx graph or file with edge list
    embeddings_array, t = sdne.learn_embedding(graph,
                                               edge_f=None,
                                               is_weighted=True,
                                               no_python=True)
    embeddings = pandas.DataFrame(embeddings_array)
    embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
Ejemplo n.º 2
0
def main(args):

    # Load edgelist
    G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed)
    G = G.to_directed()

    # Preprocess the graph
    # G, _ = prep_graph(G)

    if args.method == 'gf':
        # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta),
        # regularization coefficient (regu) as inputs
        model = GraphFactorization(d=args.dimension,
                                   max_iter=args.max_iter,
                                   eta=args.eta,
                                   regu=args.regu)
    elif args.method == 'hope':
        # HOPE takes embedding dimension (d) and decay factor (beta) as inputs
        model = HOPE(d=args.dimension, beta=args.beta)
    elif args.method == 'lap':
        # LE takes embedding dimension (d) as input
        model = LaplacianEigenmaps(d=args.dimension)
    elif args.method == 'lle':
        # LLE takes embedding dimension (d) as input
        model = LocallyLinearEmbedding(d=args.dimension)
    elif args.method == 'sdne':
        encoder_layer_list = ast.literal_eval(args.encoder_list)
        # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight
        # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers
        # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch)
        # location of modelfile and weightfile save (modelfile and weightfile) as inputs
        model = SDNE(d=args.dimension,
                     beta=args.beta,
                     alpha=args.alpha,
                     nu1=args.nu1,
                     nu2=args.nu2,
                     K=len(encoder_layer_list),
                     n_units=encoder_layer_list,
                     n_iter=args.max_iter,
                     xeta=args.learning_rate,
                     n_batch=args.bs)
        # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])
    else:
        raise ValueError('The requested method does not exist!')

    # Learn the node embeddings
    Y, t = model.learn_embedding(graph=G,
                                 edge_f=None,
                                 is_weighted=args.weighted,
                                 no_python=True)
    Z = np.real_if_close(Y, tol=1000)

    # Save the node embeddings to a file
    np.savetxt(args.output, Z, delimiter=',', fmt='%f')
Ejemplo n.º 3
0
def load_models(G, edge_f, models):
    isDirected = True

    # Load graph
    G = G.to_directed()

    # models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1))
    models.append(
        SDNE(d=2,
             beta=5,
             alpha=1e-5,
             nu1=1e-6,
             nu2=1e-6,
             K=3,
             n_units=[
                 50,
                 15,
             ],
             rho=0.3,
             n_iter=50,
             xeta=0.01,
             n_batch=100,
             modelfile=['enc_model.json', 'dec_model.json'],
             weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))
    #    return models,edge_f

    # For each model, learn the embedding and evaluate on graph reconstruction and visualization
    for embedding in models:
        print('Num nodes: %d, num edges: %d' %
              (G.number_of_nodes(), G.number_of_edges()))
        t1 = time()
        # Learn embedding - accepts a networkx graph or file with edge list
        Y, t = embedding.learn_embedding(graph=G,
                                         edge_f=None,
                                         is_weighted=True,
                                         no_python=True)
        #        Y, t = embedding.learn_embedding(graph=None, edge_f=edge_f, is_weighted=True, no_python=True)

        print(embedding._method_name + ':\n\tTraining time: %f' %
              (time() - t1))
        # Evaluate on graph reconstruction
        MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
            G, embedding, Y, None)  ###HERE
        #---------------------------------------------------------------------------------
        print(("\tMAP: {} \t precision curve: {}\n\n\n\n" + '-' * 100).format(
            MAP, prec_curv[:5]))
        # ---------------------------------------------------------------------------------
        # Visualize
        viz.plot_embedding2D(embedding.get_embedding(),
                             di_graph=G,
                             node_colors=None)
        #plt.show()
        return models
Ejemplo n.º 4
0
    def __init__(self,
                 dims,
                 beta=5,
                 alpha=1e-5,
                 nu1=1e-6,
                 nu2=1e-6,
                 K=3,
                 n_units=[64, 32],
                 rho=0.3,
                 n_iter=50,
                 xeta=0.01,
                 n_batch=500):
        super().__init__()
        self.dims = dims
        self.beta = beta
        self.alpha = alpha
        self.nu1 = nu1
        self.nu2 = nu2
        self.K = K
        self.n_units = n_units
        self.rho = rho
        self.n_iter = n_iter
        self.xeta = xeta
        self.n_batch = n_batch

        modelfile = [
            './intermediate/enc_model.json', './intermediate/dec_model.json'
        ],
        weightfile = [
            './intermediate/enc_weights.hdf5',
            './intermediate/dec_weights.hdf5'
        ]

        self.embedding_model = SDNE(d=dims,
                                    beta=beta,
                                    alpha=alpha,
                                    nu1=nu1,
                                    nu2=self.nu2,
                                    K=K,
                                    n_units=n_units,
                                    rho=rho,
                                    n_iter=n_iter,
                                    xeta=xeta,
                                    n_batch=n_batch,
                                    modelfile=[
                                        './intermediate/enc_model.json',
                                        './intermediate/dec_model.json'
                                    ],
                                    weightfile=[
                                        './intermediate/enc_weights.hdf5',
                                        './intermediate/dec_weights.hdf5'
                                    ])
 def SDNE(netData, **kwargs):
     d = kwargs.get('d', 2)
     beta = kwargs.get('beta', 5)
     alpha = kwargs.get('alpha', 1e-5)
     nu1 = kwargs.get('nu1', 1e-6)
     nu2 = kwargs.get('nu2', 1e-6)
     K = kwargs.get('K', 3)
     n_units = kwargs.get('n_units', [
         50,
         15,
     ])
     rho = kwargs.get('rho', 0.3)
     n_iter = kwargs.get('n_iter', 50)
     xeta = kwargs.get('xeta', 0.01)
     n_batch = kwargs.get('n_batch', 500)
     modelfile = kwargs.get('modelfile', [
         './intermediate/enc_model.json',
         './intermediate/dec_model.json'
     ])
     weightfile = kwargs.get('weightfile', [
         './intermediate/enc_weights.hdf5',
         './intermediate/dec_weights.hdf5'
     ])
     from gem.embedding.sdne import SDNE
     emb = SDNE(d=d,
                beta=beta,
                alpha=alpha,
                nu1=nu1,
                nu2=nu2,
                K=K,
                n_units=n_units,
                rho=rho,
                n_iter=n_iter,
                xeta=xeta,
                n_batch=n_batch,
                modelfile=modelfile,
                weightfile=weightfile)
     return attMethods.GEMexport(netData, emb)
Ejemplo n.º 6
0
            nu1: L1-reg hyperparameter
            nu2: L2-reg hyperparameter
            K: number of hidden layers in encoder/decoder
            n_units: vector of length K-1 containing #units in hidden layers
                     of encoder/decoder, not including the units in the
                     embedding layer
            rho: bounding ratio for number of units in consecutive layers (< 1)
            n_iter: number of sgd iterations for first embedding (const)
            xeta: sgd step size parameter
            n_batch: minibatch size for SGD
            modelfile: Files containing previous encoder and decoder models
            weightfile: Files containing previous encoder and decoder weights
    '''
    models.append(
        SDNE(d=args.d, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2,
             K=args.k, n_units=args.nunits, n_iter=args.niter, xeta=args.xeta, n_batch=args.nbatch,
             modelfile=['enc_model.json', 'dec_model.json'],
             weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))

    for embedding in models:
        print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(),
                                                G.number_of_edges()))
        t1 = time()
        # Learn embedding - accepts a networkx graph or file with edge list
        Y, t = embedding.learn_embedding(
            graph=G, edge_f=None, is_weighted=True, no_python=True)
        print(embedding._method_name +
              ':\n\tTraining time: %f' % (time() - t1))
        # Evaluate on graph reconstruction
        MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
            G, embedding, Y, None)
        # ---------------------------------------------------------------------------------
Ejemplo n.º 7
0
                 walk_len=80,
                 num_walks=10,
                 con_size=10,
                 ret_p=1,
                 inout_p=1))
else:
    # This logically has to be SDNE as there are no other options
    models.append(
        SDNE(d=dims * 2,
             beta=5,
             alpha=1e-5,
             nu1=1e-6,
             nu2=1e-6,
             K=3,
             n_units=[
                 50,
                 15,
             ],
             rho=0.3,
             n_iter=50,
             xeta=0.01,
             n_batch=100,
             modelfile=['enc_model.json', 'dec_model.json'],
             weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))

#check to see if file has already been embedded
if os.path.isfile(completed_file_path):
    print("Embeddings already created. Terminating task")

else:
    # read the graph ml file
    G = nx.read_graphml(load_path)
Ejemplo n.º 8
0
    node_colors = pickle.load(
        open('data/sbm_node_labels.pickle', 'rb'), encoding='latin1'
    )
node_colors_arr = [None] * node_colors.shape[0]
for idx in range(node_colors.shape[0]):
    node_colors_arr[idx] = np.where(node_colors[idx, :].toarray() == 1)[1][0]

models = []
# Load the models you want to run
models.append(GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm'))
models.append(HOPE(d=256, beta=0.01))
models.append(LaplacianEigenmaps(d=128))
models.append(LocallyLinearEmbedding(d=128))
models.append(node2vec(d=182, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set='sbm'))
models.append(SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[500, 300,], rho=0.3, n_iter=30, xeta=0.001,n_batch=500,
                modelfile=['enc_model.json', 'dec_model.json'],
                weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))
# For each model, learn the embedding and evaluate on graph reconstruction and visualization
for embedding in models:
    print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True)
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    # Evaluate on graph reconstruction
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None)
    #---------------------------------------------------------------------------------
    print(("\tMAP: {} \t preccision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5]))
    #---------------------------------------------------------------------------------
    # Visualize
    viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=node_colors_arr)
Ejemplo n.º 9
0
             num_walks=10,
             con_size=10,
             ret_p=1,
             inout_p=1))
models.append(
    SDNE(d=2,
         beta=5,
         alpha=1e-5,
         nu1=1e-6,
         nu2=1e-6,
         K=3,
         n_units=[
             50,
             15,
         ],
         rho=0.3,
         n_iter=50,
         xeta=0.01,
         n_batch=500,
         modelfile=[
             './intermediate/enc_model.json', './intermediate/dec_model.json'
         ],
         weightfile=[
             './intermediate/enc_weights.hdf5',
             './intermediate/dec_weights.hdf5'
         ]))

for embedding in models:
    print('Num nodes: %d, num edges: %d' %
          (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
Ejemplo n.º 10
0
from gem.embedding.sdne import SDNE
import networkx as nx
from timeit import Timer

sizes = [200, 500, 1000, 2000, 3000, 5000, 10000, 15000, 20000, 35000, 50000]
_DENS = 1e-3

for s in sizes:
    G = nx.fast_gnp_random_graph(s, _DENS, directed=True)
    sdne_ = SDNE(d=128,
                 beta=5,
                 alpha=1e-5,
                 nu1=1e-6,
                 nu2=1e-6,
                 K=3,
                 n_units=[50, 15],
                 rho=0.3,
                 n_iter=10,
                 xeta=0.01,
                 n_batch=500)
    t = Timer('sdne_.learn_embedding(G)',
              setup='from __main__ import sdne_, G')
    n_runs = 3 if s <= 5000 else 1
    exec_times = t.repeat(n_runs, 1)
    print(f'{s}: {exec_times}')

    with open('sdne_times.txt', 'a') as f:
        f.write(f'{s}: {exec_times}\n')
Ejemplo n.º 11
0
embedding = lap(4)  # d
Y, t = embedding.learn_embedding(graph=G,
                                 edge_f=None,
                                 is_weighted=True,
                                 no_python=True)
print 'Laplacian Eigenmaps:\n\tTraining time: %f' % t

# sdne
embedding = SDNE(d=2,
                 beta=5,
                 alpha=1e-5,
                 nu1=1e-6,
                 nu2=1e-6,
                 K=3,
                 n_units=[
                     50,
                     15,
                 ],
                 rho=0.3,
                 n_iter=1,
                 xeta=0.01,
                 n_batch=500,
                 savefilesuffix='karate')
Y, t = embedding.learn_embedding(graph=G,
                                 edge_f=None,
                                 is_weighted=True,
                                 no_python=True)
print 'SDNE:\n\tTraining time: %f' % t

# node2vec
args = input_args(file_path='data/karate.edgelist',
Ejemplo n.º 12
0
#模型运行
print('Num nodes: %d, num edges: %d' %
      (G.number_of_nodes(), G.number_of_edges()))
t1 = time()
#根据grid_search调参
embedding = SDNE(d=2,
                 beta=5,
                 alpha=1e-5,
                 nu1=1e-6,
                 nu2=1e-6,
                 K=3,
                 n_units=[
                     50,
                     15,
                 ],
                 rho=0.3,
                 n_iter=50,
                 xeta=0.01,
                 n_batch=500,
                 modelfile=[
                     './outdata/intermediate/enc_model.json',
                     './outdata/intermediate/dec_model.json'
                 ],
                 weightfile=[
                     './outdata/intermediate/enc_weights.hdf5',
                     './outdata/intermediate/dec_weights.hdf5'
                 ])

# Learn embedding - accepts a networkx graph or file with edge list
Y, t = embedding.learn_embedding(graph=G, is_weighted=True)

# ev.evaluateNodeClassification()