Beispiel #1
0
def binary_community_graph(N, k, maxk, mu):
    ## OS system is windows 
    if sys.platform[0] == "w":
        args = ["gem/c_exe/benchm.exe"]
        fcall = "gem/c_exe/benchm.exe"
    else:
        args = ["gem/c_exe/benchm"]
        fcall = "gem/c_exe/benchm"
    args.append("-N %d" % N)
    args.append("-k %d" % k)
    args.append("-maxk %d" % maxk)
    args.append("-mu %f" % mu)
    t1 = time()
    print(args)
    try:
        os.system("%s -N %d -k %d -maxk %d -mu %f" % (fcall, N, k, maxk, mu))
        # call(args)
    except Exception as e:
        print('ERROR: %s' % str(e))
        print('gem/c_exe/benchm not found. Please compile gf, place benchm in the path and grant executable permission')
    t2 = time()
    print('\tTime taken to generate random graph: %f sec' % (t2 - t1))
    try:
        graph = graph_util.loadGraphFromEdgeListTxt('gem/c_exe/network.dat')
        node_labels = np.loadtxt('gem/c_exe/community.dat')
    except:
        graph = graph_util.loadGraphFromEdgeListTxt('network.dat')
        node_labels = np.loadtxt('community.dat')
    node_labels = node_labels[:, -1].reshape(-1, 1)
    enc = OneHotEncoder()
    return graph, enc.fit_transform(node_labels)
Beispiel #2
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     c_flag = True
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if no_python:
         try:
             from c_ext import graphFac_ext
         except ImportError:
             print(
                 'Could not import C++ module for Graph Factorization. Reverting to python implementation. Please recompile graphFac_ext from graphFac.cpp using bjam'
             )
             c_flag = False
         if c_flag:
             if edge_f:
                 graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
             graph_util.saveGraphToEdgeListTxt(graph, 'tempGraph.graph')
             is_weighted = True
             edge_f = 'tempGraph.graph'
             t1 = time()
             graphFac_ext.learn_embedding(edge_f, "tempGraphGF.emb", True,
                                          is_weighted, self._d, self._eta,
                                          self._regu, self._max_iter)
             self._X = graph_util.loadEmbedding('tempGraphGF.emb')
             t2 = time()
             return self._X, (t2 - t1)
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     t1 = time()
     self._node_num = graph.number_of_nodes()
     self._X = 0.01 * np.random.randn(self._node_num, self._d)
     for iter_id in range(self._max_iter):
         if not iter_id % self._print_step:
             [f1, f2, f] = self._get_f_value(graph)
             print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' %
                   (iter_id, f, f1, f2))
         for i, j, w in graph.edges(data='weight', default=1):
             if j <= i:
                 continue
             term1 = -(w -
                       np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :]
             term2 = self._regu * self._X[i, :]
             delPhi = term1 + term2
             self._X[i, :] -= self._eta * delPhi
     t2 = time()
     return self._X, (t2 - t1)
Beispiel #3
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if edge_f:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graph_util.write_edgelist(graph, 'tempGraph_verse.graph')
     try:
         os.system(
             "python gem/verse-master/python/convert.py tempGraph_verse.graph outgraph_verse.bcsr"
         )
     except Exception as e:
         print(str(e))
     args = "gem/verse-master/src/verse -input outgraph_verse.bcsr -output tempGraph_verse.emb -dim " + str(
         self._d) + " -alpha " + str(self._alpha) + " -threads " + str(
             self._threads) + " -nsamples " + str(self._nsamples)
     t1 = time()
     try:
         os.system(args)
     except Exception as e:
         print(str(e))
         raise Exception(
             './verse not found. Please compile, place verse in the path and grant executable permission'
         )
     self._X = np.fromfile('tempGraph_verse.emb',
                           np.float32).reshape(graph.number_of_nodes(),
                                               self._d)
     t2 = time()
     return self._X, (t2 - t1)
Beispiel #4
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()
        t1 = time()
        L_sym = nx.normalized_laplacian_matrix(graph)

        k = self._d + 1
        # We hit this error message https://stackoverflow.com/questions/18436667/python-scipy-sparse-matrix-svd-with-error-arpack-error-3-no-shifts-could-be-app
        # so we are increasing ncv accordingly
        w, v = lg.eigs(L_sym, k=k, which='SM', ncv=4 * k)
        idx = np.argsort(w)  # sort eigenvalues
        w = w[idx]
        v = v[:, idx]
        t2 = time()
        self._X = v[:, 1:]

        p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
        eig_err = np.linalg.norm(p_d_p_t - L_sym)
        # print('Laplacian matrix recon. error (low rank): %f' % eig_err)
        return self._X.real, (t2 - t1)
Beispiel #5
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        args = ["gem/c_exe/node2vec"]
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if edge_f:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph_util.saveGraphToEdgeListTxtn2v(graph, 'embs/n2v_tempGraph.graph')
        args.append("-i:embs/n2v_tempGraph.graph")
        args.append("-o:embs/n2v_tempGraph.emb")
        args.append("-d:%d" % self._d)
        args.append("-l:%d" % self._walkLength)
        args.append("-r:%d" % self._numWalks)
        args.append("-k:%d" % self._contextSize)
        args.append("-e:%d" % self._max_iter)
        args.append("-p:%f" % self._return_p)
        args.append("-q:%f" % self._inout_p)
        args.append("-v")
        args.append("-dr")
        args.append("-w")
        t1 = time()
        try:
            call(args)
        except Exception as e:
            print(str(e))
            raise Exception(
                './node2vec not found. Please compile snap, place node2vec in the path and grant executable permission'
            )
        t2 = time()
        self._X = graph_util.loadEmbedding('embs/n2v_tempGraph.emb')

        return self._X, (t2 - t1)
Beispiel #6
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A
        A = nx.to_numpy_matrix(graph)
        M_g = np.eye(graph.number_of_nodes()) - self._beta * A
        M_l = self._beta * A
        S = np.dot(np.linalg.inv(M_g), M_l)

        u, s, vt = lg.svds(S, k=self._d // 2)
        X1 = np.dot(u, np.diag(np.sqrt(s)))
        X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
        t2 = time()
        self._X = np.concatenate((X1, X2), axis=1)

        p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
        eig_err = np.linalg.norm(p_d_p_t - S)
        print('SVD error (low rank): %f' % eig_err)

        # p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T))
        # eig_err = np.linalg.norm(p_d_p_t - L_sym)
        # print 'Laplacian reconstruction error (low rank approx): %f' % eig_err
        return self._X, (t2 - t1)
Beispiel #7
0
def main(opts):
    dataset = opts.dataset
    embed_dim = int(opts.dimension)
    # File that contains the edges. Format: source target
    # Optionally, you can add weights as third column: source target weight
    edge_f = 'Data/%s.edgelist' % dataset

    # Specify whether the edges are directed
    # isDirected = True

    print "Loading Dataset"
    # Load graph
    G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False)
    #G = G.to_directed()

    embedding = LaplacianEigenmaps(d=embed_dim)

    print('Num nodes: %d, num edges: %d' %
          (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    print "Starting Embedding"
    Y, t = embedding.learn_embedding(graph=G,
                                     edge_f=None,
                                     is_weighted=True,
                                     no_python=True)
    print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))
    np_save(writable("Embedding_Results", "jac_" + dataset + str(embed_dim)),
            Y)
Beispiel #8
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graph = graph.to_undirected()
     t1 = time()
     A = nx.to_scipy_sparse_matrix(graph)
     normalize(A, norm='l1', axis=1, copy=False)
     I_n = sp.eye(len(graph.nodes))
     I_min_A = I_n - A
     # In scipy.sparse.linalg maxiter defaults to num_nodes * 10, but in testing this sometimes isn't enough which
     # crashes instead of settling on whatever eigenvectors are at after maxiters
     u, s, vt = lg.svds(I_min_A,
                        k=self._d + 1,
                        which='SM',
                        maxiter=(graph.number_of_nodes() * 1000))
     t2 = time()
     self._X = vt.T
     self._X = self._X[:, 1:]
     return self._X.real, (t2 - t1)
Beispiel #9
0
	def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False):
		# A = nx.to_scipy_sparse_matrix(G)
		# if not np.allclose(A.T, A):
		# 	print "laplace eigmap approach only works for symmetric graphs!"
		# 	return

		# self._node_num = A.shape[0]

		# D = np.diag(np.sum(A, 1))
		# L_G = D - A
		# zeroRows = np.where(D.sum(1)==0)
		# D[zeroRows, zeroRows] = np.inf
		# d_min_half = np.linalg.inv(np.sqrt(D))
		# L_sym = np.dot(d_min_half, np.dot(L_G, d_min_half))
		if not graph and not edge_f:
			raise Exception('graph/edge_f needed')
		if not graph:
			graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
		graph = graph.to_undirected()
		t1 = time()
		L_sym = nx.normalized_laplacian_matrix(graph)

		w, v = lg.eigs(L_sym, k=self._d+1, which='SM')
		t2 = time()
		self._X = v[:, 1:]

		# p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
		# eig_err = np.linalg.norm(p_d_p_t - L_sym)
		# print 'Laplacian matrix reconstruction error (low rank): %f' % eig_err

		# p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T))
		# eig_err = np.linalg.norm(p_d_p_t - L_sym)
		# print 'Laplacian reconstruction error (low rank approx): %f' % eig_err
		return self._X, (t2-t1)
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A
        A = nx.to_numpy_matrix(graph)
        M_g = np.eye(graph.number_of_nodes()) - self._beta * A
        M_l = self._beta * A
        S = np.dot(np.linalg.inv(M_g), M_l)

        u, s, vt = lg.svds(S, k=self._d // 2)
        X1 = np.dot(u, np.diag(np.sqrt(s)))
        X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
        t2 = time()
        self._X = np.concatenate((X1, X2), axis=1)

        p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
        eig_err = np.linalg.norm(p_d_p_t - S)
        print('SVD error (low rank): %f' % eig_err)
        zipbObj = zip(list(graph.nodes), self._X.tolist())
        # Create a dictionary from zip object
        models = dict(zipbObj)
        return models, (t2 - t1)
Beispiel #11
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()
        t1 = time()
        L_sym = nx.normalized_laplacian_matrix(graph)

        try:
            w, v = lg.eigs(L_sym, k=self._d + 1, which='SM')
            t2 = time()
            self._X = v[:, 1:]

            p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
            eig_err = np.linalg.norm(p_d_p_t - L_sym)
            print('Laplacian matrix recon. error (low rank): %f' % eig_err)
            return self._X, (t2 - t1)
        except:
            print('SVD did not converge. Assigning random emebdding')
            self._X = np.random.randn(L_sym.shape[0], self._d)
            t2 = time()
            return self._X, (t2 - t1)
Beispiel #12
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A
        A = nx.to_numpy_matrix(graph, nodelist=range(graph.number_of_nodes()))
        if self._sim_fn == "katz":
            M_g = np.eye(graph.number_of_nodes()) - self._beta * A
            M_l = self._beta * A
        elif self._sim_fn == "pagerank":
            # np.matrix can't
            A = np.array(A)
            # in case the sum is 0
            row_sums = A.sum(axis=1) + 1e-8
            P = A / row_sums[:, np.newaxis]
            M_g = np.eye(graph.number_of_nodes()) - self._beta * P
            M_l = (1 - self._beta) * np.eye(graph.number_of_nodes())
        elif self._sim_fn == "cn":
            M_g = np.eye(graph.number_of_nodes())
            M_l = np.dot(A, A)
        elif self._sim_fn == "aa":
            D = A.sum(axis=1) + A.sum(axis=0)
            D = np.diag(np.reciprocal(D.astype('float')))
            M_g = np.eye(graph.number_of_nodes())
            M_l = np.dot(np.dot(A, D), A)
        else:
            M_g = np.eye(graph.number_of_nodes()) - self._beta * A
            M_l = self._beta * A
        try:
            S = np.dot(np.linalg.inv(M_g), M_l)
            u, s, vt = lg.svds(S, k=self._d // 2)
            X1 = np.dot(u, np.diag(np.sqrt(s)))
            X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
            t2 = time()
            self._X = np.concatenate((X1, X2), axis=1)
            p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
            eig_err = np.linalg.norm(p_d_p_t - S)
            print('SVD error (low rank): %f' % eig_err)
            return self._X, (t2 - t1)
        except:
            print(
                'Singularity Matrix or SVD did not converge. Assigning random emebdding'
            )
            X1 = np.random.randn(A.shape[0], self._d // 2)
            X2 = np.random.randn(A.shape[0], self._d // 2)
            t2 = time()
            self._X = np.concatenate((X1, X2), axis=1)
            return self._X, (t2 - t1)
Beispiel #13
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        nNodes = graph.number_of_nodes()
        nEdges = graph.number_of_edges()
        print('num nodes: ', nNodes)
        print('num edges: ', nEdges)
        S = nx.to_numpy_matrix(graph, nodelist=sorted(graph.nodes()))
        A = Normalizer(norm='l1').fit_transform(S)

        if self._d == None:
            self._d = 2 * self._K
        else:
            assert self._d == 2 * self._K

        # Tensorization
        md_array = np.zeros((nNodes, nNodes, self._K))
        int_res = A
        for i in range(self._K):
            md_array[:, :, i] = int_res
            int_res = int_res.dot(A)

        emb = np.zeros((nNodes, self._d))
        for i in range(self._K):
            print('Slab id: ', i)
            slab = np.reshape(md_array[:, :, i], (nNodes, nNodes, 1))
            XX = tl.tensor(slab)
            print('Tensor shape: ', XX.shape)
            factors = parafac(XX,
                              rank=self._R,
                              n_iter_max=self._n_iter,
                              init='random')  # random_state=123,
            source_emb = factors[0]
            target_emb = factors[1]
            proximity_emb = factors[2]
            print('Source emb shape: ', source_emb.shape)
            print('Target emb shape: ', target_emb.shape)
            print('Proximity emb shape: ', proximity_emb.shape)
            source_proximity_emb = np.dot(source_emb, proximity_emb.T)
            target_proximity_emb = np.dot(target_emb, proximity_emb.T)
            emb[:, [i, i + self._K]] = np.concatenate(
                (source_proximity_emb, target_proximity_emb), axis=1)

        self._X = emb
        print("Embedding shape: ", self._X.shape)

        t2 = time()
        return self._X, (t2 - t1)
Beispiel #14
0
def tsv2edgelist(file, isDirected=True):
    """
    load an edgelist from a tsv file for use in the GEM package
    """
    G = (graph_util.loadGraphFromEdgeListTxt('../data/Nedgelist.tsv',
                                             directed=isDirected))
    if isDirected:
        return G.to_directed()
    else:
        return G
Beispiel #15
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        # A = nx.to_scipy_sparse_matrix(G)
        # if not np.allclose(A.T, A):
        # 	print "laplace eigmap approach only works for symmetric graphs!"
        # 	return

        # self._node_num = A.shape[0]

        # D = np.diag(np.sum(A, 1))
        # L_G = D - A
        # zeroRows = np.where(D.sum(1)==0)
        # D[zeroRows, zeroRows] = np.inf
        # d_min_half = np.linalg.inv(np.sqrt(D))
        # L_sym = np.dot(d_min_half, np.dot(L_G, d_min_half))
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()

        t1 = time()

        L_sym = nx.normalized_laplacian_matrix(graph)
        w, v = lg.eigs(L_sym, k=self._d + 1, which='SM')

        t2 = time()
        self._X = v[:, 1:]

        #p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
        #eig_err = np.linalg.norm(p_d_p_t - L_sym)
        #print 'Laplacian matrix reconstruction error (low rank): %f' % eig_err

        #p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T))
        #eig_err = np.linalg.norm(p_d_p_t - L_sym)
        #print 'Laplacian reconstruction error (low rank approx): %f' % eig_err

        # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI
        listNodes = graph.nodes()
        listNodes = list(set(listNodes))  # Elimina i doppioni dalla lista
        listNodes.sort(
        )  # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale
        nA = np.asarray(listNodes, dtype=int)
        dE = self._d
        nR = (nA.max()) + 1
        XX = np.zeros((nR, dE))
        for i in range(0, nA.__len__()):
            XX[nA[i]] = cp.copy(self._X[i])
        self._X = np.zeros((nR, dE))
        self._X = cp.copy(XX)

        return self._X, (t2 - t1)
Beispiel #16
0
def executeLinkPrediction(dataset, isDirected, method):

    G_train = graph_util.loadGraphFromEdgeListTxt(
        'gem/datasets/' + dataset + '/u1LIKECompact.base',
        directed=isDirected)  # Carica il Grafo di Train
    G_test = graph_util.loadGraphFromEdgeListTxt(
        'gem/datasets/' + dataset + '/u1LIKECompact.test',
        directed=isDirected)  # Carica il Grafo di Test
    G_total = graph_util.loadGraphFromEdgeListTxt(
        'gem/datasets/' + dataset + '/u1LIKECompact.edgelist',
        directed=isDirected)  # Carica il Grafo totale (Train + Test)

    MAP = lp.evaluateStaticLinkPrediction(G_total,
                                          G_train,
                                          G_test,
                                          method,
                                          is_undirected=(not isDirected))

    print method.get_method_summary()
    print "MAP:", MAP
Beispiel #17
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A
        A = nx.to_numpy_matrix(
            graph)  # Crea la matrice di adiacenza del Grafo 'graph'

        #print "Graph:\n", graph
        #print "Matrice di adiacenza del Grafo:\n", A

        M_g = np.eye(graph.number_of_nodes()) - self._beta * A
        M_l = self._beta * A
        S = np.dot(np.linalg.inv(M_g), M_l)

        u, s, vt = lg.svds(S, k=self._d // 2)
        X1 = np.dot(u, np.diag(np.sqrt(s)))
        X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
        t2 = time()
        self._X = np.concatenate((X1, X2), axis=1)

        p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
        eig_err = np.linalg.norm(p_d_p_t - S)
        print('SVD error (low rank): %f' % eig_err)

        # p_d_p_t = np.dot(self._X, np.dot(w[1:self._d+1, 1:self._d+1], self._X.T))
        # eig_err = np.linalg.norm(p_d_p_t - L_sym)
        # print 'Laplacian reconstruction error (low rank approx): %f' % eig_err

        # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI
        listNodes = graph.nodes()
        listNodes = list(set(listNodes))  # Elimina i doppioni dalla lista
        listNodes.sort(
        )  # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale
        nA = np.asarray(listNodes, dtype=int)
        dE = self._d
        nR = (nA.max()) + 1
        XX = np.zeros((nR, dE))
        for i in range(0, nA.__len__()):
            XX[nA[i]] = cp.copy(self._X[i])
        self._X = np.zeros((nR, dE))
        self._X = cp.copy(XX)

        return self._X, (t2 - t1)
Beispiel #18
0
def run_sdne(edges_file, modified_filename, vertices_filename):
    # Instatiate the embedding method with hyperparameters
    em = sdne(d=100, beta=5, alpha=1e-6, nu1=1e-3, nu2=1e-3, K=3, n_units=[500, 300], rho=0.3, n_iter=1, xeta=1e-4, n_batch=500, modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'])

    # Load graph
    graph = graph_util.loadGraphFromEdgeListTxt(edges_file)

    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = em.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True)

    create_converted_file(em.get_embedding(None), modified_filename, vertices_filename)
Beispiel #19
0
def main(args):

    # Load edgelist
    G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed)
    G = G.to_directed()

    # Preprocess the graph
    # G, _ = prep_graph(G)

    if args.method == 'gf':
        # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta),
        # regularization coefficient (regu) as inputs
        model = GraphFactorization(d=args.dimension,
                                   max_iter=args.max_iter,
                                   eta=args.eta,
                                   regu=args.regu)
    elif args.method == 'hope':
        # HOPE takes embedding dimension (d) and decay factor (beta) as inputs
        model = HOPE(d=args.dimension, beta=args.beta)
    elif args.method == 'lap':
        # LE takes embedding dimension (d) as input
        model = LaplacianEigenmaps(d=args.dimension)
    elif args.method == 'lle':
        # LLE takes embedding dimension (d) as input
        model = LocallyLinearEmbedding(d=args.dimension)
    elif args.method == 'sdne':
        encoder_layer_list = ast.literal_eval(args.encoder_list)
        # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight
        # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers
        # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch)
        # location of modelfile and weightfile save (modelfile and weightfile) as inputs
        model = SDNE(d=args.dimension,
                     beta=args.beta,
                     alpha=args.alpha,
                     nu1=args.nu1,
                     nu2=args.nu2,
                     K=len(encoder_layer_list),
                     n_units=encoder_layer_list,
                     n_iter=args.max_iter,
                     xeta=args.learning_rate,
                     n_batch=args.bs)
        # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])
    else:
        raise ValueError('The requested method does not exist!')

    # Learn the node embeddings
    Y, t = model.learn_embedding(graph=G,
                                 edge_f=None,
                                 is_weighted=args.weighted,
                                 no_python=True)
    Z = np.real_if_close(Y, tol=1000)

    # Save the node embeddings to a file
    np.savetxt(args.output, Z, delimiter=',', fmt='%f')
Beispiel #20
0
 def prep_embedding(self, graph=None, edge_f=None,
                     is_weighted=False, no_python=False):
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     S = nx.to_scipy_sparse_matrix(graph)
     t1 = time()
     S = (S + S.T) / 2
     print (S.shape)
     self._node_num = graph.number_of_nodes()
     print (self._node_num)
Beispiel #21
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graph = graph.to_undirected()
     t1 = time()
     self._X = np.random.randn(graph.number_of_nodes(), 1)
     t2 = time()
     return self._X, (t2 - t1)
Beispiel #22
0
def main(data_set_name):
    dimensions = 4
    input_file = './graph/' + data_set_name + '.tsv'
    output_file = './emb/' + data_set_name + '.emb'
    # Instatiate the embedding method with hyperparameters
    graph_factorization = LaplacianEigenmaps(dimensions)

    # Load graph
    graph = graph_util.loadGraphFromEdgeListTxt(input_file)

    # Learn embedding - accepts a networkx graph or file with edge list
    embeddings_array, t = graph_factorization.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True)
    embeddings = pandas.DataFrame(embeddings_array)
    embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
Beispiel #23
0
    def learn_embedding(self, graph=None, edge_f=None,
                        is_weighted=False, no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()
        L_sym = nx.normalized_laplacian_matrix(graph)

        w, v = lg.eigs(L_sym, k=self._d + 1, which='SM')
        self._X = v[:, 1:]

        p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
        eig_err = np.linalg.norm(p_d_p_t - L_sym)
        return self._X, eig_err
Beispiel #24
0
	def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False):
		if not graph and not edge_f:
			raise Exception('graph/edge_f needed')
		if not graph:
			graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
		graph = graph.to_undirected()
		t1 = time()
		A = nx.to_scipy_sparse_matrix(graph)
		normalize(A, norm='l1', axis=1, copy=False)
		I = sp.eye(graph.number_of_nodes())
		I_min_A = I - A
		u, s, vt = lg.svds(I_min_A, k=self._d+1, which='SM')
		t2 = time()
		self._X = vt.T
		self._X = self._X[:, 1:]
		return self._X, (t2-t1)
Beispiel #25
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     args = ["node2vec"]
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if graph is None:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=True)
     print("node2vec graph")
     print(graph.edges()[:3])
     # print(graph)
     graph_util.saveGraphToEdgeListTxtn2v(graph, 'tempGraph.graph')
     args.append("-i:tempGraph.graph")
     args.append("-o:tempGraph.emb")
     args.append("-d:%d" % self._d)
     args.append("-l:%d" % self._walk_len)
     args.append("-r:%d" % self._num_walks)
     args.append("-k:%d" % self._con_size)
     args.append("-e:%d" % self._max_iter)
     args.append("-p:%f" % self._ret_p)
     args.append("-q:%f" % self._inout_p)
     args.append("-v")
     args.append("-dr")
     args.append("-w")
     t1 = time()
     try:
         call(args)
     except Exception as e:
         print(str(e))
         raise Exception(
             './node2vec not found. Please compile snap, place node2vec in the system path and grant executable permission'
         )
     self._X = graph_util.loadEmbedding('tempGraph.emb')
     t2 = time()
     print('len graph edges')
     print(len(graph.nodes()))
     print('embedding vectors number')
     print(len(self._X))
     print('GUESS embedding node2vc')
     for i in range(len(self._X)):
         print(str(graph.nodes()[i]) + " " + str(self._X[i]))
     # print (self._X)
     return self._X, (t2 - t1)
Beispiel #26
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     args = ["gem/c_exe/node2vec"]
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if edge_f:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graphFileName = 'gem/intermediate/%s_n2v.graph' % self._data_set
     embFileName = 'gem/intermediate/%s_%d_n2v.emb' % (self._data_set,
                                                       self._d)
     try:
         f = open(graphFileName, 'r')
         f.close()
     except IOError:
         graph_util.saveGraphToEdgeListTxtn2v(graph, graphFileName)
     args.append("-i:%s" % graphFileName)
     args.append("-o:%s" % embFileName)
     args.append("-d:%d" % self._d)
     args.append("-l:%d" % self._walk_len)
     args.append("-r:%d" % self._num_walks)
     args.append("-k:%d" % self._con_size)
     args.append("-e:%d" % self._max_iter)
     args.append("-p:%f" % self._ret_p)
     args.append("-q:%f" % self._inout_p)
     args.append("-v")
     args.append("-dr")
     args.append("-w")
     t1 = time()
     try:
         call(args)
     except Exception as e:
         print(str(e))
         raise Exception(
             './node2vec not found. Please compile snap, place node2vec in the path and grant executable permission'
         )
     self._X = graph_util.loadEmbedding(embFileName)
     t2 = time()
     call(["rm", embFileName])
     return self._X, (t2 - t1)
Beispiel #27
0
    def learn_embedding(self, graph=None, edge_f=None,
                        is_weighted=False, no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A



        
        A = nx.to_numpy_matrix(graph)
        #A est l'adjacency matrix sous forme de numpy matrix
        M_g = np.eye(graph.number_of_nodes()) - self._beta * A
        #identite - 0.01 * A
        M_l = self._beta * A
        #0.01 * A
        S = np.dot(np.linalg.inv(M_g), M_l)
        #produit de M_g-1 et M_l

        u, s, vt = lg.svds(S, k=self._d // 2)
        #valeurs propres et vecteurs propres
        X1 = np.dot(u, np.diag(np.sqrt(s)))
        X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
        t2 = time()
        self._X = np.concatenate((X1, X2), axis=1)
        test = self._X

        p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
        eig_err = np.linalg.norm(p_d_p_t - S)
        print('SVD error (low rank): %f' % eig_err)
        return self._X, (t2 - t1)
Beispiel #28
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()
        t1 = time()
        L_sym = nx.normalized_laplacian_matrix(graph)

        w, v = lg.eigs(L_sym, k=self._d + 1, which='SM')
        idx = np.argsort(w)  # sort eigenvalues
        w = w[idx]
        v = v[:, idx]
        t2 = time()
        self._X = v[:, 1:]

        p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
        eig_err = np.linalg.norm(p_d_p_t - L_sym)
        print('Laplacian matrix recon. error (low rank): %f' % eig_err)
        return self._X.real, (t2 - t1)
Beispiel #29
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        amatrix=None,
                        nodelist=None,
                        weight='weight'):
        if graph is None and edge_f is None and amatrix is None:
            raise Exception('graph/edge_f/amatrix needed')

        if amatrix is None:
            if graph is None:
                graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
            if nodelist is None:
                nodelist = sorted(graph.nodes)
            A = nx.to_numpy_matrix(graph, nodelist=nodelist, weight=weight)
            n = graph.number_of_nodes()
        else:
            A = np.mat(amatrix)
            n = A.shape[0]

        if self._proximity == 'katz':
            M_g = np.eye(n) - self._beta * A
            M_l = self._beta * A
        elif self._proximity == 'common-neighbors':
            M_g = np.eye(n)
            M_l = A * A
        S = np.dot(np.linalg.inv(M_g), M_l)

        u, s, vt = lg.svds(S, k=self._d // 2)
        X1 = np.dot(u, np.diag(np.sqrt(s)))
        X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
        self._X = np.concatenate((X1, X2), axis=1)

        p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
        eig_err = np.linalg.norm(p_d_p_t - S)
        return self._X, eig_err
Beispiel #30
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        graph = graph.to_undirected()
        t1 = time()
        A = nx.to_scipy_sparse_matrix(graph)
        normalize(A, norm='l1', axis=1, copy=False)
        I = sp.eye(graph.number_of_nodes())
        I_min_A = I - A
        u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM')
        t2 = time()
        self._X = vt.T
        self._X = self._X[:, 1:]

        # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI
        listNodes = graph.nodes()
        listNodes = list(set(listNodes))  # Elimina i doppioni dalla lista
        listNodes.sort(
        )  # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale
        nA = np.asarray(listNodes, dtype=int)
        dE = self._d
        nR = (nA.max()) + 1
        XX = np.zeros((nR, dE))
        for i in range(0, nA.__len__()):
            XX[nA[i]] = cp.copy(self._X[i])
        self._X = np.zeros((nR, dE))
        self._X = cp.copy(XX)

        return self._X, (t2 - t1)