Ejemplo n.º 1
0
def binary_community_graph(N, k, maxk, mu):
    """Retruns a binary community graph. """
    if sys.platform[0] == "w":
        args = ["gemben/c_exe/benchm.exe"]
        fcall = "gemben/c_exe/benchm.exe"
    else:
        args = ["gemben/c_exe/benchm"]
        fcall = "gemben/c_exe/benchm"
    args.append("-N %d" % N)
    args.append("-k %d" % k)
    args.append("-maxk %d" % maxk)
    args.append("-mu %f" % mu)
    t1 = time()
    print(args)
    try:
        os.system("%s -N %d -k %d -maxk %d -mu %f" % (fcall, N, k, maxk, mu))
        # call(args)
    except Exception as e:
        print('ERROR: %s' % str(e))
        print('gemben/c_exe/benchm not found. Please compile gf, place benchm in the path and grant executable permission')
    t2 = time()
    print('\tTime taken to generate random graph: %f sec' % (t2 - t1))
    try:
        graph = graph_util.loadGraphFromEdgeListTxt('gemben/c_exe/network.dat')
        node_labels = np.loadtxt('gemben/c_exe/community.dat')
    except:
        graph = graph_util.loadGraphFromEdgeListTxt('network.dat')
        node_labels = np.loadtxt('community.dat')
    node_labels = node_labels[:, -1].reshape(-1, 1)
    enc = OneHotEncoder()
    return graph, enc.fit_transform(node_labels)
Ejemplo n.º 2
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A
        A = nx.to_numpy_matrix(graph)
        if self._sim_fn == "katz":
            M_g = np.eye(graph.number_of_nodes()) - self._beta * A
            M_l = self._beta * A
        elif self._sim_fn == "pagerank":
            ## np.matrix can't
            A = np.array(A)
            ## in case the sum is 0
            row_sums = A.sum(axis=1) + 1e-8
            P = A / row_sums[:, np.newaxis]
            M_g = np.eye(graph.number_of_nodes()) - self._beta * P
            M_l = (1 - self._beta) * np.eye(graph.number_of_nodes())
        elif self._sim_fn == "cn":
            M_g = np.eye(graph.number_of_nodes())
            M_l = np.dot(A, A)
        elif self._sim_fn == "aa":
            D = A.sum(axis=1) + A.sum(axis=0)
            D = np.diag(np.reciprocal(D.astype('float')))
            M_g = np.eye(graph.number_of_nodes())
            M_l = np.dot(np.dot(A, D), A)
        else:
            M_g = np.eye(graph.number_of_nodes()) - self._beta * A
            M_l = self._beta * A
        try:
            S = np.dot(np.linalg.inv(M_g), M_l)
            u, s, vt = lg.svds(S, k=self._d // 2)
            X1 = np.dot(u, np.diag(np.sqrt(s)))
            X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
            t2 = time()
            self._X = np.concatenate((X1, X2), axis=1)
            p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
            eig_err = np.linalg.norm(p_d_p_t - S)
            print('SVD error (low rank): %f' % eig_err)
            return self._X, (t2 - t1)
        except:
            print(
                'Singularity Matrix or SVD did not converge. Assigning random emebdding'
            )
            X1 = np.random.randn(A.shape[0], self._d // 2)
            X2 = np.random.randn(A.shape[0], self._d // 2)
            t2 = time()
            self._X = np.concatenate((X1, X2), axis=1)
            return self._X, (t2 - t1)
Ejemplo n.º 3
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graph = graph.to_undirected()
     t1 = time()
     self._X = np.random.randn(graph.number_of_nodes(), 1)
     t2 = time()
     return self._X, (t2 - t1)
Ejemplo n.º 4
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     args = ["gem/c_exe/node2vec"]
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if edge_f:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graphFileName = 'gem/intermediate/%s_n2v.graph' % self._data_set
     embFileName = 'gem/intermediate/%s_%d_n2v.emb' % (self._data_set,
                                                       self._d)
     try:
         f = open(graphFileName, 'r')
         f.close()
     except IOError:
         graph_util.saveGraphToEdgeListTxtn2v(graph, graphFileName)
     args.append("-i:%s" % graphFileName)
     args.append("-o:%s" % embFileName)
     args.append("-d:%d" % self._d)
     args.append("-l:%d" % self._walk_len)
     args.append("-r:%d" % self._num_walks)
     args.append("-k:%d" % self._con_size)
     args.append("-e:%d" % self._max_iter)
     args.append("-p:%f" % self._ret_p)
     args.append("-q:%f" % self._inout_p)
     args.append("-v")
     args.append("-dr")
     args.append("-w")
     t1 = time()
     try:
         call(args)
     except Exception as e:
         print(str(e))
         raise Exception(
             './node2vec not found. Please compile snap, place node2vec in the path and grant executable permission'
         )
     self._X = graph_util.loadEmbedding(embFileName)
     t2 = time()
     call(["rm", embFileName])
     return self._X, (t2 - t1)
Ejemplo n.º 5
0
 def learn_embedding(self, graph=None, edge_f=None,
                     is_weighted=False, no_python=False):
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     graph = graph.to_undirected()
     t1 = time()
     A = nx.to_scipy_sparse_matrix(graph)
     normalize(A, norm='l1', axis=1, copy=False)
     I_n = sp.eye(graph.number_of_nodes())
     I_min_A = I_n - A
     try:
         u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM')
     except:
         u = np.random.randn(A.shape[0], self._d + 1)
         s = np.random.randn(self._d + 1, self._d + 1)
         vt = np.random.randn(self._d + 1, A.shape[0])
     t2 = time()
     self._X = vt.T
     self._X = self._X[:, 1:]
     return self._X, (t2 - t1)
Ejemplo n.º 6
0
    def learn_embedding(self, graph=None, edge_f=None,
                        is_weighted=False, no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()
        t1 = time()
        L_sym = nx.normalized_laplacian_matrix(graph)
        
        try:
            w, v = lg.eigs(L_sym, k=self._d + 1, which='SM')
            t2 = time()
            self._X = v[:, 1:]

            p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T))
            eig_err = np.linalg.norm(p_d_p_t - L_sym)
            print('Laplacian matrix recon. error (low rank): %f' % eig_err)
            return self._X, (t2 - t1)
        except:
            print('SVD did not converge. Assigning random emebdding')
            self._X = np.random.randn(L_sym.shape[0], self._d)
            t2 = time()
            return self._X, (t2 - t1)
Ejemplo n.º 7
0
            self._X = X
        else:
            node_num = self._node_num
        adj_mtx_r = np.zeros((node_num, node_num))
        for v_i in range(node_num):
            for v_j in range(node_num):
                if v_i == v_j:
                    continue
                adj_mtx_r[v_i, v_j] = self.get_edge_weight(v_i, v_j)
        return adj_mtx_r


if __name__ == '__main__':
    # load Zachary's Karate graph
    edge_f = 'data/karate.edgelist'
    G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False)
    G = G.to_directed()
    res_pre = 'results/testKarate'
    graph_util.print_graph_stats(G)
    t1 = time()
    embedding = HOPE(4, 0.01)
    embedding.learn_embedding(graph=G,
                              edge_f=None,
                              is_weighted=True,
                              no_python=True)
    print('HOPE:\n\tTraining time: %f' % (time() - t1))

    viz.plot_embedding2D(embedding.get_embedding()[:, :2],
                         di_graph=G,
                         node_colors=None)
    plt.show()
Ejemplo n.º 8
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        S = nx.to_scipy_sparse_matrix(graph)
        t1 = time()
        S = (S + S.T) / 2
        self._node_num = graph.number_of_nodes()

        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        # If cannot use previous step information, initialize new models
        self._encoder = get_encoder(self._node_num, self._d, self._n_units,
                                    self._nu1, self._nu2, self._actfn)
        self._decoder = get_decoder(self._node_num, self._d, self._n_units,
                                    self._nu1, self._nu2, self._actfn)
        self._autoencoder = get_autoencoder(self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(2 * self._node_num, ), name='x_in')
        x1 = Lambda(lambda x: x[:, 0:self._node_num],
                    output_shape=(self._node_num, ))(x_in)
        x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num],
                    output_shape=(self._node_num, ))(x_in)
        # Process inputs
        [x_hat1, y1] = self._autoencoder(x1)
        [x_hat2, y2] = self._autoencoder(x2)
        # Outputs
        x_diff1 = merge([x_hat1, x1],
                        mode=lambda ab: ab[0] - ab[1],
                        output_shape=lambda L: L[1])
        x_diff2 = merge([x_hat2, x2],
                        mode=lambda ab: ab[0] - ab[1],
                        output_shape=lambda L: L[1])
        y_diff = merge([y2, y1],
                       mode=lambda ab: ab[0] - ab[1],
                       output_shape=lambda L: L[1])

        # Objectives
        def weighted_mse_x(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains [b, deg]
            '''
            return KBack.sum(KBack.square(
                y_pred * y_true[:, 0:self._node_num]),
                             axis=-1) / y_true[:, self._node_num]

        def weighted_mse_y(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
            y_pred: Contains y2 - y1
            y_true: Contains s12
            '''
            min_batch_size = KBack.shape(y_true)[0]
            return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1),
                                 [min_batch_size, 1]) * y_true

        # Model
        self._model = Model(input=x_in, output=[x_diff1, x_diff2, y_diff])
        sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True)
        # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(
            optimizer=sgd,
            loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y],
            loss_weights=[1, 1, self._alpha])

        history = self._model.fit_generator(
            generator=batch_generator_sdne(S, self._beta, self._n_batch, True),
            nb_epoch=self._num_iter,
            samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch,
            verbose=1,
            callbacks=[callbacks.TerminateOnNaN()])
        loss = history.history['loss']
        # Get embedding for all points
        if loss[-1] == np.inf or np.isnan(loss[-1]):
            print('Model diverged. Assigning random embeddings')
            self._Y = np.random.randn(self._node_num, self._d)
        else:
            self._Y = model_batch_predictor(self._autoencoder, S,
                                            self._n_batch)
        t2 = time()
        # Save the autoencoder and its weights
        if (self._weightfile is not None):
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if (self._modelfile is not None):
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if (self._savefilesuffix is not None):
            saveweights(self._encoder,
                        'encoder_weights_' + self._savefilesuffix + '.hdf5')
            saveweights(self._decoder,
                        'decoder_weights_' + self._savefilesuffix + '.hdf5')
            savemodel(self._encoder,
                      'encoder_model_' + self._savefilesuffix + '.json')
            savemodel(self._decoder,
                      'decoder_model_' + self._savefilesuffix + '.json')
            # Save the embedding
            np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y)
        return self._Y, (t2 - t1)
Ejemplo n.º 9
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        S = nx.to_scipy_sparse_matrix(graph)
        self._node_num = graph.number_of_nodes()
        t1 = time()

        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        self._encoder = get_encoder(self._node_num, self._d, self._n_units,
                                    self._nu1, self._nu2, self._actfn)
        self._decoder = get_decoder(self._node_num, self._d, self._n_units,
                                    self._nu1, self._nu2, self._actfn)
        self._autoencoder = get_autoencoder(self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(self._node_num, ), name='x_in')
        # Process inputs
        [x_hat, y] = self._autoencoder(x_in)
        # Outputs
        x_diff = Subtract()([x_hat, x_in])

        # x_diff = merge([x_hat, x_in],
        #                mode=lambda (a, b): a - b,
        #                output_shape=lambda L: L[1])

        # Objectives
        def weighted_mse_x(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains b
            '''
            return KBack.sum(KBack.square(y_true * y_pred), axis=-1)

        # Model
        self._model = Model(input=x_in, output=x_diff)
        # sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True)
        adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(optimizer=adam, loss=weighted_mse_x)

        history = self._model.fit_generator(
            generator=batch_generator_ae(S, self._beta, self._n_batch, True),
            nb_epoch=self._num_iter,
            samples_per_epoch=S.shape[0] // self._n_batch,
            verbose=1,
            callbacks=[callbacks.TerminateOnNaN()])
        loss = history.history['loss']
        # Get embedding for all points
        if loss[0] == np.inf or np.isnan(loss[0]):
            print('Model diverged. Assigning random embeddings')
            self._Y = np.random.randn(self._node_num, self._d)
        else:
            self._Y = model_batch_predictor(self._autoencoder, S,
                                            self._n_batch)
        t2 = time()
        # Save the autoencoder and its weights
        if (self._weightfile is not None):
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if (self._modelfile is not None):
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if (self._savefilesuffix is not None):
            saveweights(self._encoder,
                        'encoder_weights_' + self._savefilesuffix + '.hdf5')
            saveweights(self._decoder,
                        'decoder_weights_' + self._savefilesuffix + '.hdf5')
            savemodel(self._encoder,
                      'encoder_model_' + self._savefilesuffix + '.json')
            savemodel(self._decoder,
                      'decoder_model_' + self._savefilesuffix + '.json')
            # Save the embedding
            np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y)
        return self._Y, (t2 - t1)
Ejemplo n.º 10
0
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=True):
     c_flag = True
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if no_python:
         if sys.platform[0] == "w":
             args = ["gem/c_exe/gf.exe"]
         else:
             args = ["gem/c_exe/gf"]
         if not graph and not edge_f:
             raise Exception('graph/edge_f needed')
         if edge_f:
             graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
         graphFileName = 'gem/intermediate/%s_gf.graph' % self._data_set
         embFileName = 'gem/intermediate/%s_%d_gf.emb' % (self._data_set,
                                                          self._d)
         # try:
         # f = open(graphFileName, 'r')
         # f.close()
         # except IOError:
         graph_util.saveGraphToEdgeListTxt(graph, graphFileName)
         args.append(graphFileName)
         args.append(embFileName)
         args.append("1")  # Verbose
         args.append("1")  # Weighted
         args.append("%d" % self._d)
         args.append("%f" % self._eta)
         args.append("%f" % self._regu)
         args.append("%d" % self._max_iter)
         args.append("%d" % self._print_step)
         t1 = time()
         try:
             call(args)
         except Exception as e:
             print(str(e))
             c_flag = False
             print(
                 './gf not found. Reverting to Python implementation. Please compile gf, place node2vec in the path and grant executable permission'
             )
         if c_flag:
             try:
                 self._X = graph_util.loadEmbedding(embFileName)
             except FileNotFoundError:
                 self._X = np.random.randn(graph.number_of_nodes(), self._d)
             t2 = time()
             try:
                 call(["rm", embFileName])
             except:
                 pass
             return self._X, (t2 - t1)
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     t1 = time()
     self._node_num = graph.number_of_nodes()
     self._X = 0.01 * np.random.randn(self._node_num, self._d)
     for iter_id in range(self._max_iter):
         if not iter_id % self._print_step:
             [f1, f2, f] = self._get_f_value(graph)
             print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' %
                   (iter_id, f, f1, f2))
         for i, j, w in graph.edges(data='weight', default=1):
             if j <= i:
                 continue
             term1 = -(w -
                       np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :]
             term2 = self._regu * self._X[i, :]
             delPhi = term1 + term2
             self._X[i, :] -= self._eta * delPhi
     t2 = time()
     return self._X, (t2 - t1)
Ejemplo n.º 11
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        S = nx.to_scipy_sparse_matrix(graph)
        self._node_num = graph.number_of_nodes()
        t1 = time()

        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        self._encoder = get_variational_encoder(self._node_num, self._d,
                                                self._n_units, self._nu1,
                                                self._nu2, self._actfn)
        self._decoder = get_decoder(self._node_num, self._d, self._n_units,
                                    self._nu1, self._nu2, self._actfn)
        self._autoencoder = get_variational_autoencoder(
            self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(self._node_num, ), name='x_in')
        # Process inputs
        # [x_hat, y] = self._autoencoder(x_in)
        [x_hat, y_mean, y_std, y2] = self._autoencoder(x_in)
        # Outputs
        x_diff = Subtract()([x_hat, x_in])
        # x_diff = merge([x_hat, x_in],
        #                mode=lambda (a, b): a - b,
        #                output_shape=lambda L: L[1])
        y_log_var = KBack.log(KBack.square(y_std))
        vae_loss = merge(
            [y_mean, y_std],
            mode=lambda x: -0.5 * KBack.sum(1 + KBack.log(KBack.square(x[
                1])) - KBack.square(x[0]) - KBack.square(x[1]),
                                            axis=-1),
            output_shape=lambda L: (L[1][0], 1))

        # Objectives
        def weighted_mse_x(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains b
            '''
            return KBack.sum(KBack.square(y_pred *
                                          y_true[:, 0:self._node_num]),
                             axis=-1)

        def weighted_mse_vae(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains KL-divergence
                y_true: Contains np.zeros(mini_batch)
            '''
            min_batch_size = KBack.shape(y_true)[0]
            return KBack.mean(
                # KBack.abs(y_pred),
                KBack.abs(KBack.reshape(y_pred, [min_batch_size, 1])),
                axis=-1)

        # Model
        self._model = Model(input=x_in, output=[x_diff, vae_loss])
        # sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True)
        adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(optimizer=adam,
                            loss=[weighted_mse_x, weighted_mse_vae],
                            loss_weights=[1, self._beta_vae])

        history = self._model.fit_generator(
            generator=batch_generator_vae(S, self._beta, self._n_batch, True),
            nb_epoch=self._num_iter,
            samples_per_epoch=S.shape[0] // self._n_batch,
            verbose=1,
            callbacks=[callbacks.TerminateOnNaN()])
        loss = history.history['loss']
        # Get embedding for all points
        if loss[0] == np.inf or np.isnan(loss[0]):
            print('Model diverged. Assigning random embeddings')
            self._Y = np.random.randn(self._node_num, self._d)
        else:
            self._Y = model_batch_predictor(self._autoencoder,
                                            S,
                                            self._n_batch,
                                            meth='vae')

        submodel_gen = batch_generator_vae(S, self._beta, self._n_batch, True)
        x = np.concatenate([next(submodel_gen)[0] for _ in range(100)], axis=0)
        vae_submodel = Model(x_in, self._autoencoder(x_in))
        _, _, log_std, _ = vae_submodel.predict(x)
        mean = np.mean(log_std)
        std = np.std(log_std)
        print('log std mean and std')
        print(mean)
        print(std)

        t2 = time()
        # Save the autoencoder and its weights
        if (self._weightfile is not None):
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if (self._modelfile is not None):
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if (self._savefilesuffix is not None):
            saveweights(self._encoder,
                        'encoder_weights_' + self._savefilesuffix + '.hdf5')
            saveweights(self._decoder,
                        'decoder_weights_' + self._savefilesuffix + '.hdf5')
            savemodel(self._encoder,
                      'encoder_model_' + self._savefilesuffix + '.json')
            savemodel(self._decoder,
                      'decoder_model_' + self._savefilesuffix + '.json')
            # Save the embedding
            np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y)
        return self._Y, (t2 - t1)