コード例 #1
0
ファイル: utils.py プロジェクト: mouchetm/CommunityDetection
def get_laplacian(A, normalization_mode = None):
    """
    Compute the different laplacian of a graphs given a
    Code inspired by networkx python library
    """

    A = scipy.sparse.csr_matrix(A)
    diags = A.sum(axis=1).flatten()#Degree
    n,m = A.shape
    D = scipy.sparse.spdiags(diags, [0], m, n, format='csr')
    L = D - A

    if normalization_mode not in ['sym', 'rw', None]:
        raise Exception('Normalisation mode {} unknown'.format(normalization_mode))

    elif normalization_mode == None:
        return L

    elif normalization_mode == 'sym':
        with scipy.errstate(divide='ignore'):
            diags_sqrt = 1.0/scipy.sqrt(diags)
        diags_sqrt[scipy.isinf(diags_sqrt)] = 0
        DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr')
        return DH.dot(L.dot(DH))

    elif normalization_mode == 'rw':
        with scipy.errstate(divide='ignore'):
            diags_inverse = 1.0/diags
        diags_inverse[scipy.isinf(diags_inverse)] = 0
        DH = scipy.sparse.spdiags(diags_inverse, [0], m, n, format='csr')
        return DH.dot(L)
コード例 #2
0
ファイル: commonutil.py プロジェクト: cjlin1/liblinear
def evaluations_scipy(ty, pv):
	"""
	evaluations_scipy(ty, pv) -> (ACC, MSE, SCC)
	ty, pv: ndarray

	Calculate accuracy, mean squared error and squared correlation coefficient
	using the true values (ty) and predicted values (pv).
	"""
	if not (scipy != None and isinstance(ty, scipy.ndarray) and isinstance(pv, scipy.ndarray)):
		raise TypeError("type of ty and pv must be ndarray")
	if len(ty) != len(pv):
		raise ValueError("len(ty) must be equal to len(pv)")
	ACC = 100.0*(ty == pv).mean()
	MSE = ((ty - pv)**2).mean()
	l = len(ty)
	sumv = pv.sum()
	sumy = ty.sum()
	sumvy = (pv*ty).sum()
	sumvv = (pv*pv).sum()
	sumyy = (ty*ty).sum()
	with scipy.errstate(all = 'raise'):
		try:
			SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
		except:
			SCC = float('nan')
	return (float(ACC), float(MSE), float(SCC))
コード例 #3
0
    def generate(cls, trueY, forecastY, missing=True):
        nz_mask = trueY != 0
        diff = forecastY - trueY
        abs_true = sp.absolute(trueY)
        abs_diff = sp.absolute(diff)

        def my_mean(x):
            tmp = x[sp.isfinite(x)]
            assert len(tmp) != 0
            return tmp.mean()

        with sp.errstate(divide='ignore'):
            nrmse = sp.sqrt((diff**2).mean()) / abs_true.mean()
            m_nrmse = my_mean(
                sp.sqrt((diff**2).mean(axis=0)) / abs_true.mean(axis=0))

            nd = abs_diff.sum() / abs_true.sum()
            m_nd = my_mean(abs_diff.sum(axis=0) / abs_true.sum(axis=0))

            abs_baseline = sp.absolute(trueY[1:, :] - trueY[:-1, :])
            mase = abs_diff.mean() / abs_baseline.mean()
            m_mase = my_mean(abs_diff.mean(axis=0) / abs_baseline.mean(axis=0))

            mape = my_mean(sp.divide(abs_diff, abs_true, where=nz_mask))

        return cls(nd=nd,
                   mase=mase,
                   nrmse=nrmse,
                   m_nd=m_nd,
                   m_mase=m_mase,
                   m_nrmse=m_nrmse,
                   mape=mape)
コード例 #4
0
 def proba_matrix(self):
     n, m = self.A.shape
     diags = self.A.sum(axis=1).flatten()
     with scipy.errstate(divide='ignore'):
         diags_inv = 1.0 / diags
     D_inv = scipy.sparse.spdiags(diags_inv, [0], m, n)
     return D_inv.dot(self.A)
コード例 #5
0
def evaluations_scipy(ty, pv):
    """
	evaluations_scipy(ty, pv) -> (ACC, MSE, SCC)
	ty, pv: ndarray

	Calculate accuracy, mean squared error and squared correlation coefficient
	using the true values (ty) and predicted values (pv).
	"""
    if not (scipy != None and isinstance(ty, scipy.ndarray)
            and isinstance(pv, scipy.ndarray)):
        raise TypeError("type of ty and pv must be ndarray")
    if len(ty) != len(pv):
        raise ValueError("len(ty) must be equal to len(pv)")
    ACC = 100.0 * (ty == pv).mean()
    MSE = ((ty - pv)**2).mean()
    l = len(ty)
    sumv = pv.sum()
    sumy = ty.sum()
    sumvy = (pv * ty).sum()
    sumvv = (pv * pv).sum()
    sumyy = (ty * ty).sum()
    with scipy.errstate(all='raise'):
        try:
            SCC = ((l * sumvy - sumv * sumy) *
                   (l * sumvy - sumv * sumy)) / ((l * sumvv - sumv * sumv) *
                                                 (l * sumyy - sumy * sumy))
        except:
            SCC = float('nan')
    return (float(ACC), float(MSE), float(SCC))
コード例 #6
0
ファイル: hdda.py プロジェクト: artificyan/HDDA
    def e_step(self, X):
        """Compute the e-step of the algorithm

        Parameters
        ----------
        X : array-like, shape (n_samples, n_dimensions)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.
        Returns
        -------

        """
        # Get some parameters
        n = X.shape[0]

        # Compute the membership function
        K = self.score_samples(X)

        # Compute the Loglikelhood
        K *= (0.5)
        Km = K.max(axis=1)
        Km.shape = (n, 1)

        # logsumexp trick
        LL = (sp.log(sp.exp(K - Km).sum(axis=1))[:, sp.newaxis] + Km).sum()

        # Compute the posterior
        with sp.errstate(over='ignore'):
            for c in xrange(self.C):
                self.T[:,
                       c] = 1 / sp.exp(K - K[:, c][:, sp.newaxis]).sum(axis=1)

        return LL
コード例 #7
0
def laplacian_layout(G, norm=False, dim=2, bad=False):
    A = nx.to_scipy_sparse_matrix(G, format='csr')
    A = np.array(A.todense())
    n, m = A.shape

    diags = A.sum(axis=1).flatten()
    D = np.diag(diags)
    L = D - A
    B = np.eye(n)
    if norm == False:
        layout = eig_layout(G, L, B)
    else:
        if bad:
            with scipy.errstate(divide='ignore'):
                #diags_sqrt = 1.0 / scipy.power(diags, 1)
                diags_sqrt = 1.0 / scipy.sqrt(diags)
            diags_sqrt[scipy.isinf(diags_sqrt)] = 0
            DH = np.diag(diags_sqrt)
            L = np.dot(DH, np.dot(L, DH))
            eigenvalues, eigenvectors = scipy.linalg.eigh(L)
            index = np.argsort(eigenvalues)[1:dim + 1]
            pos = np.real(eigenvectors[:, index])

            pos = np.dot(DH, pos)
            pos = dict(zip(G, pos))
            layout = pos
        else:
            B = D
            layout = eig_layout(G, L, B)

    #print(L)
    #print(B)

    return layout
def generate_stats(trueY, forecastY, missing=True):
    """ From TRMF code """
    nz_mask = trueY != 0
    diff = forecastY - trueY
    abs_true = sp.absolute(trueY)
    abs_diff = sp.absolute(diff)

    def my_mean(x):
        tmp = x[sp.isfinite(x)]
        assert len(tmp) != 0
        return tmp.mean()

    with sp.errstate(divide='ignore'):
        # rmse
        rmse = sp.sqrt((diff**2).mean())
        # normalized root mean squared error
        nrmse = sp.sqrt((diff**2).mean()) / abs_true.mean()

        # baseline
        abs_baseline = sp.absolute(trueY[1:, :] - trueY[:-1, :])
        mase = abs_diff.mean() / abs_baseline.mean()
        m_mase = my_mean(abs_diff.mean(axis=0) / abs_baseline.mean(axis=0))

        mape = my_mean(sp.divide(abs_diff, abs_true, where=nz_mask))

        return mape, mase, rmse
コード例 #9
0
ファイル: hdda.py プロジェクト: mfauvel/HDDA
    def predict_proba(self, X):
        """
        Predict the membership probabilities for the data samples
        in X using trained model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        proba : array, shape (n_samples, n_clusters)
        """
        X = check_array(X, copy=False, order='C', dtype=sp.float64)
        K = self.score_samples(X)
        T = sp.empty_like(K)

        # Compute the Loglikelhood
        K *= (0.5)

        # Compute the posterior
        with sp.errstate(over='ignore'):
            for c in xrange(self.C):
                T[:, c] = 1 / sp.exp(K-K[:, c][:, sp.newaxis]).sum(axis=1)

        return T
コード例 #10
0
ファイル: hdda.py プロジェクト: mfauvel/HDDA
    def e_step(self, X):
        """Compute the e-step of the algorithm

        Parameters
        ----------
        X : array-like, shape (n_samples, n_dimensions)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.
        Returns
        -------

        """
        # Get some parameters
        n = X.shape[0]

        # Compute the membership function
        K = self.score_samples(X)

        # Compute the Loglikelhood
        K *= (0.5)
        Km = K.max(axis=1)
        Km.shape = (n, 1)

        # logsumexp trick
        LL = (sp.log(sp.exp(K-Km).sum(axis=1))[:, sp.newaxis]+Km).sum()

        # Compute the posterior
        with sp.errstate(over='ignore'):
            for c in xrange(self.C):
                self.T[:, c] = 1 / sp.exp(K-K[:, c][:, sp.newaxis]).sum(axis=1)

        return LL
コード例 #11
0
ファイル: hdda.py プロジェクト: artificyan/HDDA
    def predict_proba(self, X):
        """
        Predict the membership probabilities for the data samples
        in X using trained model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        proba : array, shape (n_samples, n_clusters)
        """
        X = check_array(X, copy=False, order='C', dtype=sp.float64)
        K = self.score_samples(X)
        T = sp.empty_like(K)

        # Compute the Loglikelhood
        K *= (0.5)

        # Compute the posterior
        with sp.errstate(over='ignore'):
            for c in xrange(self.C):
                T[:, c] = 1 / sp.exp(K - K[:, c][:, sp.newaxis]).sum(axis=1)

        return T
コード例 #12
0
ファイル: RSC.py プロジェクト: GiulioRossetti/cdlib
def __regularized_laplacian_matrix(adj_matrix, tau):
    """
    Using ARPACK solver, compute the first K eigen vector.
    The laplacian is computed using the regularised formula from [2]
    [2]Kamalika Chaudhuri, Fan Chung, and Alexander Tsiatas 2018.
        Spectral clustering of graphs with general degrees in the extended planted partition model.
    L = I - D^-1/2 * A * D ^-1/2
    :param adj_matrix: adjacency matrix representation of graph where [m][n] >0 if there is edge and [m][n] = weight
    :param tau: the regularisation constant
    :return: the first K eigenvector
    """
    import scipy.sparse

    # Code inspired from nx.normalized_laplacian_matrix, with changes to allow regularisation
    n, m = adj_matrix.shape
    I = np.eye(n, m)
    diags = adj_matrix.sum(axis=1).flatten()
    # add tau to the diags to produce a regularised diags
    if tau != 0:
        diags = np.add(diags, tau)

    # diags will be zero at points where there is no edge and/or the node you are at
    #  ignore the error and make it zero later
    with scipy.errstate(divide="ignore"):
        diags_sqrt = 1.0 / scipy.sqrt(diags)
    diags_sqrt[scipy.isinf(diags_sqrt)] = 0
    D = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format="csr")

    L = I - (D.dot(adj_matrix.dot(D)))
    return L
コード例 #13
0
ファイル: hdda.py プロジェクト: lopesm/STH_measures
    def loglike(self, x, T=None):
        """
        Compute the log likelyhood given a set of samples.
        :param x: The sample matrix, is of size x \times d where n is the number of samples and d is the number of variables
        """
        flag = False
        ## Get some parameters
        n = x.shape[0]

        ## Compute the membership function
        K = self.predict(x, out='ki')

        ## Compute the Loglikelhood
        K *= (-0.5)
        Km = K.max(axis=1).reshape(n, 1)
        LL = (sp.log(sp.exp(K - Km).sum(axis=1)).reshape(n, 1) +
              Km).sum()  # logsumexp trick

        ## Compute the posterior
        if T is None:
            flag = True
            T = sp.empty_like(K)

        with sp.errstate(over='ignore'):
            for i in xrange(K.shape[1]):
                T[:, i] = 1 / sp.exp(K - K[:, i][:, sp.newaxis]).sum(axis=1)

        if flag:
            return LL, T
        else:
            return LL
コード例 #14
0
        def clean_p_values(counts, lambdas):
            with scipy.errstate(divide='ignore'):
                p_values = poisson.logsf(counts, lambdas)

                p_values /= -baseEtoTen
                p_values[counts == 0] = 0
                p_values[np.isinf(p_values)] = 1000
                return p_values
コード例 #15
0
def get_DH(A):
    # D^{-1/2}
    diags = A.sum(axis=1).flatten()
    C, R = A.shape
    with scipy.errstate(divide='ignore'):
        diag_s = 1.0 / scipy.sqrt(diags)
    diag_s[scipy.isinf(diag_s)] = 0
    DH = scipy.sparse.spdiags(diag_s, [0], C, R, format='csr')
    return DH
コード例 #16
0
ファイル: ClassifierUtils.py プロジェクト: mraouf1/stokastik
def normalized_laplacian(W):
    n, m = W.shape
    diag = np.diag(W.sum(axis=0))
    with sc.errstate(divide='ignore'):
        diags_sqrt = 1.0 / sc.sqrt(diag)
    diags_sqrt[sc.isinf(diags_sqrt)] = 0
    DH = sp.spdiags(diags_sqrt, [0], m, n, format='csr')
    DH=DH.toarray()
    L=DH.dot(L.dot(DH))
    
    return L
コード例 #17
0
 def make_matrices(self, G):
     self.adj_matrix = nx.to_scipy_sparse_matrix(
         G, nodelist=self.pool_of_nodes)
     n, m = self.adj_matrix.shape
     D = self.adj_matrix.sum(axis=1).flatten()
     D = scipy.sparse.spdiags(D, [0], n, m, format='csr')
     self.laplacian = csr_matrix(D - self.adj_matrix)
     with scipy.errstate(divide='ignore', invalid='ignore'):
         DI = spdiags(1.0 / scipy.array(self.adj_matrix.sum(axis=1).flat),
                      [0], n, m)
     self.normed_adj_matrix = DI * self.adj_matrix
コード例 #18
0
ファイル: utilities.py プロジェクト: leiloong/HMGCN
def _normalize_diffusion_matrix(A):
    n, m = A.shape
    A_with_selfloop = A
    diags = A_with_selfloop.sum(axis=1).flatten()

    with scipy.errstate(divide='ignore'):
        diags_sqrt = 1.0 / scipy.sqrt(diags)
    diags_sqrt[scipy.isinf(diags_sqrt)] = 0
    DH = sp.spdiags(diags_sqrt, [0], m, n, format='csc')
    d = DH.dot(A_with_selfloop.dot(DH))
    return d
コード例 #19
0
ファイル: TempPlot.py プロジェクト: cpsmv/ecu
    def calibrateTempCurve(self):
        self.resistances = numpy.array([2.46, 0.318])
        self.temperatures = numpy.array([20, 80])
        with scipy.errstate(divide='ignore'):
            slope, intercept, r_value, p_value, std_err = \
                stats.linregress(self.temperatures, self.resistances)

        self.t = Symbol('t')
        self.a = Symbol('a')

        self.calibrationcurve = solve((intercept + slope*self.t) /
            (intercept + slope*self.t + 2.49) * 1024 - self.a, self.t)
コード例 #20
0
def cocitation_modularity(partition, adjacency_matrix, resolution=1.0):
    """
    Compute the modularity of a node partition of a cocitation graph.
    Parameters
    ----------
    partition: dict
       The partition of the nodes.
       The keys of the dictionary correspond to the nodes and the values to the communities.
    adjacency_matrix: scipy.csr_matrix or np.ndarray
        The adjacency matrix of the graph (sparse or dense).
    resolution: double, optional
        The resolution parameter in the modularity function (default=1.).

    Returns
    -------
    modularity : float
       The modularity.
    """

    if type(adjacency_matrix) == sparse.csr_matrix:
        adj_matrix = adjacency_matrix
    elif type(adjacency_matrix) == np.ndarray:
        adj_matrix = sparse.csr_matrix(adjacency_matrix)
    else:
        raise TypeError(
            "The argument should be a NumPy array or a SciPy Compressed Sparse Row matrix."
        )

    n_nodes = adj_matrix.shape[0]
    out_degree = np.array(adj_matrix.sum(axis=1).flatten())
    in_degree = adj_matrix.sum(axis=0).flatten()
    total_weight = out_degree.sum()

    with errstate(divide='ignore'):
        in_degree_sqrt = 1.0 / sqrt(in_degree)
    in_degree_sqrt[isinf(in_degree_sqrt)] = 0
    in_degree_sqrt = sparse.spdiags(in_degree_sqrt, [0],
                                    adj_matrix.shape[1],
                                    adj_matrix.shape[1],
                                    format='csr')
    normalized_adjacency = (adj_matrix.dot(in_degree_sqrt)).T

    communities = lab2com(partition)
    mod = 0.

    for community in communities:
        indicator_vector = np.zeros(n_nodes)
        indicator_vector[list(community)] = 1
        mod += np.linalg.norm(normalized_adjacency.dot(indicator_vector))**2
        mod -= (resolution / total_weight) * (np.dot(out_degree,
                                                     indicator_vector))**2

    return float(mod / total_weight)
コード例 #21
0
    def learn_embeddings(self):
        n, m = self.adj_matrix.shape
        diags = self.adj_matrix.sum(axis=1).flatten()
        D = sparse.spdiags(diags, [0], m, n, format='csr')
        L = D - self.adj_matrix
        with scipy.errstate(divide='ignore'):
            diags_sqrt = 1.0 / scipy.sqrt(diags)
        diags_sqrt[scipy.isinf(diags_sqrt)] = 0
        DH = sparse.spdiags(diags_sqrt, [0], m, n, format='csr')
        laplacian = DH.dot(L.dot(DH))

        _, v = sparse.linalg.eigs(laplacian, k=self.dim + 1, which='SM')
        embeddings = v[:, 1:].real
        return embeddings
コード例 #22
0
ファイル: FIGRL.py プロジェクト: HendrikTytgat/KU_LEUVEN
    def train(self, train_graph, S=None):
        """
        
        This function trains a figrl model.
        It returns the trained figrl model and a pandas datarame containing the embeddings generated for the train nodes.
        
        Parameters
        ----------
        train_graph : NetworkX Object
            The graph on which the training step is done on.
        S : numpy randn matrix of size #of nodes in train_graph
        """
        A = nx.adjacency_matrix(train_graph)
        n, m = A.shape
        diags = A.sum(axis=1).flatten()
        D = scipy.sparse.spdiags(diags, [0], n, n, format='csr')

        with scipy.errstate(divide='ignore'):
            diags_sqrt = 1.0 / np.lib.scimath.sqrt(diags)
        diags_sqrt[np.isinf(diags_sqrt)] = 0
        DH = scipy.sparse.spdiags(diags_sqrt, [0], n, n, format='csr')

        Normalized_random_walk = DH.dot(A.dot(DH))
        if S is None:
            S = np.random.randn(n, self.intermediate_dimension) / np.sqrt(
                self.intermediate_dimension)
            np.savetxt("S_train_matrix.csv", S, delimiter=",")
        #S = np.array(pd.read_csv('S_train_matrix.csv', header=None))

        C = Normalized_random_walk.dot(S)

        from scipy import sparse
        sC = sparse.csr_matrix(C)

        U, self.sigma, self.V = scipy.sparse.linalg.svds(sC,
                                                         k=self.embedding_size,
                                                         tol=0,
                                                         which='LM')
        self.V = self.V.transpose()
        self.sigma = np.diag(self.sigma)

        figrl_train_emb = pd.DataFrame(U)
        figrl_train_emb = figrl_train_emb.set_index(figrl_train_emb.index)

        self.sigma = np.array(self.sigma)
        self.V = np.array(self.V)
        self.St = np.array(S)
        return figrl_train_emb
コード例 #23
0
    def fit(self, adjacency_matrix):
        """Fits the model from data in adjacency_matrix

        Parameters
        ----------
        adjacency_matrix : Scipy csr matrix or numpy ndarray
              Adjacency matrix of the graph
        node_weights : {'uniform', 'degree', array of length n_nodes with positive entries}
              Node weights
        """

        if type(adjacency_matrix) == sparse.csr_matrix:
            adj_matrix = adjacency_matrix
        elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray:
            adj_matrix = sparse.csr_matrix(adjacency_matrix)
        else:
            raise TypeError(
                "The argument must be a NumPy array or a SciPy Sparse matrix.")
        n_nodes, m_nodes = adj_matrix.shape
        if n_nodes != m_nodes:
            raise ValueError("The adjacency matrix must be a square matrix.")
        #if csgraph.connected_components(adj_matrix, directed=False)[0] > 1:
            #raise ValueError("The graph must be connected.")
        if (adj_matrix != adj_matrix.maximum(adj_matrix.T)).nnz != 0:
            raise ValueError("The adjacency matrix is not symmetric.")

        # builds standard laplacian
        degrees = adj_matrix.dot(np.ones(n_nodes))
        degree_matrix = sparse.diags(degrees, format='csr')
        laplacian = degree_matrix - adj_matrix

        # applies normalization by node weights 
        with errstate(divide='ignore'):
            degrees_inv_sqrt = 1.0 / sqrt(degrees)
        degrees_inv_sqrt[isinf(degrees_inv_sqrt)] = 0
        weight_matrix = sparse.diags(degrees_inv_sqrt, format='csr')
            
        laplacian = weight_matrix.dot(laplacian.dot(weight_matrix))

        # spectral decomposition
        eigenvalues, eigenvectors = eigsh(laplacian, min(self.embedding_dimension + 1, n_nodes - 1), which='SM')
        self.eigenvalues_ = eigenvalues[1:]
        self.embedding_ = np.array(weight_matrix.dot(eigenvectors[:, 1:]))
        
        return self
    def fit(self, train_graph, S=None):
        """This function trains a figrl model.
        It returns the trained figrl model and a pandas datarame containing the embeddings generated for the train nodes.
        ----------
        train_graph : NetworkX Object
            The graph on which the training step is done on, containing only the seen training nodes.
        S : ndarray, shape (number of training nodes, intermediate dimension)
            A random matrix used to create the normalized random walk matrix; if non the fit definition creates a new one
        Returns
        -------
        figrl_train_emb : pandas Dataframe
            The embeddings created during the training step for the training nodes.
        """
        
        A = nx.adjacency_matrix(train_graph)
        n,m = A.shape
        diags = A.sum(axis=1).flatten()

        with scipy.errstate(divide='ignore'):
           diags_sqrt = 1.0/np.lib.scimath.sqrt(diags)
        diags_sqrt[np.isinf(diags_sqrt)] = 0
        DH = scipy.sparse.spdiags(diags_sqrt, [0], n, n, format='csr')

        Normalized_random_walk = DH.dot(A.dot(DH))
        if S is None:
            S = np.random.randn(n, self.intermediate_dimension) / np.sqrt(self.intermediate_dimension)
            #np.savetxt("S_train_matrix.csv", S, delimiter=",")

        C = Normalized_random_walk.dot(S)

        from scipy import sparse
        sC = sparse.csr_matrix(C)

        U, self.sigma, self.V = scipy.sparse.linalg.svds(sC, k=self.embedding_size, tol=0,which='LM')
        self.V = self.V.transpose()
        self.sigma = np.diag(self.sigma)
        
        figrl_train_emb = pd.DataFrame(U)
        figrl_train_emb = figrl_train_emb.set_index(figrl_train_emb.index)
        
        self.sigma = np.array(self.sigma)
        self.V = np.array(self.V)
        self.St = np.array(S)
        
        return figrl_train_emb
コード例 #25
0
    def __init__(self, layer_multiplexes, bipartite_files=None):
        #TODO check and verify inputs
        self.multiplexes = layer_multiplexes
        print("\nGenerating bipartite matrix...")
        if (len(layer_multiplexes.keys()) == 1 or bipartite_files is None):
            k = list(layer_multiplexes.keys())
            self.total_nodes = layer_multiplexes[k[0]].num_nodes
            self.num_layers = 1
            self.pool_of_nodes = list(layer_multiplexes[k[0]].pool_of_nodes)
            self.supra_adjacency_matrix = layer_multiplexes[
                k[0]].layers[0].adj_matrix
            self.normed_supra_adjacency_matrix = layer_multiplexes[
                k[0]].layers[0].normed_adj_matrix
            self.supra_transition_matrix = layer_multiplexes[
                k[0]].layers[0].normed_adj_matrix

        else:
            self.bipartite_matrix = dict.fromkeys(bipartite_files, None)
            self.bipartite_G = dict.fromkeys(bipartite_files, None)
            for key, value in bipartite_files.items():
                if (len(value.columns)) == 2:
                    value.columns = ["source", "target"]
                    value['weight'] = [1.0] * value.shape[0]
                elif (len(value.columns)) == 3:
                    value.columns = ["source", "target", "weight"]
                bipartite_rel = value
                self.bipartite_G[key], self.bipartite_matrix[
                    key] = self.get_bipartite_graph(
                        layer_multiplexes[key.split("-")[0]],
                        layer_multiplexes[key.split("-")[1]], bipartite_rel)
            print("Expanding bipartite matrix to fit the multiplex network...")
            self.supra_adjacency_matrix = self.compute_adjacency_matrix(
                self.multiplexes, self.bipartite_matrix)
            with scipy.errstate(divide='ignore', invalid='ignore'):
                DI = spdiags(
                    1.0 /
                    scipy.array(self.supra_adjacency_matrix.sum(axis=1).flat),
                    [0], self.total_nodes, self.total_nodes)
            self.normed_supra_adjacency_matrix = DI * self.supra_adjacency_matrix
            self.supra_transition_matrix = self.compute_transition_matrix()
コード例 #26
0
def normalized_laplacian_matrix(G, nodelist=None, weight='weight'):
    r"""Return the normalized Laplacian matrix of G.

    The normalized graph Laplacian is the matrix

    .. math::

        N = D^{-1/2} L D^{-1/2}

    where `L` is the graph Laplacian and `D` is the diagonal matrix of
    node degrees.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    N : NumPy matrix
      The normalized Laplacian matrix of G.

    Notes
    -----
    For MultiGraph/MultiDiGraph, the edges weights are summed.
    See to_numpy_matrix for other options.

    If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is
    the adjacency matrix [2]_.

    See Also
    --------
    laplacian_matrix

    References
    ----------
    .. [1] Fan Chung-Graham, Spectral Graph Theory,
       CBMS Regional Conference Series in Mathematics, Number 92, 1997.
    .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized
       Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98,
       March 2007.
    """
    import scipy
    import scipy.sparse
    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight=weight,
                                  format='csr')
    n, m = A.shape
    diags = A.sum(axis=1).flatten()
    D = scipy.sparse.spdiags(diags, [0], m, n, format='csr')
    L = D - A
    with scipy.errstate(divide='ignore'):
        diags_sqrt = 1.0 / scipy.sqrt(diags)
    diags_sqrt[scipy.isinf(diags_sqrt)] = 0
    DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr')
    return DH.dot(L.dot(DH))
コード例 #27
0
import networkx as nx
import scipy

G = nx.read_edgelist('/home/rafael/googledrive/DOC/data/figrl/edgelist')

A = nx.adjacency_matrix(G)

A[1, 1]

dim = 10

n, m = A.shape
diags = A.sum(axis=1).flatten()
D = scipy.sparse.spdiags(diags, [0], m, n, format='csr')
#L = D - A
with scipy.errstate(divide='ignore'):
    diags_sqrt = 1.0 / scipy.sqrt(diags)
diags_sqrt[scipy.isinf(diags_sqrt)] = 0
DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr')

Normalized_random_walk = DH.dot(A.dot(DH))

S = np.random.randn(n, dim) / np.sqrt(dim)

C = Normalized_random_walk.dot(S)

C.shape

np.linalg.svd(C)

scipy.linalg.svd(C, lapack_driver='gesvd')
コード例 #28
0
    def compute_transition_matrix(self):
        total_edges = 0
        if compute_weights:
            for key in self.multiplexes.keys():
                total_edges += self.multiplexes[key].layers[0].edge_df.shape[0]
            for key in self.multiplexes.keys():
                delta[key] = round(
                    self.multiplexes[key].layers[0].edge_df.shape[0] /
                    total_edges, 4)

        L = len(self.multiplexes.keys())
        self.supra_transition_matrix = self.supra_adjacency_matrix
        sort_keys = sorted(self.multiplexes.keys())
        if (len(sort_keys)) == 1:
            print("No bipartite relation possible!!")
            exit(-2)
        elif (len(sort_keys)) == 2:
            k1 = sort_keys[0]
            n1 = self.node_num_dict[k1]
            k2 = sort_keys[1]
            n2 = self.node_num_dict[k2]
            self.supra_transition_matrix[
                0:n1,
                0:n1] = delta[k1] * self.supra_transition_matrix[0:n1, 0:n1]
            if k1 + "-" + k2 in self.bipartite_matrix:
                if self.bipartite_matrix[k1 + "-" + k2] is not None:
                    self.supra_transition_matrix[
                        0:n1, n1:n1 + n2] = (1.0 - delta[k1]) / (
                            L - 1) * 1.0 * self.supra_transition_matrix[0:n1,
                                                                        n1:n1 +
                                                                        n2]
            if k2 + "-" + k1 in self.bipartite_matrix:
                if self.bipartite_matrix[k2 + "-" + k1] is not None:
                    self.supra_transition_matrix[n1:n1 + n2, 0:n1] = (
                        1.0 - delta[k2]
                    ) / (L - 1) * 1.0 * self.supra_transition_matrix[n1:n1 +
                                                                     n2, 0:n1]
            self.supra_transition_matrix[
                n1:n1 + n2, n1:n1 +
                n2] = delta[k2] * self.supra_transition_matrix[n1:n1 + n2,
                                                               n1:n1 + n2]

        elif (len(sort_keys)) == 3:
            k1 = sort_keys[0]
            n1 = self.node_num_dict[k1]
            k2 = sort_keys[1]
            n2 = self.node_num_dict[k2]
            k3 = sort_keys[2]
            n3 = self.node_num_dict[k3]
            self.supra_transition_matrix[
                0:n1,
                0:n1] = delta[k1] * self.supra_transition_matrix[0:n1, 0:n1]
            if k1 + "-" + k2 in self.bipartite_matrix:
                if self.bipartite_matrix[k1 + "-" + k2] is not None:
                    self.supra_transition_matrix[
                        0:n1, n1:n1 + n2] = (1.0 - delta[k1]) / (
                            L - 1) * 1.0 * self.supra_transition_matrix[0:n1,
                                                                        n1:n1 +
                                                                        n2]
            if k1 + "-" + k3 in self.bipartite_matrix:
                if self.bipartite_matrix[k1 + "-" + k3] is not None:
                    self.supra_transition_matrix[
                        0:n1, n1 + n2:n1 + n2 + n3] = (1.0 - delta[k1]) / (
                            L - 1) * 1.0 * self.supra_transition_matrix[
                                0:n1, n1 + n2:n1 + n2 + n3]

            if k2 + "-" + k1 in self.bipartite_matrix:
                if self.bipartite_matrix[k2 + "-" + k1] is not None:
                    self.supra_transition_matrix[n1:n1 + n2, 0:n1] = (
                        1.0 - delta[k2]
                    ) / (L - 1) * 1.0 * self.supra_transition_matrix[n1:n1 +
                                                                     n2, 0:n1]
            self.supra_transition_matrix[
                n1:n1 + n2, n1:n1 +
                n2] = delta[k2] * self.supra_transition_matrix[n1:n1 + n2,
                                                               n1:n1 + n2]
            if k2 + "-" + k3 in self.bipartite_matrix:
                if self.bipartite_matrix[k2 + "-" + k3] is not None:
                    self.supra_transition_matrix[
                        n1:n1 + n2,
                        n1 + n2:n1 + n2 + n3] = (1.0 - delta[k2]) / (
                            L - 1) * 1.0 * self.supra_transition_matrix[
                                n1:n1 + n2, n1 + n2:n1 + n2 + n3]

            if k3 + "-" + k1 in self.bipartite_matrix:
                if self.bipartite_matrix[k3 + "-" + k1] is not None:
                    self.supra_transition_matrix[
                        n1 + n2:n1 + n2 + n3, 0:n1] = (1.0 - delta[k3]) / (
                            L - 1) * 1.0 * self.supra_transition_matrix[
                                n1 + n2:n1 + n2 + n3, 0:n1]
            if k3 + "-" + k2 in self.bipartite_matrix:
                if self.bipartite_matrix[k3 + "-" + k2] is not None:
                    self.supra_transition_matrix[
                        n1 + n2:n1 + n2 + n3,
                        n1:n1 + n2] = (1.0 - delta[k3]) / (
                            L - 1) * 1.0 * self.supra_transition_matrix[
                                n1 + n2:n1 + n2 + n3, n1:n1 + n2]
            self.supra_transition_matrix[
                n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 +
                n3] = delta[k3] * self.supra_transition_matrix[n1 + n2:n1 +
                                                               n2 + n3, n1 +
                                                               n2:n1 + n2 + n3]

        else:
            print("Invalid number of layers!!")
            exit(-2)
        with scipy.errstate(divide='ignore', invalid='ignore'):
            DI = spdiags(
                1.0 /
                scipy.array(self.supra_transition_matrix.sum(axis=1).flat),
                [0], self.total_nodes, self.total_nodes)
        self.supra_transition_matrix = DI * self.supra_transition_matrix

        return (self.supra_transition_matrix)
コード例 #29
0
ファイル: gcnmain.py プロジェクト: zhuhm1996/geographconv
def preprocess_data(data_home, **kwargs):
    bucket_size = kwargs.get('bucket', 300)
    encoding = kwargs.get('encoding', 'utf-8')
    celebrity_threshold = kwargs.get('celebrity', 10)
    mindf = kwargs.get('mindf', 10)
    dtype = kwargs.get('dtype', 'float32')
    one_hot_label = kwargs.get('onehot', False)
    vocab_file = os.path.join(data_home, 'vocab.pkl')
    dump_file = os.path.join(data_home, 'dump.pkl')
    if os.path.exists(dump_file) and not model_args.builddata:
        logging.info('loading data from dumped file...')
        data = load_obj(dump_file)
        logging.info('loading data finished!')
        return data

    dl = DataLoader(data_home=data_home,
                    bucket_size=bucket_size,
                    encoding=encoding,
                    celebrity_threshold=celebrity_threshold,
                    one_hot_labels=one_hot_label,
                    mindf=mindf,
                    token_pattern=r'(?u)(?<![@])#?\b\w\w+\b')
    dl.load_data()
    dl.assignClasses()
    dl.tfidf()
    vocab = dl.vectorizer.vocabulary_
    logging.info('saving vocab in {}'.format(vocab_file))
    dump_obj(vocab, vocab_file)
    logging.info('vocab dumped successfully!')
    U_test = dl.df_test.index.tolist()
    U_dev = dl.df_dev.index.tolist()
    U_train = dl.df_train.index.tolist()

    dl.get_graph()
    logging.info('creating adjacency matrix...')
    adj = nx.adjacency_matrix(dl.graph,
                              nodelist=xrange(len(U_train + U_dev + U_test)),
                              weight='w')

    adj.setdiag(0)
    #selfloop_value = np.asarray(adj.sum(axis=1)).reshape(-1,)
    selfloop_value = 1
    adj.setdiag(selfloop_value)
    n, m = adj.shape
    diags = adj.sum(axis=1).flatten()
    with sp.errstate(divide='ignore'):
        diags_sqrt = 1.0 / sp.sqrt(diags)
    diags_sqrt[sp.isinf(diags_sqrt)] = 0
    D_pow_neghalf = sp.sparse.spdiags(diags_sqrt, [0], m, n, format='csr')
    A = D_pow_neghalf * adj * D_pow_neghalf
    A = A.astype(dtype)
    logging.info('adjacency matrix created.')

    X_train = dl.X_train
    X_dev = dl.X_dev
    X_test = dl.X_test
    Y_test = dl.test_classes
    Y_train = dl.train_classes
    Y_dev = dl.dev_classes
    classLatMedian = {
        str(c): dl.cluster_median[c][0]
        for c in dl.cluster_median
    }
    classLonMedian = {
        str(c): dl.cluster_median[c][1]
        for c in dl.cluster_median
    }

    P_test = [
        str(a[0]) + ',' + str(a[1])
        for a in dl.df_test[['lat', 'lon']].values.tolist()
    ]
    P_train = [
        str(a[0]) + ',' + str(a[1])
        for a in dl.df_train[['lat', 'lon']].values.tolist()
    ]
    P_dev = [
        str(a[0]) + ',' + str(a[1])
        for a in dl.df_dev[['lat', 'lon']].values.tolist()
    ]
    userLocation = {}
    for i, u in enumerate(U_train):
        userLocation[u] = P_train[i]
    for i, u in enumerate(U_test):
        userLocation[u] = P_test[i]
    for i, u in enumerate(U_dev):
        userLocation[u] = P_dev[i]

    data = (A, X_train, Y_train, X_dev, Y_dev, X_test, Y_test, U_train, U_dev,
            U_test, classLatMedian, classLonMedian, userLocation)
    if not model_args.builddata:
        logging.info('dumping data in {} ...'.format(str(dump_file)))
        dump_obj(data, dump_file)
        logging.info('data dump finished!')

    return data
コード例 #30
0
ファイル: main.py プロジェクト: afcarl/graphconvgeo
def main2(data_home, **kwargs):
    bucket_size = kwargs.get('bucket', 300)
    batch_size = kwargs.get('batch', 500)
    hidden_size = kwargs.get('hidden', 500)
    encoding = kwargs.get('encoding', 'utf-8')
    regul = kwargs.get('regularization', 1e-6)
    celebrity_threshold = kwargs.get('celebrity', 10)
    convolution = kwargs.get('conv', False)

    dl = DataLoader(data_home=data_home,
                    bucket_size=bucket_size,
                    encoding=encoding,
                    celebrity_threshold=celebrity_threshold)
    dl.load_data()
    dl.get_graph()
    dl.assignClasses()
    dl.tfidf()

    U_test = dl.df_test.index.tolist()
    U_dev = dl.df_dev.index.tolist()
    U_train = dl.df_train.index.tolist()
    if convolution:
        logging.info('creating adjacency matrix...')
        adj = nx.adjacency_matrix(dl.graph,
                                  nodelist=xrange(len(U_train + U_dev +
                                                      U_test)),
                                  weight='w')
        #adj[adj > 0] = 1
        adj.setdiag(1)
        n, m = adj.shape
        diags = adj.sum(axis=1).flatten()
        with sp.errstate(divide='ignore'):
            diags_sqrt = 1.0 / sp.sqrt(diags)
        diags_sqrt[sp.isinf(diags_sqrt)] = 0
        D_pow_neghalf = sp.sparse.spdiags(diags_sqrt, [0], m, n, format='csr')
        H = D_pow_neghalf * adj * D_pow_neghalf

        #logging.info('normalizing adjacency matrix...')
        #normalize(adj, axis=1, norm='l1', copy=False)
        #adj = adj.astype('float32')
        logging.info('vstacking...')
        X = sp.sparse.vstack([dl.X_train, dl.X_dev, dl.X_test])
        logging.info('convolution...')
        X_conv = H * X
        X_conv = X_conv.tocsr().astype('float32')
        X_train = X_conv[0:dl.X_train.shape[0], :]
        X_dev = X_conv[dl.X_train.shape[0]:dl.X_train.shape[0] +
                       dl.X_dev.shape[0], :]
        X_test = X_conv[dl.X_train.shape[0] + dl.X_dev.shape[0]:, :]
    else:
        X_train = dl.X_train
        X_dev = dl.X_dev
        X_test = dl.X_test

    Y_test = dl.test_classes
    Y_train = dl.train_classes
    Y_dev = dl.dev_classes
    classLatMedian = {
        str(c): dl.cluster_median[c][0]
        for c in dl.cluster_median
    }
    classLonMedian = {
        str(c): dl.cluster_median[c][1]
        for c in dl.cluster_median
    }

    P_test = [
        str(a[0]) + ',' + str(a[1])
        for a in dl.df_test[['lat', 'lon']].values.tolist()
    ]
    P_train = [
        str(a[0]) + ',' + str(a[1])
        for a in dl.df_train[['lat', 'lon']].values.tolist()
    ]
    P_dev = [
        str(a[0]) + ',' + str(a[1])
        for a in dl.df_dev[['lat', 'lon']].values.tolist()
    ]
    userLocation = {}
    for i, u in enumerate(U_train):
        userLocation[u] = P_train[i]
    for i, u in enumerate(U_test):
        userLocation[u] = P_test[i]
    for i, u in enumerate(U_dev):
        userLocation[u] = P_dev[i]
    clf = MLP(n_epochs=200,
              batch_size=batch_size,
              init_parameters=None,
              complete_prob=False,
              add_hidden=True,
              regul_coefs=[regul, regul],
              save_results=False,
              hidden_layer_size=hidden_size,
              drop_out=True,
              drop_out_coefs=[0.5, 0.5],
              early_stopping_max_down=10,
              loss_name='log',
              nonlinearity='rectify')
    clf.fit(X_train, Y_train, X_dev, Y_dev)
    print('Test classification accuracy is %f' % clf.accuracy(X_test, Y_test))
    y_pred = clf.predict(X_test)
    geo_eval(Y_test, y_pred, U_test, classLatMedian, classLonMedian,
             userLocation)
    print('Dev classification accuracy is %f' % clf.accuracy(X_dev, Y_dev))
    y_pred = clf.predict(X_dev)
    mean, median, acc161 = geo_eval(Y_dev, y_pred, U_dev, classLatMedian,
                                    classLonMedian, userLocation)
    return mean, median, acc161
コード例 #31
0
    def _updateInternals(self):
        """Update internal attributes related to likelihood.

        Should be called any time branch lengths or model parameters
        are changed.
        """
        rootnode = self.nnodes - 1
        if self._distributionmodel:
            catweights = self.model.catweights
        else:
            catweights = scipy.ones(1, dtype='float')
        # When there are multiple categories, it is acceptable
        # for some (but not all) of them to have underflow at
        # any given site. Note that we still include a check for
        # Underflow by ensuring that none of the site likelihoods is
        # zero.
        undererrstate = 'ignore' if len(catweights) > 1 else 'raise'
        with scipy.errstate(over='raise',
                            under=undererrstate,
                            divide='raise',
                            invalid='raise'):
            self.underflowlogscale.fill(0.0)
            self._computePartialLikelihoods()
            sitelik = scipy.zeros(self.nsites, dtype='float')
            assert (self.L[rootnode] >= 0).all(), str(self.L[rootnode])
            for k in self._catindices:
                sitelik += scipy.sum(
                    self._stationarystate(k) * self.L[rootnode][k],
                    axis=1) * catweights[k]
            assert (sitelik > 0).all(), "Underflow:\n{0}\n{1}".format(
                sitelik, self.underflowlogscale)
            self.siteloglik = scipy.log(sitelik) + self.underflowlogscale
            self.loglik = scipy.sum(self.siteloglik) + self.model.logprior
            if self.dparamscurrent:
                self._dloglik = {}
                for param in self.model.freeparams:
                    if self._distributionmodel and (
                            param in self.model.distributionparams):
                        name = self.model.distributedparam
                        weighted_dk = (self.model.d_distributionparams[param] *
                                       catweights)
                    else:
                        name = param
                        weighted_dk = catweights
                    dsiteloglik = 0
                    for k in self._catindices:
                        dsiteloglik += (scipy.sum(
                            self._dstationarystate(k, name) *
                            self.L[rootnode][k] + self.dL[name][rootnode][k] *
                            self._stationarystate(k),
                            axis=-1) * weighted_dk[k])
                    dsiteloglik /= sitelik
                    self._dloglik[param] = (scipy.sum(dsiteloglik, axis=-1) +
                                            self.model.dlogprior(param))
            if self.dtcurrent:
                self._dloglik_dt = 0
                dLnroot_dt = scipy.array([
                    self.dL_dt[n2][rootnode]
                    for n2 in sorted(self.dL_dt.keys())
                ])
                for k in self._catindices:
                    if isinstance(k, int):
                        dLnrootk_dt = dLnroot_dt.swapaxes(0, 1)[k]
                    else:
                        assert k == slice(None)
                        dLnrootk_dt = dLnroot_dt
                    self._dloglik_dt += catweights[k] * scipy.sum(
                        self._stationarystate(k) * dLnrootk_dt, axis=-1)
                self._dloglik_dt /= sitelik
                self._dloglik_dt = scipy.sum(self._dloglik_dt, axis=-1)
                assert self._dloglik_dt.shape == self.t.shape
コード例 #32
0
ファイル: laplacianmatrix.py プロジェクト: networkx/networkx
def normalized_laplacian_matrix(G, nodelist=None, weight='weight'):
    r"""Returns the normalized Laplacian matrix of G.

    The normalized graph Laplacian is the matrix

    .. math::

        N = D^{-1/2} L D^{-1/2}

    where `L` is the graph Laplacian and `D` is the diagonal matrix of
    node degrees.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    N : NumPy matrix
      The normalized Laplacian matrix of G.

    Notes
    -----
    For MultiGraph/MultiDiGraph, the edges weights are summed.
    See to_numpy_matrix for other options.

    If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is
    the adjacency matrix [2]_.

    See Also
    --------
    laplacian_matrix
    normalized_laplacian_spectrum

    References
    ----------
    .. [1] Fan Chung-Graham, Spectral Graph Theory,
       CBMS Regional Conference Series in Mathematics, Number 92, 1997.
    .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized
       Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98,
       March 2007.
    """
    import scipy
    import scipy.sparse
    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  format='csr')
    n, m = A.shape
    diags = A.sum(axis=1).flatten()
    D = scipy.sparse.spdiags(diags, [0], m, n, format='csr')
    L = D - A
    with scipy.errstate(divide='ignore'):
        diags_sqrt = 1.0 / scipy.sqrt(diags)
    diags_sqrt[scipy.isinf(diags_sqrt)] = 0
    DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr')
    return DH.dot(L.dot(DH))
コード例 #33
0
    def fit(self, adjacency_matrix, tol=1e-6, n_iter='auto',
            power_iteration_normalizer='auto', random_state=None):
        """Fits the model from data in adjacency_matrix.

        Parameters
        ----------
        adjacency_matrix: array-like, shape = (n, m)
            Adjacency matrix, where n = m = |V| for a standard graph,
            n = |V1|, m = |V2| for a bipartite graph.

        tol: float, optional
            Tolerance for pseudo-inverse of singular values (default=1e-6).

        n_iter: int or 'auto' (default is 'auto')
            Number of power iterations. It can be used to deal with very noisy
            problems. When 'auto', it is set to 4, unless `n_components` is small
            (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
            This improves precision with few components.

        power_iteration_normalizer: 'auto' (default), 'QR', 'LU', 'none'
            Whether the power iterations are normalized with step-by-step
            QR factorization (the slowest but most accurate), 'none'
            (the fastest but numerically unstable when `n_iter` is large, e.g.
            typically 5 or larger), or 'LU' factorization (numerically stable
            but can lose slightly in accuracy). The 'auto' mode applies no
            normalization if `n_iter`<=2 and switches to LU otherwise.

        random_state: int, RandomState instance or None, optional (default=None)
            The seed of the pseudo random number generator to use when shuffling
            the data.  If int, random_state is the seed used by the random number
            generator; If RandomState instance, random_state is the random number
            generator; If None, the random number generator is the RandomState
            instance used by `np.random`.

        Returns
        -------
        self

        """
        if type(adjacency_matrix) == sparse.csr_matrix:
            adj_matrix = adjacency_matrix
        elif type(adjacency_matrix) == np.ndarray:
            adj_matrix = sparse.csr_matrix(adjacency_matrix)
        else:
            raise TypeError(
                "The argument should be a NumPy array or a SciPy Compressed Sparse Row matrix.")
        n_nodes, m_nodes = adj_matrix.shape

        # out-degree vector
        dou = adj_matrix.sum(axis=1).flatten()
        # in-degree vector
        din = adj_matrix.sum(axis=0).flatten()

        with errstate(divide='ignore'):
            dou_sqrt = 1.0 / sqrt(dou)
            din_sqrt = 1.0 / sqrt(din)
        dou_sqrt[isinf(dou_sqrt)] = 0
        din_sqrt[isinf(din_sqrt)] = 0
        # pseudo inverse square-root out-degree matrix
        dhou = sparse.spdiags(dou_sqrt, [0], n_nodes, n_nodes, format='csr')
        # pseudo inverse square-root in-degree matrix
        dhin = sparse.spdiags(din_sqrt, [0], m_nodes, m_nodes, format='csr')

        laplacian = dhou.dot(adj_matrix.dot(dhin))
        u, sigma, vt = randomized_svd(laplacian, self.n_components, n_iter=n_iter,
                                      power_iteration_normalizer=power_iteration_normalizer, random_state=random_state)

        self.singular_values_ = sigma

        gamma = 1 - sigma ** 2
        gamma_sqrt = np.diag(np.piecewise(gamma, [gamma > tol, gamma <= tol], [lambda x: 1 / np.sqrt(x), 0]))
        self.embedding_ = dhou.dot(u).dot(gamma_sqrt)
        self.backward_embedding_ = dhin.dot(vt.T).dot(gamma_sqrt)

        return self
コード例 #34
0
def MultiRank_Nodes_Layers(H, alpha, gamma, s, a):
    v_quadratic_error = 0.001

    z = np.ones(H.num_layers, )
    g = H.supra_adjacency_matrix
    #g = lil_matrix((H.total_nodes, H.total_nodes), dtype=np.float) # Without bipartite connection
    sort_keys = sorted(H.multiplexes.keys())
    if (len(sort_keys)) == 2:
        k1 = sort_keys[0]
        n1 = H.node_num_dict[k1]
        k2 = sort_keys[1]
        n2 = H.node_num_dict[k2]
        g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0]
        g[n1:n1 + n2, n1:n1 +
          n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1]
    elif (len(sort_keys)) == 3:
        k1 = sort_keys[0]
        n1 = H.node_num_dict[k1]
        k2 = sort_keys[1]
        n2 = H.node_num_dict[k2]
        k3 = sort_keys[2]
        n3 = H.node_num_dict[k3]
        g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0]
        g[n1:n1 + n2, n1:n1 +
          n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1]
        g[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 +
          n3] = H.supra_adjacency_matrix[n1 + n2:n1 + n2 + n3,
                                         n1 + n2:n1 + n2 + n3].T * z[2]

    B_in = lil_matrix((H.num_layers, H.total_nodes), dtype=np.float)
    W = np.zeros(H.num_layers)

    if (len(sort_keys)) == 2:
        k1 = sort_keys[0]
        n1 = H.node_num_dict[k1]
        k2 = sort_keys[1]
        n2 = H.node_num_dict[k2]
        W[0] = H.multiplexes[k1].layers[0].adj_matrix.sum()
        W[1] = H.multiplexes[k2].layers[0].adj_matrix.sum()
        if k1 + "-" + k2 in H.bipartite_matrix:
            if H.bipartite_matrix[k1 + "-" + k2] is not None:
                tmp = H.bipartite_matrix[k1 + "-" + k2].T.sum(axis=0).ravel()
                with scipy.errstate(divide='ignore', invalid='ignore'):
                    n_tmp = np.array(tmp / W[0])
                    n_tmp[n_tmp == np.inf] = 0
                    n_tmp[np.where(np.isnan(n_tmp))] = 0
                    B_in[0:1, 0:n1] = lil_matrix(n_tmp)
        if k2 + "-" + k1 in H.bipartite_matrix:
            if H.bipartite_matrix[k2 + "-" + k1] is not None:
                tmp = H.bipartite_matrix[k2 + "-" + k1].T.sum(axis=0).ravel()
                with scipy.errstate(divide='ignore', invalid='ignore'):
                    n_tmp = np.array(tmp / W[1])
                    n_tmp[n_tmp == np.inf] = 0
                    n_tmp[np.where(np.isnan(n_tmp))] = 0
                    B_in[1:, n1:] = lil_matrix(n_tmp)
    elif (len(sort_keys)) == 3:
        k1 = sort_keys[0]
        n1 = H.node_num_dict[k1]
        k2 = sort_keys[1]
        n2 = H.node_num_dict[k2]
        k3 = sort_keys[2]
        n3 = H.node_num_dict[k3]
        W[0] = H.multiplexes[k1].layers[0].adj_matrix.sum()
        W[1] = H.multiplexes[k2].layers[0].adj_matrix.sum()
        W[2] = H.multiplexes[k3].layers[0].adj_matrix.sum()
        if k1 + "-" + k2 in H.bipartite_matrix:
            if H.bipartite_matrix[k1 + "-" + k2] is not None:
                tmp = H.bipartite_matrix[k1 + "-" + k2].T.sum(axis=0).ravel()
                B_in[0:1, 0:n1] = tmp
        if k1 + "-" + k3 in H.bipartite_matrix:
            if H.bipartite_matrix[k1 + "-" + k3] is not None:
                tmp = H.bipartite_matrix[k1 + "-" + k3].T.sum(axis=0).ravel()
                with scipy.errstate(divide='ignore', invalid='ignore'):
                    n_tmp = B_in[0:1, 0:n1] + tmp
                    n_tmp = np.array(n_tmp / W[0])
                    n_tmp[n_tmp == np.inf] = 0
                    n_tmp[np.where(np.isnan(n_tmp))] = 0
                    B_in[0:1, 0:n1] = lil_matrix(n_tmp)

        if k2 + "-" + k1 in H.bipartite_matrix:
            if H.bipartite_matrix[k2 + "-" + k1] is not None:
                tmp = H.bipartite_matrix[k2 + "-" + k1].T.sum(axis=0).ravel()
                B_in[1:2, n1:n1 + n2] = tmp
        if k2 + "-" + k3 in H.bipartite_matrix:
            if H.bipartite_matrix[k2 + "-" + k3] is not None:
                tmp = H.bipartite_matrix[k2 + "-" + k3].T.sum(axis=0).ravel()
                with scipy.errstate(divide='ignore', invalid='ignore'):
                    n_tmp = B_in[1:2, n1:n1 + n2] + tmp
                    n_tmp = np.array(n_tmp / W[1])
                    n_tmp[n_tmp == np.inf] = 0
                    n_tmp[np.where(np.isnan(n_tmp))] = 0
                    B_in[1:2, n1:n1 + n2] = lil_matrix(n_tmp)

        if k3 + "-" + k1 in H.bipartite_matrix:
            if H.bipartite_matrix[k3 + "-" + k1] is not None:
                tmp = H.bipartite_matrix[k3 + "-" + k1].T.sum(axis=0).ravel()
                B_in[2:, n1 + n2:] = tmp
        if k3 + "-" + k2 in H.bipartite_matrix:
            if H.bipartite_matrix[k3 + "-" + k2] is not None:
                tmp = H.bipartite_matrix[k3 + "-" + k2].T.sum(axis=0).ravel()
                with scipy.errstate(divide='ignore', invalid='ignore'):
                    n_tmp = B_in[2:, n1 + n2:] + tmp
                    n_tmp = np.array(n_tmp / W[2])
                    n_tmp[n_tmp == np.inf] = 0
                    n_tmp[np.where(np.isnan(n_tmp))] = 0
                    B_in[2:, n1 + n2:] = lil_matrix(n_tmp)

    D = g.sum(axis=1)
    D[D < 1.0] = 1.0
    with scipy.errstate(divide='ignore', invalid='ignore'):
        D = spdiags(1.0 / scipy.array(D.flat), [0], H.total_nodes,
                    H.total_nodes)

    x0 = g.sum(axis=0) + g.sum(axis=1).T
    x0 = scipy.array(x0)
    with scipy.errstate(divide='ignore', invalid='ignore'):
        x0 = x0.T / np.count_nonzero(x0)
        x0[x0 == np.inf] = 0
        x0[np.where(np.isnan(x0))] = 0
        x0 = scipy.array(x0)

    l = scipy.array(g.sum(axis=0))
    jump = scipy.array(alpha * l.T)
    jump = np.divide(jump, jump.sum())
    x = x0
    x = g.dot(D).dot(np.multiply(
        x, jump)) + np.multiply(x, 1 - jump).sum(axis=0) * x0
    x = np.divide(x, x.sum())

    z1 = np.power(B_in.sum(axis=1), a)
    with np.errstate(divide='ignore'):
        z2 = B_in.dot(np.power(x, (s * gamma)))
        z2[z2 == np.inf] = 0
        z2[np.where(np.isnan(z2))] = 0
    with scipy.errstate(divide='ignore', invalid='ignore'):
        n_tmp = z2
        n_tmp = np.array(n_tmp / B_in.sum(axis=1))
        n_tmp[n_tmp == np.inf] = 0
        n_tmp[np.where(np.isnan(n_tmp))] = 0
        z2 = n_tmp
    z = np.multiply(z1, (np.power(z2, s)))
    z = np.divide(z, z.sum())
    #normalized = (x - x.min()) / (x.max() - x.min())

    count = 0
    last_x = np.ones(H.total_nodes, ) * np.inf
    while (True):
        last_x = x
        g = lil_matrix((H.total_nodes, H.total_nodes), dtype=np.float)
        sort_keys = sorted(H.multiplexes.keys())
        n_z = list()
        for item in z.tolist():
            n_z.append(item)
        z = np.array(n_z)
        z = z.reshape(H.num_layers, )

        if (len(sort_keys)) == 2:
            k1 = sort_keys[0]
            n1 = H.node_num_dict[k1]
            k2 = sort_keys[1]
            n2 = H.node_num_dict[k2]
            g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0]
            g[n1:n1 + n2, n1:n1 +
              n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1]
        elif (len(sort_keys)) == 3:
            k1 = sort_keys[0]
            n1 = H.node_num_dict[k1]
            k2 = sort_keys[1]
            n2 = H.node_num_dict[k2]
            k3 = sort_keys[2]
            n3 = H.node_num_dict[k3]
            g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0]
            g[n1:n1 + n2, n1:n1 +
              n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1]
            g[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 +
              n3] = H.supra_adjacency_matrix[n1 + n2:n1 + n2 + n3,
                                             n1 + n2:n1 + n2 + n3].T * z[2]

        D = g.sum(axis=1)
        D[D < 1.0] = 1.0
        with scipy.errstate(divide='ignore', invalid='ignore'):
            D = spdiags(1.0 / scipy.array(D.flat), [0], H.total_nodes,
                        H.total_nodes)

        x0 = g.sum(axis=0) + g.sum(axis=1).T
        with scipy.errstate(divide='ignore', invalid='ignore'):
            x0 = x0.T / np.count_nonzero(x0)
            x0[x0 == np.inf] = 0
            x0[np.where(np.isnan(x0))] = 0
        l = scipy.array(g.sum(axis=0))
        jump = scipy.array(alpha * l.T)
        jump = np.divide(jump, jump.sum())
        x = g.dot(D).dot(np.multiply(x, jump)) + np.multiply(
            np.multiply(x, 1 - jump).sum(axis=0), x0)
        x = np.divide(x, x.sum())

        z1 = np.power(B_in.sum(axis=1), a)
        with np.errstate(divide='ignore'):
            z2 = B_in.dot(np.power(x, (s * gamma)))
            z2[z2 == np.inf] = 0
            z2[np.where(np.isnan(z2))] = 0
        with scipy.errstate(divide='ignore', invalid='ignore'):
            n_tmp = z2
            n_tmp = np.array(n_tmp / B_in.sum(axis=1))
            n_tmp[n_tmp == np.inf] = 0
            n_tmp[np.where(np.isnan(n_tmp))] = 0
            z2 = n_tmp
        with np.errstate(divide='ignore'):
            z = np.multiply(z1, (np.power(z2, s)))
            z[z == np.inf] = 0
            z[np.where(np.isnan(z))] = 0
        z = np.divide(z, z.sum())
        try:
            normed = norm(x - last_x)
        except:
            print(count)
            break

        if normed < v_quadratic_error:
            break
        elif (count > 100):
            break
        count = count + 1

    return x, z
コード例 #35
0
def normalized_laplacian_matrix(G, nodelist=None, weight="weight"):
    r"""Returns the normalized Laplacian matrix of G.

    The normalized graph Laplacian is the matrix

    .. math::

        N = D^{-1/2} L D^{-1/2}

    where `L` is the graph Laplacian and `D` is the diagonal matrix of
    node degrees.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    N : Scipy sparse matrix
      The normalized Laplacian matrix of G.

    Notes
    -----
    For MultiGraph/MultiDiGraph, the edges weights are summed.
    See to_numpy_array for other options.

    If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is
    the adjacency matrix [2]_.

    See Also
    --------
    laplacian_matrix
    normalized_laplacian_spectrum

    References
    ----------
    .. [1] Fan Chung-Graham, Spectral Graph Theory,
       CBMS Regional Conference Series in Mathematics, Number 92, 1997.
    .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized
       Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98,
       March 2007.
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 format="csr")
    n, m = A.shape
    diags = A.sum(axis=1)
    # TODO: rm csr_array wrapper when spdiags can produce arrays
    D = sp.sparse.csr_array(sp.sparse.spdiags(diags, 0, m, n, format="csr"))
    L = D - A
    with sp.errstate(divide="ignore"):
        diags_sqrt = 1.0 / np.sqrt(diags)
    diags_sqrt[np.isinf(diags_sqrt)] = 0
    # TODO: rm csr_array wrapper when spdiags can produce arrays
    DH = sp.sparse.csr_array(
        sp.sparse.spdiags(diags_sqrt, 0, m, n, format="csr"))
    import warnings

    warnings.warn(
        "normalized_laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: rm csr_matrix wrapper for NX 3.0
    return sp.sparse.csr_matrix(DH @ (L @ DH))