Example #1
0
def test_meld():
    # MELD operator
    # Numerical accuracy
    np.random.seed(42)

    def norm(x):
        x = x.copy()
        x = x - np.min(x)
        x = x / np.max(x)
        return x

    D = np.random.normal(0, 2, (1000, 2))
    RES = np.random.binomial(1, norm(D[:, 0]), 1000)
    G = gt.Graph(D, knn=20, decay=10, use_pygsp=True)

    meld_op = meld.MELD()
    B = meld_op.fit_transform(G, RES)

    if version.parse(np.__version__) < version.parse('1.17'):
        np.testing.assert_allclose(np.sum(B), 532.0001992193013)
    else:
        np.testing.assert_allclose(np.sum(B), 519.0001572740623)

    meld_op = meld.MELD()
    B = meld_op.fit_transform(gt.Graph(
        D, knn=20, decay=10, use_pygsp=False), RES)

    if version.parse(np.__version__) < version.parse('1.17'):
        np.testing.assert_allclose(np.sum(B), 532.0001992193013)
    else:
        np.testing.assert_allclose(np.sum(B), 519.0001572740623)

    # lap type TypeError
    lap_type = 'hello world'
    assert_raise_message(
        TypeError,
        "lap_type must be 'combinatorial'"
        " or 'normalized'. Got: '{}'".format(lap_type),
        meld.MELD(lap_type=lap_type).fit,
        G=G)

    # RES wrong shape
    RES = np.ones([2, G.N + 100])
    assert_raise_message(
        ValueError,
        "Input data ({}) and input graph ({}) "
        "are not of the same size".format(RES.shape, G.N),
        meld_op.fit_transform,
        RES=RES,
        G=G)

    # lap reconversion warning
    assert_warns_message(
        RuntimeWarning,
        "Changing lap_type may require recomputing the Laplacian",
        meld_op.fit,
        G=gt.Graph(D, knn=20, decay=10, use_pygsp=True, lap_type='normalized'))
Example #2
0
    def fit(self, X):
        if not len(X.shape) == 3:
            raise ValueError("Expected X to be a tensor with three dimensions."
                             " Got shape {}".format(X.shape))

        if self.normalize:
            X = utils.normalize(X)

        tasklogger.log_start("multislice kernel")
        K = kernel.multislice_kernel(X,
                                     intraslice_knn=self.intraslice_knn,
                                     interslice_knn=self.interslice_knn,
                                     decay=self.decay,
                                     n_pca=self.n_pca,
                                     distance=self.knn_dist,
                                     n_jobs=self.n_jobs)
        tasklogger.log_complete("multislice kernel")
        tasklogger.log_start("graph and diffusion operator")
        n_landmark = self.n_landmark if self.n_landmark < K.shape[0] else None
        self.graph = graphtools.Graph(K,
                                      precomputed="affinity",
                                      n_landmark=n_landmark,
                                      n_svd=self.n_svd,
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose,
                                      random_state=self.random_state,
                                      **(self.kwargs))
        self.diff_op
        tasklogger.log_complete("graph and diffusion operator")
        result = super().fit(self.graph)
        return result
Example #3
0
 def fit_kNN(self, data, **kwargs):
     self.graph_knn = gt.Graph(data,
                               n_pca=100,
                               kernel_symm=None,
                               use_pygsp=True,
                               random_state=self.seed,
                               **kwargs)
Example #4
0
def run_meld(X_red_dim, sample_labels, conditions, k=15):
    '''
    Run MELD
    - X_red_dim: c x d matrix of dimensionality reduction to use for graph construction
    - sample_labels: assignment of cells to samples
    - conditions: vector of condition names
    '''
    ## Make graph
    graph = gt.Graph(X_red_dim, knn=int(k))
    ## Make MELD object
    meld_op = meld.MELD()
    meld_op.graph = graph
    ## Compute density
    meld_fit = meld_op.transform(sample_labels=np.array(sample_labels))

    ## Mean density per replicates
    mean_density = pd.DataFrame(
        np.zeros(shape=(meld_fit.shape[0], len(conditions))),
        index=meld_fit.index,
        columns=conditions,
    )

    for c in conditions:
        c_mean = meld_fit.loc[:, [c in x for x in meld_fit.columns]].mean(1)
        mean_density[c] = c_mean

    ## From density to likelihood per condition
    likelihoods = meld.utils.normalize_densities(mean_density)
    likelihoods.columns = [col.split("_")[0] for col in likelihoods.columns]
    return (likelihoods)
def diffusionCoordinates(X, decay, knn, n_pca, random_state=None):
    # diffusion maps with normalized Laplacian
    # n_pca = 0 corresponds to NO pca
    G = graphtools.Graph(
        X,
        knn=knn,
        decay=decay,
        n_pca=n_pca,
        use_pygsp=True,
        thresh=0,
        random_state=random_state,
    )
    n_samples = X.shape[0]
    W = G.W.tocoo()
    # W / (DD^T)
    W.data = W.data / (G.dw[W.row] * G.dw[W.col])
    # this is the anisotropic kernel
    nsqrtD = sparse.dia_matrix((np.array(np.sum(W, 0)) ** (-0.5), [0]), W.shape)
    L = sparse.eye(n_samples) - nsqrtD.dot(W).dot(nsqrtD)
    U, S, _ = randSVD(L, random_state=random_state)
    # smallest to largest
    S_idx = np.argsort(S)
    U, S = U[:, S_idx], S[S_idx]
    #  trim trivial information
    U, S = U[:, 1:], S[1:]
    return U, S
Example #6
0
def test_simple():
    tree_data, tree_clusters = phate.tree.gen_dla(n_branch=3)
    phate_operator = phate.PHATE(knn=15, t=100, verbose=False)
    tree_phate = phate_operator.fit_transform(tree_data)
    assert tree_phate.shape == (tree_data.shape[0], 2)
    clusters = phate.cluster.kmeans(phate_operator, n_clusters="auto")
    assert np.issubdtype(clusters.dtype, np.signedinteger)
    assert len(np.unique(clusters)) >= 2
    assert len(clusters.shape) == 1
    assert len(clusters) == tree_data.shape[0]
    clusters = phate.cluster.kmeans(phate_operator, n_clusters=3)
    assert np.issubdtype(clusters.dtype, np.signedinteger)
    assert len(np.unique(clusters)) == 3
    assert len(clusters.shape) == 1
    assert len(clusters) == tree_data.shape[0]
    phate_operator.fit(phate_operator.graph)
    G = graphtools.Graph(
        phate_operator.graph.kernel,
        precomputed="affinity",
        use_pygsp=True,
        verbose=False,
    )
    phate_operator.fit(G)
    G = pygsp.graphs.Graph(G.W)
    phate_operator.fit(G)
    phate_operator.fit(anndata.AnnData(tree_data))
    with assert_raises_message(TypeError,
                               "Expected phate_op to be of type PHATE. Got 1"):
        phate.cluster.kmeans(1)
Example #7
0
File: dm.py Project: wilsonjr/DEMaP
def DM(data, t=1, knn=5, decay=40):
    # symmetric affinity matrix
    K = graphtools.Graph(data, n_jobs=-1, knn=knn, decay=decay).kernel
    # degrees
    diff_deg = np.array(np.sum(K, axis=1)).flatten()
    # negative sqrt
    diff_deg = np.power(diff_deg, -1 / 2)
    # put into square matrix
    diff_deg = sparse.spdiags([diff_deg], diags=0, m=K.shape[0], n=K.shape[0])
    # conjugate
    K = sparse.csr_matrix(K)
    diff_aff = diff_deg.dot(K).dot(diff_deg)
    # symmetrize to remove numerical error
    diff_aff = (diff_aff + diff_aff.T) / 2
    # svd
    U, S, _ = sparse.linalg.svds(diff_aff, k=3)
    # sort by smallest eigenvector
    s_idx = np.argsort(S)[::-1]
    U, S = U[:, s_idx], S[s_idx]
    # get first eigenvector
    u1 = U[:, 0][:, None]
    # ensure non-zero
    zero_idx = np.abs(u1) <= np.finfo(float).eps
    u1[zero_idx] = (np.sign(u1[zero_idx]) * np.finfo(float).eps).reshape(-1)
    # normalize by first eigenvector
    U = U / u1
    # drop first eigenvector
    U, S = U[:, 1:], S[1:]
    # power eigenvalues
    S = np.power(S, t)
    # weight U by eigenvalues
    dm = U.dot(np.diagflat(S))
    return dm
Example #8
0
def phate_similarity_exampler(data, data_exampler, n_neigh = 5, t = 5, use_potential = True, n_pca = 100, n_exampler = None, method = "exact", **kwargs):
    """\
    Description:
    ------------
        Calculate diffusion distance using Phate/Diffusion Map method
    
    Parameters:
    ------------
        data: 
            Feature matrix of dimension (n_samples, n_features)
        n_neigh:
            The number of neighbor in knn for graph construction
        t:
            The transition timestep t
        use_potential:
            Using potential distance or not, if use, the same as Phate; if not, the same as diffusion map

    Returns:
    -----------    
        dist:
            Similarity matrix
    """
    import graphtools as gt
    from scipy.spatial.distance import pdist, squareform
    from sklearn.neighbors import NearestNeighbors
    from sklearn.cluster import KMeans

    start = time.time()

    # calculate the exampler
    G_exampler = gt.Graph(data_exampler, n_pca = n_pca, knn = n_neigh, **kwargs)
    T_exampler = G_exampler.diff_op
    if scipy.sparse.issparse(T_exampler):
        T_exampler = T_exampler.toarray()

    T_exampler_t = np.linalg.matrix_power(T_exampler, t)
    if use_potential:
        U_exampler_t = - np.log(T_exampler_t + 1e-7)
    else:
        U_exampler_t = T_exampler_t
    
    dist_exampler = squareform(pdist(U_exampler_t))
    # calculate distance between data and exampler, choice 1: euclidean distance, choice 2: diffusion distance
    # choice 1
    # dist = pairwise_distances(X = data, Y = data_exampler)

    # choice 2
    dist = pairwise_distances(X = data, Y = data_exampler)
    knn_index = np.argpartition(dist, kth = n_neigh - 1, axis = 1)[:,(n_neigh-1)]
    kth_dist = np.take_along_axis(dist, knn_index[:,None], axis = 1)
    K = dist/kth_dist 
    K = (dist <= kth_dist) * np.exp(-K) 
    K = K/np.sum(K, axis = 1)[:,None]
    U_query_t = np.matmul(K, U_exampler_t)
    # features, cannot deal with distance matrix directly, or the distances between query nodes are unknown.
    dist = pairwise_distances(X = U_query_t, Y = U_exampler_t)
    end = time.time()
    
    print("running time(sec):", end-start)
    return dist, dist_exampler
Example #9
0
def test_graph_input():
    X = np.random.normal(0, 1, (10, 2))
    E = Estimator(verbose=0)
    G = graphtools.Graph(X)
    E.fit(G)
    assert E.graph == G
    G = graphtools.Graph(X, knn=2, decay=5, distance="cosine", thresh=0)
    E.fit(G)
    assert E.graph == G
    assert E.knn == G.knn
    assert E.decay == G.decay
    assert E.distance == G.distance
    assert E.thresh == G.thresh
    W = G.K - np.eye(X.shape[0])
    G = pygsp.graphs.Graph(W)
    E.fit(G, use_pygsp=True)
    assert np.all(E.graph.W.toarray() == W)
Example #10
0
 def setUpClass(self):
     # VertexFrequencyCluster
     # Custom window sizes
     self.window_sizes = np.array([2, 4, 8, 24])
     data, self.labels = make_batches(n_pts_per_cluster=100)
     self.G = gt.Graph(data, sample_idx=self.labels, use_pygsp=True)
     meld_op = meld.MELD()
     self.EES = meld_op.fit_transform(G=self.G, RES=self.labels)
Example #11
0
    def fit(self, X):
        """Computes the diffusion operator

        Parameters
        ----------
        X : array, shape=[n_samples, n_features]
            input data with `n_samples` samples and `n_dimensions`
            dimensions. Accepted data types: `numpy.ndarray`,
            `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If
            `knn_dist` is 'precomputed', `data` should be a n_samples x
            n_samples distance or affinity matrix

        Returns
        -------
        phate_operator : PHATE
        The estimator object
        """
        X, n_pca, precomputed, update_graph = self._parse_input(X)

        if precomputed is None:
            tasklogger.log_info(
                "Running PHATE on {} cells and {} genes.".format(
                    X.shape[0], X.shape[1]))
        else:
            tasklogger.log_info(
                "Running PHATE on precomputed {} matrix with {} cells.".format(
                    precomputed, X.shape[0]))

        if self.n_landmark is None or X.shape[0] <= self.n_landmark:
            n_landmark = None
        else:
            n_landmark = self.n_landmark

        if self.graph is not None and update_graph:
            self._update_graph(X, precomputed, n_pca, n_landmark)

        self.X = X

        if self.graph is None:
            tasklogger.log_start("graph and diffusion operator")
            self.graph = graphtools.Graph(
                X,
                n_pca=n_pca,
                n_landmark=n_landmark,
                distance=self.knn_dist,
                precomputed=precomputed,
                knn=self.knn,
                decay=self.decay,
                thresh=1e-4,
                n_jobs=self.n_jobs,
                verbose=self.verbose,
                random_state=self.random_state,
                **(self.kwargs))
            tasklogger.log_complete("graph and diffusion operator")

        # landmark op doesn't build unless forced
        self.diff_op
        return self
Example #12
0
def test_utils():
    data, labels = make_batches(n_pts_per_cluster=250)
    G = gt.Graph(data, sample_idx=labels, use_pygsp=True)
    EES = meld.MELD().fit_transform(G, labels)

    clusters = meld.VertexFrequencyCluster().fit_predict(G=G,
                                                         RES=labels,
                                                         EES=EES)
    meld.utils.sort_clusters_by_meld_score(clusters, EES)
Example #13
0
def Spring(X, *args, is_graph=False, **kwargs):
    if not is_graph:
        G = graphtools.Graph(X, knn=3, decay=None, use_pygsp=True)
    else:
        G = pygsp.graphs.Graph(X)
    G = networkx.from_numpy_matrix(G.W.toarray())
    X = networkx.spring_layout(G, *args, **kwargs)
    X = np.vstack(list(X.values()))
    return X
Example #14
0
 def build(self, size=500):
     self.name = 'Tree'
     #params = {'method': 'paths', 'batch_cells': size,
     #          'path_length': 500,
     #          'path_from': [0, 1, 1, 2, 0, 0],
     #          'de_fac_loc': 1, 'path_nonlinear_prob': 0.5,
     #          'dropout_type': 'binomial', 'dropout_prob': 0.5,
     #          'path_skew': [0.5, 0.75, 0.25, 0.5, 0.25, 0.75],
     #          'group_prob': [0.15, 0.05, .1, .25, .2, .25],
     #          'seed': self.seed, 'verbose': False}
     params = {
         'method': 'paths',
         'batch_cells': size,
         'path_length': 500,
         'path_from': [0, 1, 1, 2, 0, 0, 2],
         'de_fac_loc': 1,
         'path_skew': [0.45, 0.7, 0.7, 0.45, 0.65, 0.5, 0.5],
         'group_prob': [0.1, 0.1, .1, .2, .2, .2, .1],
         'dropout_type': 'binomial',
         'dropout_prob': 0.5,
         'seed': self.seed,
         'verbose': False
     }
     sim = scprep.run.SplatSimulate(**params)
     data = sim['counts']
     data_ln = scprep.normalize.library_size_normalize(data)
     data_sqrt = scprep.transform.sqrt(data_ln)
     self.X = data_sqrt
     self.c = sim['group']
     self.X = self.X[np.argsort(self.c)]
     self.c = np.sort(self.c)
     expand = 4
     params = {
         'method': 'paths',
         'batch_cells': size * expand,
         'out_prob': 0,
         'path_length': 500,
         'path_from': [0, 1, 1, 2, 0, 0],
         'de_fac_loc': 1,
         'path_nonlinear_prob': 0.5,
         'group_prob': [0.15, 0.05, .1, .25, .2, .25],
         'dropout_type': 'binomial',
         'dropout_prob': 0,
         'path_skew': [0.5, 0.55, 0.4, 0.5, 0.45, 0.6],
         'group_prob': [0.15, 0.05, .1, .25, .2, .25],
         'seed': self.seed,
         'verbose': False
     }
     sim = scprep.run.SplatSimulate(**params)
     data = sim['counts']
     data = data[np.argsort(sim['group'])]
     data_ln = scprep.normalize.library_size_normalize(data)
     data_sqrt = scprep.transform.sqrt(data_ln)
     G = graphtools.Graph(data_sqrt, n_pca=100, anisotropy=1)
     self.X_true = embed.PHATE(G, gamma=0)[::expand]
Example #15
0
def test_from_igraph():
    n = 100
    m = 500
    K = np.zeros((n, n))
    for _ in range(m):
        e = np.random.choice(n, 2, replace=False)
        K[e[0], e[1]] = K[e[1], e[0]] = 1
    g = igraph.Graph.Adjacency(K.tolist())
    G = graphtools.from_igraph(g, attribute=None)
    G2 = graphtools.Graph(K, precomputed="adjacency")
    assert np.all(G.K == G2.K)
Example #16
0
def test_from_igraph_weighted():
    n = 100
    m = 500
    K = np.zeros((n, n))
    for _ in range(m):
        e = np.random.choice(n, 2, replace=False)
        K[e[0], e[1]] = K[e[1], e[0]] = np.random.uniform(0, 1)
    g = igraph.Graph.Weighted_Adjacency(K.tolist())
    G = graphtools.from_igraph(g)
    G2 = graphtools.Graph(K, precomputed="adjacency")
    assert np.all(G.K == G2.K)
Example #17
0
def test_dm():
    # create fake data
    n_time_steps = 50
    n_points = 20
    n_dim = 10
    np.random.seed(42)
    data = np.cumsum(np.random.normal(
        0, 1, (n_time_steps, n_points, n_dim)), axis=0)
    kernel = m_phate.kernel.multislice_kernel(m_phate.utils.normalize(data))
    dm = m_phate.kernel.DM(graphtools.Graph(kernel, precomputed='affinity'))
    assert dm.shape == (n_time_steps * n_points, 2)
Example #18
0
def test_check_pygsp_graph():
    # _check_pygsp_graph
    D = np.random.normal(0, 2, (10, 2))
    G = gt.Graph(D, use_pygsp=False)
    assert isinstance(meld.utils._check_pygsp_graph(G), pygsp.graphs.Graph)
    assert_raise_message(
        TypeError,
        "Input graph should be of type graphtools.base.BaseGraph. "
        "With graphtools, use the `use_pygsp=True` flag.",
        meld.utils._check_pygsp_graph,
        G='hello world')
Example #19
0
def Spring(X, *args, is_graph=False, seed=None, **kwargs):
    np.random.seed(seed)
    if not is_graph:
        G = graphtools.Graph(X, knn=3, decay=None, use_pygsp=True)
    else:
        G = pygsp.graphs.Graph(X)
    G = networkx.from_numpy_matrix(G.W.toarray())
    tasklogger.log_start("Spring")
    X = networkx.spring_layout(G, *args, **kwargs)
    tasklogger.log_complete("Spring")
    X = np.vstack(list(X.values()))
    return X
Example #20
0
def adata_phate(adata):
    # compute PHATE
    G = gt.Graph(data=adata.obsp['connectivities']+sparse.diags([1]*adata.shape[0],format='csr'),
                 precomputed='adjacency',
                 use_pygsp=True)
    G.knn_max = None

    phate_op = phate.PHATE(knn_dist='precomputed',
                           gamma=0,
                           n_jobs=-1,
                           random_state=42)
    adata.obsm['X_phate']=phate_op.fit_transform(G.K)

    return adata
Example #21
0
    def __init__(self, size=5, *args, **kwargs):
        self.size = size
        super().__init__(*args, **kwargs)

        # create a linear topology
        g = graphtools.Graph(directed=False)
        self.graph = g
        g.create_graph('lattice', 1, self.size)

        # save the positions of vertices as Vector object
        for v in range(1, self.size + 1):
            x, y = ((v - 0.5) / self.size * self.width, 0.5 * self.height)
            g.set_vertex_attribute(v, 'xy', V(x, y))

        self.compute_edge_lengths()
Example #22
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # create a grid topology
        g = graphtools.Graph(directed=False)
        self.graph = g
        g.create_graph('lattice', 2, self.size)

        # save the positions of vertices as Vector object
        for j in range(1, self.size + 1):
            for i in range(1, self.size + 1):
                v = g._lattice_vertex(2, self.size, i, j)
                x, y = ((0.5 + i - 1) / self.size * self.width,
                        (0.5 + j - 1) / self.size * self.height)
                g.set_vertex_attribute(v, 'xy', V(x, y))
        self.compute_edge_lengths()
Example #23
0
    def __init__(self, npoints=100, *args, **kwargs):
        self.npoints = npoints
        super().__init__(*args, **kwargs)

        # create a Voronoi topology
        g = graphtools.Graph(directed=False)
        self.graph = g
        g.create_graph('voronoi', self.npoints, self.width, self.height)

        # save the positions of vertices as Vector object
        for v in g.vertices():
            x, y = g.get_vertex_attribute(v, 'pos').split(',')
            x, y = float(x), float(y)
            g.set_vertex_attribute(v, 'xy', V(x, y))

        self.compute_edge_lengths()
Example #24
0
def phate_similarity(data, n_neigh=5, t=5, use_potential=True, **kwargs):
    """\
    Description:
    ------------
        Calculate diffusion distance using Phate/Diffusion Map method
    
    Parameters:
    ------------
        data: 
            Feature matrix of dimension (n_samples, n_features)
        n_neigh:
            The number of neighbor in knn for graph construction
        t:
            The transition timestep t
        use_potential:
            Using potential distance or not, if use, the same as Phate; if not, the same as diffusion map

    Returns:
    -----------    
        dist:
            Similarity matrix
    """
    import graphtools as gt
    from scipy.spatial.distance import pdist, squareform

    # pairwise-distance graph, if decaying kernel with threshold, then will use knn basically, if decaying kernel without threshold, then using exact graph
    # threshold default is 1e-4

    # what it did is basically calculate the radius neighbor, and then using kernel function, and then filter the kernel function again
    G = gt.Graph(data, n_pca=100, knn=n_neigh, **kwargs)
    # obtain transition matrix
    T = G.diff_op

    if scipy.sparse.issparse(T):
        T = T.toarray()

    # T to the power of t
    T_t = np.linalg.matrix_power(T, t)
    # calculate potential distance used as feature vector for each cell
    if use_potential:
        U_t = -np.log(T_t + 1e-7)
    else:
        U_t = T_t
    # calculate pairwise feature vector distance
    dist = squareform(pdist(U_t))

    return U_t, dist
Example #25
0
def DPT_similarity(data, n_neigh=5, use_potential=False):
    '''\
    Description:
    -----------
        Calculates DPT between all points in the data, directly ouput similarity matrix, which is the diffusion pseudotime matrix, a little better than diffusion map
        
    Parameters:
    -----------
        data: 
            Feature matrix, numpy.array of the size [n_samples, n_features]
        n_neigh: 
            Larger correspond to slower decay
        use_potential:
            Expand shorter cell and compress distant cell
    
    Returns:
    -----------
        DPT: 
            Similarity matrix calculated from diffusion pseudo-time
    '''
    import graphtools as gt
    from scipy.spatial.distance import pdist, squareform
    # Calculate from raw data would be too noisy, dimension reduction is necessary, construct graph adjacency matrix with n_pca 100
    G = gt.Graph(data, n_pca=100, knn=n_neigh, use_pygsp=True)

    # Calculate eigenvectors of the diffusion operator
    # G.diff_op is a diffusion operator, return similarity matrix calculated from diffusion operation
    W, V = scipy.sparse.linalg.eigs(G.diff_op, k=1)

    # Remove first eigenspace
    T_tilde = G.diff_op.toarray() - (V[:, 0] @ V[:, 0].T)

    # Calculate M
    I = np.eye(T_tilde.shape[1])
    M = np.linalg.inv(I - T_tilde) - I
    M = np.real(M)

    # log-potential
    if use_potential:
        M = M - np.min(M, axis=1)[:, None]
        M = M / np.sum(M, axis=1)[:, None]
        M = -np.log(M + 1e-7)

    DPT = squareform(pdist(M))

    return DPT
Example #26
0
def get_laplacian_extrema(data, n_extrema, knn=10):
    '''
    Finds the 'Laplacian extrema' of a dataset.  The first extrema is chosen as
    the point that minimizes the first non-trivial eigenvalue of the Laplacian graph
    on the data.  Subsequent extrema are chosen by first finding the unique non-trivial
    non-negative vector that is zero on all previous extrema while at the same time
    minimizing the Laplacian quadratic form, then taking the argmax of this vector.
    '''

    G = gt.Graph(data, use_pygsp=True, decay=None, knn=knn)

    # We need to convert G into a NetworkX graph to use the Tracemin PCG algorithm
    G_nx = nx.convert_matrix.from_scipy_sparse_matrix(G.W)
    fiedler = nx.linalg.algebraicconnectivity.fiedler_vector(
        G_nx, method='tracemin_pcg')

    # Combinatorial Laplacian gives better results than the normalized Laplacian
    L = nx.laplacian_matrix(G_nx)
    first_extrema = np.argmax(fiedler)
    extrema = [first_extrema]
    extrema_ordered = [first_extrema]

    init_lanczos = fiedler
    init_lanczos = np.delete(init_lanczos, first_extrema)
    for i in range(n_extrema - 1):
        # Generate the Laplacian submatrix by removing rows/cols for previous extrema
        indices = range(data.shape[0])
        indices = np.delete(indices, extrema)
        ixgrid = np.ix_(indices, indices)
        L_sub = L[ixgrid]

        # Find the smallest eigenvector of our Laplacian submatrix
        eigvals, eigvecs = scipy.sparse.linalg.eigsh(L_sub,
                                                     k=1,
                                                     which='SM',
                                                     v0=init_lanczos)

        # Add it to the sorted and unsorted lists of extrema
        new_extrema = np.argmax(np.abs(eigvecs[:, 0]))
        init_lanczos = eigvecs[:, 0]
        init_lanczos = np.delete(init_lanczos, new_extrema)
        shift = np.searchsorted(extrema_ordered, new_extrema)
        extrema_ordered.insert(shift, new_extrema + shift)
        extrema.append(new_extrema + shift)

    return extrema
Example #27
0
File: test.py Project: semir2/PHATE
def test_simple():
    tree_data, tree_clusters = phate.tree.gen_dla(n_branch=3)
    phate_operator = phate.PHATE(k=15, t=100)
    tree_phate = phate_operator.fit_transform(tree_data)
    assert tree_phate.shape == (tree_data.shape[0], 2)
    clusters = phate.cluster.kmeans(phate_operator, k=3)
    assert np.issubdtype(clusters.dtype, int)
    assert len(clusters.shape) == 1
    assert len(clusters) == tree_data.shape[0]
    phate_operator.fit(phate_operator.graph)
    G = graphtools.Graph(phate_operator.graph.kernel,
                         precomputed='affinity',
                         use_pygsp=True)
    phate_operator.fit(G)
    G = pygsp.graphs.Graph(G.W)
    phate_operator.fit(G)
    phate_operator.fit(anndata.AnnData(tree_data))
Example #28
0
def diffusionCoordinates(
    X, decay, knn, n_pca, n_eigenvectors=None, n_jobs=1, verbose=0, random_state=None
):
    # diffusion maps with normalized Laplacian
    G = graphtools.Graph(
        X,
        knn=knn,
        decay=decay,
        n_pca=n_pca,
        use_pygsp=True,
        thresh=1e-4,
        anisotropy=1,
        lap_type="normalized",
        n_jobs=n_jobs,
        verbose=verbose,
        random_state=random_state,
    )
    return graphDiffusionCoordinates(G, n_eigenvectors=n_eigenvectors)
Example #29
0
    def fit(self, X, y=None):

        if isinstance(X, list):
            X = np.array(X)

        if X.ndim < 2:
            raise ValueError("Cannot fit 1D array.")

        if X.shape[0] == 1:
            raise ValueError("Input contains only 1 sample.")

        self.n_features_in_ = X.shape[1]

        graph = graphtools.Graph(
            X,
            n_pca=self.n_pca,
            n_landmark=self.n_landmark,
            distance=self.knn_dist,
            knn=self.knn,
            knn_max=self.knn_max,
            decay=self.decay,
            thresh=1e-4,
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            random_state=self.random_state,
        )

        self.affinity_matrix_ = graph.diff_op.toarray()

        affinity_igraph = Graph().Weighted_Adjacency(
            matrix=self.affinity_matrix_.tolist(), mode="undirected")

        partition = leidenalg.find_partition(
            affinity_igraph,
            partition_type=leidenalg.RBConfigurationVertexPartition,
            weights=affinity_igraph.es["weight"],
            n_iterations=-1,
            seed=self.random_state,
            resolution_parameter=self.resolution_parameter,
        )

        self.labels_ = np.array(partition.membership)
        self.q_ = partition.q
        return self
Example #30
0
    def fit_graph(self, data, n_pca=100, **kwargs):
        """Fits a graphtools.Graph to input data

        Parameters
        ----------
        data : array, shape=[n_samples, n_observations]
            Input data
        **kwargs : dict
            Keyword arguments passed to gt.Graph()

        Returns
        -------
        graph : graphtools.Graph
            Graph fit to data

        """
        self.graph = gt.Graph(data,
                              n_pca=n_pca,
                              use_pygsp=True,
                              random_state=self.seed,
                              **kwargs)
        return self.graph