Ejemplo n.º 1
0
def calc_fitsne(X, n_jobs, n_components, perplexity, early_exaggeration,
                learning_rate, random_state):
    # FItSNE will change X content
    return FItSNE(X.copy(),
                  nthreads=n_jobs,
                  no_dims=n_components,
                  perplexity=perplexity,
                  early_exag_coeff=early_exaggeration,
                  learning_rate=learning_rate,
                  rand_seed=(random_state if random_state is not None else -1))
Ejemplo n.º 2
0
def tsne(X, method='umap'):
    """
    Parameters:
    ---------------

    X: numpy.array()
        log2(TPM + 1)
    method: str()
        'sklearn', 'FIt-SNE'

    Returns:
    ---------------
    
    tsne_1: list()
    tsne_2: list()
    """

    if method == 'sklearn':
        X_embedded = TSNE(n_components=2, metric='cosine',
                          init='pca').fit_transform(X)

    # fit-sne seems to have the problem 'np.array() is not C - contiguous'
    elif method == 'FIt-SNE':
        # init
        pca = PCA(n_components=50)
        X_pca = pca.fit_transform(X)
        init = X_pca[:, :2]
        # make array C - contiguous
        X_pca = np.ascontiguousarray(X_pca)
        init = np.ascontiguousarray(init)
        # embedding
        X_embedded = FItSNE(X_pca, initialization=init)

    elif method == 'TSNEApprox':  # from scanorama, the result seems to be very weird, do not suggest using.
        from scanorama.t_sne_approx import TSNEApprox
        # init
        pca = PCA(n_components=50)
        X_pca = pca.fit_transform(X)
        init = X_pca[:, 2]
        # embedding
        X_embedded = TSNEApprox(init='pca',
                                metric='cosine').fit_transform(X_pca)

    elif method == 'umap':
        import umap
        reducer = umap.UMAP()
        X_embedded = reducer.fit_transform(X)

    tsne_1 = X_embedded[:, 0].tolist()
    tsne_2 = X_embedded[:, 1].tolist()

    return tsne_1, tsne_2
def calc_tsne(
    X,
    nthreads,
    no_dims,
    perplexity,
    early_exag_coeff,
    learning_rate,
    rand_seed,
    initialization=None,
    max_iter=750,
    stop_early_exag_iter=250,
    mom_switch_iter=250,
):
    """
    TODO: Calculate t-SNE embeddings using the FIt-SNE package
    """
    # FItSNE will change X content

    # Check if fftw3 is installed.
    import ctypes.util

    fftw3_loc = ctypes.util.find_library("fftw3")
    if fftw3_loc is None:
        raise Exception(
            "Please install 'fftw3' first to use the FIt-SNE feature!")

    try:
        from fitsne import FItSNE
    except ModuleNotFoundError:
        import sys
        logger.error(
            "Need FItSNE!  Try 'pip install fitsne' or 'conda install -c conda-forge pyfit-sne'."
        )
        sys.exit(-1)

    return FItSNE(
        X,
        nthreads=nthreads,
        no_dims=no_dims,
        perplexity=perplexity,
        early_exag_coeff=early_exag_coeff,
        learning_rate=learning_rate,
        rand_seed=rand_seed,
        initialization=initialization,
        max_iter=max_iter,
        stop_early_exag_iter=stop_early_exag_iter,
        mom_switch_iter=mom_switch_iter,
    )
Ejemplo n.º 4
0
def calc_fitsne(
    X,
    nthreads,
    no_dims,
    perplexity,
    early_exag_coeff,
    learning_rate,
    rand_seed,
    initialization=None,
    max_iter=1000,
    stop_early_exag_iter=250,
    mom_switch_iter=250,
):
    """
    TODO: Typing
    """
    # FItSNE will change X content

    # Check if fftw3 is installed.
    import ctypes.util

    fftw3_loc = ctypes.util.find_library("fftw3")
    if fftw3_loc is None:
        raise Exception(
            "Please install 'fftw3' first to use the FIt-SNE feature!")

    from fitsne import FItSNE

    return FItSNE(
        X.astype("float64"),
        nthreads=nthreads,
        no_dims=no_dims,
        perplexity=perplexity,
        early_exag_coeff=early_exag_coeff,
        learning_rate=learning_rate,
        rand_seed=rand_seed,
        initialization=initialization,
        max_iter=max_iter,
        stop_early_exag_iter=stop_early_exag_iter,
        mom_switch_iter=mom_switch_iter,
    )
Ejemplo n.º 5
0
    def fit_transform(self, X):
        if self.variant == "bhtsne":
            return bhtsne.tsne(X, perplexity=self.perplexity)

        if self.variant == "multicore":
            return MulticoreTSNE(n_jobs=4,
                                 perplexity=self.perplexity).fit_transform(X)

        if self.variant == "sklearn":
            return skTSNE(perplexity=self.perplexity).fit_transform(X)

        if self.variant == "optsne":
            return OptSNE(perplexity=self.perplexity).fit_transform(X)

        if self.variant == "fitsne":
            return FItSNE(X, perplexity=self.perplexity)

        if self.variant == "cuda":
            return CudaTSNE(perplexity=self.perplexity).fit_transform(X)

        return None
def run_reduce_dim(
    adata,
    X_data,
    n_components,
    n_pca_components,
    reduction_method,
    embedding_key,
    n_neighbors,
    neighbor_key,
    cores,
    kwargs,
):
    if reduction_method == "trimap":
        import trimap

        triplemap = trimap.TRIMAP(
            n_inliers=20,
            n_outliers=10,
            n_random=10,
            distance="euclidean",  # cosine
            weight_adj=1000.0,
            apply_pca=False,
        )
        X_dim = triplemap.fit_transform(X_data)

        adata.obsm[embedding_key] = X_dim
        adata.uns[neighbor_key] = {
            "params": {
                "n_neighbors": n_neighbors,
                "method": reduction_method
            },
            # "connectivities": "connectivities",
            # "distances": "distances",
            # "indices": "indices",
        }
    elif reduction_method == "diffusion_map":
        pass
    elif reduction_method.lower() == "tsne":
        try:
            from fitsne import FItSNE
        except ImportError:
            print(
                "Please first install fitsne to perform accelerated tSNE method. Install instruction is "
                "provided here: https://pypi.org/project/fitsne/")

        X_dim = FItSNE(X_data, nthreads=cores)  # use FitSNE

        # bh_tsne = TSNE(n_components = n_components)
        # X_dim = bh_tsne.fit_transform(X)
        adata.obsm[embedding_key] = X_dim
        adata.uns[neighbor_key] = {
            "params": {
                "n_neighbors": n_neighbors,
                "method": reduction_method
            },
            # "connectivities": "connectivities",
            # "distances": "distances",
            # "indices": "indices",
        }
    elif reduction_method == "umap":
        _umap_kwargs = {
            "n_components": n_components,
            "metric": "euclidean",
            "min_dist": 0.5,
            "spread": 1.0,
            "n_epochs": 0,
            "alpha": 1.0,
            "gamma": 1.0,
            "negative_sample_rate": 5,
            "init_pos": "spectral",
            "random_state": 0,
            "densmap": False,
            "dens_lambda": 2.0,
            "dens_frac": 0.3,
            "dens_var_shift": 0.1,
            "output_dens": False,
            "verbose": False,
        }
        umap_kwargs = update_dict(_umap_kwargs, kwargs)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            (
                mapper,
                graph,
                knn_indices,
                knn_dists,
                X_dim,
            ) = umap_conn_indices_dist_embedding(X_data, n_neighbors,
                                                 **umap_kwargs)  # X

        adata.obsm[embedding_key] = X_dim
        knn_dists = knn_to_adj(knn_indices, knn_dists)
        adata.uns[neighbor_key] = {
            "params": {
                "n_neighbors": n_neighbors,
                "method": reduction_method
            },
            # "connectivities": "connectivities",
            # "distances": "distances",
            "indices": knn_indices,
        }

        layer = neighbor_key.split("_")[0] if neighbor_key.__contains__(
            "_") else None
        conn_key = "connectivities" if layer is None else layer + "_connectivities"
        dist_key = "distances" if layer is None else layer + "_distances"

        adata.obsp[conn_key], adata.obsp[dist_key] = graph, knn_dists

        adata.uns["umap_fit"] = {
            "fit": mapper,
            "n_pca_components": n_pca_components
        }
    elif reduction_method == "psl":
        adj_mat, X_dim = psl(X_data, d=n_components,
                             K=n_neighbors)  # this need to be updated
        adata.obsm[embedding_key] = X_dim
        adata.uns[neighbor_key] = adj_mat

    else:
        raise Exception(
            "reduction_method {} is not supported.".format(reduction_method))

    return adata
Ejemplo n.º 7
0
def run(perplexity=30, learning_rate=100, n_jobs=4):
    x, y = get_mouse_60k()
    # x, y = get_fashion_mnist()

    angle = 0.5
    ee = 12
    metric = 'euclidean'

    print(x.shape)

    start = time.time()
    tsne = TSNE(
        perplexity=perplexity,
        learning_rate=learning_rate,
        early_exaggeration=ee,
        n_jobs=n_jobs,
        theta=angle,
        initialization='random',
        metric=metric,
        n_components=2,
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors='approx',
        negative_gradient_method='fft',
        min_num_intervals=10,
        ints_in_interval=1,
        late_exaggeration_iter=0,
        late_exaggeration=2.,
        callbacks=ErrorLogger(),
    )
    # x = PCA(n_components=50).fit_transform(x)
    embedding = tsne.fit(x)
    print('-' * 80)
    print('tsne', time.time() - start)
    plt.title('tsne')
    plot(embedding, y)
    return

    x = np.ascontiguousarray(x.astype(np.float64))
    from fitsne import FItSNE
    start = time.time()
    embedding = FItSNE(
        x,
        2,
        perplexity=perplexity,
        stop_lying_iter=250,
        ann_not_vptree=True,
        early_exag_coeff=ee,
        nthreads=n_jobs,
        theta=angle,
    )
    print('-' * 80)
    print('fft interp %.4f' % (time.time() - start))
    plt.title('fft interp')
    plot(embedding, y)
    plt.show()
    return

    init = PCA(n_components=2).fit_transform(x)
    start = time.time()
    embedding = MulticoreTSNE(early_exaggeration=ee,
                              learning_rate=learning_rate,
                              perplexity=perplexity,
                              n_jobs=n_jobs,
                              cheat_metric=False,
                              angle=angle,
                              init=init,
                              metric=metric,
                              verbose=True).fit_transform(x)
    print('-' * 80)
    print('mctsne', time.time() - start)
    plt.title('mctsne')
    plot(embedding, y)
    plt.show()

    start = time.time()
    embedding = SKLTSNE(
        early_exaggeration=ee,
        learning_rate=learning_rate,
        angle=angle,
        perplexity=perplexity,
        init='pca',
        metric=metric,
    ).fit_transform(x)
    print('-' * 80)
    print('sklearn', time.time() - start)
    plt.title('sklearn')
    plot(embedding, y)
    plt.show()
if os.path.isfile(FILENAME_TSNE_RESULT) == True:
    print('File exists already, skipping this step..')
    exit()

else:
    pass

# _____ Load data ______________________________________________________________ Load data
# Load the data
sample_tsne_no_noise = np.load(
    dirs.data + 'sample_to_tsne_range_{}.npy'.format(SELECTED_RANGES))

# Noise is added at each iteration then
sample_tsne = sample_tsne_no_noise + np.random.normal(0, 1/float(SNR),
 sample_tsne_no_noise.shape)

print("Added noise to the t-SNE sample with a SNR of {}".format(int(SNR)))

del sample_tsne_no_noise

# _____ t-SNE __________________________________________________________________ t-SNE
print('='*40)
print('--> Starting t-SNE analysis for perplexity {}'.format(PERPLEXITY))
# Run t-SNE for each one of the above perplexities
tSNEd_fluxes = FItSNE(X=sample_tsne, perplexity=PERPLEXITY)
# Save the data
np.save(dirs.data + 'tSNE_results_range_{}_perplexity_{}_SNRof{}_fftw'.format(
    SELECTED_RANGES, PERPLEXITY, int(SNR)), tSNEd_fluxes)

print('='*40)
Ejemplo n.º 9
0
def reduceDimension(adata,
                    n_pca_components=25,
                    n_components=2,
                    n_neighbors=10,
                    reduction_method='trimap',
                    velocity_key='velocity_S',
                    cores=1):
    """Compute a low dimension reduction projection of an annodata object first with PCA, followed by non-linear dimension reduction methods

    Arguments
    ---------
    adata: :class:`~anndata.AnnData`
        an Annodata object 
    n_pca_components: 'int' (optional, default 50)
        Number of PCA components.  
    n_components: 'int' (optional, default 50)
        The dimension of the space to embed into.
    n_neighbors: 'int' (optional, default 10)
        Number of nearest neighbors when constructing adjacency matrix. 
    reduction_method: 'str' (optional, default trimap)
        Non-linear dimension reduction method to further reduce dimension based on the top n_pca_components PCA components. Currently, PSL 
        (probablistic structure learning, a new dimension reduction by us), tSNE (fitsne instead of traditional tSNE used) or umap are supported.
    velocity_key: 'str' (optional, default velocity_S)
        The dictionary key that corresponds to the estimated velocity values. 
    cores: `int` (optional, default `1`)
        Number of cores. Used only when the tSNE reduction_method is used.

    Returns
    -------
    Returns an updated `adata` with reduced dimension data for spliced counts, projected future transcript counts 'Y_dim' and adjacency matrix when possible.
    """

    n_obs = adata.shape[0]

    if 'use_for_dynamo' in adata.var.keys():
        X = adata.X[:, adata.var.use_for_dynamo.values]
        if velocity_key is not None:
            X_t = adata.X[:, adata.var.use_for_dynamo.values] + adata.layers[
                velocity_key][:, adata.var.use_for_dynamo.values]
    else:
        X = adata.X
        if velocity_key is not None:
            X_t = adata.X + adata.layers[velocity_key]

    if ((not 'X_pca' in adata.obsm.keys())
            or 'pca_fit' not in adata.uns.keys()) or reduction_method is "pca":
        transformer = TruncatedSVD(n_components=n_pca_components + 1,
                                   random_state=0)
        X_fit = transformer.fit(X)
        X_pca = X_fit.transform(X)[:, 1:]
        adata.obsm['X_pca'] = X_pca
        if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys(
        ):
            X_t_pca = X_fit.transform(X_t)[:, 1:]
            adata.obsm['_velocity_pca'] = X_t_pca - X_pca
    else:
        X_pca = adata.obsm['X_pca'][:, :n_pca_components]
        if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys(
        ):
            X_t_pca = adata.uns['pca_fit'].fit_transform(X_t)
            adata.obsm['_velocity_pca'] = X_t_pca[:, 1:(n_pca_components +
                                                        1)] - X_pca
        adata.obsm['X_pca'] = X_pca

    if reduction_method is "trimap":
        import trimap
        triplemap = trimap.TRIMAP(
            n_inliers=20,
            n_outliers=10,
            n_random=10,
            distance='angular',  # cosine
            weight_adj=1000.0,
            apply_pca=False)
        X_dim = triplemap.fit_transform(X_pca)

        adata.obsm['X_trimap'] = X_dim
        adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \
                                  'distances': None, 'indices': None}
    elif reduction_method is 'tSNE':
        try:
            from fitsne import FItSNE
        except ImportError:
            print(
                'Please first install fitsne to perform accelerated tSNE method. Install instruction is provided here: https://pypi.org/project/fitsne/'
            )

        X_dim = FItSNE(X_pca, nthreads=cores)  # use FitSNE

        # bh_tsne = TSNE(n_components = n_components)
        # X_dim = bh_tsne.fit_transform(X_pca)
        adata.obsm['X_tSNE'] = X_dim
        adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \
                                  'distances': None, 'indices': None}
    elif reduction_method is 'umap':
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            graph, knn_indices, knn_dists, X_dim = umap_conn_indices_dist_embedding(
                X_pca)  # X_pca
        adata.obsm['X_umap'] = X_dim
        adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': graph, \
                                  'distances': knn_dists, 'indices': knn_indices}
    elif reduction_method is 'psl':
        adj_mat, X_dim = psl_py(X_pca, d=n_components,
                                K=n_neighbors)  # this need to be updated
        adata.obsm['X_psl'] = X_dim
        adata.uns['PSL_adj_mat'] = adj_mat

    else:
        raise Exception(
            'reduction_method {} is not supported.'.format(reduction_method))

    return adata