Exemple #1
0
def tsne_emb(model_path: str, proc_dir: str, layer_name: str,
             perplexities: List[int], n_iter: int):
    model = torch.load(model_path, map_location=torch.device('cpu'))['model']
    w = model[layer_name].numpy()

    affinities_multiscale_mixture = affinity.Multiscale(
        w,
        perplexities=perplexities,
        metric="cosine",
        n_jobs=-1,
        random_state=3)

    init = initialization.pca(w, random_state=42)

    embedding = TSNEEmbedding(init,
                              affinities_multiscale_mixture,
                              negative_gradient_method="fft",
                              n_jobs=-1,
                              random_state=4,
                              verbose=True)

    embedding = embedding.optimize(n_iter=n_iter,
                                   exaggeration=None,
                                   momentum=0.8,
                                   learning_rate="auto")
    df = pd.DataFrame(embedding, columns=['x', 'y'])
    df.to_csv(path.join(proc_dir, 'tsne_emb.csv'), index=False)

    with open(path.join(proc_dir, 'kl_divergence'), 'w') as f:
        f.write(f'{embedding.kl_divergence:.4f}')
Exemple #2
0
 def test_1(self):
     init = initialization.pca(self.x)
     aff = affinity.PerplexityBasedNN(self.x, perplexity=30)
     embedding = openTSNE.TSNEEmbedding(init, aff)
     embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True)
     embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True)
     self.eval_embedding(embedding, self.y)
     new_embedding = embedding.transform(self.x)
     self.eval_embedding(new_embedding, self.y, "transform")
Exemple #3
0
 def test_2(self):
     init = initialization.pca(self.x)
     aff = affinity.MultiscaleMixture(self.x, perplexities=[5, 30])
     embedding = openTSNE.TSNEEmbedding(init, aff)
     embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True)
     embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True)
     self.eval_embedding(embedding, self.y)
     new_embedding = embedding.transform(self.x)
     self.eval_embedding(new_embedding, self.y, "transform")
Exemple #4
0
def art_of_tsne(X: np.ndarray,
                metric: Union[str, Callable] = "euclidean",
                exaggeration: float = -1,
                perplexity: int = 30,
                n_jobs: int = -1) -> TSNEEmbedding:
    """
    Implementation of Dmitry Kobak and Philipp Berens
    "The art of using t-SNE for single-cell transcriptomics" based on openTSNE.
    See https://doi.org/10.1038/s41467-019-13056-x | www.nature.com/naturecommunications
    Args:
        X				The data matrix of shape (n_cells, n_genes) i.e. (n_samples, n_features)
        metric			Any metric allowed by PyNNDescent (default: 'euclidean')
        exaggeration	The exaggeration to use for the embedding
        perplexity		The perplexity to use for the embedding

    Returns:
        The embedding as an opentsne.TSNEEmbedding object (which can be cast to an np.ndarray)
    """
    n = X.shape[0]
    if n > 100_000:
        if exaggeration == -1:
            exaggeration = 1 + n / 333_333
        # Subsample, optimize, then add the remaining cells and optimize again
        # Also, use exaggeration == 4
        logging.info(f"Creating subset of {n // 40} elements")
        # Subsample and run a regular art_of_tsne on the subset
        indices = np.random.permutation(n)
        reverse = np.argsort(indices)
        X_sample, X_rest = X[indices[:n // 40]], X[indices[n // 40:]]
        logging.info(f"Embedding subset")
        Z_sample = art_of_tsne(X_sample)

        logging.info(
            f"Preparing partial initial embedding of the {n - n // 40} remaining elements"
        )
        if isinstance(Z_sample.affinities, affinity.Multiscale):
            rest_init = Z_sample.prepare_partial(X_rest,
                                                 k=1,
                                                 perplexities=[1 / 3, 1 / 3])
        else:
            rest_init = Z_sample.prepare_partial(X_rest, k=1, perplexity=1 / 3)
        logging.info(f"Combining the initial embeddings, and standardizing")
        init_full = np.vstack((Z_sample, rest_init))[reverse]
        init_full = init_full / (np.std(init_full[:, 0]) * 10000)

        logging.info(f"Creating multiscale affinities")
        affinities = affinity.PerplexityBasedNN(X,
                                                perplexity=perplexity,
                                                metric=metric,
                                                method="approx",
                                                n_jobs=n_jobs)
        logging.info(f"Creating TSNE embedding")
        Z = TSNEEmbedding(init_full,
                          affinities,
                          negative_gradient_method="fft",
                          n_jobs=n_jobs)
        logging.info(f"Optimizing, stage 1")
        Z.optimize(n_iter=250,
                   inplace=True,
                   exaggeration=12,
                   momentum=0.5,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
        logging.info(f"Optimizing, stage 2")
        Z.optimize(n_iter=750,
                   inplace=True,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
    elif n > 3_000:
        if exaggeration == -1:
            exaggeration = 1
        # Use multiscale perplexity
        affinities_multiscale_mixture = affinity.Multiscale(
            X,
            perplexities=[perplexity, n / 100],
            metric=metric,
            method="approx",
            n_jobs=n_jobs)
        init = initialization.pca(X)
        Z = TSNEEmbedding(init,
                          affinities_multiscale_mixture,
                          negative_gradient_method="fft",
                          n_jobs=n_jobs)
        Z.optimize(n_iter=250,
                   inplace=True,
                   exaggeration=12,
                   momentum=0.5,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
        Z.optimize(n_iter=750,
                   inplace=True,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
    else:
        if exaggeration == -1:
            exaggeration = 1
        # Just a plain TSNE with high learning rate
        lr = max(200, n / 12)
        aff = affinity.PerplexityBasedNN(X,
                                         perplexity=perplexity,
                                         metric=metric,
                                         method="approx",
                                         n_jobs=n_jobs)
        init = initialization.pca(X)
        Z = TSNEEmbedding(init,
                          aff,
                          learning_rate=lr,
                          n_jobs=n_jobs,
                          negative_gradient_method="fft")
        Z.optimize(250,
                   exaggeration=12,
                   momentum=0.5,
                   inplace=True,
                   n_jobs=n_jobs)
        Z.optimize(750,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   inplace=True,
                   n_jobs=n_jobs)
    return Z
Exemple #5
0
    norm = raw * 256.0 / sum(raw)
    profile[0, ] = norm

## 2. Run t-SNE
seed = 211
threads = int(cores)

affinities_multiscale_mixture = affinity.Multiscale(
    profile,
    perplexities=[30, 300],
    metric="cosine",
    n_jobs=threads,
    random_state=seed,
)

init = initialization.pca(profile, random_state=seed)

embedding = TSNEEmbedding(
    init,
    affinities_multiscale_mixture,
    negative_gradient_method="fft",
    n_jobs=threads,
)

embedding1 = embedding.optimize(n_iter=250, exaggeration=6, momentum=0.5)
embedding2 = embedding1.optimize(n_iter=750, exaggeration=1, momentum=0.8)
embedding_multiscale = embedding2.view(np.ndarray)

## 3. Save matrix
df = pd.DataFrame(embedding_multiscale, columns=['Dim1', 'Dim2'], index=ctgs)
df.to_csv(outfile, sep='\t', float_format='%.3f')