コード例 #1
0
def trainVAE(gene_dataset,
             filename,
             rep,
             nlayers=2,
             n_hidden=128,
             reconstruction_loss: str = 'zinb'):
    vae = VAE(gene_dataset.nb_genes,
              n_batch=gene_dataset.n_batches,
              n_labels=gene_dataset.n_labels,
              n_hidden=n_hidden,
              n_latent=10,
              n_layers=nlayers,
              dispersion='gene',
              reconstruction_loss=reconstruction_loss)
    trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
    filename = '../' + filename + '/' + 'vae' + '.' + reconstruction_loss + '.rep' + str(
        rep) + '.pkl'
    if os.path.isfile(filename):
        trainer.model.load_state_dict(torch.load(filename))
        trainer.model.eval()
    else:
        trainer.train(n_epochs=250)
        torch.save(trainer.model.state_dict(), filename)
    full = trainer.create_posterior(trainer.model,
                                    gene_dataset,
                                    indices=np.arange(len(gene_dataset)))
    return full
コード例 #2
0
def trainVAE(gene_dataset, rmCellTypes,rep):
    vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels,
              n_hidden=128, n_latent=10, n_layers=2, dispersion='gene')
    trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
    if os.path.isfile('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)):
        trainer.model.load_state_dict(torch.load('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)))
        trainer.model.eval()
    else:
        trainer.train(n_epochs=150)
        torch.save(trainer.model.state_dict(), '../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep))
    full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
    latent, batch_indices, labels = full.sequential().get_latent()
    batch_indices = batch_indices.ravel()
    return latent, batch_indices,labels,trainer
コード例 #3
0
def compute_scvi_latent(
    adata: sc.AnnData,
    n_latent: int = 5,
    n_epochs: int = 100,
    lr: float = 1e-3,
    use_batches: bool = False,
    use_cuda: bool = True,
) -> Tuple[scvi.inference.Posterior, np.ndarray]:
    """Train and return a scVI model and sample a latent space

    :param adata: sc.AnnData object non-normalized
    :param n_latent: dimension of the latent space
    :param n_epochs: number of training epochs
    :param lr: learning rate
    :param use_batches
    :param use_cuda
    :return: (scvi.Posterior, latent_space)
    """
    # Convert easily to scvi dataset
    scviDataset = AnnDataset(adata)

    # Train a model
    vae = VAE(
        scviDataset.nb_genes,
        n_batch=scviDataset.n_batches * use_batches,
        n_latent=n_latent,
    )
    trainer = UnsupervisedTrainer(vae,
                                  scviDataset,
                                  train_size=1.0,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs, lr=lr)
    ####

    # Extract latent space
    posterior = trainer.create_posterior(trainer.model,
                                         scviDataset,
                                         indices=np.arange(
                                             len(scviDataset))).sequential()

    latent, _, _ = posterior.get_latent()

    return posterior, latent
コード例 #4
0
ファイル: Prop_cor.py プロジェクト: Edouard360/scVI
         nsamples = np.asarray(nsamples)
         sample = sample_by_batch(labels[batch_id == batch], nsamples)
         sample = cellid[batch_id == batch][sample]
         count.append(nsamples)
         cells.append(sample)
     correlation = (np.corrcoef(count[0], count[1])[0, 1])
 print("dataset 1 has %d cells" % (np.sum(count[0])))
 print("dataset 2 has %d cells" % (np.sum(count[1])))
 print(
     "correlation between the cell-type composition of the subsampled dataset is %.3f"
     % correlation)
 sub_dataset = deepcopy(gene_dataset)
 sub_dataset.update_cells(np.concatenate(cells))
 vae = VAE(sub_dataset.nb_genes,
           n_batch=sub_dataset.n_batches,
           n_labels=sub_dataset.n_labels,
           n_hidden=128,
           dispersion='gene')
 infer = VariationalInference(vae, sub_dataset, use_cuda=use_cuda)
 infer.train(n_epochs=250)
 latent, batch_indices, labels = infer.get_latent('sequential')
 keys = sub_dataset.cell_types
 batch_entropy = entropy_batch_mixing(latent, batch_indices)
 print("Entropy batch mixing :", batch_entropy)
 sample = select_indices_evenly(1000, labels)
 res = knn_purity_avg(latent[sample, :],
                      labels[sample].astype('int'),
                      keys=keys,
                      acc=True)
 print('average classification accuracy per cluster')
 for x in res:
コード例 #5
0
ファイル: Easycase2.compare.py プロジェクト: Edouard360/scVI
from scvi.harmonization.clustering.Combat import COMBAT
from scvi.harmonization.benchmark import knn_purity_avg

from scvi.metrics.clustering import select_indices_evenly,entropy_batch_mixing,clustering_scores

import sys

model_type = str(sys.argv[1])
plotname = 'Macosko_Regev'
dataset1 = MacoskoDataset()
dataset2 = RegevDataset()
gene_dataset = GeneExpressionDataset.concat_datasets(dataset1, dataset2)
gene_dataset.subsample_genes(5000)

if model_type == 'vae':
    vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels,
              n_hidden=128, n_latent=10, n_layers=2, dispersion='gene')
    infer = VariationalInference(vae, gene_dataset, use_cuda=use_cuda)
    infer.train(n_epochs=250)
    data_loader = infer.data_loaders['sequential']
    latent, batch_indices, labels = get_latent(vae, data_loader)
    keys = gene_dataset.cell_types
    batch_indices = np.concatenate(batch_indices)
    keys = gene_dataset.cell_types
elif model_type == 'svaec':
    svaec = SCANVI(gene_dataset.nb_genes, gene_dataset.n_batches,
                   gene_dataset.n_labels, use_labels_groups=False,
                   n_latent=10, n_layers=2)
    infer = SemiSupervisedVariationalInference(svaec, gene_dataset)
    infer.train(n_epochs=50)
    print('svaec acc =', infer.accuracy('unlabelled'))
    data_loader = infer.data_loaders['unlabelled']
コード例 #6
0
from scvi.dataset import LoomDataset, CsvDataset, Dataset10X

## Correction for batch effects

gene_dataset = RetinaDataset(save_path=save_path)
#tenX_dataset = Dataset10X("neuron_9k", save_path=save_path)
n_epochs=50 if n_epochs_all is None else n_epochs_all
lr=1e-3
use_batches=True
use_cuda=True

### Train the model and output model likelihood every 5 epochs
from scvi.models.vae import VAE

vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches)



n_batch = gene_dataset.n_batches * use_batches
n_input = gene_dataset.nb_genes
n_hidden = 128
n_latent = 10
n_layers= 1
dropout_rate = 0.1
dispersion = "gene"
log_variational = True
reconstruction_loss: str = "zinb"
x = torch.rand([n_batch,n_input])

px_scale, px_r, px_rate, px_dropout, qz_m, qz_v, z, ql_m, ql_v, library = vae.inference(x)
コード例 #7
0
UMI = GeneExpressionDataset(
            *GeneExpressionDataset.get_attributes_from_matrix(
                csr_matrix(countUMI), labels=labelUMI),
            gene_names=['gene'+str(i) for i in range(2000)], cell_types=['type'+str(i+1) for i in range(5)])

nonUMI = GeneExpressionDataset(
            *GeneExpressionDataset.get_attributes_from_matrix(
                csr_matrix(countnonUMI), labels=labelnonUMI),
            gene_names=['gene'+str(i) for i in range(2000)], cell_types=['type'+str(i+1) for i in range(5)])

if model_type in ['vae', 'svaec', 'Seurat', 'Combat']:
    gene_dataset = GeneExpressionDataset.concat_datasets(UMI, nonUMI)

    if model_type == 'vae':
        vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels,
                  n_hidden=128, n_latent=10, n_layers=2, dispersion='gene')
        infer_vae = VariationalInference(vae, gene_dataset, use_cuda=use_cuda)
        infer_vae.train(n_epochs=250)
        data_loader = infer_vae.data_loaders['sequential']
        latent, batch_indices, labels = get_latent(vae, data_loader)
        keys = gene_dataset.cell_types
        batch_indices = np.concatenate(batch_indices)
        keys = gene_dataset.cell_types
    elif model_type == 'svaec':
        gene_dataset.subsample_genes(1000)

        n_epochs_vae = 100
        n_epochs_scanvi = 50
        vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels, n_latent=10, n_layers=2)
        trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
        trainer.train(n_epochs=n_epochs_vae)