tuple(be + list(cell_type2))) plotUMAP(latent, plotname, 'vae', gene_dataset.cell_types, rmCellTypes, gene_dataset.batch_indices.ravel()) scanvi = SCANVI(gene_dataset.nb_genes, 2, (gene_dataset.n_labels + 1), n_hidden=128, n_latent=10, n_layers=2, dispersion='gene') scanvi.load_state_dict(trainer.model.state_dict(), strict=False) trainer_scanvi = AlternateSemiSupervisedTrainer( scanvi, gene_dataset, n_epochs_classifier=10, lr_classification=5 * 1e-3) trainer_scanvi.labelled_set = trainer_scanvi.create_posterior( indices=gene_dataset.batch_indices.ravel() == 0) trainer_scanvi.unlabelled_set = trainer_scanvi.create_posterior( indices=gene_dataset.batch_indices.ravel() == 1) trainer_scanvi.train(n_epochs=10) scanvi_full = trainer_scanvi.create_posterior( trainer_scanvi.model, gene_dataset, indices=np.arange(len(gene_dataset))) latent, _, _ = scanvi_full.sequential().get_latent() acc, cell_type = KNNpurity(latent1, latent2, latent, batch_indices.ravel(), labels, keys) f.write('scanvi' + '\t' + rmCellTypes + ("\t%.4f" * 8 + "\t%s" * 8 + "\n") % tuple(list(acc) + list(cell_type))) be, cell_type2 = BEbyType(keys, latent, labels, batch_indices, celltype1)
# scanvi = SCANVI(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels, n_latent=10) scanvi = SCANVI(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels, n_latent=10, reconstruction_loss='nb') scanvi.load_state_dict(trainer.model.state_dict(), strict=False) trainer_scanvi = AlternateSemiSupervisedTrainer(scanvi, gene_dataset, n_epochs_classifier=5, lr_classification=5 * 1e-3) labelled = np.where(gene_dataset.batch_indices == 0)[0] # np.random.shuffle(labelled) unlabelled = np.where(gene_dataset.batch_indices == 1)[0] # np.random.shuffle(unlabelled) trainer_scanvi.labelled_set = trainer_scanvi.create_posterior(indices=labelled) trainer_scanvi.unlabelled_set = trainer_scanvi.create_posterior( indices=unlabelled) # file_name = '%s/scanvi.pkl' % save_path # if os.path.isfile(file_name): # print("loaded model from: " + file_name) # trainer_scanvi.model.load_state_dict(torch.load(file_name)) # trainer_scanvi.model.eval() # else: # train & save trainer_scanvi.train(n_epochs=5) # torch.save(trainer_scanvi.model.state_dict(), file_name) scanvi_labels = trainer_scanvi.full_dataset.sequential().compute_predictions( )[1]
def trainSCANVI(gene_dataset, model_type, filename, rep, nlayers=2, reconstruction_loss: str = "zinb"): vae_posterior = trainVAE(gene_dataset, filename, rep, reconstruction_loss=reconstruction_loss) filename = '../' + filename + '/' + model_type + '.' + reconstruction_loss + '.rep' + str( rep) + '.pkl' scanvi = SCANVI(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels, n_layers=nlayers, reconstruction_loss=reconstruction_loss) scanvi.load_state_dict(vae_posterior.model.state_dict(), strict=False) if model_type == 'scanvi1': trainer_scanvi = AlternateSemiSupervisedTrainer( scanvi, gene_dataset, classification_ratio=0, n_epochs_classifier=100, lr_classification=5 * 1e-3) labelled = np.where(gene_dataset.batch_indices.ravel() == 0)[0] labelled = np.random.choice(labelled, len(labelled), replace=False) trainer_scanvi.labelled_set = trainer_scanvi.create_posterior( indices=labelled) trainer_scanvi.unlabelled_set = trainer_scanvi.create_posterior( indices=(gene_dataset.batch_indices.ravel() == 1)) elif model_type == 'scanvi2': trainer_scanvi = AlternateSemiSupervisedTrainer( scanvi, gene_dataset, classification_ratio=0, n_epochs_classifier=100, lr_classification=5 * 1e-3) labelled = np.where(gene_dataset.batch_indices.ravel() == 1)[0] labelled = np.random.choice(labelled, len(labelled), replace=False) trainer_scanvi.labelled_set = trainer_scanvi.create_posterior( indices=labelled) trainer_scanvi.unlabelled_set = trainer_scanvi.create_posterior( indices=(gene_dataset.batch_indices.ravel() == 0)) elif model_type == 'scanvi0': trainer_scanvi = SemiSupervisedTrainer(scanvi, gene_dataset, classification_ratio=0, n_epochs_classifier=100, lr_classification=5 * 1e-3) trainer_scanvi.labelled_set = trainer_scanvi.create_posterior( indices=(gene_dataset.batch_indices.ravel() < 0)) trainer_scanvi.unlabelled_set = trainer_scanvi.create_posterior( indices=(gene_dataset.batch_indices.ravel() >= 0)) else: trainer_scanvi = SemiSupervisedTrainer(scanvi, gene_dataset, classification_ratio=10, n_epochs_classifier=100, lr_classification=5 * 1e-3) if os.path.isfile(filename): trainer_scanvi.model.load_state_dict(torch.load(filename)) trainer_scanvi.model.eval() else: trainer_scanvi.train(n_epochs=5) torch.save(trainer_scanvi.model.state_dict(), filename) return trainer_scanvi