def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # Set random seed np.random.seed(FLAGS.seed) torch.manual_seed(FLAGS.seed) if torch.cuda.is_available(): # cuda device device = 'cuda' torch.cuda.set_device(FLAGS.gpu) else: device = 'cpu' adata, trainloader, testloader = load_dataset( FLAGS.input_path, batch_name='batch', min_genes=FLAGS.min_peaks, min_cells=FLAGS.min_cells, batch_size=FLAGS.batch_size, n_top_genes=FLAGS.n_feature, log=None, ) input_dim = adata.shape[1] tf.io.gfile.makedirs(FLAGS.output_path) dims = [input_dim, FLAGS.latent, FLAGS.encode_dim, FLAGS.decode_dim] model = SCALE(dims, n_centroids=FLAGS.n_centroids) print(model) print('\n## Training Model ##') model.init_gmm_params(testloader) model.fit(trainloader, lr=FLAGS.lr, weight_decay=FLAGS.weight_decay, verbose=FLAGS.verbose, device=device, max_iter=FLAGS.max_iter, outdir=FLAGS.output_path) adata.obsm['latent'] = model.encodeBatch(testloader, device=device, out='z') dr = pd.DataFrame(adata.obsm['latent'], index=adata.obs_names) with tf.io.gfile.GFile(os.path.join(FLAGS.output_path, 'SCALE.csv'), 'w') as f: dr.to_csv(f)
device=device, max_iter=args.max_iter, # name=name, outdir=outdir) torch.save(model.state_dict(), os.path.join(outdir, 'model.pt')) # save model else: print('\n## Loading Model: {}\n'.format(args.pretrain)) model.load_model(args.pretrain) model.to(device) ### output ### print('outdir: {}'.format(outdir)) # 1. latent feature adata.obsm['latent'] = model.encodeBatch(testloader, device=device, out='z') # 2. cluster sc.pp.neighbors(adata, n_neighbors=30, use_rep='latent') if args.cluster_method == 'leiden': sc.tl.leiden(adata) elif args.cluster_method == 'kmeans': kmeans = KMeans(n_clusters=k, n_init=20, random_state=0) adata.obs['kmeans'] = kmeans.fit_predict( adata.obsm['latent']).astype(str) # if args.reference in adata.obs: # cluster_report(adata.obs[args.reference].cat.codes, adata.obs[args.cluster_method].astype(int)) sc.settings.figdir = outdir
weight_decay=args.weight_decay, verbose=args.verbose, device=device, max_iter=args.max_iter, name=name, outdir=outdir) # torch.save(model.to('cpu').state_dict(), os.path.join(outdir, 'model.pt')) # save model else: print('\n## Loading Model: {}\n'.format(args.pretrain)) model.load_model(args.pretrain) model.to(device) ### output ### print('outdir: {}'.format(outdir)) # 1. latent feature feature = model.encodeBatch(testloader, device=device, out='z') pd.DataFrame(feature).to_csv(os.path.join(outdir, 'feature.txt'), sep='\t', header=False) # 2. cluster assignments pred = model.predict(testloader, device) pd.Series(pred, index=dataset.barcode).to_csv(os.path.join( outdir, 'cluster_assignments.txt'), sep='\t', header=False) # 3. imputed data if args.impute or args.binary: recon_x = model.encodeBatch(testloader, device,