def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # Set random seed np.random.seed(FLAGS.seed) torch.manual_seed(FLAGS.seed) if torch.cuda.is_available(): # cuda device device = 'cuda' torch.cuda.set_device(FLAGS.gpu) else: device = 'cpu' adata, trainloader, testloader = load_dataset( FLAGS.input_path, batch_name='batch', min_genes=FLAGS.min_peaks, min_cells=FLAGS.min_cells, batch_size=FLAGS.batch_size, n_top_genes=FLAGS.n_feature, log=None, ) input_dim = adata.shape[1] tf.io.gfile.makedirs(FLAGS.output_path) dims = [input_dim, FLAGS.latent, FLAGS.encode_dim, FLAGS.decode_dim] model = SCALE(dims, n_centroids=FLAGS.n_centroids) print(model) print('\n## Training Model ##') model.init_gmm_params(testloader) model.fit(trainloader, lr=FLAGS.lr, weight_decay=FLAGS.weight_decay, verbose=FLAGS.verbose, device=device, max_iter=FLAGS.max_iter, outdir=FLAGS.output_path) adata.obsm['latent'] = model.encodeBatch(testloader, device=device, out='z') dr = pd.DataFrame(adata.obsm['latent'], index=adata.obs_names) with tf.io.gfile.GFile(os.path.join(FLAGS.output_path, 'SCALE.csv'), 'w') as f: dr.to_csv(f)
print( "\n**********************************************************************" ) print( " SCALE: Single-Cell ATAC-seq analysis via Latent feature Extraction") print( "**********************************************************************\n" ) print("======== Parameters ========") print( 'Cell number: {}\nInput_dim: {}\nn_centroids: {}\nEpoch: {}\nSeed: {}\nDevice: {}' .format(cell_num, input_dim, k, epochs, args.seed, args.device)) print("============================") dims = [input_dim, config.latent, config.encode_dim, config.decode_dim] model = SCALE(dims, n_centroids=k, device=device) model.to(device) data = data.to(device) if not args.pretrain: print('\n## Training Model ##') t0 = time.time() model.init_gmm_params(data) model.fit(dataloader, lr=lr, weight_decay=config.weight_decay, epochs=epochs, verbose=args.verbose, print_interval=config.print_interval) print('\nRunning Time: {:.2f} s'.format(time.time() - t0)) else: print('\n## Loading Model {} ##\n'.format(args.pretrain))
lr = args.lr k = args.n_centroids outdir = args.outdir + '/' if not os.path.exists(outdir): os.makedirs(outdir) print("\n======== Parameters ========") print( 'Cell number: {}\nPeak number: {}\nn_centroids: {}\nmax_iter: {}\nbatch_size: {}\ncell filter by peaks: {}\npeak filter by cells: {}' .format(cell_num, input_dim, k, args.max_iter, batch_size, args.min_peaks, args.min_cells)) print("============================") dims = [input_dim, args.latent, args.encode_dim, args.decode_dim] model = SCALE(dims, n_centroids=k) print(model) if not args.pretrain: print('\n## Training Model ##') model.init_gmm_params(testloader) model.fit( trainloader, lr=lr, weight_decay=args.weight_decay, verbose=args.verbose, device=device, max_iter=args.max_iter, # name=name, outdir=outdir) torch.save(model.state_dict(), os.path.join(outdir,
"\n**********************************************************************" ) print( " SCALE: Single-Cell ATAC-seq Analysis via Latent feature Extraction") print( "**********************************************************************\n" ) print("======== Parameters ========") print( 'Cell number: {}\nPeak number: {}\nn_centroids: {}\nmax_iter: {}\nbatch_size: {}\ncell filter by peaks: {}\nrare peak filter: {}\ncommon peak filter: {}' .format(cell_num, input_dim, k, args.max_iter, batch_size, args.min_peaks, args.low, args.high)) print("============================") dims = [input_dim, args.latent, args.encode_dim, args.decode_dim] model = SCALE(dims, n_centroids=k) # print(model) if not args.pretrain: print('\n## Training Model ##') model.init_gmm_params(testloader) model.fit(trainloader, lr=lr, weight_decay=args.weight_decay, verbose=args.verbose, device=device, max_iter=args.max_iter, name=name, outdir=outdir) # torch.save(model.to('cpu').state_dict(), os.path.join(outdir, 'model.pt')) # save model else:
args = parser.parse_args() if args.version == 2: from scale import SCALE adata = SCALE(args.data_list, batch_categories=args.batch_categories, profile=args.profile, join=args.join, batch_key=args.batch_key, min_features=args.min_features, min_cells=args.min_cells, n_top_features=args.n_top_features, batch_size=args.batch_size, lr=args.lr, max_iteration=args.max_iteration, impute=args.impute, batch_name=args.batch_name, seed=args.seed, gpu=args.gpu, outdir=args.outdir, projection=args.projection, chunk_size=args.chunk_size, ignore_umap=args.ignore_umap, repeat=args.repeat, verbose=True, assess=args.assess) elif args.version == 1: from extensions.scale import SCALE_v1 adata = SCALE_v1(