Esempio n. 1
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Set random seed
    np.random.seed(FLAGS.seed)
    torch.manual_seed(FLAGS.seed)

    if torch.cuda.is_available():  # cuda device
        device = 'cuda'
        torch.cuda.set_device(FLAGS.gpu)
    else:
        device = 'cpu'

    adata, trainloader, testloader = load_dataset(
        FLAGS.input_path,
        batch_name='batch',
        min_genes=FLAGS.min_peaks,
        min_cells=FLAGS.min_cells,
        batch_size=FLAGS.batch_size,
        n_top_genes=FLAGS.n_feature,
        log=None,
    )

    input_dim = adata.shape[1]

    tf.io.gfile.makedirs(FLAGS.output_path)

    dims = [input_dim, FLAGS.latent, FLAGS.encode_dim, FLAGS.decode_dim]
    model = SCALE(dims, n_centroids=FLAGS.n_centroids)
    print(model)

    print('\n## Training Model ##')
    model.init_gmm_params(testloader)
    model.fit(trainloader,
              lr=FLAGS.lr,
              weight_decay=FLAGS.weight_decay,
              verbose=FLAGS.verbose,
              device=device,
              max_iter=FLAGS.max_iter,
              outdir=FLAGS.output_path)

    adata.obsm['latent'] = model.encodeBatch(testloader,
                                             device=device,
                                             out='z')

    dr = pd.DataFrame(adata.obsm['latent'], index=adata.obs_names)

    with tf.io.gfile.GFile(os.path.join(FLAGS.output_path, 'SCALE.csv'),
                           'w') as f:
        dr.to_csv(f)
Esempio n. 2
0
    print(
        "\n**********************************************************************"
    )
    print(
        "  SCALE: Single-Cell ATAC-seq analysis via Latent feature Extraction")
    print(
        "**********************************************************************\n"
    )
    print("======== Parameters ========")
    print(
        'Cell number: {}\nInput_dim: {}\nn_centroids: {}\nEpoch: {}\nSeed: {}\nDevice: {}'
        .format(cell_num, input_dim, k, epochs, args.seed, args.device))
    print("============================")

    dims = [input_dim, config.latent, config.encode_dim, config.decode_dim]
    model = SCALE(dims, n_centroids=k, device=device)
    model.to(device)
    data = data.to(device)
    if not args.pretrain:
        print('\n## Training Model ##')
        t0 = time.time()
        model.init_gmm_params(data)
        model.fit(dataloader,
                  lr=lr,
                  weight_decay=config.weight_decay,
                  epochs=epochs,
                  verbose=args.verbose,
                  print_interval=config.print_interval)
        print('\nRunning Time: {:.2f} s'.format(time.time() - t0))
    else:
        print('\n## Loading Model {} ##\n'.format(args.pretrain))
Esempio n. 3
0
    lr = args.lr
    k = args.n_centroids

    outdir = args.outdir + '/'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print("\n======== Parameters ========")
    print(
        'Cell number: {}\nPeak number: {}\nn_centroids: {}\nmax_iter: {}\nbatch_size: {}\ncell filter by peaks: {}\npeak filter by cells: {}'
        .format(cell_num, input_dim, k, args.max_iter, batch_size,
                args.min_peaks, args.min_cells))
    print("============================")

    dims = [input_dim, args.latent, args.encode_dim, args.decode_dim]
    model = SCALE(dims, n_centroids=k)
    print(model)

    if not args.pretrain:
        print('\n## Training Model ##')
        model.init_gmm_params(testloader)
        model.fit(
            trainloader,
            lr=lr,
            weight_decay=args.weight_decay,
            verbose=args.verbose,
            device=device,
            max_iter=args.max_iter,
            #                   name=name,
            outdir=outdir)
        torch.save(model.state_dict(), os.path.join(outdir,
Esempio n. 4
0
        "\n**********************************************************************"
    )
    print(
        "  SCALE: Single-Cell ATAC-seq Analysis via Latent feature Extraction")
    print(
        "**********************************************************************\n"
    )
    print("======== Parameters ========")
    print(
        'Cell number: {}\nPeak number: {}\nn_centroids: {}\nmax_iter: {}\nbatch_size: {}\ncell filter by peaks: {}\nrare peak filter: {}\ncommon peak filter: {}'
        .format(cell_num, input_dim, k, args.max_iter, batch_size,
                args.min_peaks, args.low, args.high))
    print("============================")

    dims = [input_dim, args.latent, args.encode_dim, args.decode_dim]
    model = SCALE(dims, n_centroids=k)
    #     print(model)

    if not args.pretrain:
        print('\n## Training Model ##')
        model.init_gmm_params(testloader)
        model.fit(trainloader,
                  lr=lr,
                  weight_decay=args.weight_decay,
                  verbose=args.verbose,
                  device=device,
                  max_iter=args.max_iter,
                  name=name,
                  outdir=outdir)
#         torch.save(model.to('cpu').state_dict(), os.path.join(outdir, 'model.pt')) # save model
    else:
Esempio n. 5
0
    args = parser.parse_args()

    if args.version == 2:
        from scale import SCALE
        adata = SCALE(args.data_list,
                      batch_categories=args.batch_categories,
                      profile=args.profile,
                      join=args.join,
                      batch_key=args.batch_key,
                      min_features=args.min_features,
                      min_cells=args.min_cells,
                      n_top_features=args.n_top_features,
                      batch_size=args.batch_size,
                      lr=args.lr,
                      max_iteration=args.max_iteration,
                      impute=args.impute,
                      batch_name=args.batch_name,
                      seed=args.seed,
                      gpu=args.gpu,
                      outdir=args.outdir,
                      projection=args.projection,
                      chunk_size=args.chunk_size,
                      ignore_umap=args.ignore_umap,
                      repeat=args.repeat,
                      verbose=True,
                      assess=args.assess)

    elif args.version == 1:
        from extensions.scale import SCALE_v1

        adata = SCALE_v1(