Esempio n. 1
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Set random seed
    np.random.seed(FLAGS.seed)
    torch.manual_seed(FLAGS.seed)

    if torch.cuda.is_available():  # cuda device
        device = 'cuda'
        torch.cuda.set_device(FLAGS.gpu)
    else:
        device = 'cpu'

    adata, trainloader, testloader = load_dataset(
        FLAGS.input_path,
        batch_name='batch',
        min_genes=FLAGS.min_peaks,
        min_cells=FLAGS.min_cells,
        batch_size=FLAGS.batch_size,
        n_top_genes=FLAGS.n_feature,
        log=None,
    )

    input_dim = adata.shape[1]

    tf.io.gfile.makedirs(FLAGS.output_path)

    dims = [input_dim, FLAGS.latent, FLAGS.encode_dim, FLAGS.decode_dim]
    model = SCALE(dims, n_centroids=FLAGS.n_centroids)
    print(model)

    print('\n## Training Model ##')
    model.init_gmm_params(testloader)
    model.fit(trainloader,
              lr=FLAGS.lr,
              weight_decay=FLAGS.weight_decay,
              verbose=FLAGS.verbose,
              device=device,
              max_iter=FLAGS.max_iter,
              outdir=FLAGS.output_path)

    adata.obsm['latent'] = model.encodeBatch(testloader,
                                             device=device,
                                             out='z')

    dr = pd.DataFrame(adata.obsm['latent'], index=adata.obs_names)

    with tf.io.gfile.GFile(os.path.join(FLAGS.output_path, 'SCALE.csv'),
                           'w') as f:
        dr.to_csv(f)
Esempio n. 2
0
            device=device,
            max_iter=args.max_iter,
            #                   name=name,
            outdir=outdir)
        torch.save(model.state_dict(), os.path.join(outdir,
                                                    'model.pt'))  # save model
    else:
        print('\n## Loading Model: {}\n'.format(args.pretrain))
        model.load_model(args.pretrain)
        model.to(device)

    ### output ###
    print('outdir: {}'.format(outdir))
    # 1. latent feature
    adata.obsm['latent'] = model.encodeBatch(testloader,
                                             device=device,
                                             out='z')

    # 2. cluster
    sc.pp.neighbors(adata, n_neighbors=30, use_rep='latent')
    if args.cluster_method == 'leiden':
        sc.tl.leiden(adata)
    elif args.cluster_method == 'kmeans':
        kmeans = KMeans(n_clusters=k, n_init=20, random_state=0)
        adata.obs['kmeans'] = kmeans.fit_predict(
            adata.obsm['latent']).astype(str)

#     if args.reference in adata.obs:
#         cluster_report(adata.obs[args.reference].cat.codes, adata.obs[args.cluster_method].astype(int))

    sc.settings.figdir = outdir
Esempio n. 3
0
                  weight_decay=args.weight_decay,
                  verbose=args.verbose,
                  device=device,
                  max_iter=args.max_iter,
                  name=name,
                  outdir=outdir)
#         torch.save(model.to('cpu').state_dict(), os.path.join(outdir, 'model.pt')) # save model
    else:
        print('\n## Loading Model: {}\n'.format(args.pretrain))
        model.load_model(args.pretrain)
        model.to(device)

    ### output ###
    print('outdir: {}'.format(outdir))
    # 1. latent feature
    feature = model.encodeBatch(testloader, device=device, out='z')
    pd.DataFrame(feature).to_csv(os.path.join(outdir, 'feature.txt'),
                                 sep='\t',
                                 header=False)

    # 2. cluster assignments
    pred = model.predict(testloader, device)
    pd.Series(pred, index=dataset.barcode).to_csv(os.path.join(
        outdir, 'cluster_assignments.txt'),
                                                  sep='\t',
                                                  header=False)

    # 3. imputed data
    if args.impute or args.binary:
        recon_x = model.encodeBatch(testloader,
                                    device,