Exemple #1
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Set random seed
    np.random.seed(FLAGS.seed)
    torch.manual_seed(FLAGS.seed)

    if torch.cuda.is_available():  # cuda device
        device = 'cuda'
        torch.cuda.set_device(FLAGS.gpu)
    else:
        device = 'cpu'

    adata, trainloader, testloader = load_dataset(
        FLAGS.input_path,
        batch_name='batch',
        min_genes=FLAGS.min_peaks,
        min_cells=FLAGS.min_cells,
        batch_size=FLAGS.batch_size,
        n_top_genes=FLAGS.n_feature,
        log=None,
    )

    input_dim = adata.shape[1]

    tf.io.gfile.makedirs(FLAGS.output_path)

    dims = [input_dim, FLAGS.latent, FLAGS.encode_dim, FLAGS.decode_dim]
    model = SCALE(dims, n_centroids=FLAGS.n_centroids)
    print(model)

    print('\n## Training Model ##')
    model.init_gmm_params(testloader)
    model.fit(trainloader,
              lr=FLAGS.lr,
              weight_decay=FLAGS.weight_decay,
              verbose=FLAGS.verbose,
              device=device,
              max_iter=FLAGS.max_iter,
              outdir=FLAGS.output_path)

    adata.obsm['latent'] = model.encodeBatch(testloader,
                                             device=device,
                                             out='z')

    dr = pd.DataFrame(adata.obsm['latent'], index=adata.obs_names)

    with tf.io.gfile.GFile(os.path.join(FLAGS.output_path, 'SCALE.csv'),
                           'w') as f:
        dr.to_csv(f)
Exemple #2
0
    print(
        'Cell number: {}\nInput_dim: {}\nn_centroids: {}\nEpoch: {}\nSeed: {}\nDevice: {}'
        .format(cell_num, input_dim, k, epochs, args.seed, args.device))
    print("============================")

    dims = [input_dim, config.latent, config.encode_dim, config.decode_dim]
    model = SCALE(dims, n_centroids=k, device=device)
    model.to(device)
    data = data.to(device)
    if not args.pretrain:
        print('\n## Training Model ##')
        t0 = time.time()
        model.init_gmm_params(data)
        model.fit(dataloader,
                  lr=lr,
                  weight_decay=config.weight_decay,
                  epochs=epochs,
                  verbose=args.verbose,
                  print_interval=config.print_interval)
        print('\nRunning Time: {:.2f} s'.format(time.time() - t0))
    else:
        print('\n## Loading Model {} ##\n'.format(args.pretrain))
        model.load_model(args.pretrain)

    # Clustering Report
    if args.reference:
        ref, classes = read_labels(args.reference)
        pred = model.predict(data)
        cluster_report(ref, pred, classes)

    outdir = args.outdir
    if not args.no_results:
Exemple #3
0
        .format(cell_num, input_dim, k, args.max_iter, batch_size,
                args.min_peaks, args.min_cells))
    print("============================")

    dims = [input_dim, args.latent, args.encode_dim, args.decode_dim]
    model = SCALE(dims, n_centroids=k)
    print(model)

    if not args.pretrain:
        print('\n## Training Model ##')
        model.init_gmm_params(testloader)
        model.fit(
            trainloader,
            lr=lr,
            weight_decay=args.weight_decay,
            verbose=args.verbose,
            device=device,
            max_iter=args.max_iter,
            #                   name=name,
            outdir=outdir)
        torch.save(model.state_dict(), os.path.join(outdir,
                                                    'model.pt'))  # save model
    else:
        print('\n## Loading Model: {}\n'.format(args.pretrain))
        model.load_model(args.pretrain)
        model.to(device)

    ### output ###
    print('outdir: {}'.format(outdir))
    # 1. latent feature
    adata.obsm['latent'] = model.encodeBatch(testloader,