ex_matrix = pd.DataFrame(data=ds[:, :],
                                     index=ds.ra[args.gene_attribute],
                                     columns=ds.ca[args.cell_id_attribute]).T
            gene_names = pd.Series(ds.ra[args.gene_attribute])

    end_time = time.time()
    print(
        f'Loaded expression matrix of {ex_matrix.shape[0]} cells and {ex_matrix.shape[1]} genes in {end_time - start_time} seconds...',
        file=sys.stderr)

    tf_names = load_tf_names(args.tfs_fname.name)
    print(f'Loaded {len(tf_names)} TFs...', file=sys.stderr)

    ex_matrix, gene_names, tf_names = _prepare_input(ex_matrix, gene_names,
                                                     tf_names)
    tf_matrix, tf_matrix_gene_names = to_tf_matrix(ex_matrix, gene_names,
                                                   tf_names)

    print(f'starting {args.method} using {args.num_workers} processes...',
          file=sys.stderr)
    start_time = time.time()

    with Pool(args.num_workers) as p:
        adjs = list(
            tqdm.tqdm(p.imap(run_infer_partial_network,
                             target_gene_indices(gene_names,
                                                 target_genes='all'),
                             chunksize=1),
                      total=len(gene_names)))

    adj = pd.concat(adjs).sort_values(by='importance', ascending=False)
Example #2
0
    lf = lp.connect(args.expression_mtx_fname.name, mode='r', validate=False)
    # genes in columns:
    ex_matrix = pd.DataFrame(lf[:, :], index=lf.ra.Gene,
                             columns=lf.ca.CellID).T
    lf.close()
    gene_names = ex_matrix.columns
    print('Loaded expression matrix of {} cells and {} genes...'.format(
        ex_matrix.shape[0], ex_matrix.shape[1]),
          file=sys.stderr)

    tf_names = load_tf_names(args.tfs_fname.name)
    print('Loaded {} TFs...'.format(len(tf_names)), file=sys.stderr)

    expression_matrix, gene_names, tf_names = _prepare_input(
        ex_matrix, gene_names, tf_names)
    tf_matrix, tf_matrix_gene_names = to_tf_matrix(expression_matrix,
                                                   gene_names, tf_names)

    print('starting GRNBoost2 using {} processes...'.format(args.num_workers),
          file=sys.stderr)
    start_time = time.time()

    with Pool(args.num_workers) as p:
        adjs = p.map(runInferPartialNet,
                     target_gene_indices(gene_names, target_genes='all'))
    adj = pd.concat(adjs).sort_values(by='importance', ascending=False)

    end_time = time.time()
    print('Done in {} seconds.'.format(end_time - start_time), file=sys.stderr)
    adj.to_csv(args.output, index=False, sep="\t")