コード例 #1
0
    def build_from_mol_counter(molecule_counter, subsample_rate=1.0,
                               subsample_result=None):
        """ Construct a GeneBCMatrices object from a MoleculeCounter.
            Args: subsample_result (dict) - Return some metrics results into this dict. """

        # Reconstruct all barcode sequences in the original matrices
        barcode_whitelist = cr_utils.load_barcode_whitelist(molecule_counter.get_barcode_whitelist())
        barcode_length = molecule_counter.get_barcode_length() or len(barcode_whitelist[0])

        gem_groups = molecule_counter.get_gem_groups()
        barcode_seqs = cr_utils.format_barcode_seqs(barcode_whitelist, gem_groups)

        # Reconstruct Gene tuples from the molecule info ref columns
        gene_ids = molecule_counter.get_ref_column('gene_ids')
        genome_ids = molecule_counter.get_ref_column('genome_ids')
        gene_names = molecule_counter.get_ref_column('gene_names')
        gene_tuples = [cr_constants.Gene(gid, gname, None, None, None) for (gid, gname) in itertools.izip(gene_ids, gene_names)]
        genes = cr_utils.split_genes_by_genomes(gene_tuples, genome_ids)

        matrices = GeneBCMatrices(genome_ids, genes, barcode_seqs)

        # Track results of subsampling
        reads = 0

        for mol in molecule_counter.get_molecule_iter(barcode_length, subsample_rate=subsample_rate):
            matrices.add(mol.genome, mol.gene_id, mol.barcode)
            reads += mol.reads

        if subsample_result is not None:
            subsample_result['mapped_reads'] = reads

        return matrices
コード例 #2
0
def main(args, outs):
    in_bam = tk_bam.create_bam_infile(args.chunk_input)

    chroms = in_bam.references

    barcode_whitelist = cr_utils.load_barcode_whitelist(args.barcode_whitelist)
    barcode_summary = cr_utils.load_barcode_summary(
        args.barcode_summary) if not barcode_whitelist else None

    gene_index = cr_reference.GeneIndex.load_pickle(
        cr_utils.get_reference_genes_index(args.reference_path))
    reporter = cr_report.Reporter(reference_path=args.reference_path,
                                  high_conf_mapq=cr_utils.get_high_conf_mapq(
                                      args.align),
                                  gene_index=gene_index,
                                  chroms=chroms,
                                  barcode_whitelist=barcode_whitelist,
                                  barcode_summary=barcode_summary,
                                  gem_groups=args.gem_groups)

    if barcode_whitelist:
        barcode_seqs = cr_utils.format_barcode_seqs(barcode_whitelist,
                                                    args.gem_groups)
    else:
        barcode_seqs = barcode_summary

    genomes = cr_utils.get_reference_genomes(args.reference_path)
    genes = cr_utils.split_genes_by_genomes(gene_index.get_genes(), genomes)
    matrices = cr_matrix.GeneBCMatrices(genomes, genes, barcode_seqs)

    for read in in_bam:
        is_conf_mapped_deduped, genome, gene_id, bc = reporter.count_genes_bam_cb(
            read, use_umis=cr_chem.has_umis(args.chemistry_def))
        if is_conf_mapped_deduped:
            matrices.add(genome, gene_id, bc)

    in_bam.close()

    matrices.save_h5(outs.matrices_h5)
    reporter.save(outs.chunked_reporter)