def build_from_mol_counter(molecule_counter, subsample_rate=1.0, subsample_result=None): """ Construct a GeneBCMatrices object from a MoleculeCounter. Args: subsample_result (dict) - Return some metrics results into this dict. """ # Reconstruct all barcode sequences in the original matrices barcode_whitelist = cr_utils.load_barcode_whitelist(molecule_counter.get_barcode_whitelist()) barcode_length = molecule_counter.get_barcode_length() or len(barcode_whitelist[0]) gem_groups = molecule_counter.get_gem_groups() barcode_seqs = cr_utils.format_barcode_seqs(barcode_whitelist, gem_groups) # Reconstruct Gene tuples from the molecule info ref columns gene_ids = molecule_counter.get_ref_column('gene_ids') genome_ids = molecule_counter.get_ref_column('genome_ids') gene_names = molecule_counter.get_ref_column('gene_names') gene_tuples = [cr_constants.Gene(gid, gname, None, None, None) for (gid, gname) in itertools.izip(gene_ids, gene_names)] genes = cr_utils.split_genes_by_genomes(gene_tuples, genome_ids) matrices = GeneBCMatrices(genome_ids, genes, barcode_seqs) # Track results of subsampling reads = 0 for mol in molecule_counter.get_molecule_iter(barcode_length, subsample_rate=subsample_rate): matrices.add(mol.genome, mol.gene_id, mol.barcode) reads += mol.reads if subsample_result is not None: subsample_result['mapped_reads'] = reads return matrices
def main(args, outs): in_bam = tk_bam.create_bam_infile(args.chunk_input) chroms = in_bam.references barcode_whitelist = cr_utils.load_barcode_whitelist(args.barcode_whitelist) barcode_summary = cr_utils.load_barcode_summary( args.barcode_summary) if not barcode_whitelist else None gene_index = cr_reference.GeneIndex.load_pickle( cr_utils.get_reference_genes_index(args.reference_path)) reporter = cr_report.Reporter(reference_path=args.reference_path, high_conf_mapq=cr_utils.get_high_conf_mapq( args.align), gene_index=gene_index, chroms=chroms, barcode_whitelist=barcode_whitelist, barcode_summary=barcode_summary, gem_groups=args.gem_groups) if barcode_whitelist: barcode_seqs = cr_utils.format_barcode_seqs(barcode_whitelist, args.gem_groups) else: barcode_seqs = barcode_summary genomes = cr_utils.get_reference_genomes(args.reference_path) genes = cr_utils.split_genes_by_genomes(gene_index.get_genes(), genomes) matrices = cr_matrix.GeneBCMatrices(genomes, genes, barcode_seqs) for read in in_bam: is_conf_mapped_deduped, genome, gene_id, bc = reporter.count_genes_bam_cb( read, use_umis=cr_chem.has_umis(args.chemistry_def)) if is_conf_mapped_deduped: matrices.add(genome, gene_id, bc) in_bam.close() matrices.save_h5(outs.matrices_h5) reporter.save(outs.chunked_reporter)