def _create_tables(self, config, temp): # Opening pipe/symlink created in _setup() out = sys.stderr if not self._print_stats: out = open(os.path.join(temp, "pipe_coverage_%i.stdout" % id(self)), "w") with pysam.Samfile(self._pipes["input_file"]) as samfile: timer = BAMTimer(samfile, out = out) intervals, region_names = self._get_intervals(temp, samfile) mapping = self._open_handles(temp, samfile, intervals) for read in samfile: if read.is_unmapped or read.is_duplicate: continue rg = dict(read.tags).get("RG") for handle in mapping[rg]: handle.write(read) timer.increment(read = read) timer.finalize() if not self._print_stats: out.close() for handle in self._handle.itervalues(): handle.close() for proclst in self._procs.itervalues(): for proc in proclst: if proc.wait() != 0: raise RuntimeError("Error while running process: %i" % proc.wait()) return region_names
def process_file(handle, args): timer = BAMTimer(handle, step=1000000) counts = {} region_template = build_region_template(args, handle) for region in BAMRegionsIter(handle, args.regions): if region.name is None: # Trailing unmapped reads continue name = region.name if not args.regions and (handle.nreferences > args.max_contigs): name = '<Genome>' region_table = get_region_table(counts, name, region_template) for (_, records) in region: for record in records: readgroup = args.get_readgroup_func(record) readgroup_table = region_table[readgroup] process_record(readgroup_table, record, record.flag, region) timer.increment(read=record) timer.finalize() print_table(args, handle, counts) return 0
def _create_tables(self, config, temp): # Opening pipe/symlink created in _setup() out = sys.stderr if not self._print_stats: out = open( os.path.join(temp, "pipe_coverage_%i.stdout" % id(self)), "w") with pysam.Samfile(self._pipes["input_file"]) as samfile: timer = BAMTimer(samfile, out=out) intervals, region_names = self._get_intervals(temp, samfile) mapping = self._open_handles(temp, samfile, intervals) for read in samfile: if read.is_unmapped or read.is_duplicate: continue rg = dict(read.tags).get("RG") for handle in mapping[rg]: handle.write(read) timer.increment(read=read) timer.finalize() if not self._print_stats: out.close() for handle in self._handle.itervalues(): handle.close() for proclst in self._procs.itervalues(): for proc in proclst: if proc.wait() != 0: raise RuntimeError("Error while running process: %i" % proc.wait()) return region_names
def process_file(handle, args): timer = BAMTimer(handle, step=1000000) last_tid = 0 totals = build_totals_dict(args, handle) rg_to_smlbid, smlbid_to_smlb = build_rg_to_smlbid_keys(args, handle) template = [0] * len(smlbid_to_smlb) for region in BAMRegionsIter(handle, args.regions): if region.name is None: # Trailing unmapped reads continue elif not args.regions and (handle.nreferences > args.max_contigs): region.name = '<Genome>' last_pos = 0 counts = collections.deque() mapping = MappingToTotals(totals, region, smlbid_to_smlb) for (position, records) in region: mapping.process_counts(counts, last_pos, position) for record in records: timer.increment(read=record) count_bases(args, counts, record, rg_to_smlbid, template) if (region.tid, position) < (last_tid, last_pos): sys.stderr.write("ERROR: Input BAM file is unsorted\n") return 1 last_pos = position last_tid = region.tid # Process columns in region after last read mapping.process_counts(counts, last_pos, float("inf")) mapping.finalize() timer.finalize() print_table(handle, args, totals) return 0