Ejemplo n.º 1
0
    def _create_tables(self, config, temp):
        # Opening pipe/symlink created in _setup()
        out = sys.stderr
        if not self._print_stats:
            out = open(os.path.join(temp, "pipe_coverage_%i.stdout" % id(self)), "w")

        with pysam.Samfile(self._pipes["input_file"]) as samfile:
            timer = BAMTimer(samfile, out = out)
            intervals, region_names = self._get_intervals(temp, samfile)
            mapping   = self._open_handles(temp, samfile, intervals)
            for read in samfile:
                if read.is_unmapped or read.is_duplicate:
                    continue

                rg = dict(read.tags).get("RG")
                for handle in mapping[rg]:
                    handle.write(read)
                timer.increment(read = read)
            timer.finalize()

        if not self._print_stats:
            out.close()

        for handle in self._handle.itervalues():
            handle.close()

        for proclst in self._procs.itervalues():
            for proc in proclst:
                if proc.wait() != 0:
                    raise RuntimeError("Error while running process: %i" % proc.wait())

        return region_names
Ejemplo n.º 2
0
def process_file(handle, args):
    timer = BAMTimer(handle, step=1000000)

    counts = {}
    region_template = build_region_template(args, handle)
    for region in BAMRegionsIter(handle, args.regions):
        if region.name is None:
            # Trailing unmapped reads
            continue

        name = region.name
        if not args.regions and (handle.nreferences > args.max_contigs):
            name = '<Genome>'

        region_table = get_region_table(counts, name, region_template)
        for (_, records) in region:
            for record in records:
                readgroup = args.get_readgroup_func(record)
                readgroup_table = region_table[readgroup]
                process_record(readgroup_table, record, record.flag, region)
                timer.increment(read=record)
    timer.finalize()

    print_table(args, handle, counts)

    return 0
Ejemplo n.º 3
0
def process_file(handle, args):
    timer = BAMTimer(handle, step=1000000)

    last_tid = 0
    totals = build_totals_dict(args, handle)
    rg_to_smlbid, smlbid_to_smlb = build_rg_to_smlbid_keys(args, handle)
    template = [0] * len(smlbid_to_smlb)

    for region in BAMRegionsIter(handle, args.regions):
        if region.name is None:
            # Trailing unmapped reads
            continue
        elif not args.regions and (handle.nreferences > args.max_contigs):
            region.name = '<Genome>'

        last_pos = 0
        counts = collections.deque()
        mapping = MappingToTotals(totals, region, smlbid_to_smlb)
        for (position, records) in region:
            mapping.process_counts(counts, last_pos, position)

            for record in records:
                timer.increment(read=record)
                count_bases(args, counts, record, rg_to_smlbid, template)

            if (region.tid, position) < (last_tid, last_pos):
                sys.stderr.write("ERROR: Input BAM file is unsorted\n")
                return 1

            last_pos = position
            last_tid = region.tid

        # Process columns in region after last read
        mapping.process_counts(counts, last_pos, float("inf"))
        mapping.finalize()
    timer.finalize()

    print_table(handle, args, totals)

    return 0
Ejemplo n.º 4
0
def process_file(handle, args):
    timer = BAMTimer(handle, step=1000000)

    counts = {}
    region_template = build_region_template(args, handle)
    for region in BAMRegionsIter(handle, args.regions):
        if region.name is None:
            # Trailing unmapped reads
            continue

        name = region.name
        if not args.regions and (handle.nreferences > args.max_contigs):
            name = '<Genome>'

        region_table = get_region_table(counts, name, region_template)
        for (_, records) in region:
            for record in records:
                readgroup = args.get_readgroup_func(record)
                readgroup_table = region_table[readgroup]
                process_record(readgroup_table, record, record.flag, region)
                timer.increment(read=record)
    timer.finalize()

    print_table(args, handle, counts)

    return 0
Ejemplo n.º 5
0
    def _create_tables(self, config, temp):
        # Opening pipe/symlink created in _setup()
        out = sys.stderr
        if not self._print_stats:
            out = open(
                os.path.join(temp, "pipe_coverage_%i.stdout" % id(self)), "w")

        with pysam.Samfile(self._pipes["input_file"]) as samfile:
            timer = BAMTimer(samfile, out=out)
            intervals, region_names = self._get_intervals(temp, samfile)
            mapping = self._open_handles(temp, samfile, intervals)
            for read in samfile:
                if read.is_unmapped or read.is_duplicate:
                    continue

                rg = dict(read.tags).get("RG")
                for handle in mapping[rg]:
                    handle.write(read)
                timer.increment(read=read)
            timer.finalize()

        if not self._print_stats:
            out.close()

        for handle in self._handle.itervalues():
            handle.close()

        for proclst in self._procs.itervalues():
            for proc in proclst:
                if proc.wait() != 0:
                    raise RuntimeError("Error while running process: %i" %
                                       proc.wait())

        return region_names
Ejemplo n.º 6
0
def process_file(handle, args):
    timer = BAMTimer(handle, step=1000000)

    last_tid = 0
    totals = build_totals_dict(args, handle)
    rg_to_smlbid, smlbid_to_smlb = build_rg_to_smlbid_keys(args, handle)
    template = [0] * len(smlbid_to_smlb)

    for region in BAMRegionsIter(handle, args.regions):
        if region.name is None:
            # Trailing unmapped reads
            continue
        elif not args.regions and (handle.nreferences > args.max_contigs):
            region.name = '<Genome>'

        last_pos = 0
        counts = collections.deque()
        mapping = MappingToTotals(totals, region, smlbid_to_smlb)
        for (position, records) in region:
            mapping.process_counts(counts, last_pos, position)

            for record in records:
                timer.increment(read=record)
                count_bases(args, counts, record, rg_to_smlbid, template)

            if (region.tid, position) < (last_tid, last_pos):
                sys.stderr.write("ERROR: Input BAM file is unsorted\n")
                return 1

            last_pos = position
            last_tid = region.tid

        # Process columns in region after last read
        mapping.process_counts(counts, last_pos, float("inf"))
        mapping.finalize()
    timer.finalize()

    print_table(handle, args, totals)

    return 0