Beispiel #1
0
class BedStats(object):
    def __init__(self, bed, gtf_file=None, names=False):
        self.regiontagger = None
        if gtf_file:
            self.regiontagger = RegionTagger(gtf_file)

        self.total = 0
        self.size = 0
        self.lengths = Counts()
        self.refs = {}
        self.names = {}
        for region in bed:
            self.total += 1
            self.size += (region.end - region.start)
            self.lengths.add(region.end - region.start)

            if names:
                if not region.name in self.names:
                    self.names[region.name] = 1
                else:
                    self.names[region.name] += 1

            if not region.chrom in self.refs:
                self.refs[region.chrom] = 0
            self.refs[region.chrom] += 1

            if self.regiontagger:
                self.regiontagger.add_region(region.chrom, region.start,
                                             region.end, region.strand)

    def write(self, out=sys.stdout):
        out.write("Regions:\t%s\n" % format_number(self.total))
        out.write("Total coverage:\t%s bases\n" % format_number(self.size))
        out.write("Average size:\t%s bases\n" % self.lengths.mean())
        out.write("\n")
        out.write("Reference distribution\n")
        out.write("ref\tcount\n")
        for refname in ngsutils.support.ngs_utils.natural_sort(
            [x for x in self.refs]):
            out.write("%s\t%s\n" %
                      (refname, format_number(self.refs[refname])))

        if self.names:
            out.write("\nName distribution\n")
            out.write("name\tcount\n")
            for name in ngsutils.support.ngs_utils.natural_sort(
                [x for x in self.names]):
                out.write("%s\t%s\n" % (name, format_number(self.names[name])))

        if self.regiontagger:
            out.write("\n")
            out.write("Mapping regions\n")
            sorted_keys = [x for x in self.regiontagger.counts]
            sorted_keys.sort()
            for k in sorted_keys:
                out.write("%s\t%s\n" % (k, self.regiontagger.counts[k]))
Beispiel #2
0
class BedStats(object):
    def __init__(self, bed, gtf_file=None, names=False):
        self.regiontagger = None
        if gtf_file:
            self.regiontagger = RegionTagger(gtf_file)

        self.total = 0
        self.size = 0
        self.lengths = Counts()
        self.refs = {}
        self.names = {}
        for region in bed:
            self.total += 1
            self.size += region.end - region.start
            self.lengths.add(region.end - region.start)

            if names:
                if not region.name in self.names:
                    self.names[region.name] = 1
                else:
                    self.names[region.name] += 1

            if not region.chrom in self.refs:
                self.refs[region.chrom] = 0
            self.refs[region.chrom] += 1

            if self.regiontagger:
                self.regiontagger.add_region(region.chrom, region.start, region.end, region.strand)

    def write(self, out=sys.stdout):
        out.write("Regions:\t%s\n" % format_number(self.total))
        out.write("Total coverage:\t%s bases\n" % format_number(self.size))
        out.write("Average size:\t%s bases\n" % self.lengths.mean())
        out.write("\n")
        out.write("Reference distribution\n")
        out.write("ref\tcount\n")
        for refname in ngsutils.support.ngs_utils.natural_sort([x for x in self.refs]):
            out.write("%s\t%s\n" % (refname, format_number(self.refs[refname])))

        if self.names:
            out.write("\nName distribution\n")
            out.write("name\tcount\n")
            for name in ngsutils.support.ngs_utils.natural_sort([x for x in self.names]):
                out.write("%s\t%s\n" % (name, format_number(self.names[name])))

        if self.regiontagger:
            out.write("\n")
            out.write("Mapping regions\n")
            sorted_keys = [x for x in self.regiontagger.counts]
            sorted_keys.sort()
            for k in sorted_keys:
                out.write("%s\t%s\n" % (k, self.regiontagger.counts[k]))