Beispiel #1
0
    def calc_insert_hist(self):
        counter = 0
        skip = 0
        skip_counter = 0
        mads = 10
        ins_list = []

        # Each entry in valueCounts is a value, and its count is
        # the number of instances of that value observed in the dataset.
        # So valueCount[5] is the number of times 5 has been seen in the data.
        valueCounts = Counter()
        for read in self.bam.fetch():
            if skip_counter < skip:
                skip_counter += 1
                continue
            if (read.is_reverse
                or not read.mate_is_reverse
                or read.is_unmapped
                or read.mate_is_unmapped
                or not self.is_primary(read)
                or read.template_length <= 0
                or read.get_tag('RG') not in self.readgroups):
                continue
            else:
                valueCounts[read.template_length] += 1
                counter += 1
            if counter == self.num_samp:
                break

        if len(valueCounts) == 0:
            sys.stderr.write('Error: failed to build insert size histogram for paired-end reads.\n\
Please ensure BAM file (%s) has inward facing, paired-end reads.\n' % self.bam.filename)
            exit(1)

        # remove outliers
        med = median(valueCounts)
        u_mad = upper_mad(valueCounts, med)
        for x in [x for x in list(valueCounts) if x > med + mads * u_mad]:
            del valueCounts[x]

        self.hist = valueCounts
        self.mean = mean(self.hist)
        self.sd = stdev(self.hist)
Beispiel #2
0
    def calc_insert_hist(self):
        counter = 0
        skip = 0
        skip_counter = 0
        mads = 10
        ins_list = []

        # Each entry in valueCounts is a value, and its count is
        # the number of instances of that value observed in the dataset.
        # So valueCount[5] is the number of times 5 has been seen in the data.
        valueCounts = Counter()
        for read in self.bam.fetch():
            if skip_counter < skip:
                skip_counter += 1
                continue
            if (read.is_reverse
                or not read.mate_is_reverse
                or read.is_unmapped
                or read.mate_is_unmapped
                or not self.is_primary(read)
                or read.template_length <= 0
                or read.get_tag('RG') not in self.readgroups):
                continue
            else:
                valueCounts[read.template_length] += 1
                counter += 1
            if counter == self.num_samp:
                break

        if len(valueCounts) == 0:
            sys.stderr.write('Error: failed to build insert size histogram for paired-end reads.\n\
Please ensure BAM file (%s) has inward facing, paired-end reads.\n' % self.bam.filename)
            exit(1)

        # remove outliers
        med = median(valueCounts)
        u_mad = upper_mad(valueCounts, med)
        for x in [x for x in list(valueCounts) if x > med + mads * u_mad]:
            del valueCounts[x]

        self.hist = valueCounts
        self.mean = mean(self.hist)
        self.sd = stdev(self.hist)