def calc_insert_hist(self): counter = 0 skip = 0 skip_counter = 0 mads = 10 ins_list = [] # Each entry in valueCounts is a value, and its count is # the number of instances of that value observed in the dataset. # So valueCount[5] is the number of times 5 has been seen in the data. valueCounts = Counter() for read in self.bam.fetch(): if skip_counter < skip: skip_counter += 1 continue if (read.is_reverse or not read.mate_is_reverse or read.is_unmapped or read.mate_is_unmapped or not self.is_primary(read) or read.template_length <= 0 or read.get_tag('RG') not in self.readgroups): continue else: valueCounts[read.template_length] += 1 counter += 1 if counter == self.num_samp: break if len(valueCounts) == 0: sys.stderr.write('Error: failed to build insert size histogram for paired-end reads.\n\ Please ensure BAM file (%s) has inward facing, paired-end reads.\n' % self.bam.filename) exit(1) # remove outliers med = median(valueCounts) u_mad = upper_mad(valueCounts, med) for x in [x for x in list(valueCounts) if x > med + mads * u_mad]: del valueCounts[x] self.hist = valueCounts self.mean = mean(self.hist) self.sd = stdev(self.hist)