def calc_maxima_forward_reverse_all_peaks(self): self.log_info( "Calculating the forward and reverse maxima for each of the peak regions..." ) self.peak_maxima = defaultdict( lambda: defaultdict(lambda: defaultdict(list))) for chrom, start, end in self.peaks: peak_str = shared.peak_str(start, end) self.log_debug("Calculating maxima for peak %s..." % peak_str) max_depth = max_depth = max( self.genome.forward_depth[chrom][start:end + 1] + self.genome.reverse_depth[chrom][start:end + 1]) forward = self.get_maxima_single(self.genome.forward_depth, chrom, start, end, max_depth) reverse = self.get_maxima_single(self.genome.reverse_depth, chrom, start, end, max_depth) self.peak_maxima[chrom][peak_str]["FORWARD"] = forward self.peak_maxima[chrom][peak_str]["REVERSE"] = reverse self.log_debug("Forward Maxima: %s" % shared.pprint_list(forward)) self.log_debug("Reverse Maxima: %s" % shared.pprint_list(reverse)) peak_pairs = self.calc_peak_pairs_single(forward, reverse) self.log_debug("Peak Pairs: %s" % shared.pprint_list(peak_pairs)) self.peak_pairs[chrom][peak_str] = peak_pairs
def find_sites(self): self.log_info("Building the genome read depth model...") self.genome = GenomeAlignment(self.bam_file, self.peaks, self.log) self.genome.generateModel() self.log_debug("Finished building the genome model...") self.log_debug("Creating the PeakQC object...") self.peakQC = PeakQC(self.genome, self.peaks, self.log) for chrom, start_zeroi, end_zeroi in self.peaks: peak_str = shared.peak_str(start_zeroi, end_zeroi) if self.log: self.log.info("Processing the peak %s..." % peak_str) self.peakQC.perform_peak_QC(chrom, start_zeroi, end_zeroi) continue max_depth = max( self.forward_read_count[chrom][start_zeroi:end_zeroi + 1] + self.reverse_read_count[chrom][start_zeroi:end_zeroi + 1]) forward_maxima = self.get_maxima(self.forward_read_count, chrom, start_zeroi, end_zeroi, max_depth, lower_peak_cutoff_perc) reverse_maxima = self.get_maxima(self.reverse_read_count, chrom, start_zeroi, end_zeroi, max_depth, lower_peak_cutoff_perc) print start_zeroi + 1 print end_zeroi + 1 print ', '.join([str(i) for i in forward_maxima]) print ', '.join([str(i) for i in reverse_maxima]) self.peakQC.print_QC_log() sys.exit()
def find_troughs(self): for chrom, start, stop in self.peaks: peak_str = shared.peak_str(start, stop) for first_maxima, second_maxima in self.peak_pairs[chrom][ peak_str]: trough = self.find_trough(chrom, first_maxima, second_maxima) self.trough_sites[chrom][peak_str].append(trough)
def print_QC_log(self): print '\t'.join([ "CHROM", "PEAK", self.depth_tag, self.exist_maxima_tag, self.paired_maxima_tag, 'TroughSites', 'TargetSites', 'TargetSeqs' ]) for chrom, start, end in self.peaks: peak_str = shared.peak_str(start, end) out_list = [chrom, peak_str] out_list.append(self.QC_log[chrom][peak_str][self.depth_tag]) out_list.append( self.QC_log[chrom][peak_str][self.exist_maxima_tag]) out_list.append( str(self.QC_log[chrom][peak_str][self.paired_maxima_tag])) if len(self.peak_analyzer.trough_sites[chrom][peak_str]) > 0: out_list.append(';'.join([ str(i + 1) for i in self.peak_analyzer.trough_sites[chrom][peak_str] ])) else: out_list.append("None") if len(self.peak_analyzer.target_sites[chrom][peak_str]) > 0: targets = self.peak_analyzer.target_sites[chrom][peak_str] out_targets = [] for peak in targets: out = [] for target in peak: out.append('-'.join([str(i + 1) for i in target])) out_targets.append('|'.join(out)) out_str = ';'.join(out_targets) out_list.append(out_str) else: out_list.append("None") if len(self.peak_analyzer.target_seqs[chrom][peak_str]) > 0: targets = self.peak_analyzer.target_seqs[chrom][peak_str] out_targets = [] for peak in targets: out = [] for target in peak: out.append(target) out_targets.append('|'.join(out)) out_str = ';'.join(out_targets) out_list.append(out_str) else: out_list.append("None") print "\t".join(out_list)
def perform_depth_QC(self, chrom, start, end): peak_str = shared.peak_str(start, end) max_depth = max(self.genome.forward_depth[chrom][start:end + 1] + self.genome.reverse_depth[chrom][start:end + 1]) if max_depth < 20: self.QC_log[chrom][peak_str][self.depth_tag] = 'LOW' elif max_depth < 50: self.QC_log[chrom][peak_str][self.depth_tag] = 'MED' else: self.QC_log[chrom][peak_str][self.depth_tag] = 'HIGH'
def print_QC_log(self): print '\t'.join(["CHROM", "PEAK", self.depth_tag, self.exist_maxima_tag, self.paired_maxima_qc]) for chrom, start, end in self.peaks: peak_str = shared.peak_str(start, end) out_list = [chrom, peak_str] out_list.append(self.QC_log[chrom][peak_str][self.depth_tag]) out_list.append(self.QC_log[chrom][peak_str][self.exist_maxima_tag]) out_list.append(self.QC_log[chrom][peak_str][self.paired_maxima_tag]) print "\t".join(out_list)
def perform_exist_maxima_QC(self, chrom, start, end): peak_str = shared.peak_str(start, end) if len(self.peak_analyzer.peak_maxima[chrom][peak_str]['FORWARD']) == 0 and \ len(self.peak_analyzer.peak_maxima[chrom][peak_str]['REVERSE']) == 0: self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'FALSE' elif len(self.peak_analyzer.peak_maxima[chrom][peak_str]['FORWARD']) == 0: self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'MISSING_FORWARD' elif len(self.peak_analyzer.peak_maxima[chrom][peak_str]['REVERSE']) == 0: self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'MISSING_REVERSE' else: self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'TRUE'
def get_quality_peaks(self): quality_peaks = [] for chrom, start, stop in self.peaks: peak_str = shared.peak_str(start, stop) if self.QC_log[chrom][peak_str][self.depth_tag] != 'LOW': if self.QC_log[chrom][peak_str][ self.exist_maxima_tag] == "TRUE": if self.QC_log[chrom][peak_str][ self.paired_maxima_tag] > 0: quality_peaks.append([chrom, start, stop]) return quality_peaks
def find_targets(self, peaks): for chrom, start, stop in peaks: peak_str = shared.peak_str(start, stop) for first_maxima, second_maxima in self.peak_pairs[chrom][ peak_str]: targets = self.find_targets_single_peak( chrom, first_maxima, second_maxima) if len(targets) > 0: target_seqs = [ self.genome.genome_seq[chrom][target[0]:target[1] + 1] for target in targets ] #print target_seqs self.target_seqs[chrom][peak_str].append(target_seqs) self.target_sites[chrom][peak_str].append(targets)
def find_sites(self): self.log_info("Building the genome read depth model...") self.genome = GenomeAlignment(self.genome_path, self.bam_file, self.peaks, self.log) self.genome.generateModel() self.log_debug("Finished building the genome model...") self.log_debug("Creating the PeakQC object...") self.peakQC = PeakQC(self.genome, self.peaks, self.log) for chrom, start_zeroi, end_zeroi in self.peaks: peak_str = shared.peak_str(start_zeroi, end_zeroi) if self.log: self.log.info("Processing the peak %s..." % peak_str) self.peakQC.perform_peak_QC(chrom, start_zeroi, end_zeroi) self.peakQC.find_insertion_sites() self.peakQC.print_QC_log() sys.exit()
def perform_paired_maxima_QC(self, chrom, start, end): peak_str = shared.peak_str(start, end) self.peak_analyzer
def perform_paired_maxima_QC(self, chrom, start, end): peak_str = shared.peak_str(start, end) paired_peaks = len(self.peak_analyzer.peak_pairs[chrom][peak_str]) self.QC_log[chrom][peak_str][self.paired_maxima_tag] = paired_peaks