def test_handling_quoted_chr(self): peaks = PeakSet('ChIP_peaks-ex5.txt') self.assertEqual(len(peaks),5, "Wrong number of lines read from ChIP-seq file") peaks_chr = peaks.filterByChr("chr4") self.assertEqual(len(peaks_chr),2, "Wrong number of ChIP-seq records filtered")
def test_filter_on_peak_position(self): peaks = PeakSet('ChIP_peaks-ex1.txt') lower, upper = 12000000, 15000000 peaks_pos = peaks.filterByPosition(upper, lower) self.assertEqual(len(peaks_pos), 2, "Wrong number of peaks filtered") for peak in peaks_pos: print(str(peak)) self.assertTrue((peak.start >= lower and peak.start <= upper), "Peak should have been filtered out")
def test_filter_on_peak_position(self): peaks = PeakSet('ChIP_peaks-ex1.txt') lower,upper = 12000000,15000000 peaks_pos = peaks.filterByPosition(upper,lower) self.assertEqual(len(peaks_pos),2, "Wrong number of peaks filtered") for peak in peaks_pos: print str(peak) self.assertTrue((peak.start >= lower and peak.start <= upper), "Peak should have been filtered out")
def test_filter_on_chromosome(self): peaks_chr = PeakSet('ChIP_peaks-ex2.txt') chromosome = 'chr2L' self.assertEqual(len(peaks_chr),10, "Wrong number of lines from ChIP-seq file") peaks_chr = peaks_chr.filterByChr(chromosome) self.assertEqual(len(peaks_chr), 5, "Wrong number of lines from ChIP-seq after chr filter") for peak in peaks_chr: self.assertTrue((peak.chrom == chromosome), "Wrong chromosome name filtered by chr")
def test_filter_on_chromosome(self): peaks_chr = PeakSet('ChIP_peaks-ex2.txt') chromosome = 'chr2L' self.assertEqual(len(peaks_chr), 10, "Wrong number of lines from ChIP-seq file") peaks_chr = peaks_chr.filterByChr(chromosome) self.assertEqual( len(peaks_chr), 5, "Wrong number of lines from ChIP-seq after chr filter") for peak in peaks_chr: self.assertTrue((peak.chrom == chromosome), "Wrong chromosome name filtered by chr")
def test_sort_by_distance_from(self): peaks_sort = PeakSet('ChIP_peaks-ex1.txt') position = 12000000 # Do the sorting peaks_sort.sortByDistanceFrom(position) # Check that each distance is greater than the previous one last_peak = None for peak in peaks_sort: if not last_peak: last_peak = peak else: self.assertTrue((abs(peak.start - position) >= abs(last_peak.start - position)), "Sort by distance failed")
def test_write_peaks_summary(self): # Set up some test data feature = Feature('CG31973','chr2L','25402','59243','-') peaks = PeakSet( peaks_list=( Peak('chr2L','66711','66911'), Peak('chr2L','249077','249277'), Peak('chr2L','605850','606050'))) # Temp output files fp,summary= tempfile.mkstemp() # Write peaks to file ap = AnalysisReportWriter(output.MULTI_LINE, fields=('feature.id', 'order', 'peak.chr', 'peak.start', 'peak.end', 'dist_closest', 'dist_TSS'), summary=summary) ap.write_nearest_peaks(feature,peaks) ap.close() # Expected and actual output expected_output = \ "#feature.id\torder\tpeak.chr\tpeak.start\tpeak.end\tdist_closest\tdist_TSS\n" \ "CG31973\t1 of 3\tchr2L\t66711\t66911\t7468\t7468\n" actual_output = open(summary,'r').read() # Check that output matches self.assertEqual(expected_output,actual_output)
def setUp(self): # Set up some test data self.feature = Feature('CG31973','chr2L','25402','59243','-') self.peaks = PeakSet( peaks_list=( Peak('chr2L','66711','66911'), Peak('chr2L','249077','249277'), Peak('chr2L','605850','606050'))) self.single_line_fields = ('feature.id', 'number_of_results', 'list(peak.chr,peak.start,peak.end,' 'dist_closest,dist_TSS)') self.single_line_fields_extra_data = ('feature.id', 'cutoff', 'number_of_results', 'list(peak.chr,peak.start,peak.end,' 'dist_closest,dist_TSS)') self.multi_line_fields = ('feature.id', 'order', 'peak.chr', 'peak.start', 'peak.end', 'dist_closest', 'dist_TSS')
def test_is_summit_data(self): peaks = PeakSet('ChIP_peaks-ex1.txt') self.assertTrue(peaks.isSummit(),"ChIP data are summits") peaks = PeakSet('ChIP_peaks-ex5.txt') self.assertFalse(peaks.isSummit(),"ChIP data are not summits")
def test__eq__(self): # Test equality of PeakSets peak_set1 = PeakSet() peak_set2 = PeakSet() # Empty feature sets self.assertEqual(peak_set1,peak_set2) # Populate peak_set1.addPeak(Peak('chr2L','66811','66812')) peak_set2.addPeak(Peak('chr2L','66811','66812')) self.assertEqual(peak_set1,peak_set2) # Add second peak_set1.addPeak(Peak('chr2L','249177','605951')) self.assertNotEqual(peak_set1,peak_set2) peak_set2.addPeak(Peak('chr2L','249177','605951')) self.assertEqual(peak_set1,peak_set2) # Add third peak_set1.addPeak(Peak('chr2L','605650','605850')) peak_set2.addPeak(Peak('chr2L','605850','606050')) self.assertNotEqual(peak_set1,peak_set2)
def test_is_summit_data(self): peaks = PeakSet('ChIP_peaks-ex1.txt') self.assertTrue(peaks.isSummit(), "ChIP data are summits") peaks = PeakSet('ChIP_peaks-ex5.txt') self.assertFalse(peaks.isSummit(), "ChIP data are not summits")