예제 #1
0
 def test_source_file_is_stored(self):
     peaks1 = PeakSet('ChIP_peaks-ex1.txt')
     peaks2 = PeakSet('ChIP_peaks-ex2.txt')
     self.assertEqual(peaks1.source_file, 'ChIP_peaks-ex1.txt')
     for peak in peaks1:
         self.assertEqual(peak.source_file, 'ChIP_peaks-ex1.txt')
     self.assertEqual(peaks2.source_file, 'ChIP_peaks-ex2.txt')
     for peak in peaks2:
         self.assertEqual(peak.source_file, 'ChIP_peaks-ex2.txt')
예제 #2
0
 def test_get_slice(self):
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     peaks_slice = peaks[1:3]
     self.assertTrue(isinstance(peaks_slice, PeakSet))
     self.assertEqual(len(peaks_slice), 2)
     self.assertEqual(peaks[1], peaks_slice[0])
     self.assertEqual(peaks[2], peaks_slice[1])
예제 #3
0
 def test_handling_quoted_chr(self):
     peaks = PeakSet('ChIP_peaks-ex5.txt')
     self.assertEqual(len(peaks), 5,
                      "Wrong number of lines read from ChIP-seq file")
     peaks_chr = peaks.filterByChr("chr4")
     self.assertEqual(len(peaks_chr), 2,
                      "Wrong number of ChIP-seq records filtered")
예제 #4
0
 def test_write_peaks_summary(self):
     # Set up some test data
     feature = Feature('CG31973','chr2L','25402','59243','-')
     peaks = PeakSet(
         peaks_list=(
             Peak('chr2L','66711','66911'),
             Peak('chr2L','249077','249277'),
             Peak('chr2L','605850','606050')))
     # Temp output files
     fp,summary= tempfile.mkstemp()
     # Write peaks to file
     ap = AnalysisReportWriter(output.MULTI_LINE,
                               fields=('feature.id',
                                       'order',
                                       'peak.chr',
                                       'peak.start',
                                       'peak.end',
                                       'dist_closest',
                                       'dist_TSS'),
                               summary=summary)
     ap.write_nearest_peaks(feature,peaks)
     ap.close()
     # Expected and actual output
     expected_output = \
         "#feature.id\torder\tpeak.chr\tpeak.start\tpeak.end\tdist_closest\tdist_TSS\n" \
         "CG31973\t1 of 3\tchr2L\t66711\t66911\t7468\t7468\n"
     actual_output = open(summary,'r').read()
     # Check that output matches
     self.assertEqual(expected_output,actual_output)
예제 #5
0
 def test_populate_from_list_of_peaks(self):
     peaks = PeakSet(peaks_list=(Peak('chr2L', 66711, 66911),
                                 Peak('chr2L', 605850, 606050),
                                 Peak('chr3L', 2258089, 2258289)))
     self.assertEqual(peaks.source_file, None)
     self.assertEqual(peaks[0], Peak('chr2L', 66711, 66911))
     self.assertEqual(peaks[1], Peak('chr2L', 605850, 606050))
     self.assertEqual(peaks[2], Peak('chr3L', 2258089, 2258289))
예제 #6
0
 def test_filter_on_peak_position(self):
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     lower, upper = 12000000, 15000000
     peaks_pos = peaks.filterByPosition(upper, lower)
     self.assertEqual(len(peaks_pos), 2, "Wrong number of peaks filtered")
     for peak in peaks_pos:
         print(str(peak))
         self.assertTrue((peak.start >= lower and peak.start <= upper),
                         "Peak should have been filtered out")
예제 #7
0
 def test_reading_in_ChIPseq_data_custom_columns(self):
     peaks = PeakSet('ChIP_peaks_multi_columns-ex1.txt', columns=(2, 4, 5))
     self.assertEqual(peaks.source_file, 'ChIP_peaks_multi_columns-ex1.txt')
     self.assertEqual(len(peaks), 5,
                      "Wrong number of lines read from ChIP-seq file")
     self.assertEqual(peaks[0], Peak('chr3L', 4252919, 4252920))
     self.assertEqual(peaks[1], Peak('chr3L', 9502640, 9502641))
     self.assertEqual(peaks[2], Peak('chr3L', 12139192, 12139193))
     self.assertEqual(peaks[3], Peak('chr3L', 14983597, 14983598))
     self.assertEqual(peaks[4], Peak('chr3L', 17004143, 17004144))
예제 #8
0
 def test__eq__(self):
     # Test equality of PeakSets
     peak_set1 = PeakSet()
     peak_set2 = PeakSet()
     # Empty feature sets
     self.assertEqual(peak_set1, peak_set2)
     # Populate
     peak_set1.addPeak(Peak('chr2L', '66811', '66812'))
     peak_set2.addPeak(Peak('chr2L', '66811', '66812'))
     self.assertEqual(peak_set1, peak_set2)
     # Add second
     peak_set1.addPeak(Peak('chr2L', '249177', '605951'))
     self.assertNotEqual(peak_set1, peak_set2)
     peak_set2.addPeak(Peak('chr2L', '249177', '605951'))
     self.assertEqual(peak_set1, peak_set2)
     # Add third
     peak_set1.addPeak(Peak('chr2L', '605650', '605850'))
     peak_set2.addPeak(Peak('chr2L', '605850', '606050'))
     self.assertNotEqual(peak_set1, peak_set2)
예제 #9
0
 def test_filter_on_chromosome(self):
     peaks_chr = PeakSet('ChIP_peaks-ex2.txt')
     chromosome = 'chr2L'
     self.assertEqual(len(peaks_chr), 10,
                      "Wrong number of lines from ChIP-seq file")
     peaks_chr = peaks_chr.filterByChr(chromosome)
     self.assertEqual(
         len(peaks_chr), 5,
         "Wrong number of lines from ChIP-seq after chr filter")
     for peak in peaks_chr:
         self.assertTrue((peak.chrom == chromosome),
                         "Wrong chromosome name filtered by chr")
예제 #10
0
 def test_ChIP_peak_inside_region(self):
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     lower, upper = 4252000, 4254000
     self.assertTrue(peaks[0].insideRegion(upper, lower),
                     "ChIP peak should be in region")
     self.assertTrue(peaks[0].insideRegion(lower, upper),
                     "ChIP peak should be in region (reversed limits)")
     upper, lower = 4252000, 4242000
     self.assertFalse(peaks[0].insideRegion(upper, lower),
                      "ChIP peak should not be inside region")
     self.assertFalse(
         peaks[0].insideRegion(lower, upper),
         "ChIP peak should not be inside region (reversed limits")
예제 #11
0
 def test_closest_transcript_to_peak(self):
     features = FeatureSet('Transcripts-ex1.txt')
     feature1 = features[1]
     feature2 = features[2]
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     peak = peaks[0]
     nearest = GetNearestTranscriptToPeak(feature1, feature2, peak)
     # 2nd feature should be closer than first
     self.assertEqual(nearest, feature1,
                      "Wrong transcript selected as nearest")
     # test when only one is set
     nearest = GetNearestTranscriptToPeak(None, feature2, peak)
     self.assertEqual(nearest, feature2,
                      "Wrong transcript selected as nearest")
예제 #12
0
 def test_sort_by_distance_from(self):
     peaks_sort = PeakSet('ChIP_peaks-ex1.txt')
     position = 12000000
     # Do the sorting
     peaks_sort.sortByDistanceFrom(position)
     # Check that each distance is greater than the previous one
     last_peak = None
     for peak in peaks_sort:
         if not last_peak:
             last_peak = peak
         else:
             self.assertTrue((abs(peak.start - position) >=
                              abs(last_peak.start - position)),
                             "Sort by distance failed")
예제 #13
0
 def test_reading_in_ChIPseq_with_id_column(self):
     peaks = PeakSet('ChIP_peaks_multi_columns-ex1.txt',
                     columns=(2, 4, 5),
                     id_column=1)
     self.assertEqual(peaks.source_file, 'ChIP_peaks_multi_columns-ex1.txt')
     self.assertEqual(len(peaks), 5,
                      "Wrong number of lines read from ChIP-seq file")
     self.assertEqual(peaks[0], Peak('chr3L', 4252919, 4252920, id="peak1"))
     self.assertEqual(peaks[1], Peak('chr3L', 9502640, 9502641, id="peak2"))
     self.assertEqual(peaks[2], Peak('chr3L',
                                     12139192,
                                     12139193,
                                     id="peak3"))
     self.assertEqual(peaks[3], Peak('chr3L',
                                     14983597,
                                     14983598,
                                     id="peak4"))
     self.assertEqual(peaks[4], Peak('chr3L',
                                     17004143,
                                     17004144,
                                     id="peak5"))
예제 #14
0
 def setUp(self):
     # Set up some test data
     self.feature = Feature('CG31973','chr2L','25402','59243','-')
     self.peaks = PeakSet(
         peaks_list=(
             Peak('chr2L','66711','66911'),
             Peak('chr2L','249077','249277'),
             Peak('chr2L','605850','606050')))
     self.single_line_fields = ('feature.id',
                                'number_of_results',
                                'list(peak.chr,peak.start,peak.end,'
                                'dist_closest,dist_TSS)')
     self.single_line_fields_extra_data = ('feature.id',
                                           'cutoff',
                                           'number_of_results',
                                           'list(peak.chr,peak.start,peak.end,'
                                           'dist_closest,dist_TSS)')
     self.multi_line_fields = ('feature.id',
                               'order',
                               'peak.chr',
                               'peak.start',
                               'peak.end',
                               'dist_closest',
                               'dist_TSS')
예제 #15
0
 def test_get_item(self):
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     peak = peaks[2]
     self.assertEqual(peak, Peak('chr3L', '12139192', '12139193'))
예제 #16
0
 def test_is_summit_data(self):
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     self.assertTrue(peaks.isSummit(), "ChIP data are summits")
     peaks = PeakSet('ChIP_peaks-ex5.txt')
     self.assertFalse(peaks.isSummit(), "ChIP data are not summits")