def test_write_peaks_summary(self): # Set up some test data feature = Feature('CG31973','chr2L','25402','59243','-') peaks = PeakSet( peaks_list=( Peak('chr2L','66711','66911'), Peak('chr2L','249077','249277'), Peak('chr2L','605850','606050'))) # Temp output files fp,summary= tempfile.mkstemp() # Write peaks to file ap = AnalysisReportWriter(output.MULTI_LINE, fields=('feature.id', 'order', 'peak.chr', 'peak.start', 'peak.end', 'dist_closest', 'dist_TSS'), summary=summary) ap.write_nearest_peaks(feature,peaks) ap.close() # Expected and actual output expected_output = \ "#feature.id\torder\tpeak.chr\tpeak.start\tpeak.end\tdist_closest\tdist_TSS\n" \ "CG31973\t1 of 3\tchr2L\t66711\t66911\t7468\t7468\n" actual_output = open(summary,'r').read() # Check that output matches self.assertEqual(expected_output,actual_output)
def test_tss_distances_peak_contains_TSS(self): self.assertEqual( tss_distances(Peak('chr1', '100', '250'), Feature('NM3', 'chr1', '200', '400', '+')), (0, 75)) self.assertEqual( tss_distances(Peak('chr1', '250', '350'), Feature('NM4', 'chr1', '200', '300', '-')), (0, 50))
def test_tss_distances_TSS_before_peak(self): self.assertEqual( tss_distances(Peak('chr1', '250', '400'), Feature('NM2', 'chr1', '100', '200', '+')), (150, 300)) self.assertEqual( tss_distances(Peak('chr1', '250', '400'), Feature('NM2', 'chr1', '100', '200', '-')), (50, 200))
def test_reading_in_ChIPseq_data_custom_columns(self): peaks = PeakSet('ChIP_peaks_multi_columns-ex1.txt', columns=(2, 4, 5)) self.assertEqual(peaks.source_file, 'ChIP_peaks_multi_columns-ex1.txt') self.assertEqual(len(peaks), 5, "Wrong number of lines read from ChIP-seq file") self.assertEqual(peaks[0], Peak('chr3L', 4252919, 4252920)) self.assertEqual(peaks[1], Peak('chr3L', 9502640, 9502641)) self.assertEqual(peaks[2], Peak('chr3L', 12139192, 12139193)) self.assertEqual(peaks[3], Peak('chr3L', 14983597, 14983598)) self.assertEqual(peaks[4], Peak('chr3L', 17004143, 17004144))
def test_write_features_summary(self): # Set up some test data peak = Peak('chr2L',66811,66812) features = FeatureSet( features_list=( Feature('CG31973','chr2L',25402,59243,'-'), Feature('CG2674-RE','chr2L',106903,114433,'+'), Feature('CG2674-RC','chr2L',107926,114433,'+'))) # Temp output file fp,summary = tempfile.mkstemp() # Write peaks to file ap = AnalysisReportWriter(output.MULTI_LINE, fields=('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS'), summary=summary) ap.write_nearest_features(peak,features) ap.close() # Expected and actual output expected_output = \ "#peak.chr\tpeak.start\tpeak.end\torder\tfeature.id\tdist_closest\tdist_TSS\n" \ "chr2L\t66811\t66812\t1 of 3\tCG31973\t7568\t7568\n" actual_output = open(summary,'r').read() # Check that output matches self.assertEqual(expected_output,actual_output)
def setUp(self): # Set up some test data self.peak = Peak('chr2L',66811,66812) self.features = FeatureSet( features_list=( Feature('CG31973','chr2L',25402,59243,'-'), Feature('CG2674-RE','chr2L',106903,114433,'+'), Feature('CG2674-RC','chr2L',107926,114433,'+'))) self.single_line_fields = ('peak.chr', 'peak.start', 'peak.end', 'number_of_results', 'list(feature.id,' 'dist_closest,dist_TSS)') self.single_line_fields_extra_data = ('peak.chr', 'peak.start', 'peak.end', 'cutoff', 'number_of_results', 'list(feature.id,' 'dist_closest,dist_TSS)') self.multi_line_fields = ('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS')
def test_peak_with_id(self): peak = Peak('chr2L', '66811', '66812', id='Peak001') self.assertEqual(peak.chrom, 'chr2L') self.assertEqual(peak.start, 66811) self.assertEqual(peak.end, 66812) self.assertEqual(peak.id, 'Peak001') self.assertEqual(str(peak), "Peak001\tchr2L\t66811\t66812") self.assertEqual(peak.source_file, None)
def test_peak_properties(self): peak = Peak('chr2L', '66811', '66812') self.assertEqual(peak.chrom, 'chr2L') self.assertEqual(peak.start, 66811) self.assertEqual(peak.end, 66812) self.assertEqual(peak.id, None) self.assertEqual(peak.source_file, None) self.assertEqual(str(peak), "chr2L\t66811\t66812")
def test_peak_with_source_file(self): peak = Peak('chr2L', '66811', '66812', source_file="Peaks1.txt") self.assertEqual(peak.chrom, 'chr2L') self.assertEqual(peak.start, 66811) self.assertEqual(peak.end, 66812) self.assertEqual(peak.id, None) self.assertEqual(str(peak), "chr2L\t66811\t66812") self.assertEqual(peak.source_file, "Peaks1.txt")
def test_reading_in_ChIPseq_with_id_column(self): peaks = PeakSet('ChIP_peaks_multi_columns-ex1.txt', columns=(2, 4, 5), id_column=1) self.assertEqual(peaks.source_file, 'ChIP_peaks_multi_columns-ex1.txt') self.assertEqual(len(peaks), 5, "Wrong number of lines read from ChIP-seq file") self.assertEqual(peaks[0], Peak('chr3L', 4252919, 4252920, id="peak1")) self.assertEqual(peaks[1], Peak('chr3L', 9502640, 9502641, id="peak2")) self.assertEqual(peaks[2], Peak('chr3L', 12139192, 12139193, id="peak3")) self.assertEqual(peaks[3], Peak('chr3L', 14983597, 14983598, id="peak4")) self.assertEqual(peaks[4], Peak('chr3L', 17004143, 17004144, id="peak5"))
def test_populate_from_list_of_peaks(self): peaks = PeakSet(peaks_list=(Peak('chr2L', 66711, 66911), Peak('chr2L', 605850, 606050), Peak('chr3L', 2258089, 2258289))) self.assertEqual(peaks.source_file, None) self.assertEqual(peaks[0], Peak('chr2L', 66711, 66911)) self.assertEqual(peaks[1], Peak('chr2L', 605850, 606050)) self.assertEqual(peaks[2], Peak('chr3L', 2258089, 2258289))
def setUp(self): # Set up some test data self.feature = Feature('CG31973','chr2L','25402','59243','-') self.peaks = PeakSet( peaks_list=( Peak('chr2L','66711','66911'), Peak('chr2L','249077','249277'), Peak('chr2L','605850','606050'))) self.single_line_fields = ('feature.id', 'number_of_results', 'list(peak.chr,peak.start,peak.end,' 'dist_closest,dist_TSS)') self.single_line_fields_extra_data = ('feature.id', 'cutoff', 'number_of_results', 'list(peak.chr,peak.start,peak.end,' 'dist_closest,dist_TSS)') self.multi_line_fields = ('feature.id', 'order', 'peak.chr', 'peak.start', 'peak.end', 'dist_closest', 'dist_TSS')
def test_write_features_append(self): # Set up some test data peak = Peak('chr2L',66811,66812) features1 = FeatureSet( features_list=( Feature('CG31973','chr2L',25402,59243,'-'), Feature('CG2674-RE','chr2L',106903,114433,'+'),)) features2 = FeatureSet( features_list=( Feature('CG2674-RC','chr2L',107926,114433,'+'),)) # Temp output file fp,outfile = tempfile.mkstemp() # Write first set of nearest features ap = AnalysisReportWriter(output.MULTI_LINE, fields=('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS'), outfile=outfile) ap.write_nearest_features(peak,features1) ap.close() # Write second set of nearest features ap = AnalysisReportWriter(output.MULTI_LINE, fields=('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS'), outfile=outfile, append=True) ap.write_nearest_features(peak,features2) ap.close() # Expected and actual output expected_output = \ "#peak.chr\tpeak.start\tpeak.end\torder\tfeature.id\tdist_closest\tdist_TSS\n" \ "chr2L\t66811\t66812\t1 of 2\tCG31973\t7568\t7568\n" \ "chr2L\t66811\t66812\t2 of 2\tCG2674-RE\t40091\t40091\n" \ "chr2L\t66811\t66812\t1 of 1\tCG2674-RC\t41114\t41114\n" actual_output = open(outfile,'r').read() # Check that output matches self.assertEqual(expected_output,actual_output)
def test__eq__(self): # Test equality of PeakSets peak_set1 = PeakSet() peak_set2 = PeakSet() # Empty feature sets self.assertEqual(peak_set1, peak_set2) # Populate peak_set1.addPeak(Peak('chr2L', '66811', '66812')) peak_set2.addPeak(Peak('chr2L', '66811', '66812')) self.assertEqual(peak_set1, peak_set2) # Add second peak_set1.addPeak(Peak('chr2L', '249177', '605951')) self.assertNotEqual(peak_set1, peak_set2) peak_set2.addPeak(Peak('chr2L', '249177', '605951')) self.assertEqual(peak_set1, peak_set2) # Add third peak_set1.addPeak(Peak('chr2L', '605650', '605850')) peak_set2.addPeak(Peak('chr2L', '605850', '606050')) self.assertNotEqual(peak_set1, peak_set2)
def test_feature_upstream_from_peak_neg_strand(self): peak = Peak('chr1', '100', '200') feature = Feature('NM1', 'chr1', '250', '400', '-') self.assertEqual(direction(peak, feature), UPSTREAM)
def test_feature_upstream_from_peak_partial_overlap(self): peak = Peak('chr1', '350', '550') feature = Feature('NM1', 'chr1', '250', '400', '+') self.assertEqual(direction(peak, feature), UPSTREAM)
def test_feature_downstream_from_peak(self): peak = Peak('chr1', '100', '200') feature = Feature('NM1', 'chr1', '250', '400', '+') self.assertEqual(direction(peak, feature), DOWNSTREAM)
def test_peak_upstream_from_feature_neg_strand_partial_overlap(self): peak = Peak('chr1', '100', '300') feature = Feature('NM1', 'chr1', '250', '400', '-') self.assertEqual(direction(feature, peak), DOWNSTREAM)
def test_peak_downstream_from_feature_neg_strand(self): peak = Peak('chr1', '450', '550') feature = Feature('NM1', 'chr1', '250', '400', '-') self.assertEqual(direction(feature, peak), UPSTREAM)
def test_peak_full_overlap_feature(self): peak = Peak('chr1', '200', '450') feature = Feature('NM1', 'chr1', '250', '400', '+') self.assertEqual(direction(feature, peak), OVERLAP)
def test_peak_downstream_from_feature_partial_overlap(self): peak = Peak('chr1', '350', '550') feature = Feature('NM1', 'chr1', '250', '400', '+') self.assertEqual(direction(feature, peak), DOWNSTREAM)
def test_peak_upstream_from_feature(self): peak = Peak('chr1', '100', '200') feature = Feature('NM1', 'chr1', '250', '400', '+') self.assertEqual(direction(feature, peak), UPSTREAM)
def test_get_item(self): peaks = PeakSet('ChIP_peaks-ex1.txt') peak = peaks[2] self.assertEqual(peak, Peak('chr3L', '12139192', '12139193'))
def test_distances_feature_contains_peak(self): self.assertEqual( edge_distances(Peak('chr1', '250', '350'), Feature('NM6', 'chr1', '100', '400', '+')), (0, 0))
def test_distances_feature_overlaps_peak_start(self): self.assertEqual( edge_distances(Peak('chr1', '250', '350'), Feature('NM4', 'chr1', '300', '400', '+')), (0, 50))
def test_feature_downstream_from_peak_neg_strand_partial_overlap(self): peak = Peak('chr1', '350', '550') feature = Feature('NM1', 'chr1', '250', '400', '-') self.assertEqual(direction(peak, feature), DOWNSTREAM)
def test__eq__(self): self.assertEqual(Peak('chr2L', '66811', '66812'), Peak('chr2L', '66811', '66812')) self.assertNotEqual(Peak('chr2L', '66811', '66812'), Peak('chr2L', '249177', '605951'))
def test_feature_full_overlap_peak_neg_strand(self): peak = Peak('chr1', '200', '450') feature = Feature('NM1', 'chr1', '250', '400', '-') self.assertEqual(direction(peak, feature), OVERLAP)
def test_distances_feature_before_peak(self): self.assertEqual( edge_distances(Peak('chr1', '250', '400'), Feature('NM2', 'chr1', '100', '200', '+')), (50, 150))