def test_filter_on_strand(self): rna_seq = FeatureSet('Transcripts-ex1.txt') rna_plus = rna_seq.filterByStrand('+') self.assertEqual(len(rna_plus),5, "Wrong number of + strands") rna_minus = rna_seq.filterByStrand('-') self.assertEqual(len(rna_minus),5, "Wrong number of - strands")
def test_filter_on_chromosome(self): rna_seq = FeatureSet('Transcripts-ex1.txt') rna_chr = rna_seq.filterByChr('chr3LHet') self.assertEqual(len(rna_chr),1, "Wrong number of chromosomes") for rna_data in rna_chr: self.assertEqual(rna_data.chrom,'chr3LHet', "Wrong chromosome filtered")
def test_filter_on_TSS(self): rna_seq = FeatureSet('Transcripts-ex1.txt') lower,upper = 5000000,10000000 rna_tss = rna_seq.filterByTSS(upper,lower) self.assertEqual(len(rna_tss),3, "Wrong number of transcripts filtered on TSS") for rna_data in rna_tss: self.assertTrue((rna_data.getTSS() >= lower and rna_data.getTSS() <= upper), "Transcript outside range")
def test_getTSS(self): rna_seq = FeatureSet('Transcripts-ex1.txt') rna_plus = rna_seq.filterByStrand('+') for rna_data in rna_plus: self.assertTrue((rna_data.strand == '+' and rna_data.start == rna_data.getTSS()), "Incorrect TSS on + strand") rna_minus = rna_seq.filterByStrand('-') for rna_data in rna_minus: self.assertTrue((rna_data.strand == '-' and rna_data.end == rna_data.getTSS()), "Incorrect TSS on - strand")
def test_write_features_summary(self): # Set up some test data peak = Peak('chr2L',66811,66812) features = FeatureSet( features_list=( Feature('CG31973','chr2L',25402,59243,'-'), Feature('CG2674-RE','chr2L',106903,114433,'+'), Feature('CG2674-RC','chr2L',107926,114433,'+'))) # Temp output file fp,summary = tempfile.mkstemp() # Write peaks to file ap = AnalysisReportWriter(output.MULTI_LINE, fields=('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS'), summary=summary) ap.write_nearest_features(peak,features) ap.close() # Expected and actual output expected_output = \ "#peak.chr\tpeak.start\tpeak.end\torder\tfeature.id\tdist_closest\tdist_TSS\n" \ "chr2L\t66811\t66812\t1 of 3\tCG31973\t7568\t7568\n" actual_output = open(summary,'r').read() # Check that output matches self.assertEqual(expected_output,actual_output)
def setUp(self): # Set up some test data self.peak = Peak('chr2L',66811,66812) self.features = FeatureSet( features_list=( Feature('CG31973','chr2L',25402,59243,'-'), Feature('CG2674-RE','chr2L',106903,114433,'+'), Feature('CG2674-RC','chr2L',107926,114433,'+'))) self.single_line_fields = ('peak.chr', 'peak.start', 'peak.end', 'number_of_results', 'list(feature.id,' 'dist_closest,dist_TSS)') self.single_line_fields_extra_data = ('peak.chr', 'peak.start', 'peak.end', 'cutoff', 'number_of_results', 'list(feature.id,' 'dist_closest,dist_TSS)') self.multi_line_fields = ('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS')
def test_write_features_append(self): # Set up some test data peak = Peak('chr2L',66811,66812) features1 = FeatureSet( features_list=( Feature('CG31973','chr2L',25402,59243,'-'), Feature('CG2674-RE','chr2L',106903,114433,'+'),)) features2 = FeatureSet( features_list=( Feature('CG2674-RC','chr2L',107926,114433,'+'),)) # Temp output file fp,outfile = tempfile.mkstemp() # Write first set of nearest features ap = AnalysisReportWriter(output.MULTI_LINE, fields=('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS'), outfile=outfile) ap.write_nearest_features(peak,features1) ap.close() # Write second set of nearest features ap = AnalysisReportWriter(output.MULTI_LINE, fields=('peak.chr', 'peak.start', 'peak.end', 'order', 'feature.id', 'dist_closest', 'dist_TSS'), outfile=outfile, append=True) ap.write_nearest_features(peak,features2) ap.close() # Expected and actual output expected_output = \ "#peak.chr\tpeak.start\tpeak.end\torder\tfeature.id\tdist_closest\tdist_TSS\n" \ "chr2L\t66811\t66812\t1 of 2\tCG31973\t7568\t7568\n" \ "chr2L\t66811\t66812\t2 of 2\tCG2674-RE\t40091\t40091\n" \ "chr2L\t66811\t66812\t1 of 1\tCG2674-RC\t41114\t41114\n" actual_output = open(outfile,'r').read() # Check that output matches self.assertEqual(expected_output,actual_output)
def test_sort_by_distance(self): rna_sort = FeatureSet('Transcripts-ex1.txt') position = 4250000 # Do sort on distance # Sort is done in place, so assignment is not required # however the sort function should return a reference to # the initial object result = rna_sort.sortByDistanceFrom(position) self.assertEqual(result,rna_sort, "Returned object doesn't match subject") # Check that each distance is greater than the previous one last_rna_data = None for rna_data in rna_sort: if not last_rna_data: last_rna_data = rna_data else: self.assertTrue((abs(rna_data.getTSS() - position) >= abs(last_rna_data.getTSS() - position)), "Sort by distance failed")
def test_sort_by_closest_TSS_to_edge(self): rna_sort = FeatureSet('Transcripts-ex1.txt') position = (16000000,17500000) # Do sort # Sort is done in place, so assignment is not required # however the sort function should return a reference to # the initial object result = rna_sort.sortByClosestTSSTo(*position) self.assertEqual(result,rna_sort, "Returned object doesn't match subject") # Check that the closest distances are in ascending order last_rna_data = None for rna_data in rna_sort: if not last_rna_data: last_rna_data = rna_data else: self.assertTrue((min(abs(rna_data.getTSS() - position[0]), abs(rna_data.getTSS() - position[1])) >= min(abs(last_rna_data.getTSS() - position[0]), abs(last_rna_data.getTSS() - position[1]))), "Sort by closest TSS to edge failed")
def test_closest_transcript_to_peak(self): features = FeatureSet('Transcripts-ex1.txt') feature1 = features[1] feature2 = features[2] peaks = PeakSet('ChIP_peaks-ex1.txt') peak = peaks[0] nearest = GetNearestTranscriptToPeak(feature1, feature2, peak) # 2nd feature should be closer than first self.assertEqual(nearest, feature1, "Wrong transcript selected as nearest") # test when only one is set nearest = GetNearestTranscriptToPeak(None, feature2, peak) self.assertEqual(nearest, feature2, "Wrong transcript selected as nearest")
def test__eq__(self): # Check equality of FeatureSets feature_set1 = FeatureSet() feature_set2 = FeatureSet() # Empty feature sets self.assertEqual(feature_set1,feature_set2) # Populate feature_set1.addFeature(Feature('CG1000','chr1','1','2','+')) feature_set2.addFeature(Feature('CG1000','chr1','1','2','+')) self.assertEqual(feature_set1,feature_set2) # Add second feature_set1.addFeature(Feature('CG2000','chr1','1','2','+')) self.assertNotEqual(feature_set1,feature_set2) feature_set2.addFeature(Feature('CG2000','chr1','1','2','+')) self.assertEqual(feature_set1,feature_set2) # Add third feature_set1.addFeature(Feature('CG2001','chr2',3,4,'-')) feature_set2.addFeature(Feature('CG2002','chr2',3,5,'+')) self.assertNotEqual(feature_set1,feature_set2)
def test_filter_on_flag(self): rna_seq = FeatureSet('Transcripts-ex1.txt') rna_flagged = rna_seq.filterByFlag(1) self.assertEqual(len(rna_flagged),4, "Wrong number of flagged data lines")
def test_reading_in_RNAseq_data(self): rna_seq = FeatureSet('Transcripts-ex1.txt') self.assertEqual(len(rna_seq),10, "Wrong number of lines from RNA-seq file") self.assertTrue(rna_seq.isFlagged(), "Data should be flagged")