def test_filter_on_strand(self):
     rna_seq = FeatureSet('Transcripts-ex1.txt')
     rna_plus = rna_seq.filterByStrand('+')
     self.assertEqual(len(rna_plus),5,
                      "Wrong number of + strands")
     rna_minus = rna_seq.filterByStrand('-')
     self.assertEqual(len(rna_minus),5,
                      "Wrong number of - strands")
 def test_filter_on_chromosome(self):
     rna_seq = FeatureSet('Transcripts-ex1.txt')
     rna_chr = rna_seq.filterByChr('chr3LHet')
     self.assertEqual(len(rna_chr),1,
                      "Wrong number of chromosomes")
     for rna_data in rna_chr:
         self.assertEqual(rna_data.chrom,'chr3LHet',
                          "Wrong chromosome filtered")
 def test_filter_on_TSS(self):
     rna_seq = FeatureSet('Transcripts-ex1.txt')
     lower,upper = 5000000,10000000
     rna_tss = rna_seq.filterByTSS(upper,lower)
     self.assertEqual(len(rna_tss),3,
                      "Wrong number of transcripts filtered on TSS")
     for rna_data in rna_tss:
         self.assertTrue((rna_data.getTSS() >= lower and
                          rna_data.getTSS() <= upper),
                         "Transcript outside range")
 def test_getTSS(self):
     rna_seq = FeatureSet('Transcripts-ex1.txt')
     rna_plus = rna_seq.filterByStrand('+')
     for rna_data in rna_plus:
         self.assertTrue((rna_data.strand == '+' and
                          rna_data.start == rna_data.getTSS()),
                         "Incorrect TSS on + strand")
     rna_minus = rna_seq.filterByStrand('-')
     for rna_data in rna_minus:
         self.assertTrue((rna_data.strand == '-' and
                          rna_data.end == rna_data.getTSS()),
                         "Incorrect TSS on - strand")
Example #5
0
 def test_write_features_summary(self):
     # Set up some test data
     peak = Peak('chr2L',66811,66812)
     features = FeatureSet(
         features_list=(
             Feature('CG31973','chr2L',25402,59243,'-'),
             Feature('CG2674-RE','chr2L',106903,114433,'+'),
             Feature('CG2674-RC','chr2L',107926,114433,'+')))
     # Temp output file
     fp,summary = tempfile.mkstemp()
     # Write peaks to file
     ap = AnalysisReportWriter(output.MULTI_LINE,
                               fields=('peak.chr',
                                       'peak.start',
                                       'peak.end',
                                       'order',
                                       'feature.id',
                                       'dist_closest',
                                       'dist_TSS'),
                               summary=summary)
     ap.write_nearest_features(peak,features)
     ap.close()
     # Expected and actual output
     expected_output = \
         "#peak.chr\tpeak.start\tpeak.end\torder\tfeature.id\tdist_closest\tdist_TSS\n" \
         "chr2L\t66811\t66812\t1 of 3\tCG31973\t7568\t7568\n"
     actual_output = open(summary,'r').read()
     # Check that output matches
     self.assertEqual(expected_output,actual_output)
Example #6
0
 def setUp(self):
     # Set up some test data
     self.peak = Peak('chr2L',66811,66812)
     self.features = FeatureSet(
         features_list=(
             Feature('CG31973','chr2L',25402,59243,'-'),
             Feature('CG2674-RE','chr2L',106903,114433,'+'),
             Feature('CG2674-RC','chr2L',107926,114433,'+')))
     self.single_line_fields = ('peak.chr',
                                'peak.start',
                                'peak.end',
                                'number_of_results',
                                'list(feature.id,'
                                'dist_closest,dist_TSS)')
     self.single_line_fields_extra_data = ('peak.chr',
                                           'peak.start',
                                           'peak.end',
                                           'cutoff',
                                           'number_of_results',
                                           'list(feature.id,'
                                           'dist_closest,dist_TSS)')
     self.multi_line_fields = ('peak.chr',
                               'peak.start',
                               'peak.end',
                               'order',
                               'feature.id',
                               'dist_closest',
                               'dist_TSS')
Example #7
0
 def test_write_features_append(self):
     # Set up some test data
     peak = Peak('chr2L',66811,66812)
     features1 = FeatureSet(
         features_list=(
             Feature('CG31973','chr2L',25402,59243,'-'),
             Feature('CG2674-RE','chr2L',106903,114433,'+'),))
     features2 = FeatureSet(
         features_list=(
             Feature('CG2674-RC','chr2L',107926,114433,'+'),))
     # Temp output file
     fp,outfile = tempfile.mkstemp()
     # Write first set of nearest features
     ap = AnalysisReportWriter(output.MULTI_LINE,
                               fields=('peak.chr',
                                       'peak.start',
                                       'peak.end',
                                       'order',
                                       'feature.id',
                                       'dist_closest',
                                       'dist_TSS'),
                               outfile=outfile)
     ap.write_nearest_features(peak,features1)
     ap.close()
     # Write second set of nearest features
     ap = AnalysisReportWriter(output.MULTI_LINE,
                               fields=('peak.chr',
                                       'peak.start',
                                       'peak.end',
                                       'order',
                                       'feature.id',
                                       'dist_closest',
                                       'dist_TSS'),
                               outfile=outfile,
                               append=True)
     ap.write_nearest_features(peak,features2)
     ap.close()
     # Expected and actual output
     expected_output = \
         "#peak.chr\tpeak.start\tpeak.end\torder\tfeature.id\tdist_closest\tdist_TSS\n" \
         "chr2L\t66811\t66812\t1 of 2\tCG31973\t7568\t7568\n" \
         "chr2L\t66811\t66812\t2 of 2\tCG2674-RE\t40091\t40091\n" \
         "chr2L\t66811\t66812\t1 of 1\tCG2674-RC\t41114\t41114\n"
     actual_output = open(outfile,'r').read()
     # Check that output matches
     self.assertEqual(expected_output,actual_output)
 def test_sort_by_distance(self):
     rna_sort = FeatureSet('Transcripts-ex1.txt')
     position = 4250000
     # Do sort on distance
     # Sort is done in place, so assignment is not required
     # however the sort function should return a reference to
     # the initial object
     result = rna_sort.sortByDistanceFrom(position)
     self.assertEqual(result,rna_sort,
                      "Returned object doesn't match subject")
     # Check that each distance is greater than the previous one
     last_rna_data = None
     for rna_data in rna_sort:
         if not last_rna_data:
             last_rna_data = rna_data
     else:
         self.assertTrue((abs(rna_data.getTSS() - position) >=
                          abs(last_rna_data.getTSS() - position)),
                          "Sort by distance failed")
 def test_sort_by_closest_TSS_to_edge(self):
     rna_sort = FeatureSet('Transcripts-ex1.txt')
     position = (16000000,17500000)
     # Do sort
     # Sort is done in place, so assignment is not required
     # however the sort function should return a reference to
     # the initial object
     result = rna_sort.sortByClosestTSSTo(*position)
     self.assertEqual(result,rna_sort,
                      "Returned object doesn't match subject")
     # Check that the closest distances are in ascending order
     last_rna_data = None
     for rna_data in rna_sort:
         if not last_rna_data:
             last_rna_data = rna_data
         else:
             self.assertTrue((min(abs(rna_data.getTSS() - position[0]),
                                  abs(rna_data.getTSS() - position[1])) >=
                              min(abs(last_rna_data.getTSS() - position[0]),
                                  abs(last_rna_data.getTSS() - position[1]))),
                             "Sort by closest TSS to edge failed")
Example #10
0
 def test_closest_transcript_to_peak(self):
     features = FeatureSet('Transcripts-ex1.txt')
     feature1 = features[1]
     feature2 = features[2]
     peaks = PeakSet('ChIP_peaks-ex1.txt')
     peak = peaks[0]
     nearest = GetNearestTranscriptToPeak(feature1, feature2, peak)
     # 2nd feature should be closer than first
     self.assertEqual(nearest, feature1,
                      "Wrong transcript selected as nearest")
     # test when only one is set
     nearest = GetNearestTranscriptToPeak(None, feature2, peak)
     self.assertEqual(nearest, feature2,
                      "Wrong transcript selected as nearest")
 def test__eq__(self):
     # Check equality of FeatureSets
     feature_set1 = FeatureSet()
     feature_set2 = FeatureSet()
     # Empty feature sets
     self.assertEqual(feature_set1,feature_set2)
     # Populate
     feature_set1.addFeature(Feature('CG1000','chr1','1','2','+'))
     feature_set2.addFeature(Feature('CG1000','chr1','1','2','+'))
     self.assertEqual(feature_set1,feature_set2)
     # Add second
     feature_set1.addFeature(Feature('CG2000','chr1','1','2','+'))
     self.assertNotEqual(feature_set1,feature_set2)
     feature_set2.addFeature(Feature('CG2000','chr1','1','2','+'))
     self.assertEqual(feature_set1,feature_set2)
     # Add third
     feature_set1.addFeature(Feature('CG2001','chr2',3,4,'-'))
     feature_set2.addFeature(Feature('CG2002','chr2',3,5,'+'))
     self.assertNotEqual(feature_set1,feature_set2)
 def test_filter_on_flag(self):
     rna_seq = FeatureSet('Transcripts-ex1.txt')
     rna_flagged = rna_seq.filterByFlag(1)
     self.assertEqual(len(rna_flagged),4,
                      "Wrong number of flagged data lines")
 def test_reading_in_RNAseq_data(self):
     rna_seq = FeatureSet('Transcripts-ex1.txt')
     self.assertEqual(len(rna_seq),10,
                      "Wrong number of lines from RNA-seq file")
     self.assertTrue(rna_seq.isFlagged(),
                     "Data should be flagged")