예제 #1
0
 def test_get_uorf_start_stop_baisc_negative(self):
     
     
     """
     
     Test 3 reading frames orf detection, negative strand
     
     """
     
     intervals = pybedtools.BedTool("""  chr1    1    9    ENSG1    0    -
                                         chr1    1    10    ENSG2    0    -
                                         chr1    1    11    ENSG3    0    -
                                     """, from_string=True)
     
     test_dict = {
                    "ENSG1" : [(intervals[0], SeqRecord(Seq("ATGGGGTAG", IUPAC.unambiguous_dna)))],
                    "ENSG2" : [(intervals[1], SeqRecord(Seq("AATGGGGTAG", IUPAC.unambiguous_dna)))],
                    "ENSG3" : [(intervals[2], SeqRecord(Seq("AAATGGGGTAG", IUPAC.unambiguous_dna)))],
                    }
     detector = UORF_detector()
     test_result = detector._get_uorf_start_stop(test_dict, uorf_length=0)
     
     true_result = pybedtools.BedTool(
         [
              pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "7", "9", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG1;Parent=ENSG1" ]),
             
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG1;Parent=ENSG1" ]),
             
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "7", "9", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG2;Parent=ENSG2" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG2;Parent=ENSG2" ]),
          
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "7", "9", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG3;Parent=ENSG3" ]),
             
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG3;Parent=ENSG3" ]),
          ]
                        )
     test_result = [str(x) for x in test_result]
     true_result = [str(x) for x in true_result]
     
     self.assertListEqual(test_result, true_result)
예제 #2
0
 def test_get_uorf_start_stop_split_negative(self):
     
     
     """
     
     start / stop codon detection spanning introns negative strand
     
     """
     
     intervals = pybedtools.BedTool("""  chr1    12    18    ENSG1    0    -
                                         chr1    1    6    ENSG1    0    -
                                         chr1    12    18    ENSG2    0    -
                                         chr1    1    6    ENSG2    0    -
                                         chr1    12    18    ENSG3    0    -
                                         chr1    1    6    ENSG3    0    -
                                     """, from_string=True)
     
     test_dict = {
                    "ENSG1" : [(intervals[0], SeqRecord(Seq("GGGATG", IUPAC.unambiguous_dna))), 
                               (intervals[1], SeqRecord(Seq("GGGTAG", IUPAC.unambiguous_dna)))],
                    "ENSG2" : [(intervals[2], SeqRecord(Seq("GATGGG", IUPAC.unambiguous_dna))),
                               (intervals[3], SeqRecord(Seq("GTAGGG", IUPAC.unambiguous_dna)))],
                    "ENSG3" : [(intervals[4], SeqRecord(Seq("GGATGG", IUPAC.unambiguous_dna))),
                               (intervals[5], SeqRecord(Seq("GGTAGG", IUPAC.unambiguous_dna)))]
                    }
     
     detector = UORF_detector()
     test_result = detector._get_uorf_start_stop(test_dict, uorf_length=0)
     
     true_result = pybedtools.BedTool(
         [
              pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "13", "15", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG1;Parent=ENSG1" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG1;Parent=ENSG1" ]),
          
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "15", "17", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG2;Parent=ENSG2" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "3", "5", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG2;Parent=ENSG2" ]),
          
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "14", "16", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG3;Parent=ENSG3" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "2", "4", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG3;Parent=ENSG3" ]),
          ]
                        )
     true_result = [str(interval) for interval in true_result]
     test_result = [str(interval) for interval in test_result]
     print test_result
     print true_result
     self.assertEqual(test_result, true_result)