예제 #1
0
    def test_get_five_prime_utr_sequences(self):
        """
        
        Tests 5' UTR dictionary creation
        
        """
        
        gff_file = pybedtools.BedTool(tests.get_file("gff_uorf_test.gff"))
        fa_file = tests.get_file("gff_uorf_test.fa")
        intervals = pybedtools.BedTool("""  chr1    5    10    ENSG1    0    +
                                chr1    15    20    ENSG2    0    -
                                chr1    25    30    ENSG3    0    +
                                chr1    35    40    ENSG3    0    +
                                chr1    45    50    ENSG4    0    -
                                chr1    55    60    ENSG4    0    -

                              """, from_string=True)
        detector = UORF_detector()
        test_dict = detector._get_five_prime_utr_sequences(gff_file, fa_file)
        test_dict = {name : [(interval, str(seq.seq)) for interval, seq in tuple] 
                     for name, tuple in test_dict.items()}
        true_dict = {
                       "ENSG1" : [(intervals[0], "GGGGG")],
                       "ENSG2" : [(intervals[1], "AAAAA")],
                       "ENSG3" : [(intervals[2], "GGGGG"), (intervals[3], "TTTTT")],
                       "ENSG4" : [(intervals[5], "GAAAA"), (intervals[4], "CCCCG")],
                       }
        
        self.assertDictEqual(test_dict, true_dict)
예제 #2
0
 def test_get_uorf_start_stop_baisc_negative(self):
     
     
     """
     
     Test 3 reading frames orf detection, negative strand
     
     """
     
     intervals = pybedtools.BedTool("""  chr1    1    9    ENSG1    0    -
                                         chr1    1    10    ENSG2    0    -
                                         chr1    1    11    ENSG3    0    -
                                     """, from_string=True)
     
     test_dict = {
                    "ENSG1" : [(intervals[0], SeqRecord(Seq("ATGGGGTAG", IUPAC.unambiguous_dna)))],
                    "ENSG2" : [(intervals[1], SeqRecord(Seq("AATGGGGTAG", IUPAC.unambiguous_dna)))],
                    "ENSG3" : [(intervals[2], SeqRecord(Seq("AAATGGGGTAG", IUPAC.unambiguous_dna)))],
                    }
     detector = UORF_detector()
     test_result = detector._get_uorf_start_stop(test_dict, uorf_length=0)
     
     true_result = pybedtools.BedTool(
         [
              pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "7", "9", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG1;Parent=ENSG1" ]),
             
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG1;Parent=ENSG1" ]),
             
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "7", "9", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG2;Parent=ENSG2" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG2;Parent=ENSG2" ]),
          
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "7", "9", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG3;Parent=ENSG3" ]),
             
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG3;Parent=ENSG3" ]),
          ]
                        )
     test_result = [str(x) for x in test_result]
     true_result = [str(x) for x in true_result]
     
     self.assertListEqual(test_result, true_result)
예제 #3
0
 def test_get_uorf_start_stop_split_negative(self):
     
     
     """
     
     start / stop codon detection spanning introns negative strand
     
     """
     
     intervals = pybedtools.BedTool("""  chr1    12    18    ENSG1    0    -
                                         chr1    1    6    ENSG1    0    -
                                         chr1    12    18    ENSG2    0    -
                                         chr1    1    6    ENSG2    0    -
                                         chr1    12    18    ENSG3    0    -
                                         chr1    1    6    ENSG3    0    -
                                     """, from_string=True)
     
     test_dict = {
                    "ENSG1" : [(intervals[0], SeqRecord(Seq("GGGATG", IUPAC.unambiguous_dna))), 
                               (intervals[1], SeqRecord(Seq("GGGTAG", IUPAC.unambiguous_dna)))],
                    "ENSG2" : [(intervals[2], SeqRecord(Seq("GATGGG", IUPAC.unambiguous_dna))),
                               (intervals[3], SeqRecord(Seq("GTAGGG", IUPAC.unambiguous_dna)))],
                    "ENSG3" : [(intervals[4], SeqRecord(Seq("GGATGG", IUPAC.unambiguous_dna))),
                               (intervals[5], SeqRecord(Seq("GGTAGG", IUPAC.unambiguous_dna)))]
                    }
     
     detector = UORF_detector()
     test_result = detector._get_uorf_start_stop(test_dict, uorf_length=0)
     
     true_result = pybedtools.BedTool(
         [
              pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "13", "15", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG1;Parent=ENSG1" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "1", "3", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG1;Parent=ENSG1" ]),
          
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "15", "17", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG2;Parent=ENSG2" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "3", "5", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG2;Parent=ENSG2" ]),
          
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_start", "14", "16", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_start:ENSG3;Parent=ENSG3" ]),
             pybedtools.create_interval_from_list(["chr1", "protein_coding", 
                                                   "uORF_end", "2", "4", ".", 
                                                   "-", ".", 
                                                   "ID=uORF_end:ENSG3;Parent=ENSG3" ]),
          ]
                        )
     true_result = [str(interval) for interval in true_result]
     test_result = [str(interval) for interval in test_result]
     print test_result
     print true_result
     self.assertEqual(test_result, true_result)