def test_get_uorf_start_stop_baisc_negative(self): """ Test 3 reading frames orf detection, negative strand """ intervals = pybedtools.BedTool(""" chr1 1 9 ENSG1 0 - chr1 1 10 ENSG2 0 - chr1 1 11 ENSG3 0 - """, from_string=True) test_dict = { "ENSG1" : [(intervals[0], SeqRecord(Seq("ATGGGGTAG", IUPAC.unambiguous_dna)))], "ENSG2" : [(intervals[1], SeqRecord(Seq("AATGGGGTAG", IUPAC.unambiguous_dna)))], "ENSG3" : [(intervals[2], SeqRecord(Seq("AAATGGGGTAG", IUPAC.unambiguous_dna)))], } detector = UORF_detector() test_result = detector._get_uorf_start_stop(test_dict, uorf_length=0) true_result = pybedtools.BedTool( [ pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_start", "7", "9", ".", "-", ".", "ID=uORF_start:ENSG1;Parent=ENSG1" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_end", "1", "3", ".", "-", ".", "ID=uORF_end:ENSG1;Parent=ENSG1" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_start", "7", "9", ".", "-", ".", "ID=uORF_start:ENSG2;Parent=ENSG2" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_end", "1", "3", ".", "-", ".", "ID=uORF_end:ENSG2;Parent=ENSG2" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_start", "7", "9", ".", "-", ".", "ID=uORF_start:ENSG3;Parent=ENSG3" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_end", "1", "3", ".", "-", ".", "ID=uORF_end:ENSG3;Parent=ENSG3" ]), ] ) test_result = [str(x) for x in test_result] true_result = [str(x) for x in true_result] self.assertListEqual(test_result, true_result)
def test_get_uorf_start_stop_split_negative(self): """ start / stop codon detection spanning introns negative strand """ intervals = pybedtools.BedTool(""" chr1 12 18 ENSG1 0 - chr1 1 6 ENSG1 0 - chr1 12 18 ENSG2 0 - chr1 1 6 ENSG2 0 - chr1 12 18 ENSG3 0 - chr1 1 6 ENSG3 0 - """, from_string=True) test_dict = { "ENSG1" : [(intervals[0], SeqRecord(Seq("GGGATG", IUPAC.unambiguous_dna))), (intervals[1], SeqRecord(Seq("GGGTAG", IUPAC.unambiguous_dna)))], "ENSG2" : [(intervals[2], SeqRecord(Seq("GATGGG", IUPAC.unambiguous_dna))), (intervals[3], SeqRecord(Seq("GTAGGG", IUPAC.unambiguous_dna)))], "ENSG3" : [(intervals[4], SeqRecord(Seq("GGATGG", IUPAC.unambiguous_dna))), (intervals[5], SeqRecord(Seq("GGTAGG", IUPAC.unambiguous_dna)))] } detector = UORF_detector() test_result = detector._get_uorf_start_stop(test_dict, uorf_length=0) true_result = pybedtools.BedTool( [ pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_start", "13", "15", ".", "-", ".", "ID=uORF_start:ENSG1;Parent=ENSG1" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_end", "1", "3", ".", "-", ".", "ID=uORF_end:ENSG1;Parent=ENSG1" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_start", "15", "17", ".", "-", ".", "ID=uORF_start:ENSG2;Parent=ENSG2" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_end", "3", "5", ".", "-", ".", "ID=uORF_end:ENSG2;Parent=ENSG2" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_start", "14", "16", ".", "-", ".", "ID=uORF_start:ENSG3;Parent=ENSG3" ]), pybedtools.create_interval_from_list(["chr1", "protein_coding", "uORF_end", "2", "4", ".", "-", ".", "ID=uORF_end:ENSG3;Parent=ENSG3" ]), ] ) true_result = [str(interval) for interval in true_result] test_result = [str(interval) for interval in test_result] print test_result print true_result self.assertEqual(test_result, true_result)