def test_annotate_prioritize_cds_1(): print("Tests annotation priority for overlapping transcripts (-). " "Interval: 'III', '13465', '13466', 'H10E21.3', '0', '-' " "Should return CDS") qchrom = 'III' qstart = 13465 qstop = 13466 qname = 'H10E21.3' qscore = 0 qstrand = '-' region_priority = [ ['protein_coding', 'CDS'], ['protein_coding', 'exon'], ] stranded = True, # transcript_priority = transcript_priority_1() # gene_priority = gene_priority_1() chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, features_dict, cds_dict, keys ) assert rname == 'H10E21.3a,H10E21.3b' assert region == 'CDS'
def test_annotate_prioritize_cds_2(): print("Tests annotation priority for overlapping transcripts (-). " "Interval: 'III', '13465', '13466', 'H10E21.3', '0', '-' " "Should return CDS even if it's the 2nd priority.") # interval = pybedtools.create_interval_from_list( # ['III', '13465', '13466', 'H10E21.3', '0', '-'] # ) qchrom = 'III' qstart = 13465 qstop = 13466 qname = 'H10E21.3' qscore = 0 qstrand = '-' region_priority = [ ['protein_coding', '3UTR'], ['protein_coding', 'CDS'], ] stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, features_dict, cds_dict, keys ) assert rname == 'H10E21.3a,H10E21.3b' assert region == 'CDS'
def test_utr_classification_2(): print("This tests the same region (chr11:70266235-70266302:+ " "as test_utr_classification_1, but re-orders the priority " "such that 3utr is higher than 5utr. Supposed to return 3utr") chroms = ['chr11'] db = 'test/data/gencode.v19.annotation.chr11.70M-71M.gtf.db' species = 'hg19' exons_dict, \ transcripts_dict, cds_dict, features_dict, \ keys = a.create_definitions( db, chroms, species ) qchrom = 'chr11' qstart = 70266235 qstop = 70266302 qname = 'CTTN' qscore = 0 qstrand = '+' region_priority = [ ['protein_coding', '3utr'], ['protein_coding', '5utr'], ['protein_coding', 'CDS'], ] stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, features_dict, cds_dict, keys ) assert region == '3utr'
def test_annotate_prioritize_4(): print("Tests annotation priority for overlapping transcripts (-/+). " "Interval: 'III', '16600', '16601', 'H10E21.1b', '0', '+' " "Should return 5'UTR.") # interval = pybedtools.create_interval_from_list( # ['III', '16600', '16601', 'H10E21.1a', '0', '+'] # ) qchrom = 'III' qstart = 16600 qstop = 16601 qname = 'H10E21.1b' qscore = 0 qstrand = '+' region_priority = [ ['protein_coding', 'three_prime_utr'], ['protein_coding', 'five_prime_utr'], ['protein_coding', 'CDS'], ] stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, features_dict, cds_dict, keys ) print(annotation) assert rname == 'H10E21.1a' or rname == 'H10E21.1b' # don't know which transcript is returned, should clear that up assert region == 'five_prime_utr'
def test_intergenic_1(): print("Tests a region that is intergenic (+)") qchrom = 'chr19' qstart = 10050000 qstop = 10006000 qname = 'intergenic' qscore = 0 qstrand = '+' region_priority = [ ['protein_coding', '3utr'], ['protein_coding', '5utr'], ['protein_coding', 'CDS'], ] stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, chr19_features_dict, chr19_cds_dict, chr19_keys ) print(annotation) assert rname == 'intergenic' # don't know which transcript is returned, should clear that up assert region == 'intergenic'
def test_annotate_distintron_1(): print("Tests annotation priority for overlapping transcripts (+). " "Interval: 'chr21:46553734-46553788', 'ADARB1', '0', '+' " "Should return distintron") qchrom = 'chr21' qstart = 46553734 qstop = 46553788 qname = 'ADARB1' qscore = 0 qstrand = '+' transcript_priority = [ ['protein_coding', 'distintron500'], ['non_coding', 'proxintron500'], ] gene_priority = [ ['non_coding', 'proxintron500'], ['protein_coding', 'distintron500'], ] stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, transcript_priority, gene_priority, chr21_features_dict, chr21_cds_dict, chr21_keys ) assert rname == 'ADARB1' assert region == 'distintron500'
def test_annotate_cds_2(): print("Tests annotation priority for overlapping transcripts (-). " "Interval: 'chr19:4852152-4852191', 'PLIN3', '0', '-' " "Should return CDS") # interval = pybedtools.create_interval_from_list( # ['chr19', '4852152', '4852191', 'PLIN3', '0', '-'] # ) qchrom = 'chr19' qstart = 4852152 qstop = 4852191 qname = 'PLIN3' qscore = 0 qstrand = '-' region_priority = [ ['protein_coding', 'CDS'], ['non_coding', 'exon'], ] stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, chr19_features_dict, chr19_cds_dict, chr19_keys ) # assert get_transcript_id(priority) == 'ENST00000589163.1' assert region == 'CDS' assert rname == 'PLIN3' assert gene == 'ENSG00000105355.4'
def test_annotate_prioritize_noncoding_exon_2(): print("Tests annotation priority for overlapping transcripts (+). " "Interval: 'chr19:5071035-5071036', 'KDM4B', '0', '+' " "Should return a retained intron (noncoding exon)") # interval = pybedtools.create_interval_from_list( # ['chr19', '5071035', '5071036', 'KDM4B', '0', '+'] # ) region_priority = [['non_coding', 'exon'], ['protein_coding', 'CDS']] qchrom = 'chr19' qstart = 5071035 qstop = 5071036 qname = 'KDM4B' qscore = 0 qstrand = '+' stranded = True, chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, chr19_features_dict, chr19_cds_dict, chr19_keys ) assert rname == 'KDM4B' assert gene == 'ENSG00000127663.10' assert region == 'noncoding_exon'
def test_annotate_prioritize_cds_1(): print("Tests annotation priority for overlapping transcripts (+). " "Interval: 'chr19', '5071035', '5071036', 'KDM4B', '0', '+' " "Should return CDS") # interval = pybedtools.create_interval_from_list( # ['chr19', '5071035', '5071036', 'KDM4B', '0', '+'] # ) qchrom = 'chr19' qstart = 5071035 qstop = 5071036 qname = 'KDM4B' qscore = 0 qstrand = '+' region_priority = [ ['protein_coding', 'CDS'], ['protein_coding', 'exon'], ] stranded = True chrom, start, end, name, score, strand, \ gene, rname, region, annotation = a.annotate( qchrom, qstart, qstop, qname, qscore, qstrand, stranded, region_priority, region_priority, chr19_features_dict, chr19_cds_dict, chr19_keys ) # assert get_transcript_id(priority) == 'ENST00000159111.4' assert rname == 'KDM4B' assert gene == 'ENSG00000127663.10' assert region == 'CDS'