Example #1
0
def test_annotate_prioritize_cds_1():
    print("Tests annotation priority for overlapping transcripts (-). "
          "Interval: 'III', '13465', '13466', 'H10E21.3', '0', '-' "
          "Should return CDS")
    qchrom = 'III'
    qstart = 13465
    qstop = 13466
    qname = 'H10E21.3'
    qscore = 0
    qstrand = '-'

    region_priority = [
        ['protein_coding', 'CDS'],
        ['protein_coding', 'exon'],
    ]

    stranded = True,
    # transcript_priority = transcript_priority_1()
    # gene_priority = gene_priority_1()

    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        features_dict, cds_dict, keys
    )
    assert rname == 'H10E21.3a,H10E21.3b'
    assert region == 'CDS'
Example #2
0
def test_annotate_prioritize_cds_2():
    print("Tests annotation priority for overlapping transcripts (-). "
          "Interval: 'III', '13465', '13466', 'H10E21.3', '0', '-' "
          "Should return CDS even if it's the 2nd priority.")
    # interval = pybedtools.create_interval_from_list(
    #     ['III', '13465', '13466', 'H10E21.3', '0', '-']
    # )

    qchrom = 'III'
    qstart = 13465
    qstop = 13466
    qname = 'H10E21.3'
    qscore = 0
    qstrand = '-'
    region_priority = [
        ['protein_coding', '3UTR'],
        ['protein_coding', 'CDS'],
    ]

    stranded = True,
    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        features_dict, cds_dict, keys
    )
    assert rname == 'H10E21.3a,H10E21.3b'
    assert region == 'CDS'
Example #3
0
def test_utr_classification_2():
    print("This tests the same region (chr11:70266235-70266302:+ "
          "as test_utr_classification_1, but re-orders the priority "
          "such that 3utr is higher than 5utr. Supposed to return 3utr")
    chroms = ['chr11']
    db = 'test/data/gencode.v19.annotation.chr11.70M-71M.gtf.db'
    species = 'hg19'

    exons_dict, \
    transcripts_dict, cds_dict, features_dict, \
    keys = a.create_definitions(
        db, chroms, species
    )
    qchrom = 'chr11'
    qstart = 70266235
    qstop = 70266302
    qname = 'CTTN'
    qscore = 0
    qstrand = '+'
    region_priority = [
        ['protein_coding', '3utr'],
        ['protein_coding', '5utr'],
        ['protein_coding', 'CDS'],
    ]

    stranded = True,
    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        features_dict, cds_dict, keys
    )
    assert region == '3utr'
Example #4
0
def test_annotate_prioritize_4():
    print("Tests annotation priority for overlapping transcripts (-/+). "
          "Interval: 'III', '16600', '16601', 'H10E21.1b', '0', '+' "
          "Should return 5'UTR.")
    # interval = pybedtools.create_interval_from_list(
    #     ['III', '16600', '16601', 'H10E21.1a', '0', '+']
    # )
    qchrom = 'III'
    qstart = 16600
    qstop = 16601
    qname = 'H10E21.1b'
    qscore = 0
    qstrand = '+'
    region_priority = [
        ['protein_coding', 'three_prime_utr'],
        ['protein_coding', 'five_prime_utr'],
        ['protein_coding', 'CDS'],
    ]

    stranded = True,
    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        features_dict, cds_dict, keys
    )
    print(annotation)
    assert rname == 'H10E21.1a' or rname == 'H10E21.1b'  # don't know which transcript is returned, should clear that up
    assert region == 'five_prime_utr'
Example #5
0
def test_intergenic_1():
    print("Tests a region that is intergenic (+)")

    qchrom = 'chr19'
    qstart = 10050000
    qstop = 10006000
    qname = 'intergenic'
    qscore = 0
    qstrand = '+'
    region_priority = [
        ['protein_coding', '3utr'],
        ['protein_coding', '5utr'],
        ['protein_coding', 'CDS'],
    ]

    stranded = True,
    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        chr19_features_dict, chr19_cds_dict, chr19_keys
    )
    print(annotation)
    assert rname == 'intergenic'  # don't know which transcript is returned, should clear that up
    assert region == 'intergenic'
Example #6
0
def test_annotate_distintron_1():
    print("Tests annotation priority for overlapping transcripts (+). "
          "Interval: 'chr21:46553734-46553788', 'ADARB1', '0', '+' "
          "Should return distintron")

    qchrom = 'chr21'
    qstart = 46553734
    qstop = 46553788
    qname = 'ADARB1'
    qscore = 0
    qstrand = '+'
    transcript_priority = [
        ['protein_coding', 'distintron500'],
        ['non_coding', 'proxintron500'],
    ]
    gene_priority = [
        ['non_coding', 'proxintron500'],
        ['protein_coding', 'distintron500'],
    ]

    stranded = True,

    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, transcript_priority, gene_priority,
        chr21_features_dict, chr21_cds_dict, chr21_keys
    )
    assert rname == 'ADARB1'
    assert region == 'distintron500'
Example #7
0
def test_annotate_cds_2():
    print("Tests annotation priority for overlapping transcripts (-). "
          "Interval: 'chr19:4852152-4852191', 'PLIN3', '0', '-' "
          "Should return CDS")
    # interval = pybedtools.create_interval_from_list(
    #     ['chr19', '4852152', '4852191', 'PLIN3', '0', '-']
    # )
    qchrom = 'chr19'
    qstart = 4852152
    qstop = 4852191
    qname = 'PLIN3'
    qscore = 0
    qstrand = '-'
    region_priority = [
        ['protein_coding', 'CDS'],
        ['non_coding', 'exon'],
    ]
    stranded = True,

    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        chr19_features_dict, chr19_cds_dict, chr19_keys
    )
    # assert get_transcript_id(priority) == 'ENST00000589163.1'
    assert region == 'CDS'
    assert rname == 'PLIN3'
    assert gene == 'ENSG00000105355.4'
Example #8
0
def test_annotate_prioritize_noncoding_exon_2():
    print("Tests annotation priority for overlapping transcripts (+). "
          "Interval: 'chr19:5071035-5071036', 'KDM4B', '0', '+' "
          "Should return a retained intron (noncoding exon)")
    # interval = pybedtools.create_interval_from_list(
    #     ['chr19', '5071035', '5071036', 'KDM4B', '0', '+']
    # )
    region_priority = [['non_coding', 'exon'], ['protein_coding', 'CDS']]
    qchrom = 'chr19'
    qstart = 5071035
    qstop = 5071036
    qname = 'KDM4B'
    qscore = 0
    qstrand = '+'
    stranded = True,

    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        chr19_features_dict, chr19_cds_dict, chr19_keys
    )
    assert rname == 'KDM4B'
    assert gene == 'ENSG00000127663.10'
    assert region == 'noncoding_exon'
Example #9
0
def test_annotate_prioritize_cds_1():
    print("Tests annotation priority for overlapping transcripts (+). "
          "Interval: 'chr19', '5071035', '5071036', 'KDM4B', '0', '+' "
          "Should return CDS")
    # interval = pybedtools.create_interval_from_list(
    #     ['chr19', '5071035', '5071036', 'KDM4B', '0', '+']
    # )
    qchrom = 'chr19'
    qstart = 5071035
    qstop = 5071036
    qname = 'KDM4B'
    qscore = 0
    qstrand = '+'
    region_priority = [
        ['protein_coding', 'CDS'],
        ['protein_coding', 'exon'],
    ]
    stranded = True

    chrom, start, end, name, score, strand, \
    gene, rname, region, annotation = a.annotate(
        qchrom, qstart, qstop, qname, qscore, qstrand,
        stranded, region_priority, region_priority,
        chr19_features_dict, chr19_cds_dict, chr19_keys
    )
    # assert get_transcript_id(priority) == 'ENST00000159111.4'
    assert rname == 'KDM4B'
    assert gene == 'ENSG00000127663.10'
    assert region == 'CDS'