def test_get_chrom_ref_tree(chrom, expected):
    ref_trees = {
        'chr1': 'intervals_a',
        'chr2': 'intervals_b',
        'chrM': 'intervals_c',
        '3': 'intervals_d'
    }
    assert ac.get_chrom_ref_tree(chrom, ref_trees) == expected
Exemple #2
0
def overlaps_gene(row, gene_tree):
    '''
    Return True if variant in contig row
    overlaps any gene in the reference
    '''
    olaps = False
    chr1, pos1, strand1 = get_pos_parts(row['pos1'])
    chr2, pos2, strand2 = get_pos_parts(row['pos2'])
    if chr1 == chr2:
        gtree = ac.get_chrom_ref_tree(chr1, gene_tree)
        olaps = gtree.overlaps(pos1, pos2) if gtree else False
    else:
        gtree = ac.get_chrom_ref_tree(chr1, gene_tree)
        olaps = gtree.overlaps(pos1, pos1 + 1) if gtree else False
        gtree = ac.get_chrom_ref_tree(chr2, gene_tree)
        olaps = olaps or gtree.overlaps(pos2, pos2 + 1) if gtree else olaps
    return olaps
Exemple #3
0
def overlaps_same_exon(sv, ex_trees):
    '''
    Checks whether variant is contained
    completely within a single exon
    '''
    chr1, start, s1 = get_pos_parts(sv['pos1'])
    chr2, end, s2 = get_pos_parts(sv['pos2'])

    ex_tree = ac.get_chrom_ref_tree(chr1, ex_trees)
    if ex_tree:
        olap1 = ex_tree.overlap(start, start + 1)
        olap2 = ex_tree.overlap(end, end + 1)
        return len(olap1) > 0 and len(olap2) > 0 and olap1 == olap2

    return False
def check_overlap(ex_trees, chrom, start, end, size=0):
    '''
    Checks whether variant overlaps an exonic region.
    For deletions, at least MIN_GAP bp of the deletion
    must be within the exon body.
    '''
    olap = False
    ex_tree = ac.get_chrom_ref_tree(chrom, ex_trees)
    if not ex_tree:
        return olap

    olap = ex_tree.overlaps(start, end)
    if olap and size > 0:
        olap_se = ex_tree.overlap(start, end)
        es, ee = [(x[0], x[1]) for x in olap_se][0]
        size_within = min([ee, end]) - start if start >= es \
                                      else end - max([es, start])
        olap = size_within >= size

    return olap
Exemple #5
0
def get_overlap_size(ex_trees, chrom, start, end):
    '''
    Returns by how many bases the variant overlaps
    any reference exon by (i.e. for truncated exons
    and novel introns), otherwise returns nan. A value
    of 0 indicates adjacent elements.
    '''
    ex_tree = ac.get_chrom_ref_tree(chrom, ex_trees)
    if not ex_tree:
        return float('nan')

    olap_left = ex_tree.overlaps(start - 1, start)
    olap_right = ex_tree.overlaps(end, end + 1)
    if not olap_left and not olap_right:
        return float('nan')
    else:
        match_left = ex_tree.overlap(start - 1, start)
        match_right = ex_tree.overlap(end, end + 1)
        single_match = not olap_left or not olap_right

        if match_left == match_right or single_match:
            # simple case -- this means only one exon is involved
            olap_se = match_left if olap_left else match_right
            es, ee = [(x[0], x[1]) for x in olap_se][0]
            size_within = min([ee, end]) - start if start >= es \
                                          else end - max([es, start])
            return size_within
        else:
            # more complex case, we need to check both overlaps
            # we return the largest overlap of the two ends
            es, ee = [(x[0], x[1]) for x in match_left][0]
            size_within1 = min([ee, end]) - start if start >= es \
                                          else end - max([es, start])
            es, ee = [(x[0], x[1]) for x in match_right][0]
            size_within2 = min([ee, end]) - start if start >= es \
                                          else end - max([es, start])
            return max(size_within1, size_within2)