def test_get_chrom_ref_tree(chrom, expected): ref_trees = { 'chr1': 'intervals_a', 'chr2': 'intervals_b', 'chrM': 'intervals_c', '3': 'intervals_d' } assert ac.get_chrom_ref_tree(chrom, ref_trees) == expected
def overlaps_gene(row, gene_tree): ''' Return True if variant in contig row overlaps any gene in the reference ''' olaps = False chr1, pos1, strand1 = get_pos_parts(row['pos1']) chr2, pos2, strand2 = get_pos_parts(row['pos2']) if chr1 == chr2: gtree = ac.get_chrom_ref_tree(chr1, gene_tree) olaps = gtree.overlaps(pos1, pos2) if gtree else False else: gtree = ac.get_chrom_ref_tree(chr1, gene_tree) olaps = gtree.overlaps(pos1, pos1 + 1) if gtree else False gtree = ac.get_chrom_ref_tree(chr2, gene_tree) olaps = olaps or gtree.overlaps(pos2, pos2 + 1) if gtree else olaps return olaps
def overlaps_same_exon(sv, ex_trees): ''' Checks whether variant is contained completely within a single exon ''' chr1, start, s1 = get_pos_parts(sv['pos1']) chr2, end, s2 = get_pos_parts(sv['pos2']) ex_tree = ac.get_chrom_ref_tree(chr1, ex_trees) if ex_tree: olap1 = ex_tree.overlap(start, start + 1) olap2 = ex_tree.overlap(end, end + 1) return len(olap1) > 0 and len(olap2) > 0 and olap1 == olap2 return False
def check_overlap(ex_trees, chrom, start, end, size=0): ''' Checks whether variant overlaps an exonic region. For deletions, at least MIN_GAP bp of the deletion must be within the exon body. ''' olap = False ex_tree = ac.get_chrom_ref_tree(chrom, ex_trees) if not ex_tree: return olap olap = ex_tree.overlaps(start, end) if olap and size > 0: olap_se = ex_tree.overlap(start, end) es, ee = [(x[0], x[1]) for x in olap_se][0] size_within = min([ee, end]) - start if start >= es \ else end - max([es, start]) olap = size_within >= size return olap
def get_overlap_size(ex_trees, chrom, start, end): ''' Returns by how many bases the variant overlaps any reference exon by (i.e. for truncated exons and novel introns), otherwise returns nan. A value of 0 indicates adjacent elements. ''' ex_tree = ac.get_chrom_ref_tree(chrom, ex_trees) if not ex_tree: return float('nan') olap_left = ex_tree.overlaps(start - 1, start) olap_right = ex_tree.overlaps(end, end + 1) if not olap_left and not olap_right: return float('nan') else: match_left = ex_tree.overlap(start - 1, start) match_right = ex_tree.overlap(end, end + 1) single_match = not olap_left or not olap_right if match_left == match_right or single_match: # simple case -- this means only one exon is involved olap_se = match_left if olap_left else match_right es, ee = [(x[0], x[1]) for x in olap_se][0] size_within = min([ee, end]) - start if start >= es \ else end - max([es, start]) return size_within else: # more complex case, we need to check both overlaps # we return the largest overlap of the two ends es, ee = [(x[0], x[1]) for x in match_left][0] size_within1 = min([ee, end]) - start if start >= es \ else end - max([es, start]) es, ee = [(x[0], x[1]) for x in match_right][0] size_within2 = min([ee, end]) - start if start >= es \ else end - max([es, start]) return max(size_within1, size_within2)