def crossmap_to_internal_setup(coordinate_system, selector_model=None): if coordinate_system == "g": crossmap = Genomic() return { "crossmap_function": crossmap.genomic_to_coordinate, "point_function": get_point_value, } elif coordinate_system == "c": crossmap = Coding( selector_model["exon"], selector_model["cds"][0], selector_model["inverted"], ) return { "crossmap_function": crossmap.coding_to_coordinate, "point_function": point_to_x_coding, "inverted": selector_model["inverted"], } elif coordinate_system == "n": crossmap = Coding( selector_model["exon"], (selector_model["exon"][0][0], selector_model["exon"][-1][-1]), selector_model["inverted"], ) return { "crossmap_function": crossmap.coding_to_coordinate, "point_function": point_to_x_coding, "inverted": selector_model["inverted"], }
def test_Coding_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11)) assert crossmap.coordinate_to_coding(8, True) == (-2, 0, -1, -2) assert crossmap.coordinate_to_coding(9, True) == (-1, 0, -1, -1) assert crossmap.coordinate_to_coding(11, True) == (1, 0, 1, 1) assert crossmap.coordinate_to_coding(12, True) == (2, 0, 1, 2)
def test_Coding_protein(): """Protein positions.""" crossmap = Coding(_exons, _cds) # Boundary between 5' UTR and CDS. invariant( crossmap.coordinate_to_protein, 31, crossmap.protein_to_coordinate, (-1, 3, 0, -1, 0)) invariant( crossmap.coordinate_to_protein, 32, crossmap.protein_to_coordinate, (1, 1, 0, 0, 0)) # Intron boundary. invariant( crossmap.coordinate_to_protein, 34, crossmap.protein_to_coordinate, (1, 3, 0, 0, 0)) invariant( crossmap.coordinate_to_protein, 35, crossmap.protein_to_coordinate, (1, 3, 1, 0, 0)) # Boundary between CDS and 3' UTR. invariant( crossmap.coordinate_to_protein, 42, crossmap.protein_to_coordinate, (2, 3, 0, 0, 0)) invariant( crossmap.coordinate_to_protein, 43, crossmap.protein_to_coordinate, (1, 1, 0, 1, 0))
def crossmap_to_hgvs_setup(coordinate_system, selector_model=None, degenerate=False): """ Returns a crossmap instance able to convert from the internal system to the to hgvs system. """ if coordinate_system == "g": crossmap = Genomic() return { "crossmap_function": crossmap.coordinate_to_genomic, "point_function": genomic_to_point, } elif coordinate_system == "c": crossmap = Coding( selector_model["exon"], selector_model["cds"][0], selector_model["inverted"] ) return { "crossmap_function": crossmap.coordinate_to_coding, "point_function": coding_to_point, "degenerate": degenerate, "inverted": selector_model["inverted"], } elif coordinate_system == "n": crossmap = NonCoding(selector_model["exon"], selector_model["inverted"]) return { "crossmap_function": crossmap.coordinate_to_noncoding, "point_function": noncoding_to_point, "inverted": selector_model["inverted"], } else: raise Exception("Unsupported coordinate system: {}.".format(coordinate_system)) return crossmap
def test_Coding_inverted_degenerate(): """Degenerate upstream and downstream positions are silently corrected.""" crossmap = Coding([(10, 20)], (11, 19), True) degenerate_equal( crossmap.coding_to_coordinate, 20, [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) degenerate_equal( crossmap.coding_to_coordinate, 9, [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)])
def test_Coding_inverted_no_utr_degenerate(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11), True) degenerate_equal( crossmap.coding_to_coordinate, 11, [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) degenerate_equal( crossmap.coding_to_coordinate, 9, [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)])
def test_Coding_no_utr3_inverted(): """A 3' UTR may be missing.""" crossmap = Coding([(10, 20)], (10, 15), True) # Direct transition from CDS to downstream. invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, (5, 0, 0, 0)) invariant( crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, (5, 1, 0, 1))
def test_Coding_no_utr5_inverted(): """A 5' UTR may be missing.""" crossmap = Coding([(10, 20)], (15, 20), True) # Direct transition from upstream to CDS. invariant( crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, (1, -1, 0, -1)) invariant( crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, (1, 0, 0, 0))
def test_Coding_small_utr3_inverted(): """A 5' UTR may be of lenght one.""" crossmap = Coding([(10, 20)], (11, 15), True) # Transition from CDS to 3' UTR to downstream. invariant( crossmap.coordinate_to_coding, 11, crossmap.coding_to_coordinate, (4, 0, 0, 0)) invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, (1, 0, 1, 0)) invariant( crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, (1, 1, 1, 1))
def test_Coding_small_utr5_inverted(): """A 5' UTR may be of lenght one.""" crossmap = Coding([(10, 20)], (15, 19), True) # Transition from upstream to 5' UTR to CDS. invariant( crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, (-1, -1, -1, -1)) invariant( crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, (-1, 0, -1, 0)) invariant( crossmap.coordinate_to_coding, 18, crossmap.coding_to_coordinate, (1, 0, 0, 0))
def test_Coding_small_utr5(): """A 5' UTR may be of lenght one.""" crossmap = Coding([(10, 20)], (11, 15)) # Transition from upstream to 5' UTR to CDS. invariant( crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, (-1, -1, -1, -1)) invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, (-1, 0, -1, 0)) invariant( crossmap.coordinate_to_coding, 11, crossmap.coding_to_coordinate, (1, 0, 0, 0))
def test_Coding_small_utr3(): """A 5' UTR may be of lenght one.""" crossmap = Coding([(10, 20)], (15, 19)) # Transition from CDS to 3' UTR to downstream. invariant( crossmap.coordinate_to_coding, 18, crossmap.coding_to_coordinate, (4, 0, 0, 0)) invariant( crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, (1, 0, 1, 0)) invariant( crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, (1, 1, 1, 1))
def test_Coding(): """Forward oriented coding transcript.""" crossmap = Coding(_exons, _cds) # Boundary between 5' and CDS. invariant( crossmap.coordinate_to_coding, 31, crossmap.coding_to_coordinate, (-1, 0, -1, 0)) invariant( crossmap.coordinate_to_coding, 32, crossmap.coding_to_coordinate, (1, 0, 0, 0)) # Boundary between CDS and 3'. invariant( crossmap.coordinate_to_coding, 42, crossmap.coding_to_coordinate, (6, 0, 0, 0)) invariant( crossmap.coordinate_to_coding, 43, crossmap.coding_to_coordinate, (1, 0, 1, 0))
def test_Coding_regions_inverted(): """The CDS can start or end on a region boundary.""" crossmap = Coding([(10, 21), (30, 40), (49, 60)], (30, 40), True) # Upstream odd length intron between two regions. invariant( crossmap.coordinate_to_coding, 44, crossmap.coding_to_coordinate, (-1, 5, -1, 0)) invariant( crossmap.coordinate_to_coding, 43, crossmap.coding_to_coordinate, (1, -4, 0, 0)) # Downstream odd length intron between two regions. invariant( crossmap.coordinate_to_coding, 25, crossmap.coding_to_coordinate, (10, 5, 0, 0)) invariant( crossmap.coordinate_to_coding, 24, crossmap.coding_to_coordinate, (1, -4, 1, 0))
def to_cds_coordinate(variants, sequences, selector_model): """ Converts the locations to cds equivalent. :param variants: Variants with locations in the coordinate system. :param selector_model: :param crossmap: """ exons, cds = _get_exons_and_cds(selector_model) crossmap = Coding(selector_model["exon"], cds, selector_model["inverted"]) if selector_model.get("inverted"): variants = reverse_variants(variants, sequences) splice_site_hits = _get_splice_site_hits(variants, exons, cds) coordinate_variants = to_exon_positions(variants, exons, cds) cds_variants = [] for variant in coordinate_variants: cds_variants.append( variant_to_cds_coordinate(variant, sequences, selector_model, crossmap) ) return cds_variants, splice_site_hits
def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) assert crossmap.coordinate_to_coding(20, True) == (-2, 0, -1, -1) assert crossmap.coordinate_to_coding(9, True) == (2, 0, 1, 1)
def test_Coding_inverted_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40), True) assert (crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True))