def test_without_groupname(self): pattern_tuples = [ ( ("(?P<a>(?P<x>[1-9]))", "(?P<b>(?P<x>[1-9]))"), "(?:(?P<a>(?P<a_x>[1-9]))|(?P<b>(?P<b_x>[1-9])))", ) ] for p1, p2 in pattern_tuples: with self.subTest(p1=p1, p2=p2): self.assertEqual(combine_patterns(p1), p2)
"""str: Pattern matching a protein deletion. """ pro_dup: str = rf"(?P<pro_dup>(?:(?P<start>{aa_pos})_(?P<end>{aa_pos})dup)|(?:(?P<position>{aa_pos})dup))" """str: Pattern matching a protein duplication. """ pro_ins: str = rf"(?P<pro_ins>(?P<start>{aa_pos})_(?P<end>{aa_pos})ins(?P<seq>{amino_acid}+))" """str: Pattern matching a protein insertion. """ pro_delins: str = rf"(?P<pro_delins>(?:(?:(?P<start>{aa_pos})_(?P<end>{aa_pos}))|(?P<position>{aa_pos}))delins(?P<seq>{amino_acid}+))" """str: Pattern matching a protein deletion-insertion. """ pro_variant: str = combine_patterns( [pro_equal, pro_sub, pro_del, pro_dup, pro_ins, pro_delins], None) """str: Pattern matching any single protein variant event. """ pro_single_variant: str = rf"(?P<pro>p\.{pro_variant})" """str: Pattern matching any complete protein variant, including the prefix character. """ pro_multi_variant: str = rf"(?P<pro_multi>p\.\[{remove_named_groups(pro_variant)}(?:;{remove_named_groups(pro_variant)}){{1,}}\])" """str: Pattern matching any complete protein multi-variant, including the prefix character. Named capture groups have been removed from the variant patterns because of non-uniqueness. Another applications of single-variant regular expressions is needed to recover the named groups from each individual variant in the multi-variant. """
"""str: Pattern matching a RNA deletion with numeric or relative-to-transcript positions. """ rna_dup: str = rf"(?P<rna_dup>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron})dup)|(?P<position>{pos_intron}))dup)" """str: Pattern matching a RNA duplication with numeric or relative-to-transcript positions. """ rna_ins: str = rf"(?P<rna_ins>(?P<start>{pos_intron})_(?P<end>{pos_intron})ins(?P<seq>{rna_nt}+))" """str: Pattern matching a RNA insertion with numeric or relative-to-transcript positions. """ rna_delins: str = rf"(?P<rna_delins>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron}))|(?P<position>{pos_intron}))delins(?P<seq>{rna_nt}+))" """str: Pattern matching a RNA deletion-insertion with numeric or relative-to-transcript positions. """ rna_variant: str = combine_patterns( [rna_equal, rna_sub, rna_del, rna_dup, rna_ins, rna_delins], None) """str: Pattern matching any single RNA variant event. """ rna_single_variant: str = rf"(?P<rna>r\.{rna_variant})" """str: Pattern matching any complete RNA variant, including the prefix character. """ rna_multi_variant: str = rf"(?P<rna_multi>r\.\[{remove_named_groups(rna_variant)}(?:;{remove_named_groups(rna_variant)}){{1,}}\])" """str: Pattern matching any complete RNA multi-variant, including the prefix character. Named capture groups have been removed from the variant patterns because of non-uniqueness. Another applications of single-variant regular expressions is needed to recover the named groups from each individual variant in the multi-variant. """
""" dna_ins_gmo: str = dna_ins_c.replace(pos_intron_utr, pos).replace( "(?P<dna_ins_c>", "(?P<dna_ins_gmo>" ) """str: Pattern matching a DNA insertion with only numeric positions for genomic-style variants. """ dna_delins_gmo: str = dna_delins_c.replace(pos_intron_utr, pos).replace( "(?P<dna_delins_c>", "(?P<dna_delins_gmo>" ) """str: Pattern matching a DNA deletion-insertion with only numeric positions for genomic-style variants. """ dna_variant_c: str = combine_patterns( [dna_equal_c, dna_sub_c, dna_del_c, dna_dup_c, dna_ins_c, dna_delins_c], None ) """str: Pattern matching any of the coding DNA variants. """ dna_variant_n: str = combine_patterns( [dna_equal_n, dna_sub_n, dna_del_n, dna_dup_n, dna_ins_n, dna_delins_n], None ) """str: Pattern matching any of the non-coding DNA variants. """ dna_variant_gmo: str = combine_patterns( [dna_equal_gmo, dna_sub_gmo, dna_del_gmo, dna_dup_gmo, dna_ins_gmo, dna_delins_gmo], None, ) """str: Pattern matching any of the genomic-style DNA variants.