Example #1
0
    def test_without_groupname(self):
        pattern_tuples = [
            (
                ("(?P<a>(?P<x>[1-9]))", "(?P<b>(?P<x>[1-9]))"),
                "(?:(?P<a>(?P<a_x>[1-9]))|(?P<b>(?P<b_x>[1-9])))",
            )
        ]

        for p1, p2 in pattern_tuples:
            with self.subTest(p1=p1, p2=p2):
                self.assertEqual(combine_patterns(p1), p2)
Example #2
0
"""str: Pattern matching a protein deletion.
"""

pro_dup: str = rf"(?P<pro_dup>(?:(?P<start>{aa_pos})_(?P<end>{aa_pos})dup)|(?:(?P<position>{aa_pos})dup))"
"""str: Pattern matching a protein duplication.
"""

pro_ins: str = rf"(?P<pro_ins>(?P<start>{aa_pos})_(?P<end>{aa_pos})ins(?P<seq>{amino_acid}+))"
"""str: Pattern matching a protein insertion.
"""

pro_delins: str = rf"(?P<pro_delins>(?:(?:(?P<start>{aa_pos})_(?P<end>{aa_pos}))|(?P<position>{aa_pos}))delins(?P<seq>{amino_acid}+))"
"""str: Pattern matching a protein deletion-insertion.
"""

pro_variant: str = combine_patterns(
    [pro_equal, pro_sub, pro_del, pro_dup, pro_ins, pro_delins], None)
"""str: Pattern matching any single protein variant event.
"""

pro_single_variant: str = rf"(?P<pro>p\.{pro_variant})"
"""str: Pattern matching any complete protein variant, including the prefix character.
"""

pro_multi_variant: str = rf"(?P<pro_multi>p\.\[{remove_named_groups(pro_variant)}(?:;{remove_named_groups(pro_variant)}){{1,}}\])"
"""str: Pattern matching any complete protein multi-variant, including the prefix character.

Named capture groups have been removed from the variant patterns because of non-uniqueness.
Another applications of single-variant regular expressions is needed to recover the named groups from each individual
variant in the multi-variant.
"""
Example #3
0
"""str: Pattern matching a RNA deletion with numeric or relative-to-transcript positions.
"""

rna_dup: str = rf"(?P<rna_dup>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron})dup)|(?P<position>{pos_intron}))dup)"
"""str: Pattern matching a RNA duplication with numeric or relative-to-transcript positions.
"""

rna_ins: str = rf"(?P<rna_ins>(?P<start>{pos_intron})_(?P<end>{pos_intron})ins(?P<seq>{rna_nt}+))"
"""str: Pattern matching a RNA insertion with numeric or relative-to-transcript positions.
"""

rna_delins: str = rf"(?P<rna_delins>(?:(?:(?P<start>{pos_intron})_(?P<end>{pos_intron}))|(?P<position>{pos_intron}))delins(?P<seq>{rna_nt}+))"
"""str: Pattern matching a RNA deletion-insertion with numeric or relative-to-transcript positions.
"""

rna_variant: str = combine_patterns(
    [rna_equal, rna_sub, rna_del, rna_dup, rna_ins, rna_delins], None)
"""str: Pattern matching any single RNA variant event.
"""

rna_single_variant: str = rf"(?P<rna>r\.{rna_variant})"
"""str: Pattern matching any complete RNA variant, including the prefix character.
"""

rna_multi_variant: str = rf"(?P<rna_multi>r\.\[{remove_named_groups(rna_variant)}(?:;{remove_named_groups(rna_variant)}){{1,}}\])"
"""str: Pattern matching any complete RNA multi-variant, including the prefix character.

Named capture groups have been removed from the variant patterns because of non-uniqueness.
Another applications of single-variant regular expressions is needed to recover the named groups from each individual
variant in the multi-variant.
"""
Example #4
0
"""

dna_ins_gmo: str = dna_ins_c.replace(pos_intron_utr, pos).replace(
    "(?P<dna_ins_c>", "(?P<dna_ins_gmo>"
)
"""str: Pattern matching a DNA insertion with only numeric positions for genomic-style variants.
"""

dna_delins_gmo: str = dna_delins_c.replace(pos_intron_utr, pos).replace(
    "(?P<dna_delins_c>", "(?P<dna_delins_gmo>"
)
"""str: Pattern matching a DNA deletion-insertion with only numeric positions for genomic-style variants.
"""

dna_variant_c: str = combine_patterns(
    [dna_equal_c, dna_sub_c, dna_del_c, dna_dup_c, dna_ins_c, dna_delins_c], None
)
"""str: Pattern matching any of the coding DNA variants.
"""

dna_variant_n: str = combine_patterns(
    [dna_equal_n, dna_sub_n, dna_del_n, dna_dup_n, dna_ins_n, dna_delins_n], None
)
"""str: Pattern matching any of the non-coding DNA variants.
"""

dna_variant_gmo: str = combine_patterns(
    [dna_equal_gmo, dna_sub_gmo, dna_del_gmo, dna_dup_gmo, dna_ins_gmo, dna_delins_gmo],
    None,
)
"""str: Pattern matching any of the genomic-style DNA variants.