Exemple #1
0
 def test_repr(self) -> None:
     position_strings = (
         "8",
         "92380",
         "*8",
         "-80",
         "122-6",
         "78+10",
         "*89+67",
         "-127+6",
         "*73-105",
         "-45-1",
         "Cys234",
         "Ala9",
     )
     for s in position_strings:
         with self.subTest(s=s):
             v = VariantPosition(s)
             self.assertEqual(s, repr(v))
Exemple #2
0
    def _target_validate_substitution(pos: VariantPosition, ref: str,
                                      target: str) -> None:
        """Determine whether the target portion of a substitution matches the target sequence.

        Note that variants using extended syntax cannot be validated with this method.
        If an extended syntax variant is encountered, it will be interpreted as valid/matching.

        Parameters
        ----------
        pos : VariantPosition
            Position of the substitution.
        ref : str
            Reference base or amino acid from the variant.
        target : str
            Target sequence. This must be an amino acid sequence for protein variants or a nucleotide sequence
            for coding/noncoding/genomic variants.
            RNA sequences should be in lowercase, DNA sequences should be in uppercase.

        Returns
        -------
        None

        Raises
        ------
        MaveHgvsParseError
            If the reference base or amino acid does not match the target at the given position
        MaveHgvsParseError
            If the position is outside the bounds of the target.

        """
        if pos.is_extended():
            return
        elif pos.position > len(target):
            raise MaveHgvsParseError("variant coordinate out of bounds")
        elif target[pos.position - 1] != ref:
            raise MaveHgvsParseError(
                "substitution reference does not match target")
        else:
            return
Exemple #3
0
    def test_non_adjacent_pairs(self) -> None:
        position_strings = (
            "-45-1",
            "-12",
            "8",
            "99",
            "99+88",
            "99+122",
            "100-12",
            "103",
            "202-12",
            "202-1",
            "205",
            "*1",
            "*12",
            "*73-105",
        )
        variants = [VariantPosition(s) for s in position_strings]

        for v1, v2 in itertools.permutations(variants, 2):
            with self.subTest(v1=v1, v2=v2):
                self.assertFalse(v1.is_adjacent(v2))
Exemple #4
0
 def test_not_adjacent_to_self(self) -> None:
     position_strings = (
         "-45-1",
         "-12",
         "8",
         "99",
         "99+88",
         "99+122",
         "100-12",
         "100",
         "103",
         "202-12",
         "202-1",
         "205",
         "*1",
         "*12",
         "*73-105",
     )
     variants = [VariantPosition(s) for s in position_strings]
     for v in variants:
         with self.subTest(v=v):
             self.assertFalse(v.is_adjacent(v))
Exemple #5
0
 def test_invalid_strings(self) -> None:
     position_strings = (
         "08",
         "+12",
         "*-99",
         "A",
         "TCGA",
         "g",
         "*",
         "-",
         "+",
         "**6",
         "800 + 12",
         "-12*5",
         "Glu-12",
         "*5Trp",
         "Xyz12",
         "ALA12",
     )
     for s in position_strings:
         with self.subTest(s=s):
             with self.assertRaises(MaveHgvsParseError):
                 VariantPosition(s)
Exemple #6
0
    def setUp(self) -> None:
        sorted_position_strings = (
            "-45-1",
            "-12",
            "8",
            "99",
            "99+88",
            "99+122",
            "100-12",
            "100",
            "101",
            "202-12",
            "202-1",
            "202",
            "*1",
            "*73-105",
        )

        self.sorted_variants = [VariantPosition(p) for p in sorted_position_strings]

        # pairwise itertools recipe
        a, b = itertools.tee(self.sorted_variants)
        next(b, None)
        self.sorted_variant_pairs = zip(a, b)
Exemple #7
0
 def test_adjacent_pairs(self) -> None:
     adjacent_pairs = (
         ("-45-2", "-45-1"),
         ("-45-1", "-45"),
         ("-12", "-13"),
         ("-1", "1"),
         ("8", "9"),
         ("202-1", "202"),
         ("99", "99+1"),
         ("99+88", "99+89"),
         ("100-12", "100-11"),
         ("100", "101"),
         ("*1", "*2"),
         ("*73-1", "*73"),
     )
     for s1, s2 in adjacent_pairs:
         v1 = VariantPosition(s1)
         v2 = VariantPosition(s2)
         with self.subTest(v1=v1, v2=v2):
             self.assertTrue(v1.is_adjacent(v2))
         with self.subTest(v1=v1, v2=v2):
             self.assertTrue(v2.is_adjacent(v1))
Exemple #8
0
    def _process_string_variant(
        self, match_dict: Dict[str, str], relaxed_ordering: bool
    ) -> Tuple[str, Optional[Union[VariantPosition, Tuple[
            VariantPosition, VariantPosition]]], Optional[Union[str, Tuple[
                str, str]]], ]:
        """Process the match dictionary from a single variant into its components.

        Parameters
        ----------
        match_dict : Dict[str, str]
            Match dictionary from the MAVE-HGVS regular expression.
        relaxed_ordering : bool
            If True, variants that do not observe the 3-prime rule for variant position ordering are allowed.

        Returns
        -------
        Tuple[str, Optional[Union[VariantPosition, Tuple[VariantPosition, VariantPosition]]], Optional[Union[str, Tuple[str, str]]]]
            Returns a 3-tuple containing the variant type, optional position (or start/end positions),
            and optional before/after substitution sequences or inserted sequence.

        """
        variant_type = None
        positions = None
        sequences = None

        # determine which named groups to check
        if self._prefix == "p":
            pattern_group_tuples = [(f"pro_{t}", t) for t in self.VTYPES]
        elif self._prefix == "r":
            pattern_group_tuples = [(f"rna_{t}", t) for t in self.VTYPES]
        elif self._prefix in tuple("cn"):
            pattern_group_tuples = [(f"dna_{t}_{self._prefix}", t)
                                    for t in self.VTYPES]
        elif self._prefix in tuple("gmo"):
            pattern_group_tuples = [(f"dna_{t}_gmo", t) for t in self.VTYPES]
        else:  # pragma: no cover
            raise ValueError("unexpected prefix")

        # set the variant type
        vtype_set = False
        pattern_group = None
        for pg, vtype in pattern_group_tuples:
            if match_dict[pg] is not None:
                if vtype_set:  # pragma: no cover
                    raise ValueError(
                        f"ambiguous match: '{pg}' and '{pattern_group}'")
                variant_type = vtype
                pattern_group = pg
                vtype_set = True

        # set the position and sequence
        if variant_type == "sub":
            positions = VariantPosition(
                match_dict[f"{pattern_group}_position"])
            if self._prefix == "p":
                sequences = (positions.amino_acid,
                             match_dict[f"{pattern_group}_new"])
            elif self._prefix in tuple("gmo" "cn" "r"):
                sequences = (
                    match_dict[f"{pattern_group}_ref"],
                    match_dict[f"{pattern_group}_new"],
                )
            else:  # pragma: no cover
                raise ValueError("unexpected prefix")
        elif variant_type in ("del", "dup", "ins", "delins", "equal"):
            # set position
            if (match_dict.get(f"{pattern_group}_position") is not None
                ):  # use get() since ins pattern doesn't have pos
                positions = VariantPosition(
                    match_dict[f"{pattern_group}_position"])
            elif (match_dict.get(f"{pattern_group}_start") is not None
                  and match_dict.get(f"{pattern_group}_end") is not None):
                positions = (
                    VariantPosition(match_dict[f"{pattern_group}_start"]),
                    VariantPosition(match_dict[f"{pattern_group}_end"]),
                )
                # extra validation on positions
                if positions[0] >= positions[1]:
                    if relaxed_ordering:
                        positions = (positions[1], positions[0])
                    else:
                        raise MaveHgvsParseError(
                            "start position must be before end position")
                if variant_type == "ins":
                    if not positions[0].is_adjacent(positions[1]):
                        raise MaveHgvsParseError(
                            "insertion positions must be adjacent")
            else:  # pragma: no cover
                if variant_type != "equal":
                    raise MaveHgvsParseError("variant position not found")

            # set sequence if needed
            if variant_type in ("ins", "delins"):
                sequences = match_dict[f"{pattern_group}_seq"]
            elif variant_type == "equal":
                if (match_dict[f"{pattern_group}_equal"]
                        is not None):  # special case for target identity
                    sequences = match_dict[f"{pattern_group}_equal"]
                elif match_dict[f"pro_equal_equal_sy"] is not None:
                    sequences = match_dict[f"pro_equal_equal_sy"]

        return variant_type, positions, sequences
Exemple #9
0
    def test_intron(self) -> None:
        v = VariantPosition("122-6")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (122, None, -6, None),
        )
        self.assertFalse(v.is_utr())
        self.assertTrue(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertTrue(v.is_extended())

        v = VariantPosition("78+10")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr), (78, None, 10, None)
        )
        self.assertFalse(v.is_utr())
        self.assertTrue(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertTrue(v.is_extended())
Exemple #10
0
    def test_position_only(self) -> None:
        v = VariantPosition("8")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (8, None, None, None),
        )
        self.assertFalse(v.is_utr())
        self.assertFalse(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertFalse(v.is_extended())

        v = VariantPosition("92380")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (92380, None, None, None),
        )
        self.assertFalse(v.is_utr())
        self.assertFalse(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertFalse(v.is_extended())
Exemple #11
0
    def test_amino_acid(self) -> None:
        v = VariantPosition("Gly8")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (8, "Gly", None, None),
        )
        self.assertFalse(v.is_utr())
        self.assertFalse(v.is_intronic())
        self.assertTrue(v.is_protein())
        self.assertFalse(v.is_extended())

        v = VariantPosition("Cys92380")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (92380, "Cys", None, None),
        )
        self.assertFalse(v.is_utr())
        self.assertFalse(v.is_intronic())
        self.assertTrue(v.is_protein())
        self.assertFalse(v.is_extended())
Exemple #12
0
    def test_utr_intron(self) -> None:
        v = VariantPosition("*89+67")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr), (89, None, 67, True)
        )
        self.assertTrue(v.is_utr())
        self.assertTrue(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertTrue(v.is_extended())

        v = VariantPosition("-127+6")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (-127, None, 6, True),
        )
        self.assertTrue(v.is_utr())
        self.assertTrue(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertTrue(v.is_extended())

        v = VariantPosition("*73-105")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (73, None, -105, True),
        )
        self.assertTrue(v.is_utr())
        self.assertTrue(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertTrue(v.is_extended())

        v = VariantPosition("-45-1")
        self.assertTupleEqual(
            (v.position, v.amino_acid, v.intronic_position, v.utr),
            (-45, None, -1, True),
        )
        self.assertTrue(v.is_utr())
        self.assertTrue(v.is_intronic())
        self.assertFalse(v.is_protein())
        self.assertTrue(v.is_extended())
    def test_position(self):
        variant_tuples = [
            (VariantPosition("Glu27"), "p.Glu27Trp"),
            (VariantPosition("122-6"), "c.122-6T>A"),
            (VariantPosition("44"), "g.44del"),
            ((VariantPosition("78+5"), VariantPosition("78+10")),
             "c.78+5_78+10del"),
            (VariantPosition("77"), "c.77dup"),
            ((VariantPosition("Pro12"), VariantPosition("Gly18")),
             "p.Pro12_Gly18dup"),
            (
                (VariantPosition("Ala12"), VariantPosition("Pro13")),
                "p.Ala12_Pro13insGlyProCys",
            ),
            ((VariantPosition("22"), VariantPosition("23")), "r.22_23insauc"),
            (
                (VariantPosition("43-6"), VariantPosition("595+12")),
                "c.43-6_595+12delinsCTT",
            ),
            (
                (VariantPosition("Ile71"), VariantPosition("Cys80")),
                "p.Ile71_Cys80delinsSer",
            ),
        ]

        for p, s in variant_tuples:
            with self.subTest(p=p, s=s):
                v = Variant(s)
                if isinstance(p, list):  # multi-variant
                    self.assertEqual(len(p), len(v.positions))
                    for q, vp in zip(p, v.positions):
                        if isinstance(q, tuple):
                            self.assertTupleEqual(q, vp)
                        else:
                            self.assertEqual(q, vp)
                if isinstance(p, tuple):
                    self.assertTupleEqual(p, v.positions)
                else:
                    self.assertEqual(p, v.positions)