Esempio n. 1
0
 def test_init(self):
     sequence = Sequence(
         "ACTG",
         Alphabet.NT_STRICT,
         id="id",
         type="seqtype_1",
         parent=Parent(
             id="parent",
             sequence_type="seqtype_2",
             location=SingleInterval(5, 9, Strand.MINUS, parent="parent"),
         ),
     )
     # Sequence data
     assert sequence.sequence == Seq("ACTG")
     assert str(sequence) == "ACTG"
     # Alphabet
     assert sequence.alphabet == Alphabet.NT_STRICT
     # ID
     assert sequence.id == "id"
     # Sequence type
     assert sequence.sequence_type == "seqtype_1"
     # Parent ID
     assert sequence.parent_id == "parent"
     assert Sequence("A", Alphabet.NT_STRICT,
                     parent="parent").parent_id == "parent"
     assert (Sequence(
         "A",
         Alphabet.NT_STRICT,
         parent=Parent(
             location=SingleInterval(5, 6, Strand.MINUS, parent="parent")),
     ).parent_id == "parent")
     assert Sequence("A", Alphabet.NT_STRICT).parent_id is None
     # Parent type
     assert sequence.parent_type == "seqtype_2"
     # Parent strand
     assert sequence.parent_strand == Strand.MINUS
     assert Sequence(
         "A", Alphabet.NT_STRICT,
         parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED
     # Location on parent
     assert (Sequence(
         "A",
         Alphabet.NT_STRICT,
         parent=SingleInterval(3, 4, Strand.UNSTRANDED),
     ).parent_strand == Strand.UNSTRANDED)
     assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None
     assert sequence.location_on_parent == SingleInterval(5,
                                                          9,
                                                          Strand.MINUS,
                                                          parent="parent")
     # No alphabet validation
     Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False)
Esempio n. 2
0
 def test_require_location_has_parent_with_sequence(self):
     with pytest.raises(NullParentException):
         ObjectValidation.require_location_has_parent_with_sequence(
             SingleInterval(0, 5, Strand.PLUS))
     with pytest.raises(NullSequenceException):
         ObjectValidation.require_location_has_parent_with_sequence(
             SingleInterval(0, 5, Strand.PLUS, parent="parent"))
     ObjectValidation.require_location_has_parent_with_sequence(
         SingleInterval(
             0,
             5,
             Strand.PLUS,
             parent=Parent(id="parent",
                           sequence=Sequence("AAAAA", Alphabet.NT_STRICT)),
         ))
Esempio n. 3
0
    def extract_sequence(self) -> Sequence:
        """
        Returns a continuous CDS sequence that is in frame and always a multiple of 3.

        Any leading or trailing bases that are annotated as CDS but cannot form a full codon
        are removed. Additionally, any internal codons that are incomplete are removed.

        Incomplete internal codons are determined by comparing the CDSFrame of each exon
        as annotated, to the expected value of the CDSFrame. This allows for an annotation
        to model things like programmed frameshifts and indels that may be assembly errors.
        """
        codons = (str(codon_location.extract_sequence())
                  for codon_location in self.chunk_relative_codon_locations)
        seq = "".join(codons)
        assert len(seq) % 3 == 0
        return Sequence(seq, Alphabet.NT_EXTENDED, validate_alphabet=False)
Esempio n. 4
0
    def translate(
        self,
        truncate_at_in_frame_stop: Optional[bool] = False,
        translation_table: Optional[TranslationTable] = TranslationTable.
        DEFAULT,
    ) -> Sequence:
        """
        Returns amino acid sequence of this CDS. If truncate_at_in_frame_stop is ``True``,
        this will stop at the first in-frame stop.

        Currently the ``translation_table`` field only controls the start codon. Using non-standard
        translation tables will change the set of start codons that code for Methionine,
        and will not change any other codons.
        """
        aa_seq_str = "".join(
            self._translate_iter(truncate_at_in_frame_stop, translation_table))
        return Sequence(aa_seq_str, Alphabet.AA, validate_alphabet=False)
Esempio n. 5
0
 def test_init_invalid_params(
     self,
     data,
     alphabet,
     parent_id,
     parent_type,
     parent_strand,
     location_on_parent,
     expected_exception,
 ):
     with pytest.raises(expected_exception):
         Sequence(
             data,
             alphabet,
             parent=Parent(
                 id=parent_id,
                 sequence_type=parent_type,
                 strand=parent_strand,
                 location=location_on_parent,
             ),
         )
Esempio n. 6
0
 def test_validate_alphabet_error(self, sequence, alphabet):
     with pytest.raises(AlphabetError):
         Sequence(sequence, alphabet, validate_alphabet=True)
Esempio n. 7
0
 def test_validate_alphabet(self, sequence, alphabet, validate_alphabet):
     Sequence(sequence, alphabet, validate_alphabet=validate_alphabet)
Esempio n. 8
0
 def test_getitem_error(self):
     with pytest.raises(InvalidStrandException):
         Sequence("actgactg",
                  Alphabet.NT_STRICT,
                  parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6]
Esempio n. 9
0
 def test_len(self):
     assert len(Sequence("", Alphabet.NT_EXTENDED_GAPPED)) == 0
     assert len(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == 5
Esempio n. 10
0
 def test_str(self):
     assert str(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == "AAAAt"
Esempio n. 11
0
class TestSequence:
    def test_init(self):
        sequence = Sequence(
            "ACTG",
            Alphabet.NT_STRICT,
            id="id",
            type="seqtype_1",
            parent=Parent(
                id="parent",
                sequence_type="seqtype_2",
                location=SingleInterval(5, 9, Strand.MINUS, parent="parent"),
            ),
        )
        # Sequence data
        assert sequence.sequence == Seq("ACTG")
        assert str(sequence) == "ACTG"
        # Alphabet
        assert sequence.alphabet == Alphabet.NT_STRICT
        # ID
        assert sequence.id == "id"
        # Sequence type
        assert sequence.sequence_type == "seqtype_1"
        # Parent ID
        assert sequence.parent_id == "parent"
        assert Sequence("A", Alphabet.NT_STRICT,
                        parent="parent").parent_id == "parent"
        assert (Sequence(
            "A",
            Alphabet.NT_STRICT,
            parent=Parent(
                location=SingleInterval(5, 6, Strand.MINUS, parent="parent")),
        ).parent_id == "parent")
        assert Sequence("A", Alphabet.NT_STRICT).parent_id is None
        # Parent type
        assert sequence.parent_type == "seqtype_2"
        # Parent strand
        assert sequence.parent_strand == Strand.MINUS
        assert Sequence(
            "A", Alphabet.NT_STRICT,
            parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED
        # Location on parent
        assert (Sequence(
            "A",
            Alphabet.NT_STRICT,
            parent=SingleInterval(3, 4, Strand.UNSTRANDED),
        ).parent_strand == Strand.UNSTRANDED)
        assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None
        assert sequence.location_on_parent == SingleInterval(5,
                                                             9,
                                                             Strand.MINUS,
                                                             parent="parent")
        # No alphabet validation
        Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False)

    @pytest.mark.parametrize(
        "data,alphabet,parent_id,parent_type,parent_strand,location_on_parent,expected_exception",
        [
            ("A-C", Alphabet.NT_STRICT, None, None, None, None, AlphabetError),
            (
                "ACG",
                Alphabet.NT_STRICT,
                None,
                None,
                None,
                SingleInterval(0, 4, Strand.PLUS),
                ParentException,
            ),
            (
                "ATT",
                Alphabet.NT_STRICT,
                "parent1",
                None,
                None,
                SingleInterval(0, 3, Strand.PLUS, parent="parent2"),
                ParentException,
            ),
            (
                "GGG",
                Alphabet.NT_STRICT,
                None,
                None,
                Strand.MINUS,
                SingleInterval(0, 3, Strand.PLUS),
                InvalidStrandException,
            ),
            (
                "GGG",
                Alphabet.NT_STRICT,
                None,
                "seqtype_2",
                None,
                SingleInterval(
                    0,
                    3,
                    Strand.PLUS,
                    parent=Parent(sequence_type="seqtype_3"),
                ),
                ParentException,
            ),
        ],
    )
    def test_init_invalid_params(
        self,
        data,
        alphabet,
        parent_id,
        parent_type,
        parent_strand,
        location_on_parent,
        expected_exception,
    ):
        with pytest.raises(expected_exception):
            Sequence(
                data,
                alphabet,
                parent=Parent(
                    id=parent_id,
                    sequence_type=parent_type,
                    strand=parent_strand,
                    location=location_on_parent,
                ),
            )

    @pytest.mark.parametrize(
        "sequence,other,expected",
        [
            (
                Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False),
                "AAAA",
                False,
            ),
            (
                Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False),
                Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=True),
                True,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                True,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                Sequence(
                    "AAAa",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                False,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                Sequence(
                    "AAAA",
                    Alphabet.NT_EXTENDED,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                False,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq2",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                False,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype_1",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype_2",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                False,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.PLUS, None),
                    validate_alphabet=False,
                ),
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype",
                    parent=SingleInterval(0, 4, Strand.UNSTRANDED, None),
                    validate_alphabet=False,
                ),
                False,
            ),
            (
                Sequence("AAAA", Alphabet.NT_STRICT, parent="parent1"),
                Sequence("AAAA", Alphabet.NT_STRICT, parent="parent2"),
                False,
            ),
            (
                Sequence("AAAA", Alphabet.NT_STRICT, parent="parent"),
                Sequence("AAAA", Alphabet.NT_STRICT),
                False,
            ),
            (
                Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED),
                Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.PLUS),
                False,
            ),
            (
                Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED),
                Sequence("AAAA", Alphabet.NT_STRICT),
                False,
            ),
            (
                Sequence(
                    "AAAA",
                    Alphabet.NT_STRICT,
                    parent="seqtype",
                ),
                Sequence("AAAA", Alphabet.NT_STRICT),
                False,
            ),
        ],
    )
    def test_equals(self, sequence, other, expected):
        assert (sequence == other) is expected
        assert (other == sequence) is expected

    def test_str(self):
        assert str(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == "AAAAt"

    def test_len(self):
        assert len(Sequence("", Alphabet.NT_EXTENDED_GAPPED)) == 0
        assert len(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == 5

    @pytest.mark.parametrize(
        "seq,key,exp",
        [
            # No parent
            (Sequence("acgtacgt", Alphabet.NT_STRICT), 3,
             Sequence("t", Alphabet.NT_STRICT)),
            (Sequence("acgtacgt", Alphabet.NT_STRICT), slice(
                3, 6), Sequence("tac", Alphabet.NT_STRICT)),
            (Sequence("acgtacgt", Alphabet.NT_STRICT), slice(
                3, 10), Sequence("tacgt", Alphabet.NT_STRICT)),
            # Parent with location; slice
            (
                Sequence("actgactg",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(0, 8, Strand.PLUS)),
                slice(3, 6),
                Sequence("gac",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(3, 6, Strand.PLUS)),
            ),
            (
                Sequence("actgactg",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(0, 8, Strand.MINUS)),
                slice(3, 6),
                Sequence("gac",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(2, 5, Strand.MINUS)),
            ),
            # Parent with location; single position
            (
                Sequence("actgactg",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(0, 8, Strand.PLUS)),
                3,
                Sequence("g",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(3, 4, Strand.PLUS)),
            ),
            (
                Sequence("actgactg",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(0, 8, Strand.MINUS)),
                3,
                Sequence("g",
                         Alphabet.NT_STRICT,
                         parent=SingleInterval(4, 5, Strand.MINUS)),
            ),
            # Parent without full location
            (
                Sequence("actgactg", Alphabet.NT_STRICT, parent="parent"),
                slice(3, 6),
                Sequence("gac", Alphabet.NT_STRICT, parent="parent"),
            ),
            (
                Sequence(
                    "actgactg", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED),
                slice(3, 6),
                Sequence("gac", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED),
            ),
        ],
    )
    def test_getitem(self, seq, key, exp):
        assert seq[key] == exp

    def test_getitem_error(self):
        with pytest.raises(InvalidStrandException):
            Sequence("actgactg",
                     Alphabet.NT_STRICT,
                     parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6]

    @pytest.mark.parametrize(
        "sequence,alphabet,validate_alphabet",
        [
            ("", Alphabet.NT_STRICT, True),
            ("acgtACGT", Alphabet.NT_STRICT, True),
            ("N", Alphabet.NT_STRICT, False),
            ("acNNNw", Alphabet.NT_EXTENDED, True),
            ("AN-", Alphabet.NT_EXTENDED, False),
            ("GG--AAA", Alphabet.NT_STRICT_GAPPED, True),
            ("AN-", Alphabet.NT_STRICT_GAPPED, False),
            ("nnAAw-cg", Alphabet.NT_EXTENDED_GAPPED, True),
            ("xxx", Alphabet.NT_EXTENDED_GAPPED, False),
            ("MWT*", Alphabet.AA, True),
            ("T*-", Alphabet.AA, False),
            ("ABCDE-", Alphabet.GENERIC, True),
            ("*", Alphabet.GENERIC, False),
        ],
    )
    def test_validate_alphabet(self, sequence, alphabet, validate_alphabet):
        Sequence(sequence, alphabet, validate_alphabet=validate_alphabet)

    @pytest.mark.parametrize(
        "sequence,alphabet",
        [
            ("N", Alphabet.NT_STRICT),
            ("A-", Alphabet.NT_EXTENDED),
            ("AN-", Alphabet.NT_STRICT_GAPPED),
            ("E", Alphabet.NT_EXTENDED_GAPPED),
            ("R-", Alphabet.AA),
            ("?", Alphabet.GENERIC),
        ],
    )
    def test_validate_alphabet_error(self, sequence, alphabet):
        with pytest.raises(AlphabetError):
            Sequence(sequence, alphabet, validate_alphabet=True)

    @pytest.mark.parametrize(
        "sequence,expected",
        [
            (Sequence("A", Alphabet.NT_STRICT, parent="parent"), "parent"),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(id="parent",
                                  location=SingleInterval(
                                      10, 11, Strand.UNSTRANDED)),
                ),
                "parent",
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        location=SingleInterval(
                            10, 11, Strand.UNSTRANDED, parent="parent"),
                    ),
                ),
                "parent",
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(location=SingleInterval(
                        10, 11, Strand.UNSTRANDED, parent="parent")),
                ),
                "parent",
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(10, 11, Strand.UNSTRANDED),
                ),
                None,
            ),
            (Sequence("A", Alphabet.NT_STRICT), None),
        ],
    )
    def test_parent_id(self, sequence, expected):
        assert sequence.parent_id == expected

    @pytest.mark.parametrize(
        "sequence,expected",
        [
            (Sequence("A", Alphabet.NT_STRICT), None),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(sequence_type="seqtype"),
                ),
                "seqtype",
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(location=SingleInterval(
                        0,
                        1,
                        Strand.PLUS,
                        parent=Parent(sequence_type="seqtype"),
                    )),
                ),
                "seqtype",
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(location=SingleInterval(
                        0,
                        1,
                        Strand.PLUS,
                        parent=Sequence(
                            "AA", Alphabet.NT_STRICT, type="seqtype"),
                    )),
                ),
                "seqtype",
            ),
        ],
    )
    def test_parent_type(self, sequence, expected):
        assert sequence.parent_type == expected

    @pytest.mark.parametrize(
        "sequence,expected",
        [
            (Sequence("A", Alphabet.NT_STRICT), None),
            (
                Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS),
                Strand.MINUS,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(10, 11, Strand.MINUS),
                ),
                Strand.MINUS,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(10, 11, Strand.MINUS),
                ),
                Strand.MINUS,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        strand=Strand.MINUS,
                        location=SingleInterval(10, 11, Strand.MINUS),
                    ),
                ),
                Strand.MINUS,
            ),
        ],
    )
    def test_parent_strand(self, sequence, expected):
        assert sequence.parent_strand == expected

    @pytest.mark.parametrize(
        "sequence,new_id,new_type,expected",
        [
            (
                Sequence("", Alphabet.NT_STRICT),
                None,
                None,
                Sequence("", Alphabet.NT_STRICT),
            ),
            (
                Sequence("ACGtacgT", Alphabet.NT_STRICT),
                None,
                None,
                Sequence("AcgtaCGT", Alphabet.NT_STRICT),
            ),
            (
                Sequence("ATUGCYRSWKMBdhvnNVHDbmkwsrycguta",
                         Alphabet.NT_EXTENDED),
                None,
                None,
                Sequence("taacgryswmkvHDBNnbdhVKMWSYRGCAAT",
                         Alphabet.NT_EXTENDED),
            ),
            (
                Sequence("--A-CGta", Alphabet.NT_STRICT_GAPPED),
                None,
                None,
                Sequence("taCG-T--", Alphabet.NT_STRICT_GAPPED),
            ),
            (
                Sequence("AtUC-N-", Alphabet.NT_EXTENDED_GAPPED),
                None,
                None,
                Sequence("-N-GAaT", Alphabet.NT_EXTENDED_GAPPED),
            ),
            (
                Sequence("ACGta", Alphabet.NT_STRICT),
                "new_id",
                "seqtype",
                Sequence(
                    "taCGT",
                    Alphabet.NT_STRICT,
                    id="new_id",
                    type="seqtype",
                ),
            ),
            (
                Sequence("ACGta", Alphabet.NT_STRICT, parent=Strand.PLUS),
                None,
                None,
                Sequence("taCGT", Alphabet.NT_STRICT, parent=Strand.MINUS),
            ),
            (
                Sequence(
                    "ACGta",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(5, 10, Strand.PLUS),
                ),
                None,
                None,
                Sequence(
                    "taCGT",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(5, 10, Strand.MINUS),
                ),
            ),
        ],
    )
    def test_reverse_complement(self, sequence, new_id, new_type, expected):
        assert sequence.reverse_complement(new_id=new_id,
                                           new_type=new_type) == expected

    @pytest.mark.parametrize(
        "sequence",
        [
            Sequence("AAA", Alphabet.AA),
            Sequence("AAA", Alphabet.GENERIC),
            Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False),
        ],
    )
    def test_reverse_complement_error(self, sequence):
        with pytest.raises(AlphabetError):
            sequence.reverse_complement()

    @pytest.mark.parametrize(
        "seq1,seq2,new_id,data_only,expected",
        [
            (
                Sequence("", Alphabet.NT_STRICT, parent="parent1"),
                Sequence("", Alphabet.NT_STRICT, parent="parent2"),
                "new_id",
                True,
                Sequence("", Alphabet.NT_STRICT, id="new_id"),
            ),
            (
                Sequence("AA", Alphabet.NT_STRICT, parent="parent1"),
                Sequence("TT", Alphabet.NT_STRICT, parent="parent2"),
                "new_id",
                True,
                Sequence("AATT", Alphabet.NT_STRICT, id="new_id"),
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    type="seqtype_1",
                    parent=Parent(
                        id="parent1",
                        strand=Strand.PLUS,
                        location=SingleInterval(5, 7, Strand.PLUS),
                    ),
                ),
                Sequence(
                    "TT",
                    Alphabet.NT_STRICT,
                    type="seqtype_2",
                    parent=Parent(
                        id="parent1",
                        strand=Strand.MINUS,
                        location=SingleInterval(20, 22, Strand.MINUS),
                    ),
                ),
                None,
                True,
                Sequence("AATT", Alphabet.NT_STRICT),
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(
                        id="parent1",
                        strand=Strand.PLUS,
                        location=SingleInterval(5, 7, Strand.PLUS),
                    ),
                ),
                Sequence(
                    "TT",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(id="parent1", strand=Strand.PLUS),
                ),
                None,
                False,
                Sequence(
                    "AATT",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent="parent1",
                ),
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(id="parent1", strand=Strand.PLUS),
                ),
                Sequence(
                    "TT",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(
                        id="parent1",
                        strand=Strand.PLUS,
                        location=SingleInterval(5, 7, Strand.PLUS),
                    ),
                ),
                None,
                False,
                Sequence(
                    "AATT",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent="parent1",
                ),
            ),
            (
                Sequence(
                    "CC",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(id="parent",
                                  location=SingleInterval(3, 5, Strand.PLUS)),
                ),
                Sequence(
                    "TT",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(id="parent",
                                  location=SingleInterval(10, 12,
                                                          Strand.PLUS)),
                ),
                "new_id",
                True,
                Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"),
            ),
            (
                Sequence(
                    "CC",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(id="parent",
                                  location=SingleInterval(3, 5, Strand.PLUS)),
                ),
                Sequence(
                    "TT",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(id="parent",
                                  location=SingleInterval(0, 2, Strand.PLUS)),
                ),
                "new_id",
                True,
                Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"),
            ),
            (
                Sequence("AA", Alphabet.NT_STRICT, id="seq1", parent="parent"),
                Sequence("", Alphabet.NT_STRICT, id="seq2", parent="parent"),
                None,
                False,
                Sequence("AA", Alphabet.NT_STRICT, parent="parent"),
            ),
            (
                Sequence(
                    "ACT",
                    Alphabet.NT_STRICT,
                    parent=Parent(id="parent",
                                  location=SingleInterval(2, 5, Strand.PLUS)),
                ),
                Sequence(
                    "GGA",
                    Alphabet.NT_STRICT,
                    parent=Parent(id="parent",
                                  location=SingleInterval(8, 11, Strand.PLUS)),
                ),
                "new_id",
                False,
                Sequence(
                    "ACTGGA",
                    Alphabet.NT_STRICT,
                    id="new_id",
                    parent=Parent(
                        id="parent",
                        location=CompoundInterval.from_single_intervals([
                            SingleInterval(2, 5, Strand.PLUS),
                            SingleInterval(8, 11, Strand.PLUS),
                        ]),
                    ),
                ),
            ),
            (
                Sequence(
                    "ACT",
                    Alphabet.NT_STRICT,
                    parent=Parent(id="parent",
                                  location=SingleInterval(8, 11,
                                                          Strand.MINUS)),
                ),
                Sequence(
                    "GGA",
                    Alphabet.NT_STRICT,
                    parent=Parent(id="parent",
                                  location=SingleInterval(2, 5, Strand.MINUS)),
                ),
                "new_id",
                False,
                Sequence(
                    "ACTGGA",
                    Alphabet.NT_STRICT,
                    id="new_id",
                    parent=Parent(
                        id="parent",
                        location=CompoundInterval.from_single_intervals([
                            SingleInterval(2, 5, Strand.MINUS),
                            SingleInterval(8, 11, Strand.MINUS),
                        ]),
                    ),
                ),
            ),
            (
                Sequence(
                    "ACT",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype_1",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        strand=Strand.PLUS,
                        location=SingleInterval(5, 8, Strand.PLUS),
                    ),
                ),
                Sequence(
                    "GCG",
                    Alphabet.NT_STRICT,
                    id="seq1",
                    type="seqtype_1",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        strand=Strand.PLUS,
                        location=SingleInterval(15, 18, Strand.PLUS),
                    ),
                ),
                None,
                False,
                Sequence(
                    "ACTGCG",
                    Alphabet.NT_STRICT,
                    type="seqtype_1",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        strand=Strand.PLUS,
                        location=CompoundInterval.from_single_intervals([
                            SingleInterval(5, 8, Strand.PLUS),
                            SingleInterval(15, 18, Strand.PLUS),
                        ]),
                    ),
                ),
            ),
        ],
    )
    def test_append(self, seq1, seq2, new_id, data_only, expected):
        assert seq1.append(seq2, new_id, data_only) == expected

    @pytest.mark.parametrize(
        "seq1,seq2,new_id,data_only",
        [
            (
                Sequence("AA", Alphabet.NT_STRICT),
                Sequence("TT", Alphabet.NT_EXTENDED),
                None,
                True,
            ),
            (
                Sequence("AA", Alphabet.NT_STRICT, type="seqtype_1"),
                Sequence("AA", Alphabet.NT_STRICT, type="seqtype_2"),
                None,
                False,
            ),
            (
                Sequence("AA", Alphabet.NT_STRICT, parent="parent1"),
                Sequence("AA", Alphabet.NT_STRICT, parent="parent2"),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(10, 12, Strand.PLUS),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(5, 7, Strand.PLUS),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(2, 4, Strand.MINUS),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(5, 7, Strand.MINUS),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(10, 12, Strand.PLUS),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(11, 13, Strand.PLUS),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(10, 12, Strand.UNSTRANDED),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=SingleInterval(15, 17, Strand.UNSTRANDED),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent1",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent2",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_1",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype",
                        strand=Strand.MINUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAAT", Alphabet.NT_STRICT),
                        parent="grandparent",
                    ),
                ),
                None,
                False,
            ),
            (
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent1",
                    ),
                ),
                Sequence(
                    "AA",
                    Alphabet.NT_STRICT,
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype",
                        strand=Strand.PLUS,
                        sequence=Sequence("AAA", Alphabet.NT_STRICT),
                        parent="grandparent2",
                    ),
                ),
                None,
                False,
            ),
        ],
    )
    def test_append_error(self, seq1, seq2, new_id, data_only):
        with pytest.raises(ValueError):
            seq1.append(seq2, new_id, data_only)

    @pytest.mark.parametrize(
        "sequence,sequence_type,include_self,expected",
        [
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                True,
                Parent(sequence=Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                )),
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                False,
                Parent(id="parent", sequence_type="seqtype"),
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype_1",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        parent=Parent(id="grandparent",
                                      sequence_type="seqtype_2"),
                    ),
                ),
                "seqtype_2",
                True,
                Parent(
                    id="parent",
                    sequence_type="seqtype_2",
                    parent=Parent(id="grandparent", sequence_type="seqtype_2"),
                ),
            ),
        ],
    )
    def test_first_ancestor_of_type(self, sequence, sequence_type,
                                    include_self, expected):
        assert sequence.first_ancestor_of_type(
            sequence_type, include_self=include_self) == expected

    @pytest.mark.parametrize(
        "sequence,sequence_type,include_self",
        [
            (
                Sequence("A", Alphabet.NT_STRICT, id="self"),
                "seqtype_1",
                True,
            ),
            (
                Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"),
                "seqtype_1",
                True,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype_2",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_1",
                        parent=Parent(id="grandparent",
                                      sequence_type="seqtype_1"),
                    ),
                ),
                "seqtype_3",
                True,
            ),
        ],
    )
    def test_first_ancestor_of_type_error(self, sequence, sequence_type,
                                          include_self):
        with pytest.raises(NoSuchAncestorException):
            sequence.first_ancestor_of_type(sequence_type,
                                            include_self=include_self)

    @pytest.mark.parametrize(
        "sequence,sequence_type,include_self,expected",
        [
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                True,
                True,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                False,
                True,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype_1",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        parent=Parent(id="grandparent",
                                      sequence_type="seqtype_2"),
                    ),
                ),
                "seqtype_2",
                True,
                True,
            ),
            (
                Sequence("A", Alphabet.NT_STRICT, id="self"),
                "seqtype_1",
                True,
                False,
            ),
            (
                Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"),
                "seqtype_1",
                True,
                False,
            ),
            (
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    id="self",
                    type="seqtype_2",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_1",
                        parent=Parent(id="grandparent",
                                      sequence_type="seqtype_1"),
                    ),
                ),
                "seqtype_3",
                True,
                False,
            ),
        ],
    )
    def test_has_ancestor_of_type(self, sequence, sequence_type, include_self,
                                  expected):
        assert sequence.has_ancestor_of_type(
            sequence_type, include_self=include_self) is expected

    @pytest.mark.parametrize(
        "sequence,expected",
        [
            (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT,
                      id="test"), ">test\nATGCATATTT\nGGAAACCAA"),
            (Sequence("ATGCATATTTGGAAACCAA",
                      Alphabet.NT_STRICT), ">None\nATGCATATTT\nGGAAACCAA"),
            (Sequence("GGAAACCAA", Alphabet.NT_STRICT,
                      id="test"), ">test\nGGAAACCAA"),
            (
                Sequence("ATGCATATTTGGAAACCAAGGAAACCAA",
                         Alphabet.NT_STRICT,
                         id="test"),
                ">test\nATGCATATTT\nGGAAACCAAG\nGAAACCAA",
            ),
            (
                Sequence(
                    data="AAAAAAA",
                    alphabet=Alphabet.NT_STRICT,
                    id="test",
                    parent=Parent(
                        location=SingleInterval(33, 40, Strand.MINUS)),
                ),
                ">test\nAAAAAAA",
            ),
        ],
    )
    def test_to_fasta(self, sequence, expected):
        s_fa = sequence.to_fasta(num_chars=10)
        assert s_fa == expected

    def test_empty_to_fasta(self):
        s = Sequence("", Alphabet.NT_STRICT)
        with pytest.raises(EmptySequenceFastaError):
            s.to_fasta()
Esempio n. 12
0
 def test_empty_to_fasta(self):
     s = Sequence("", Alphabet.NT_STRICT)
     with pytest.raises(EmptySequenceFastaError):
         s.to_fasta()
Esempio n. 13
0
 def test_sequence(self):
     assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT)
Esempio n. 14
0
class TestParent:
    @pytest.mark.parametrize(
        "id,sequence_type,strand,location,sequence,expected",
        [
            (None, None, None, None, None, Parent()),
            (
                "id",
                "seqtype",
                Strand.MINUS,
                SingleInterval(
                    0,
                    1,
                    Strand.MINUS,
                    Parent(
                        id="id",
                        sequence_type="seqtype",
                        strand=Strand.MINUS,
                        sequence=Sequence(
                            "AAA",
                            Alphabet.NT_STRICT,
                            id="id",
                            type="seqtype",
                            parent=Parent(
                                id="id2",
                                sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"),
                            ),
                        ),
                    ),
                ),
                Sequence(
                    "AAA",
                    Alphabet.NT_STRICT,
                    id="id",
                    type="seqtype",
                    parent=Parent(id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2")),
                ),
                Parent(
                    id="id",
                    sequence_type="seqtype",
                    strand=Strand.MINUS,
                    location=SingleInterval(
                        0,
                        1,
                        Strand.MINUS,
                        Parent(
                            id="id",
                            sequence_type="seqtype",
                            strand=Strand.MINUS,
                            sequence=Sequence(
                                "AAA",
                                Alphabet.NT_STRICT,
                                id="id",
                                type="seqtype",
                                parent=Parent(
                                    id="id2",
                                    sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"),
                                ),
                            ),
                        ),
                    ),
                    sequence=Sequence(
                        "AAA",
                        Alphabet.NT_STRICT,
                        id="id",
                        type="seqtype",
                        parent=Parent(
                            id="id2",
                            sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"),
                        ),
                    ),
                ),
            ),
        ],
    )
    def test_init(self, id, sequence_type, strand, location, sequence, expected):
        assert expected == Parent(
            id=id,
            sequence_type=sequence_type,
            strand=strand,
            location=location,
            sequence=sequence,
        )

    @pytest.mark.parametrize(
        "id,sequence_type,strand,location,sequence,parent,expected_exception",
        [
            ("id1", None, None, SingleInterval(0, 5, Strand.PLUS, parent="id2"), None, None, ParentException),
            ("id1", None, None, None, Sequence("AAA", Alphabet.NT_STRICT, id="id2"), None, ParentException),
            (
                None,
                None,
                None,
                SingleInterval(0, 5, Strand.PLUS, parent="id1"),
                Sequence("AAC", Alphabet.NT_STRICT, id="id2"),
                None,
                ParentException,
            ),
            (
                None,
                "seqtype",
                None,
                SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")),
                None,
                None,
                ParentException,
            ),
            (None, "seqtype", None, None, Sequence("AAT", Alphabet.NT_STRICT, type="unknown"), None, ParentException),
            (
                None,
                None,
                None,
                SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")),
                Sequence("AAG", Alphabet.NT_STRICT, type="seqtype"),
                None,
                ParentException,
            ),
            (None, None, Strand.MINUS, SingleInterval(0, 5, Strand.PLUS), None, None, InvalidStrandException),
            (
                None,
                None,
                None,
                SingleInterval(0, 10, Strand.PLUS),
                Sequence("A", Alphabet.NT_STRICT),
                None,
                InvalidPositionException,
            ),
            (
                None,
                None,
                None,
                None,
                Sequence("AA", Alphabet.NT_STRICT),
                Parent(sequence=Sequence("A", Alphabet.NT_STRICT)),
                LocationException,
            ),
            (None, None, Strand.PLUS, SingleInterval(5, 10, Strand.MINUS), None, None, InvalidStrandException),
            (
                None,
                None,
                None,
                None,
                Sequence("AA", Alphabet.NT_STRICT, parent="id1"),
                Parent(id="id2"),
                MismatchedParentException,
            ),
        ],
    )
    def test_init_error(self, id, sequence_type, strand, location, sequence, parent, expected_exception):
        with pytest.raises(expected_exception):
            Parent(
                id=id,
                sequence_type=sequence_type,
                strand=strand,
                location=location,
                sequence=sequence,
                parent=parent,
            )

    @pytest.mark.parametrize(
        "obj,expected",
        [
            (
                Sequence("AAA", Alphabet.NT_STRICT),
                Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)),
            ),
            ("parent", Parent(id="parent")),
            (
                SingleInterval(5, 10, Strand.PLUS),
                Parent(location=SingleInterval(5, 10, Strand.PLUS)),
            ),
            (
                CompoundInterval([5], [10], Strand.PLUS),
                Parent(location=CompoundInterval([5], [10], Strand.PLUS)),
            ),
            (EmptyLocation(), Parent(location=EmptyLocation())),
            (Strand.MINUS, Parent(strand=Strand.MINUS)),
            (
                Parent(
                    id="parent",
                    sequence_type="chr",
                    strand=Strand.MINUS,
                    parent=Parent(id="grandparent"),
                ),
                Parent(
                    id="parent",
                    sequence_type="chr",
                    strand=Strand.MINUS,
                    parent=Parent(id="grandparent"),
                ),
            ),
        ],
    )
    def test_make_parent(self, obj, expected):
        assert make_parent(obj) == expected

    @pytest.mark.parametrize(
        "parent1,parent2,expected",
        [
            (Parent(), Parent(), True),
            (Parent(), Parent(id=None, sequence_type=None), True),
            (Parent(id="id1"), Parent(id="id2"), False),
            (
                Parent(sequence_type=None),
                Parent(sequence_type="unknown"),
                False,
            ),
            (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), False),
            (
                Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")),
                Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")),
                False,
            ),
            (
                Parent(sequence=Sequence("A", Alphabet.NT_STRICT)),
                Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent=Parent(id="parent"))),
                False,
            ),
            (
                Parent(parent="parent1"),
                Parent(parent="parent2"),
                False,
            ),
        ],
    )
    def test_eq(self, parent1, parent2, expected):
        assert (parent1 == parent2) is expected

    @pytest.mark.parametrize(
        "parent1,parent2,expected",
        [
            (Parent(), Parent(), True),
            (Parent(id="id1"), Parent(id="id2"), False),
            (
                Parent(sequence_type=None),
                Parent(sequence_type="unknown"),
                False,
            ),
            (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), True),
            (
                Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")),
                Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")),
                False,
            ),
            (
                Parent(sequence=Sequence("A", Alphabet.NT_STRICT)),
                Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent="parent")),
                False,
            ),
            (
                Parent(parent="parent1"),
                Parent(parent="parent2"),
                False,
            ),
        ],
    )
    def test_equals_except_location(self, parent1, parent2, expected):
        assert parent1.equals_except_location(parent2) is expected

    @pytest.mark.parametrize(
        "id,location,sequence,expected",
        [
            ("id", None, None, "id"),
            (
                None,
                SingleInterval(0, 1, Strand.PLUS, parent="id"),
                None,
                "id",
            ),
            (
                None,
                None,
                Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"),
                "id",
            ),
            (
                "id",
                SingleInterval(0, 1, Strand.PLUS, parent="id"),
                Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"),
                "id",
            ),
        ],
    )
    def test_id(self, id, location, sequence, expected):
        assert Parent(id=id, location=location, sequence=sequence).id == expected

    @pytest.mark.parametrize(
        "sequence_type,location,sequence,expected",
        [
            ("seqtype", None, None, "seqtype"),
            (
                None,
                SingleInterval(
                    0,
                    5,
                    Strand.PLUS,
                    parent=Parent(sequence_type="seqtype"),
                ),
                None,
                "seqtype",
            ),
            (
                None,
                None,
                Sequence("A", Alphabet.NT_STRICT, type="seqtype"),
                "seqtype",
            ),
            (
                None,
                None,
                Sequence(
                    "A",
                    Alphabet.NT_STRICT,
                    type="seqtype",
                    parent=Parent(sequence_type="seqtype_2"),
                ),
                "seqtype",
            ),
        ],
    )
    def test_sequence_type(self, sequence_type, location, sequence, expected):
        assert Parent(sequence_type=sequence_type, location=location, sequence=sequence).sequence_type == expected

    @pytest.mark.parametrize(
        "strand,location,sequence,expected",
        [
            (Strand.PLUS, None, None, Strand.PLUS),
            (None, SingleInterval(0, 5, Strand.MINUS), None, Strand.MINUS),
            (
                Strand.PLUS,
                None,
                Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS),
                Strand.PLUS,
            ),
        ],
    )
    def test_strand(self, strand, location, sequence, expected):
        assert Parent(strand=strand, location=location, sequence=sequence).strand == expected

    def test_location(self):
        assert Parent(location=SingleInterval(0, 1, Strand.PLUS)).location == SingleInterval(0, 1, Strand.PLUS)

    def test_sequence(self):
        assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT)

    @pytest.mark.parametrize(
        "parent,expected",
        [
            (Parent(parent="id"), Parent(id="id")),
            (
                Parent(
                    sequence=Sequence(
                        "AA",
                        Alphabet.NT_STRICT,
                        parent=Parent(sequence_type="chr"),
                    )
                ),
                Parent(sequence_type="chr"),
            ),
        ],
    )
    def test_parent(self, parent, expected):
        assert parent.parent == expected

    @pytest.mark.parametrize(
        "parent,expected",
        [
            (Parent(), Parent()),
            (Parent(strand=Strand.PLUS), Parent()),
            (
                Parent(strand=Strand.PLUS, location=SingleInterval(5, 10, Strand.PLUS)),
                Parent(),
            ),
            (
                Parent(
                    id="parent",
                    sequence_type="unknown",
                    strand=Strand.PLUS,
                    location=SingleInterval(0, 1, Strand.PLUS),
                    sequence=Sequence("AAA", Alphabet.NT_STRICT),
                    parent="grandparent",
                ),
                Parent(
                    id="parent",
                    sequence_type="unknown",
                    sequence=Sequence("AAA", Alphabet.NT_STRICT),
                    parent="grandparent",
                ),
            ),
        ],
    )
    def test_strip_location_info(self, parent, expected):
        assert parent.strip_location_info() == expected

    @pytest.mark.parametrize(
        "parent,sequence_type,include_self,expected",
        [
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                True,
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
            ),
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                False,
                Parent(id="parent", sequence_type="seqtype"),
            ),
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        parent=Parent(id="grandparent", sequence_type="seqtype_2"),
                    ),
                ),
                "seqtype_2",
                True,
                Parent(
                    id="parent",
                    sequence_type="seqtype_2",
                    parent=Parent(id="grandparent", sequence_type="seqtype_2"),
                ),
            ),
        ],
    )
    def test_first_ancestor_of_type(self, parent, sequence_type, include_self, expected):
        assert parent.first_ancestor_of_type(sequence_type, include_self=include_self) == expected

    @pytest.mark.parametrize(
        "parent,sequence_type,include_self",
        [
            (Parent(id="self"), "seqtype_2", True),
            (
                Parent(id="self", parent="parent"),
                "seqtype_2",
                True,
            ),
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        parent=Parent(id="grandparent", sequence_type="seqtype_2"),
                    ),
                ),
                "chr",
                True,
            ),
        ],
    )
    def test_first_ancestor_of_type_error(self, parent, sequence_type, include_self):
        with pytest.raises(NoSuchAncestorException):
            parent.first_ancestor_of_type(sequence_type, include_self=include_self)

    @pytest.mark.parametrize(
        "parent,sequence_type,include_self,expected",
        [
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                True,
                True,
            ),
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(id="parent", sequence_type="seqtype"),
                ),
                "seqtype",
                False,
                True,
            ),
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        parent=Parent(id="grandparent", sequence_type="seqtype_2"),
                    ),
                ),
                "seqtype_2",
                True,
                True,
            ),
            (
                Parent(id="self"),
                "seqtype_2",
                True,
                False,
            ),
            (
                Parent(id="self", parent="parent"),
                "seqtype_2",
                True,
                False,
            ),
            (
                Parent(
                    id="self",
                    sequence_type="seqtype",
                    parent=Parent(
                        id="parent",
                        sequence_type="seqtype_2",
                        parent=Parent(id="grandparent", sequence_type="seqtype_2"),
                    ),
                ),
                "chr",
                True,
                False,
            ),
        ],
    )
    def test_has_ancestor_of_type(self, parent, sequence_type, include_self, expected):
        assert parent.has_ancestor_of_type(sequence_type, include_self=include_self) is expected

    @pytest.mark.parametrize(
        "parent,expected",
        [
            (
                Parent(
                    id="parent",
                    location=SingleInterval(3, 5, Strand.PLUS),
                    parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)),
                ),
                SingleInterval(13, 15, Strand.PLUS, parent="grandparent"),
            ),
            (
                Parent(
                    id="parent",
                    location=SingleInterval(0, 5, Strand.PLUS),
                    sequence_type="unknown",
                    strand=Strand.PLUS,
                    parent=Parent(
                        id="grandparent",
                        location=SingleInterval(100, 200, Strand.MINUS),
                    ),
                ),
                SingleInterval(195, 200, Strand.MINUS, parent="grandparent"),
            ),
            (
                Parent(
                    id="parent",
                    location=SingleInterval(6, 9, Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        location=SingleInterval(0, 10, Strand.PLUS),
                        sequence_type="chr",
                        strand=Strand.PLUS,
                        parent="great grandparent",
                    ),
                ),
                SingleInterval(
                    6,
                    9,
                    Strand.MINUS,
                    parent=Parent(
                        id="grandparent",
                        sequence_type="chr",
                        parent="great grandparent",
                    ),
                ),
            ),
            (
                Parent(
                    id="parent",
                    sequence_type="chr",
                    strand=Strand.MINUS,
                    location=SingleInterval(6, 8, Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        sequence_type="unknown",
                        strand=Strand.MINUS,
                        location=SingleInterval(5, 15, Strand.MINUS),
                        parent="great grandparent",
                    ),
                ),
                SingleInterval(
                    7,
                    9,
                    Strand.PLUS,
                    parent=Parent(
                        id="grandparent",
                        sequence_type="unknown",
                        parent="great grandparent",
                    ),
                ),
            ),
            (
                Parent(
                    id="parent",
                    location=SingleInterval(3, 5, Strand.UNSTRANDED),
                    parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)),
                ),
                SingleInterval(13, 15, Strand.UNSTRANDED, parent="grandparent"),
            ),
            (
                Parent(
                    id="parent",
                    location=SingleInterval(3, 5, Strand.UNSTRANDED),
                    parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)),
                ),
                SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"),
            ),
        ],
    )
    def test_lift_child_location_contiguous_to_parent_single_interval(self, parent, expected):
        assert parent.lift_child_location_to_parent() == expected

    @pytest.mark.parametrize(
        "parent,expected",
        [
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([3, 7], [5, 10], Strand.PLUS),
                    parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)),
                ),
                CompoundInterval([13, 17], [15, 20], Strand.PLUS, parent="grandparent"),
            ),
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([0, 10], [5, 15], Strand.PLUS),
                    sequence_type="unknown",
                    strand=Strand.PLUS,
                    parent=Parent(
                        id="grandparent",
                        location=SingleInterval(100, 200, Strand.MINUS),
                    ),
                ),
                CompoundInterval(
                    [185, 195],
                    [190, 200],
                    Strand.MINUS,
                    parent="grandparent",
                ),
            ),
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([6], [9], Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        location=SingleInterval(0, 10, Strand.PLUS),
                        sequence_type="chr",
                        strand=Strand.PLUS,
                        parent="great grandparent",
                    ),
                ),
                SingleInterval(
                    6,
                    9,
                    Strand.MINUS,
                    parent=Parent(
                        id="grandparent",
                        sequence_type="chr",
                        parent="great grandparent",
                    ),
                ),
            ),
            (
                Parent(
                    id="parent",
                    sequence_type="chr",
                    strand=Strand.MINUS,
                    location=CompoundInterval([6], [8], Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        sequence_type="unknown",
                        strand=Strand.MINUS,
                        location=SingleInterval(5, 15, Strand.MINUS),
                        parent="great grandparent",
                    ),
                ),
                SingleInterval(
                    7,
                    9,
                    Strand.PLUS,
                    parent=Parent(
                        id="grandparent",
                        sequence_type="unknown",
                        parent="great grandparent",
                    ),
                ),
            ),
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([3, 7], [5, 10], Strand.UNSTRANDED),
                    parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)),
                ),
                CompoundInterval(
                    [13, 17],
                    [15, 20],
                    Strand.UNSTRANDED,
                    parent="grandparent",
                ),
            ),
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([3], [5], Strand.UNSTRANDED),
                    parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)),
                ),
                SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"),
            ),
        ],
    )
    def test_lift_child_location_discontiguous_to_parent_single_interval(self, parent, expected):
        assert parent.lift_child_location_to_parent() == expected

    @pytest.mark.parametrize(
        "parent,expected_error",
        [
            # No location
            (
                Parent(parent=SingleInterval(5, 10, Strand.PLUS)),
                NullParentException,
            ),
            # Parent has no location
            (
                Parent(
                    location=SingleInterval(5, 10, Strand.PLUS),
                    parent="grandparent",
                ),
                NullParentException,
            ),
            # Location on parent can't be unstranded
            (
                Parent(
                    location=SingleInterval(5, 10, Strand.PLUS),
                    parent=Parent(
                        id="grandparent",
                        location=SingleInterval(0, 100, Strand.UNSTRANDED),
                    ),
                ),
                InvalidStrandException,
            ),
            # Location must fit inside location on parent
            (
                Parent(
                    location=SingleInterval(5, 10, Strand.PLUS),
                    parent=Parent(id="grandparent", location=SingleInterval(30, 31, Strand.PLUS)),
                ),
                ValueError,
            ),
        ],
    )
    def test_lift_child_location_to_parent_single_interval_error(self, parent, expected_error):
        with pytest.raises(expected_error):
            parent.lift_child_location_to_parent()

    @pytest.mark.parametrize(
        "parent,expected",
        [
            # Location takes up entire parent location
            (
                Parent(
                    id="parent",
                    location=SingleInterval(0, 10, Strand.PLUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([5, 20], [10, 25], Strand.PLUS),
                    ),
                ),
                CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"),
            ),
            # Location (unstranded) takes up part of parent location (minus)
            (
                Parent(
                    id="parent",
                    location=SingleInterval(10, 20, Strand.UNSTRANDED),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS),
                    ),
                ),
                CompoundInterval(
                    [14, 20],
                    [18, 26],
                    Strand.UNSTRANDED,
                    parent="grandparent",
                ),
            ),
            # Location (minus) takes up one block of parent location (plus); location is at end of sequence
            (
                Parent(
                    id="parent",
                    location=SingleInterval(5, 10, Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([30, 40], [35, 45], Strand.PLUS),
                    ),
                ),
                SingleInterval(40, 45, Strand.MINUS, parent="grandparent"),
            ),
            # Location (minus) takes up part of one block of parent location (minus)
            (
                Parent(
                    id="parent",
                    location=SingleInterval(0, 4, Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([30, 40], [35, 45], Strand.MINUS),
                    ),
                ),
                SingleInterval(41, 45, Strand.PLUS, parent="grandparent"),
            ),
        ],
    )
    def test_lift_child_location_contiguous_to_parent_compound_interval(self, parent, expected):
        assert parent.lift_child_location_to_parent() == expected

    @pytest.mark.parametrize(
        "parent,expected",
        [
            # Location takes up entire parent location
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([0, 5], [5, 10], Strand.PLUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([5, 20], [10, 25], Strand.PLUS),
                    ),
                ),
                CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"),
            ),
            # Location (unstranded) takes up part of parent location (minus)
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([10, 22], [20, 23], Strand.UNSTRANDED),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS),
                    ),
                ),
                CompoundInterval(
                    [11, 14, 20],
                    [12, 18, 26],
                    Strand.UNSTRANDED,
                    parent="grandparent",
                ),
            ),
            # Location (minus) takes up one block of parent location (plus); location is at end of sequence
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([5], [10], Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([30, 40], [35, 45], Strand.PLUS),
                    ),
                ),
                SingleInterval(40, 45, Strand.MINUS, parent="grandparent"),
            ),
            # Location (minus) takes up part of one block of parent location (minus)
            (
                Parent(
                    id="parent",
                    location=CompoundInterval([0, 3], [1, 4], Strand.MINUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([30, 40], [35, 45], Strand.MINUS),
                    ),
                ),
                CompoundInterval([41, 44], [42, 45], Strand.PLUS, parent="grandparent"),
            ),
        ],
    )
    def test_lift_child_location_discontiguous_to_parent_compound_interval(self, parent, expected):
        assert parent.lift_child_location_to_parent() == expected

    @pytest.mark.parametrize(
        "parent,expected_error",
        [
            # Location must fit inside location on parent
            (
                Parent(
                    location=SingleInterval(5, 50, Strand.PLUS),
                    parent=Parent(
                        id="grandparent",
                        location=CompoundInterval([10, 20], [15, 25], Strand.PLUS),
                    ),
                ),
                InvalidPositionException,
            ),
        ],
    )
    def test_lift_child_location_to_parent_compound_interval_error(self, parent, expected_error):
        with pytest.raises(expected_error):
            parent.lift_child_location_to_parent()

    @pytest.mark.parametrize(
        "parent,location,expected",
        [
            (
                Parent(),
                SingleInterval(5, 10, Strand.PLUS),
                Parent(location=SingleInterval(5, 10, Strand.PLUS), strand=Strand.PLUS),
            ),
            (
                Parent(
                    id="parent",
                    sequence_type="unknown",
                    strand=Strand.MINUS,
                    location=SingleInterval(0, 2, Strand.MINUS),
                    sequence=Sequence("AAA", Alphabet.NT_STRICT),
                ),
                SingleInterval(2, 3, Strand.PLUS),
                Parent(
                    id="parent",
                    sequence_type="unknown",
                    strand=Strand.PLUS,
                    location=SingleInterval(2, 3, Strand.PLUS),
                    sequence=Sequence("AAA", Alphabet.NT_STRICT),
                ),
            ),
            (
                Parent(
                    id="parent",
                    sequence_type="unknown",
                    strand=Strand.MINUS,
                    location=SingleInterval(0, 2, Strand.MINUS),
                    sequence=Sequence("AAA", Alphabet.NT_STRICT),
                ),
                None,
                Parent(
                    id="parent",
                    sequence_type="unknown",
                    sequence=Sequence("AAA", Alphabet.NT_STRICT),
                ),
            ),
        ],
    )
    def test_reset_location(self, parent, location, expected):
        assert parent.reset_location(location) == expected

    @pytest.mark.parametrize(
        "parent,location,expected_exception",
        [
            (
                Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)),
                SingleInterval(0, 5, Strand.PLUS),
                InvalidPositionException,
            ),
            (
                Parent(id="id1", sequence=Sequence("AAA", Alphabet.NT_STRICT)),
                SingleInterval(
                    0,
                    1,
                    Strand.PLUS,
                    parent=Parent(id="id2", sequence=Sequence("AAA", Alphabet.NT_STRICT)),
                ),
                ParentException,
            ),
        ],
    )
    def test_reset_location_error(self, parent, location, expected_exception):
        with pytest.raises(expected_exception):
            parent.reset_location(location)

    @pytest.mark.parametrize(
        "parent,sequence,include_self,expected",
        [
            (Parent(), Sequence("AA", Alphabet.NT_STRICT), True, False),
            (Parent(), Sequence("AA", Alphabet.NT_STRICT), False, False),
            (
                Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)),
                Sequence("AA", Alphabet.NT_STRICT),
                True,
                True,
            ),
            (
                Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)),
                Sequence("AA", Alphabet.NT_STRICT),
                False,
                False,
            ),
            (
                Parent(
                    sequence=Sequence("AA", Alphabet.NT_STRICT),
                    parent=Sequence("AA", Alphabet.NT_STRICT),
                ),
                Sequence("AA", Alphabet.NT_STRICT),
                False,
                True,
            ),
            (
                Parent(
                    sequence=Sequence("AA", Alphabet.NT_STRICT),
                    parent=Sequence("AAT", Alphabet.NT_STRICT),
                ),
                Sequence("AAT", Alphabet.NT_STRICT),
                False,
                True,
            ),
            (
                Parent(
                    sequence=Sequence("AA", Alphabet.NT_STRICT),
                    parent=Sequence("AAT", Alphabet.NT_STRICT),
                ),
                Sequence("AAT", Alphabet.NT_STRICT, id="id"),
                True,
                False,
            ),
            (
                Parent(
                    parent=Parent(parent=Parent(parent=Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT, id="seq"))))
                ),
                Sequence("AAA", Alphabet.NT_STRICT, id="seq"),
                True,
                True,
            ),
        ],
    )
    def test_has_ancestor_sequence(self, parent, sequence, include_self, expected):
        assert parent.has_ancestor_sequence(sequence, include_self) == expected