def test_init(self): sequence = Sequence( "ACTG", Alphabet.NT_STRICT, id="id", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", location=SingleInterval(5, 9, Strand.MINUS, parent="parent"), ), ) # Sequence data assert sequence.sequence == Seq("ACTG") assert str(sequence) == "ACTG" # Alphabet assert sequence.alphabet == Alphabet.NT_STRICT # ID assert sequence.id == "id" # Sequence type assert sequence.sequence_type == "seqtype_1" # Parent ID assert sequence.parent_id == "parent" assert Sequence("A", Alphabet.NT_STRICT, parent="parent").parent_id == "parent" assert (Sequence( "A", Alphabet.NT_STRICT, parent=Parent( location=SingleInterval(5, 6, Strand.MINUS, parent="parent")), ).parent_id == "parent") assert Sequence("A", Alphabet.NT_STRICT).parent_id is None # Parent type assert sequence.parent_type == "seqtype_2" # Parent strand assert sequence.parent_strand == Strand.MINUS assert Sequence( "A", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED # Location on parent assert (Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.UNSTRANDED), ).parent_strand == Strand.UNSTRANDED) assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None assert sequence.location_on_parent == SingleInterval(5, 9, Strand.MINUS, parent="parent") # No alphabet validation Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False)
def test_require_location_has_parent_with_sequence(self): with pytest.raises(NullParentException): ObjectValidation.require_location_has_parent_with_sequence( SingleInterval(0, 5, Strand.PLUS)) with pytest.raises(NullSequenceException): ObjectValidation.require_location_has_parent_with_sequence( SingleInterval(0, 5, Strand.PLUS, parent="parent")) ObjectValidation.require_location_has_parent_with_sequence( SingleInterval( 0, 5, Strand.PLUS, parent=Parent(id="parent", sequence=Sequence("AAAAA", Alphabet.NT_STRICT)), ))
def extract_sequence(self) -> Sequence: """ Returns a continuous CDS sequence that is in frame and always a multiple of 3. Any leading or trailing bases that are annotated as CDS but cannot form a full codon are removed. Additionally, any internal codons that are incomplete are removed. Incomplete internal codons are determined by comparing the CDSFrame of each exon as annotated, to the expected value of the CDSFrame. This allows for an annotation to model things like programmed frameshifts and indels that may be assembly errors. """ codons = (str(codon_location.extract_sequence()) for codon_location in self.chunk_relative_codon_locations) seq = "".join(codons) assert len(seq) % 3 == 0 return Sequence(seq, Alphabet.NT_EXTENDED, validate_alphabet=False)
def translate( self, truncate_at_in_frame_stop: Optional[bool] = False, translation_table: Optional[TranslationTable] = TranslationTable. DEFAULT, ) -> Sequence: """ Returns amino acid sequence of this CDS. If truncate_at_in_frame_stop is ``True``, this will stop at the first in-frame stop. Currently the ``translation_table`` field only controls the start codon. Using non-standard translation tables will change the set of start codons that code for Methionine, and will not change any other codons. """ aa_seq_str = "".join( self._translate_iter(truncate_at_in_frame_stop, translation_table)) return Sequence(aa_seq_str, Alphabet.AA, validate_alphabet=False)
def test_init_invalid_params( self, data, alphabet, parent_id, parent_type, parent_strand, location_on_parent, expected_exception, ): with pytest.raises(expected_exception): Sequence( data, alphabet, parent=Parent( id=parent_id, sequence_type=parent_type, strand=parent_strand, location=location_on_parent, ), )
def test_validate_alphabet_error(self, sequence, alphabet): with pytest.raises(AlphabetError): Sequence(sequence, alphabet, validate_alphabet=True)
def test_validate_alphabet(self, sequence, alphabet, validate_alphabet): Sequence(sequence, alphabet, validate_alphabet=validate_alphabet)
def test_getitem_error(self): with pytest.raises(InvalidStrandException): Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6]
def test_len(self): assert len(Sequence("", Alphabet.NT_EXTENDED_GAPPED)) == 0 assert len(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == 5
def test_str(self): assert str(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == "AAAAt"
class TestSequence: def test_init(self): sequence = Sequence( "ACTG", Alphabet.NT_STRICT, id="id", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", location=SingleInterval(5, 9, Strand.MINUS, parent="parent"), ), ) # Sequence data assert sequence.sequence == Seq("ACTG") assert str(sequence) == "ACTG" # Alphabet assert sequence.alphabet == Alphabet.NT_STRICT # ID assert sequence.id == "id" # Sequence type assert sequence.sequence_type == "seqtype_1" # Parent ID assert sequence.parent_id == "parent" assert Sequence("A", Alphabet.NT_STRICT, parent="parent").parent_id == "parent" assert (Sequence( "A", Alphabet.NT_STRICT, parent=Parent( location=SingleInterval(5, 6, Strand.MINUS, parent="parent")), ).parent_id == "parent") assert Sequence("A", Alphabet.NT_STRICT).parent_id is None # Parent type assert sequence.parent_type == "seqtype_2" # Parent strand assert sequence.parent_strand == Strand.MINUS assert Sequence( "A", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED # Location on parent assert (Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.UNSTRANDED), ).parent_strand == Strand.UNSTRANDED) assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None assert sequence.location_on_parent == SingleInterval(5, 9, Strand.MINUS, parent="parent") # No alphabet validation Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False) @pytest.mark.parametrize( "data,alphabet,parent_id,parent_type,parent_strand,location_on_parent,expected_exception", [ ("A-C", Alphabet.NT_STRICT, None, None, None, None, AlphabetError), ( "ACG", Alphabet.NT_STRICT, None, None, None, SingleInterval(0, 4, Strand.PLUS), ParentException, ), ( "ATT", Alphabet.NT_STRICT, "parent1", None, None, SingleInterval(0, 3, Strand.PLUS, parent="parent2"), ParentException, ), ( "GGG", Alphabet.NT_STRICT, None, None, Strand.MINUS, SingleInterval(0, 3, Strand.PLUS), InvalidStrandException, ), ( "GGG", Alphabet.NT_STRICT, None, "seqtype_2", None, SingleInterval( 0, 3, Strand.PLUS, parent=Parent(sequence_type="seqtype_3"), ), ParentException, ), ], ) def test_init_invalid_params( self, data, alphabet, parent_id, parent_type, parent_strand, location_on_parent, expected_exception, ): with pytest.raises(expected_exception): Sequence( data, alphabet, parent=Parent( id=parent_id, sequence_type=parent_type, strand=parent_strand, location=location_on_parent, ), ) @pytest.mark.parametrize( "sequence,other,expected", [ ( Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False), "AAAA", False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False), Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=True), True, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), True, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAa", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_EXTENDED, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq2", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype_2", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.UNSTRANDED, None), validate_alphabet=False, ), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent="parent1"), Sequence("AAAA", Alphabet.NT_STRICT, parent="parent2"), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent="parent"), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.PLUS), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, parent="seqtype", ), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ], ) def test_equals(self, sequence, other, expected): assert (sequence == other) is expected assert (other == sequence) is expected def test_str(self): assert str(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == "AAAAt" def test_len(self): assert len(Sequence("", Alphabet.NT_EXTENDED_GAPPED)) == 0 assert len(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == 5 @pytest.mark.parametrize( "seq,key,exp", [ # No parent (Sequence("acgtacgt", Alphabet.NT_STRICT), 3, Sequence("t", Alphabet.NT_STRICT)), (Sequence("acgtacgt", Alphabet.NT_STRICT), slice( 3, 6), Sequence("tac", Alphabet.NT_STRICT)), (Sequence("acgtacgt", Alphabet.NT_STRICT), slice( 3, 10), Sequence("tacgt", Alphabet.NT_STRICT)), # Parent with location; slice ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.PLUS)), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=SingleInterval(3, 6, Strand.PLUS)), ), ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.MINUS)), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=SingleInterval(2, 5, Strand.MINUS)), ), # Parent with location; single position ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.PLUS)), 3, Sequence("g", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.PLUS)), ), ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.MINUS)), 3, Sequence("g", Alphabet.NT_STRICT, parent=SingleInterval(4, 5, Strand.MINUS)), ), # Parent without full location ( Sequence("actgactg", Alphabet.NT_STRICT, parent="parent"), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent="parent"), ), ( Sequence( "actgactg", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), ), ], ) def test_getitem(self, seq, key, exp): assert seq[key] == exp def test_getitem_error(self): with pytest.raises(InvalidStrandException): Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6] @pytest.mark.parametrize( "sequence,alphabet,validate_alphabet", [ ("", Alphabet.NT_STRICT, True), ("acgtACGT", Alphabet.NT_STRICT, True), ("N", Alphabet.NT_STRICT, False), ("acNNNw", Alphabet.NT_EXTENDED, True), ("AN-", Alphabet.NT_EXTENDED, False), ("GG--AAA", Alphabet.NT_STRICT_GAPPED, True), ("AN-", Alphabet.NT_STRICT_GAPPED, False), ("nnAAw-cg", Alphabet.NT_EXTENDED_GAPPED, True), ("xxx", Alphabet.NT_EXTENDED_GAPPED, False), ("MWT*", Alphabet.AA, True), ("T*-", Alphabet.AA, False), ("ABCDE-", Alphabet.GENERIC, True), ("*", Alphabet.GENERIC, False), ], ) def test_validate_alphabet(self, sequence, alphabet, validate_alphabet): Sequence(sequence, alphabet, validate_alphabet=validate_alphabet) @pytest.mark.parametrize( "sequence,alphabet", [ ("N", Alphabet.NT_STRICT), ("A-", Alphabet.NT_EXTENDED), ("AN-", Alphabet.NT_STRICT_GAPPED), ("E", Alphabet.NT_EXTENDED_GAPPED), ("R-", Alphabet.AA), ("?", Alphabet.GENERIC), ], ) def test_validate_alphabet_error(self, sequence, alphabet): with pytest.raises(AlphabetError): Sequence(sequence, alphabet, validate_alphabet=True) @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT, parent="parent"), "parent"), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval( 10, 11, Strand.UNSTRANDED)), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent( id="parent", location=SingleInterval( 10, 11, Strand.UNSTRANDED, parent="parent"), ), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 10, 11, Strand.UNSTRANDED, parent="parent")), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.UNSTRANDED), ), None, ), (Sequence("A", Alphabet.NT_STRICT), None), ], ) def test_parent_id(self, sequence, expected): assert sequence.parent_id == expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT), None), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(sequence_type="seqtype"), ), "seqtype", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 0, 1, Strand.PLUS, parent=Parent(sequence_type="seqtype"), )), ), "seqtype", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 0, 1, Strand.PLUS, parent=Sequence( "AA", Alphabet.NT_STRICT, type="seqtype"), )), ), "seqtype", ), ], ) def test_parent_type(self, sequence, expected): assert sequence.parent_type == expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT), None), ( Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.MINUS), ), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.MINUS), ), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent( strand=Strand.MINUS, location=SingleInterval(10, 11, Strand.MINUS), ), ), Strand.MINUS, ), ], ) def test_parent_strand(self, sequence, expected): assert sequence.parent_strand == expected @pytest.mark.parametrize( "sequence,new_id,new_type,expected", [ ( Sequence("", Alphabet.NT_STRICT), None, None, Sequence("", Alphabet.NT_STRICT), ), ( Sequence("ACGtacgT", Alphabet.NT_STRICT), None, None, Sequence("AcgtaCGT", Alphabet.NT_STRICT), ), ( Sequence("ATUGCYRSWKMBdhvnNVHDbmkwsrycguta", Alphabet.NT_EXTENDED), None, None, Sequence("taacgryswmkvHDBNnbdhVKMWSYRGCAAT", Alphabet.NT_EXTENDED), ), ( Sequence("--A-CGta", Alphabet.NT_STRICT_GAPPED), None, None, Sequence("taCG-T--", Alphabet.NT_STRICT_GAPPED), ), ( Sequence("AtUC-N-", Alphabet.NT_EXTENDED_GAPPED), None, None, Sequence("-N-GAaT", Alphabet.NT_EXTENDED_GAPPED), ), ( Sequence("ACGta", Alphabet.NT_STRICT), "new_id", "seqtype", Sequence( "taCGT", Alphabet.NT_STRICT, id="new_id", type="seqtype", ), ), ( Sequence("ACGta", Alphabet.NT_STRICT, parent=Strand.PLUS), None, None, Sequence("taCGT", Alphabet.NT_STRICT, parent=Strand.MINUS), ), ( Sequence( "ACGta", Alphabet.NT_STRICT, parent=SingleInterval(5, 10, Strand.PLUS), ), None, None, Sequence( "taCGT", Alphabet.NT_STRICT, parent=SingleInterval(5, 10, Strand.MINUS), ), ), ], ) def test_reverse_complement(self, sequence, new_id, new_type, expected): assert sequence.reverse_complement(new_id=new_id, new_type=new_type) == expected @pytest.mark.parametrize( "sequence", [ Sequence("AAA", Alphabet.AA), Sequence("AAA", Alphabet.GENERIC), Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False), ], ) def test_reverse_complement_error(self, sequence): with pytest.raises(AlphabetError): sequence.reverse_complement() @pytest.mark.parametrize( "seq1,seq2,new_id,data_only,expected", [ ( Sequence("", Alphabet.NT_STRICT, parent="parent1"), Sequence("", Alphabet.NT_STRICT, parent="parent2"), "new_id", True, Sequence("", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence("AA", Alphabet.NT_STRICT, parent="parent1"), Sequence("TT", Alphabet.NT_STRICT, parent="parent2"), "new_id", True, Sequence("AATT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype_1", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype_2", parent=Parent( id="parent1", strand=Strand.MINUS, location=SingleInterval(20, 22, Strand.MINUS), ), ), None, True, Sequence("AATT", Alphabet.NT_STRICT), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent1", strand=Strand.PLUS), ), None, False, Sequence( "AATT", Alphabet.NT_STRICT, type="seqtype", parent="parent1", ), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent1", strand=Strand.PLUS), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), None, False, Sequence( "AATT", Alphabet.NT_STRICT, type="seqtype", parent="parent1", ), ), ( Sequence( "CC", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(3, 5, Strand.PLUS)), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(10, 12, Strand.PLUS)), ), "new_id", True, Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence( "CC", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(3, 5, Strand.PLUS)), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(0, 2, Strand.PLUS)), ), "new_id", True, Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence("AA", Alphabet.NT_STRICT, id="seq1", parent="parent"), Sequence("", Alphabet.NT_STRICT, id="seq2", parent="parent"), None, False, Sequence("AA", Alphabet.NT_STRICT, parent="parent"), ), ( Sequence( "ACT", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(2, 5, Strand.PLUS)), ), Sequence( "GGA", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(8, 11, Strand.PLUS)), ), "new_id", False, Sequence( "ACTGGA", Alphabet.NT_STRICT, id="new_id", parent=Parent( id="parent", location=CompoundInterval.from_single_intervals([ SingleInterval(2, 5, Strand.PLUS), SingleInterval(8, 11, Strand.PLUS), ]), ), ), ), ( Sequence( "ACT", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(8, 11, Strand.MINUS)), ), Sequence( "GGA", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(2, 5, Strand.MINUS)), ), "new_id", False, Sequence( "ACTGGA", Alphabet.NT_STRICT, id="new_id", parent=Parent( id="parent", location=CompoundInterval.from_single_intervals([ SingleInterval(2, 5, Strand.MINUS), SingleInterval(8, 11, Strand.MINUS), ]), ), ), ), ( Sequence( "ACT", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=SingleInterval(5, 8, Strand.PLUS), ), ), Sequence( "GCG", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=SingleInterval(15, 18, Strand.PLUS), ), ), None, False, Sequence( "ACTGCG", Alphabet.NT_STRICT, type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=CompoundInterval.from_single_intervals([ SingleInterval(5, 8, Strand.PLUS), SingleInterval(15, 18, Strand.PLUS), ]), ), ), ), ], ) def test_append(self, seq1, seq2, new_id, data_only, expected): assert seq1.append(seq2, new_id, data_only) == expected @pytest.mark.parametrize( "seq1,seq2,new_id,data_only", [ ( Sequence("AA", Alphabet.NT_STRICT), Sequence("TT", Alphabet.NT_EXTENDED), None, True, ), ( Sequence("AA", Alphabet.NT_STRICT, type="seqtype_1"), Sequence("AA", Alphabet.NT_STRICT, type="seqtype_2"), None, False, ), ( Sequence("AA", Alphabet.NT_STRICT, parent="parent1"), Sequence("AA", Alphabet.NT_STRICT, parent="parent2"), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.PLUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(5, 7, Strand.PLUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(2, 4, Strand.MINUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(5, 7, Strand.MINUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.PLUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(11, 13, Strand.PLUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.UNSTRANDED), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(15, 17, Strand.UNSTRANDED), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent1", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent2", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype_1", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAAT", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent1", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent2", ), ), None, False, ), ], ) def test_append_error(self, seq1, seq2, new_id, data_only): with pytest.raises(ValueError): seq1.append(seq2, new_id, data_only) @pytest.mark.parametrize( "sequence,sequence_type,include_self,expected", [ ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, Parent(sequence=Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), )), ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, Parent(id="parent", sequence_type="seqtype"), ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), ], ) def test_first_ancestor_of_type(self, sequence, sequence_type, include_self, expected): assert sequence.first_ancestor_of_type( sequence_type, include_self=include_self) == expected @pytest.mark.parametrize( "sequence,sequence_type,include_self", [ ( Sequence("A", Alphabet.NT_STRICT, id="self"), "seqtype_1", True, ), ( Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"), "seqtype_1", True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_2", parent=Parent( id="parent", sequence_type="seqtype_1", parent=Parent(id="grandparent", sequence_type="seqtype_1"), ), ), "seqtype_3", True, ), ], ) def test_first_ancestor_of_type_error(self, sequence, sequence_type, include_self): with pytest.raises(NoSuchAncestorException): sequence.first_ancestor_of_type(sequence_type, include_self=include_self) @pytest.mark.parametrize( "sequence,sequence_type,include_self,expected", [ ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, True, ), ( Sequence("A", Alphabet.NT_STRICT, id="self"), "seqtype_1", True, False, ), ( Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"), "seqtype_1", True, False, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_2", parent=Parent( id="parent", sequence_type="seqtype_1", parent=Parent(id="grandparent", sequence_type="seqtype_1"), ), ), "seqtype_3", True, False, ), ], ) def test_has_ancestor_of_type(self, sequence, sequence_type, include_self, expected): assert sequence.has_ancestor_of_type( sequence_type, include_self=include_self) is expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nATGCATATTT\nGGAAACCAA"), (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT), ">None\nATGCATATTT\nGGAAACCAA"), (Sequence("GGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nGGAAACCAA"), ( Sequence("ATGCATATTTGGAAACCAAGGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nATGCATATTT\nGGAAACCAAG\nGAAACCAA", ), ( Sequence( data="AAAAAAA", alphabet=Alphabet.NT_STRICT, id="test", parent=Parent( location=SingleInterval(33, 40, Strand.MINUS)), ), ">test\nAAAAAAA", ), ], ) def test_to_fasta(self, sequence, expected): s_fa = sequence.to_fasta(num_chars=10) assert s_fa == expected def test_empty_to_fasta(self): s = Sequence("", Alphabet.NT_STRICT) with pytest.raises(EmptySequenceFastaError): s.to_fasta()
def test_empty_to_fasta(self): s = Sequence("", Alphabet.NT_STRICT) with pytest.raises(EmptySequenceFastaError): s.to_fasta()
def test_sequence(self): assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT)
class TestParent: @pytest.mark.parametrize( "id,sequence_type,strand,location,sequence,expected", [ (None, None, None, None, None, Parent()), ( "id", "seqtype", Strand.MINUS, SingleInterval( 0, 1, Strand.MINUS, Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent(id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2")), ), Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, location=SingleInterval( 0, 1, Strand.MINUS, Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), ], ) def test_init(self, id, sequence_type, strand, location, sequence, expected): assert expected == Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, ) @pytest.mark.parametrize( "id,sequence_type,strand,location,sequence,parent,expected_exception", [ ("id1", None, None, SingleInterval(0, 5, Strand.PLUS, parent="id2"), None, None, ParentException), ("id1", None, None, None, Sequence("AAA", Alphabet.NT_STRICT, id="id2"), None, ParentException), ( None, None, None, SingleInterval(0, 5, Strand.PLUS, parent="id1"), Sequence("AAC", Alphabet.NT_STRICT, id="id2"), None, ParentException, ), ( None, "seqtype", None, SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")), None, None, ParentException, ), (None, "seqtype", None, None, Sequence("AAT", Alphabet.NT_STRICT, type="unknown"), None, ParentException), ( None, None, None, SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")), Sequence("AAG", Alphabet.NT_STRICT, type="seqtype"), None, ParentException, ), (None, None, Strand.MINUS, SingleInterval(0, 5, Strand.PLUS), None, None, InvalidStrandException), ( None, None, None, SingleInterval(0, 10, Strand.PLUS), Sequence("A", Alphabet.NT_STRICT), None, InvalidPositionException, ), ( None, None, None, None, Sequence("AA", Alphabet.NT_STRICT), Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), LocationException, ), (None, None, Strand.PLUS, SingleInterval(5, 10, Strand.MINUS), None, None, InvalidStrandException), ( None, None, None, None, Sequence("AA", Alphabet.NT_STRICT, parent="id1"), Parent(id="id2"), MismatchedParentException, ), ], ) def test_init_error(self, id, sequence_type, strand, location, sequence, parent, expected_exception): with pytest.raises(expected_exception): Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, parent=parent, ) @pytest.mark.parametrize( "obj,expected", [ ( Sequence("AAA", Alphabet.NT_STRICT), Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)), ), ("parent", Parent(id="parent")), ( SingleInterval(5, 10, Strand.PLUS), Parent(location=SingleInterval(5, 10, Strand.PLUS)), ), ( CompoundInterval([5], [10], Strand.PLUS), Parent(location=CompoundInterval([5], [10], Strand.PLUS)), ), (EmptyLocation(), Parent(location=EmptyLocation())), (Strand.MINUS, Parent(strand=Strand.MINUS)), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, parent=Parent(id="grandparent"), ), Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, parent=Parent(id="grandparent"), ), ), ], ) def test_make_parent(self, obj, expected): assert make_parent(obj) == expected @pytest.mark.parametrize( "parent1,parent2,expected", [ (Parent(), Parent(), True), (Parent(), Parent(id=None, sequence_type=None), True), (Parent(id="id1"), Parent(id="id2"), False), ( Parent(sequence_type=None), Parent(sequence_type="unknown"), False, ), (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), False), ( Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")), Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")), False, ), ( Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent=Parent(id="parent"))), False, ), ( Parent(parent="parent1"), Parent(parent="parent2"), False, ), ], ) def test_eq(self, parent1, parent2, expected): assert (parent1 == parent2) is expected @pytest.mark.parametrize( "parent1,parent2,expected", [ (Parent(), Parent(), True), (Parent(id="id1"), Parent(id="id2"), False), ( Parent(sequence_type=None), Parent(sequence_type="unknown"), False, ), (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), True), ( Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")), Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")), False, ), ( Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent="parent")), False, ), ( Parent(parent="parent1"), Parent(parent="parent2"), False, ), ], ) def test_equals_except_location(self, parent1, parent2, expected): assert parent1.equals_except_location(parent2) is expected @pytest.mark.parametrize( "id,location,sequence,expected", [ ("id", None, None, "id"), ( None, SingleInterval(0, 1, Strand.PLUS, parent="id"), None, "id", ), ( None, None, Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"), "id", ), ( "id", SingleInterval(0, 1, Strand.PLUS, parent="id"), Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"), "id", ), ], ) def test_id(self, id, location, sequence, expected): assert Parent(id=id, location=location, sequence=sequence).id == expected @pytest.mark.parametrize( "sequence_type,location,sequence,expected", [ ("seqtype", None, None, "seqtype"), ( None, SingleInterval( 0, 5, Strand.PLUS, parent=Parent(sequence_type="seqtype"), ), None, "seqtype", ), ( None, None, Sequence("A", Alphabet.NT_STRICT, type="seqtype"), "seqtype", ), ( None, None, Sequence( "A", Alphabet.NT_STRICT, type="seqtype", parent=Parent(sequence_type="seqtype_2"), ), "seqtype", ), ], ) def test_sequence_type(self, sequence_type, location, sequence, expected): assert Parent(sequence_type=sequence_type, location=location, sequence=sequence).sequence_type == expected @pytest.mark.parametrize( "strand,location,sequence,expected", [ (Strand.PLUS, None, None, Strand.PLUS), (None, SingleInterval(0, 5, Strand.MINUS), None, Strand.MINUS), ( Strand.PLUS, None, Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS), Strand.PLUS, ), ], ) def test_strand(self, strand, location, sequence, expected): assert Parent(strand=strand, location=location, sequence=sequence).strand == expected def test_location(self): assert Parent(location=SingleInterval(0, 1, Strand.PLUS)).location == SingleInterval(0, 1, Strand.PLUS) def test_sequence(self): assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT) @pytest.mark.parametrize( "parent,expected", [ (Parent(parent="id"), Parent(id="id")), ( Parent( sequence=Sequence( "AA", Alphabet.NT_STRICT, parent=Parent(sequence_type="chr"), ) ), Parent(sequence_type="chr"), ), ], ) def test_parent(self, parent, expected): assert parent.parent == expected @pytest.mark.parametrize( "parent,expected", [ (Parent(), Parent()), (Parent(strand=Strand.PLUS), Parent()), ( Parent(strand=Strand.PLUS, location=SingleInterval(5, 10, Strand.PLUS)), Parent(), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.PLUS, location=SingleInterval(0, 1, Strand.PLUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), Parent( id="parent", sequence_type="unknown", sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), ], ) def test_strip_location_info(self, parent, expected): assert parent.strip_location_info() == expected @pytest.mark.parametrize( "parent,sequence_type,include_self,expected", [ ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), ), ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, Parent(id="parent", sequence_type="seqtype"), ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), ], ) def test_first_ancestor_of_type(self, parent, sequence_type, include_self, expected): assert parent.first_ancestor_of_type(sequence_type, include_self=include_self) == expected @pytest.mark.parametrize( "parent,sequence_type,include_self", [ (Parent(id="self"), "seqtype_2", True), ( Parent(id="self", parent="parent"), "seqtype_2", True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "chr", True, ), ], ) def test_first_ancestor_of_type_error(self, parent, sequence_type, include_self): with pytest.raises(NoSuchAncestorException): parent.first_ancestor_of_type(sequence_type, include_self=include_self) @pytest.mark.parametrize( "parent,sequence_type,include_self,expected", [ ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, True, ), ( Parent(id="self"), "seqtype_2", True, False, ), ( Parent(id="self", parent="parent"), "seqtype_2", True, False, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "chr", True, False, ), ], ) def test_has_ancestor_of_type(self, parent, sequence_type, include_self, expected): assert parent.has_ancestor_of_type(sequence_type, include_self=include_self) is expected @pytest.mark.parametrize( "parent,expected", [ ( Parent( id="parent", location=SingleInterval(3, 5, Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), SingleInterval(13, 15, Strand.PLUS, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(0, 5, Strand.PLUS), sequence_type="unknown", strand=Strand.PLUS, parent=Parent( id="grandparent", location=SingleInterval(100, 200, Strand.MINUS), ), ), SingleInterval(195, 200, Strand.MINUS, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(6, 9, Strand.MINUS), parent=Parent( id="grandparent", location=SingleInterval(0, 10, Strand.PLUS), sequence_type="chr", strand=Strand.PLUS, parent="great grandparent", ), ), SingleInterval( 6, 9, Strand.MINUS, parent=Parent( id="grandparent", sequence_type="chr", parent="great grandparent", ), ), ), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, location=SingleInterval(6, 8, Strand.MINUS), parent=Parent( id="grandparent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(5, 15, Strand.MINUS), parent="great grandparent", ), ), SingleInterval( 7, 9, Strand.PLUS, parent=Parent( id="grandparent", sequence_type="unknown", parent="great grandparent", ), ), ), ( Parent( id="parent", location=SingleInterval(3, 5, Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), SingleInterval(13, 15, Strand.UNSTRANDED, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(3, 5, Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)), ), SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"), ), ], ) def test_lift_child_location_contiguous_to_parent_single_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected", [ ( Parent( id="parent", location=CompoundInterval([3, 7], [5, 10], Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), CompoundInterval([13, 17], [15, 20], Strand.PLUS, parent="grandparent"), ), ( Parent( id="parent", location=CompoundInterval([0, 10], [5, 15], Strand.PLUS), sequence_type="unknown", strand=Strand.PLUS, parent=Parent( id="grandparent", location=SingleInterval(100, 200, Strand.MINUS), ), ), CompoundInterval( [185, 195], [190, 200], Strand.MINUS, parent="grandparent", ), ), ( Parent( id="parent", location=CompoundInterval([6], [9], Strand.MINUS), parent=Parent( id="grandparent", location=SingleInterval(0, 10, Strand.PLUS), sequence_type="chr", strand=Strand.PLUS, parent="great grandparent", ), ), SingleInterval( 6, 9, Strand.MINUS, parent=Parent( id="grandparent", sequence_type="chr", parent="great grandparent", ), ), ), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, location=CompoundInterval([6], [8], Strand.MINUS), parent=Parent( id="grandparent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(5, 15, Strand.MINUS), parent="great grandparent", ), ), SingleInterval( 7, 9, Strand.PLUS, parent=Parent( id="grandparent", sequence_type="unknown", parent="great grandparent", ), ), ), ( Parent( id="parent", location=CompoundInterval([3, 7], [5, 10], Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), CompoundInterval( [13, 17], [15, 20], Strand.UNSTRANDED, parent="grandparent", ), ), ( Parent( id="parent", location=CompoundInterval([3], [5], Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)), ), SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"), ), ], ) def test_lift_child_location_discontiguous_to_parent_single_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected_error", [ # No location ( Parent(parent=SingleInterval(5, 10, Strand.PLUS)), NullParentException, ), # Parent has no location ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent="grandparent", ), NullParentException, ), # Location on parent can't be unstranded ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent=Parent( id="grandparent", location=SingleInterval(0, 100, Strand.UNSTRANDED), ), ), InvalidStrandException, ), # Location must fit inside location on parent ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(30, 31, Strand.PLUS)), ), ValueError, ), ], ) def test_lift_child_location_to_parent_single_interval_error(self, parent, expected_error): with pytest.raises(expected_error): parent.lift_child_location_to_parent() @pytest.mark.parametrize( "parent,expected", [ # Location takes up entire parent location ( Parent( id="parent", location=SingleInterval(0, 10, Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([5, 20], [10, 25], Strand.PLUS), ), ), CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"), ), # Location (unstranded) takes up part of parent location (minus) ( Parent( id="parent", location=SingleInterval(10, 20, Strand.UNSTRANDED), parent=Parent( id="grandparent", location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS), ), ), CompoundInterval( [14, 20], [18, 26], Strand.UNSTRANDED, parent="grandparent", ), ), # Location (minus) takes up one block of parent location (plus); location is at end of sequence ( Parent( id="parent", location=SingleInterval(5, 10, Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.PLUS), ), ), SingleInterval(40, 45, Strand.MINUS, parent="grandparent"), ), # Location (minus) takes up part of one block of parent location (minus) ( Parent( id="parent", location=SingleInterval(0, 4, Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.MINUS), ), ), SingleInterval(41, 45, Strand.PLUS, parent="grandparent"), ), ], ) def test_lift_child_location_contiguous_to_parent_compound_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected", [ # Location takes up entire parent location ( Parent( id="parent", location=CompoundInterval([0, 5], [5, 10], Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([5, 20], [10, 25], Strand.PLUS), ), ), CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"), ), # Location (unstranded) takes up part of parent location (minus) ( Parent( id="parent", location=CompoundInterval([10, 22], [20, 23], Strand.UNSTRANDED), parent=Parent( id="grandparent", location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS), ), ), CompoundInterval( [11, 14, 20], [12, 18, 26], Strand.UNSTRANDED, parent="grandparent", ), ), # Location (minus) takes up one block of parent location (plus); location is at end of sequence ( Parent( id="parent", location=CompoundInterval([5], [10], Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.PLUS), ), ), SingleInterval(40, 45, Strand.MINUS, parent="grandparent"), ), # Location (minus) takes up part of one block of parent location (minus) ( Parent( id="parent", location=CompoundInterval([0, 3], [1, 4], Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.MINUS), ), ), CompoundInterval([41, 44], [42, 45], Strand.PLUS, parent="grandparent"), ), ], ) def test_lift_child_location_discontiguous_to_parent_compound_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected_error", [ # Location must fit inside location on parent ( Parent( location=SingleInterval(5, 50, Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([10, 20], [15, 25], Strand.PLUS), ), ), InvalidPositionException, ), ], ) def test_lift_child_location_to_parent_compound_interval_error(self, parent, expected_error): with pytest.raises(expected_error): parent.lift_child_location_to_parent() @pytest.mark.parametrize( "parent,location,expected", [ ( Parent(), SingleInterval(5, 10, Strand.PLUS), Parent(location=SingleInterval(5, 10, Strand.PLUS), strand=Strand.PLUS), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(0, 2, Strand.MINUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), SingleInterval(2, 3, Strand.PLUS), Parent( id="parent", sequence_type="unknown", strand=Strand.PLUS, location=SingleInterval(2, 3, Strand.PLUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(0, 2, Strand.MINUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), None, Parent( id="parent", sequence_type="unknown", sequence=Sequence("AAA", Alphabet.NT_STRICT), ), ), ], ) def test_reset_location(self, parent, location, expected): assert parent.reset_location(location) == expected @pytest.mark.parametrize( "parent,location,expected_exception", [ ( Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)), SingleInterval(0, 5, Strand.PLUS), InvalidPositionException, ), ( Parent(id="id1", sequence=Sequence("AAA", Alphabet.NT_STRICT)), SingleInterval( 0, 1, Strand.PLUS, parent=Parent(id="id2", sequence=Sequence("AAA", Alphabet.NT_STRICT)), ), ParentException, ), ], ) def test_reset_location_error(self, parent, location, expected_exception): with pytest.raises(expected_exception): parent.reset_location(location) @pytest.mark.parametrize( "parent,sequence,include_self,expected", [ (Parent(), Sequence("AA", Alphabet.NT_STRICT), True, False), (Parent(), Sequence("AA", Alphabet.NT_STRICT), False, False), ( Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)), Sequence("AA", Alphabet.NT_STRICT), True, True, ), ( Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)), Sequence("AA", Alphabet.NT_STRICT), False, False, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AA", Alphabet.NT_STRICT), ), Sequence("AA", Alphabet.NT_STRICT), False, True, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AAT", Alphabet.NT_STRICT), ), Sequence("AAT", Alphabet.NT_STRICT), False, True, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AAT", Alphabet.NT_STRICT), ), Sequence("AAT", Alphabet.NT_STRICT, id="id"), True, False, ), ( Parent( parent=Parent(parent=Parent(parent=Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT, id="seq")))) ), Sequence("AAA", Alphabet.NT_STRICT, id="seq"), True, True, ), ], ) def test_has_ancestor_sequence(self, parent, sequence, include_self, expected): assert parent.has_ancestor_sequence(sequence, include_self) == expected