def test_require_parents_equal_except_location(self): with pytest.raises(MismatchedParentException): ObjectValidation.require_parents_equal_except_location( Parent(id="parent1"), Parent(id="parent2")) ObjectValidation.require_parents_equal_except_location( Parent(id="parent", location=SingleInterval(0, 5, Strand.PLUS)), Parent(id="parent", location=SingleInterval(10, 20, Strand.MINUS)), )
def test_require_location_nonempty(self): with pytest.raises(LocationException): ObjectValidation.require_location_nonempty( SingleInterval(5, 5, Strand.PLUS)) with pytest.raises(LocationException): ObjectValidation.require_location_nonempty( CompoundInterval([5, 10], [5, 10], Strand.PLUS)) ObjectValidation.require_location_nonempty( SingleInterval(5, 6, Strand.PLUS))
def test_require_object_has_type(self): with pytest.raises(TypeError): ObjectValidation.require_object_has_type( CompoundInterval.from_single_intervals( [SingleInterval(5, 10, Strand.PLUS)]), SingleInterval, ) ObjectValidation.require_object_has_type( CompoundInterval.from_single_intervals( [SingleInterval(5, 10, Strand.PLUS)]), CompoundInterval, )
def test_init(self): sequence = Sequence( "ACTG", Alphabet.NT_STRICT, id="id", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", location=SingleInterval(5, 9, Strand.MINUS, parent="parent"), ), ) # Sequence data assert sequence.sequence == Seq("ACTG") assert str(sequence) == "ACTG" # Alphabet assert sequence.alphabet == Alphabet.NT_STRICT # ID assert sequence.id == "id" # Sequence type assert sequence.sequence_type == "seqtype_1" # Parent ID assert sequence.parent_id == "parent" assert Sequence("A", Alphabet.NT_STRICT, parent="parent").parent_id == "parent" assert (Sequence( "A", Alphabet.NT_STRICT, parent=Parent( location=SingleInterval(5, 6, Strand.MINUS, parent="parent")), ).parent_id == "parent") assert Sequence("A", Alphabet.NT_STRICT).parent_id is None # Parent type assert sequence.parent_type == "seqtype_2" # Parent strand assert sequence.parent_strand == Strand.MINUS assert Sequence( "A", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED # Location on parent assert (Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.UNSTRANDED), ).parent_strand == Strand.UNSTRANDED) assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None assert sequence.location_on_parent == SingleInterval(5, 9, Strand.MINUS, parent="parent") # No alphabet validation Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False)
def test_require_location_has_parent_with_sequence(self): with pytest.raises(NullParentException): ObjectValidation.require_location_has_parent_with_sequence( SingleInterval(0, 5, Strand.PLUS)) with pytest.raises(NullSequenceException): ObjectValidation.require_location_has_parent_with_sequence( SingleInterval(0, 5, Strand.PLUS, parent="parent")) ObjectValidation.require_location_has_parent_with_sequence( SingleInterval( 0, 5, Strand.PLUS, parent=Parent(id="parent", sequence=Sequence("AAAAA", Alphabet.NT_STRICT)), ))
def test_intersection_error(self): with pytest.raises(MismatchedParentException): EmptyLocation().intersection(SingleInterval(0, 1, Strand.PLUS, parent="seq"), strict_parent_compare=True)
def chunk_relative_interval_to_cds(self, chr_start: int, chr_end: int, chr_strand: Strand) -> Location: """Converts a contiguous interval on the chunk-relative sequence to a relative location within the CDS.""" if not self.is_coding: raise NoncodingTranscriptError("No CDS positions on non-coding transcript") return self.cds.chunk_relative_location.parent_to_relative_location( SingleInterval(chr_start, chr_end, chr_strand, parent=self.cds.chunk_relative_location.parent) )
def test_intersection_exception(self): schema = FeatureIntervalModel.Schema().load( dict(interval_starts=[10], interval_ends=[15], strand=Strand.MINUS.name) ) feat = schema.to_feature_interval() s = SingleInterval(0, 5, Strand.PLUS) with pytest.raises(EmptyLocationException): _ = feat.intersect(s)
def chromosome_location(self) -> Location: """Returns the Location of this in *chromosome coordinates*. If the coordinate system is unknown, this will return the same coordinate system as ``chunk_relative_location``, that is the true underlying ``_location`` member. This Location object will always have the full span of the Interval in chromosome coordinates, even if this feature exists in chunk relative coordinates. As a result of this, if this Interval was built on chunk relative coordinates, the sequence information will not be present. """ if self._parent_or_seq_chunk_parent and self._parent_or_seq_chunk_parent.has_ancestor_of_type( SequenceType.CHROMOSOME ): parent = self._parent_or_seq_chunk_parent.first_ancestor_of_type(SequenceType.CHROMOSOME) return SingleInterval(self.start, self.end, Strand.PLUS, parent) else: return SingleInterval(self.start, self.end, Strand.PLUS)
def test_seq_chunk_to_parent(strand): obs = seq_chunk_to_parent("ATGCATGC", "TestSeq", 200, 208, strand=strand) assert obs == Parent( id="TestSeq:200-208", sequence_type=SequenceType.SEQUENCE_CHUNK, strand=None, location=None, sequence=Sequence( data="ATGCATGC", id="TestSeq:200-208", alphabet=Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.SEQUENCE_CHUNK, parent=Parent( id="TestSeq", sequence_type=SequenceType.CHROMOSOME, strand=strand, location=SingleInterval( 200, 208, strand, parent=Parent( id="TestSeq", sequence_type=SequenceType.CHROMOSOME, strand=strand, location=SingleInterval(200, 208, strand), sequence=None, parent=None, ), ), sequence=None, parent=Parent( id="TestSeq", sequence_type=SequenceType.CHROMOSOME, strand=strand, location=SingleInterval(200, 208, strand, parent=None), sequence=None, parent=None, ), ), ), ) assert obs.has_ancestor_of_type(SequenceType.CHROMOSOME) assert obs.has_ancestor_of_type(SequenceType.SEQUENCE_CHUNK)
def test_seq_to_parent(): seq = "ATGCATGC" seq_id = "TestSeq" obs = seq_to_parent(seq, seq_id=seq_id) assert obs == Parent( sequence=Sequence(seq, Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.CHROMOSOME, id=seq_id), location=SingleInterval(0, len(seq), Strand.PLUS), )
def test_require_parent_has_parent_with_location(self): with pytest.raises(NullParentException): ObjectValidation.require_parent_has_parent_with_location( Parent(id="parent")) with pytest.raises(NullParentException): ObjectValidation.require_parent_has_parent_with_location( Parent(id="parent", parent="grandparent")) ObjectValidation.require_parent_has_parent_with_location( Parent( id="parent", parent=Parent(id="grandparent", location=SingleInterval(0, 5, Strand.PLUS)), ))
def seq_chunk_to_parent( seq: str, sequence_name: Union[UUID, str], start: int, end: int, strand: Optional[Strand] = Strand.PLUS, alphabet: Optional[Alphabet] = Alphabet.NT_EXTENDED_GAPPED, ) -> Parent: """Construct a sequence chunk parent from a sequence. This is used when an annotation collection is being instantiated with a subset of a genome sequence. NOTE: This sequence is assumed to be a subset of a chromosome. There is no way to validate that within this function. Args: seq: Sequence subset to use. sequence_name: The name of the sequence. start: The genomic start position of this sequence. end: The genomic end position of this sequence. strand: The strand this chunk is relative to the genome. alphabet: The alphabet the sequence is in. Returns: An instantiated Parent object ready to be passed to a constructor. """ chunk_id = f"{sequence_name}:{start}-{end}" return Parent( id=chunk_id, sequence=Sequence( seq, alphabet, id=chunk_id, type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( start, end, strand, parent=Parent(id=sequence_name, sequence_type=SequenceType.CHROMOSOME), )), ), )
def test_require_locations_overlap(self): with pytest.raises(LocationOverlapException): ObjectValidation.require_locations_overlap( SingleInterval(0, 5, Strand.PLUS), SingleInterval(5, 10, Strand.PLUS)) with pytest.raises(LocationOverlapException): ObjectValidation.require_locations_overlap( SingleInterval(0, 5, Strand.PLUS), SingleInterval(0, 5, Strand.MINUS), match_strand=True, ) ObjectValidation.require_locations_overlap( SingleInterval(0, 5, Strand.PLUS), SingleInterval(3, 6, Strand.PLUS), match_strand=True, )
def initialize_location( starts: List[int], ends: List[int], strand: Strand, parent_or_seq_chunk_parent: Optional[Parent] = None, ) -> Location: """ Initialize the :class:`Location` object for this interval. Args: starts: Start positions relative to the chromosome. ends: End positions relative to the chromosome. strand: Strand relative to the chromosome. parent_or_seq_chunk_parent: An optional parent, either as a full chromosome or as a sequence chunk. """ if len(starts) != len(ends): raise ValidationException("Number of interval starts does not match number of interval ends") elif len(starts) == len(ends) == 1: location = SingleInterval(starts[0], ends[0], strand) else: location = CompoundInterval(starts, ends, strand) return AbstractInterval.liftover_location_to_seq_chunk_parent(location, parent_or_seq_chunk_parent)
def seq_to_parent( seq: str, alphabet: Optional[Alphabet] = Alphabet.NT_EXTENDED_GAPPED, seq_id: Optional[str] = None, seq_type: Optional[str] = SequenceType.CHROMOSOME, ) -> Parent: """Convert a string into a Parent object. This is the intermediate that transfers a BioPython sequence object to a BioCantor sequence object. NOTE: This sequence is assumed to be the entire chromosome. Args: seq: String of sequence. alphabet: Alphabet this sequence is in. seq_id: ID to attach to the Parent. seq_type: Sequence type to attach to the Parent. Returns: A :class:`Parent` object. """ return Parent(sequence=Sequence(seq, alphabet, type=seq_type, id=seq_id), location=SingleInterval(0, len(seq), Strand.PLUS))
def test_getitem_error(self): with pytest.raises(InvalidStrandException): Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6]
class TestEmptyLocation: def test_is_singleton(self): assert EmptyLocation() is EmptyLocation() def test_len(self): assert len(EmptyLocation()) == 0 def test_length(self): assert EmptyLocation().length == 0 def test_parent(self): assert EmptyLocation().parent is None def test_strand(self): with pytest.raises(EmptyLocationException): EmptyLocation().strand def test_start(self): with pytest.raises(EmptyLocationException): EmptyLocation().start def test_end(self): with pytest.raises(EmptyLocationException): EmptyLocation().end def test_is_contiguous(self): with pytest.raises(EmptyLocationException): EmptyLocation().is_contiguous def test_blocks(self): assert EmptyLocation().blocks == [] def test_scan_blocks(self): assert not EmptyLocation().scan_blocks() def test_num_blocks(self): assert EmptyLocation().num_blocks == 0 def test_optimize_blocks(self): assert EmptyLocation().optimize_blocks() == EmptyLocation() def test_gap_list(self): assert EmptyLocation().gap_list() == [] def test_gaps_location(self): assert EmptyLocation().gaps_location() == EmptyLocation() def test_extract_sequence(self): with pytest.raises(EmptyLocationException): EmptyLocation().extract_sequence() def test_parent_to_relative_pos(self): with pytest.raises(EmptyLocationException): EmptyLocation().parent_to_relative_pos(0) def test_relative_to_parent_pos(self): with pytest.raises(EmptyLocationException): EmptyLocation().relative_to_parent_pos(0) def test_parent_to_relative_location(self): with pytest.raises(EmptyLocationException): EmptyLocation().parent_to_relative_location( SingleInterval(0, 0, Strand.PLUS)) @pytest.mark.parametrize( "other", [ EmptyLocation(), SingleInterval(5, 10, Strand.PLUS), CompoundInterval([5], [10], Strand.PLUS), ], ) def test_intersection(self, other): assert EmptyLocation().intersection(other) == EmptyLocation() def test_intersection_error(self): with pytest.raises(MismatchedParentException): EmptyLocation().intersection(SingleInterval(0, 1, Strand.PLUS, parent="seq"), strict_parent_compare=True) @pytest.mark.parametrize( "other", [ EmptyLocation(), SingleInterval(5, 10, Strand.PLUS), CompoundInterval([5], [10], Strand.PLUS), ], ) def test_minus(self, other): assert EmptyLocation().minus(other) == EmptyLocation() def test_minus_error(self): with pytest.raises(MismatchedParentException): EmptyLocation().minus(SingleInterval(0, 1, Strand.PLUS, parent="seq"), strict_parent_compare=True) @pytest.mark.parametrize( "other", [ EmptyLocation(), SingleInterval(5, 10, Strand.PLUS), CompoundInterval([5], [10], Strand.PLUS), ], ) def test_location_relative_to(self, other): assert EmptyLocation().location_relative_to(other) == EmptyLocation() def test_relative_interval_to_parent_location(self): with pytest.raises(EmptyLocationException): EmptyLocation().relative_interval_to_parent_location( 0, 1, Strand.PLUS) def test_has_overlap(self): assert EmptyLocation().has_overlap(EmptyLocation()) is False def test_has_overlap_error(self): with pytest.raises(MismatchedParentException): EmptyLocation().has_overlap(SingleInterval(0, 1, Strand.PLUS, parent="seq"), strict_parent_compare=True) def test_contains(self): assert not EmptyLocation().contains(EmptyLocation()) assert not EmptyLocation().contains(SingleInterval(0, 1, Strand.PLUS)) def test_contains_error(self): with pytest.raises(MismatchedParentException): EmptyLocation().contains(SingleInterval(0, 1, Strand.PLUS, parent="seq"), strict_parent_compare=True) def test_reverse(self): assert EmptyLocation().reverse() == EmptyLocation() def test_reverse_strand(self): assert EmptyLocation().reverse_strand() == EmptyLocation() def test_reset_strand(self): with pytest.raises(EmptyLocationException): EmptyLocation().reset_strand(Strand.PLUS) def test_reset_parent(self): with pytest.raises(EmptyLocationException): EmptyLocation().reset_parent(None) def test_extend_absolute(self): with pytest.raises(EmptyLocationException): EmptyLocation().extend_absolute(0, 0) def test_extend_relative(self): with pytest.raises(EmptyLocationException): EmptyLocation().extend_relative(0, 0) def test_shift_position(self): with pytest.raises(EmptyLocationException): EmptyLocation().shift_position(0) def test_distance_to(self): with pytest.raises(EmptyLocationException): EmptyLocation().distance_to(EmptyLocation()) def test_merge_overlapping(self): assert EmptyLocation().merge_overlapping() == EmptyLocation() def test_first_ancestor_of_type(self): with pytest.raises(EmptyLocationException): EmptyLocation().first_ancestor_of_type("seqtype") def test_has_ancestor_of_type(self): assert EmptyLocation().has_ancestor_of_type("seqtype") is False def test_union(self): with pytest.raises(EmptyLocationException): EmptyLocation().union(SingleInterval(0, 1, Strand.PLUS)) def test_union_preserve_overlaps(self): with pytest.raises(EmptyLocationException): EmptyLocation().union_preserve_overlaps( SingleInterval(0, 1, Strand.PLUS))
class TestSequence: def test_init(self): sequence = Sequence( "ACTG", Alphabet.NT_STRICT, id="id", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", location=SingleInterval(5, 9, Strand.MINUS, parent="parent"), ), ) # Sequence data assert sequence.sequence == Seq("ACTG") assert str(sequence) == "ACTG" # Alphabet assert sequence.alphabet == Alphabet.NT_STRICT # ID assert sequence.id == "id" # Sequence type assert sequence.sequence_type == "seqtype_1" # Parent ID assert sequence.parent_id == "parent" assert Sequence("A", Alphabet.NT_STRICT, parent="parent").parent_id == "parent" assert (Sequence( "A", Alphabet.NT_STRICT, parent=Parent( location=SingleInterval(5, 6, Strand.MINUS, parent="parent")), ).parent_id == "parent") assert Sequence("A", Alphabet.NT_STRICT).parent_id is None # Parent type assert sequence.parent_type == "seqtype_2" # Parent strand assert sequence.parent_strand == Strand.MINUS assert Sequence( "A", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED).parent_strand == Strand.UNSTRANDED # Location on parent assert (Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.UNSTRANDED), ).parent_strand == Strand.UNSTRANDED) assert Sequence("A", Alphabet.NT_STRICT).parent_strand is None assert sequence.location_on_parent == SingleInterval(5, 9, Strand.MINUS, parent="parent") # No alphabet validation Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False) @pytest.mark.parametrize( "data,alphabet,parent_id,parent_type,parent_strand,location_on_parent,expected_exception", [ ("A-C", Alphabet.NT_STRICT, None, None, None, None, AlphabetError), ( "ACG", Alphabet.NT_STRICT, None, None, None, SingleInterval(0, 4, Strand.PLUS), ParentException, ), ( "ATT", Alphabet.NT_STRICT, "parent1", None, None, SingleInterval(0, 3, Strand.PLUS, parent="parent2"), ParentException, ), ( "GGG", Alphabet.NT_STRICT, None, None, Strand.MINUS, SingleInterval(0, 3, Strand.PLUS), InvalidStrandException, ), ( "GGG", Alphabet.NT_STRICT, None, "seqtype_2", None, SingleInterval( 0, 3, Strand.PLUS, parent=Parent(sequence_type="seqtype_3"), ), ParentException, ), ], ) def test_init_invalid_params( self, data, alphabet, parent_id, parent_type, parent_strand, location_on_parent, expected_exception, ): with pytest.raises(expected_exception): Sequence( data, alphabet, parent=Parent( id=parent_id, sequence_type=parent_type, strand=parent_strand, location=location_on_parent, ), ) @pytest.mark.parametrize( "sequence,other,expected", [ ( Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False), "AAAA", False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=False), Sequence("AAAA", Alphabet.NT_STRICT, validate_alphabet=True), True, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), True, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAa", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_EXTENDED, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq2", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype_2", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.PLUS, None), validate_alphabet=False, ), Sequence( "AAAA", Alphabet.NT_STRICT, id="seq1", type="seqtype", parent=SingleInterval(0, 4, Strand.UNSTRANDED, None), validate_alphabet=False, ), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent="parent1"), Sequence("AAAA", Alphabet.NT_STRICT, parent="parent2"), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent="parent"), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.PLUS), False, ), ( Sequence("AAAA", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ( Sequence( "AAAA", Alphabet.NT_STRICT, parent="seqtype", ), Sequence("AAAA", Alphabet.NT_STRICT), False, ), ], ) def test_equals(self, sequence, other, expected): assert (sequence == other) is expected assert (other == sequence) is expected def test_str(self): assert str(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == "AAAAt" def test_len(self): assert len(Sequence("", Alphabet.NT_EXTENDED_GAPPED)) == 0 assert len(Sequence("AAAAt", Alphabet.NT_EXTENDED_GAPPED)) == 5 @pytest.mark.parametrize( "seq,key,exp", [ # No parent (Sequence("acgtacgt", Alphabet.NT_STRICT), 3, Sequence("t", Alphabet.NT_STRICT)), (Sequence("acgtacgt", Alphabet.NT_STRICT), slice( 3, 6), Sequence("tac", Alphabet.NT_STRICT)), (Sequence("acgtacgt", Alphabet.NT_STRICT), slice( 3, 10), Sequence("tacgt", Alphabet.NT_STRICT)), # Parent with location; slice ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.PLUS)), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=SingleInterval(3, 6, Strand.PLUS)), ), ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.MINUS)), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=SingleInterval(2, 5, Strand.MINUS)), ), # Parent with location; single position ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.PLUS)), 3, Sequence("g", Alphabet.NT_STRICT, parent=SingleInterval(3, 4, Strand.PLUS)), ), ( Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.MINUS)), 3, Sequence("g", Alphabet.NT_STRICT, parent=SingleInterval(4, 5, Strand.MINUS)), ), # Parent without full location ( Sequence("actgactg", Alphabet.NT_STRICT, parent="parent"), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent="parent"), ), ( Sequence( "actgactg", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), slice(3, 6), Sequence("gac", Alphabet.NT_STRICT, parent=Strand.UNSTRANDED), ), ], ) def test_getitem(self, seq, key, exp): assert seq[key] == exp def test_getitem_error(self): with pytest.raises(InvalidStrandException): Sequence("actgactg", Alphabet.NT_STRICT, parent=SingleInterval(0, 8, Strand.UNSTRANDED))[3:6] @pytest.mark.parametrize( "sequence,alphabet,validate_alphabet", [ ("", Alphabet.NT_STRICT, True), ("acgtACGT", Alphabet.NT_STRICT, True), ("N", Alphabet.NT_STRICT, False), ("acNNNw", Alphabet.NT_EXTENDED, True), ("AN-", Alphabet.NT_EXTENDED, False), ("GG--AAA", Alphabet.NT_STRICT_GAPPED, True), ("AN-", Alphabet.NT_STRICT_GAPPED, False), ("nnAAw-cg", Alphabet.NT_EXTENDED_GAPPED, True), ("xxx", Alphabet.NT_EXTENDED_GAPPED, False), ("MWT*", Alphabet.AA, True), ("T*-", Alphabet.AA, False), ("ABCDE-", Alphabet.GENERIC, True), ("*", Alphabet.GENERIC, False), ], ) def test_validate_alphabet(self, sequence, alphabet, validate_alphabet): Sequence(sequence, alphabet, validate_alphabet=validate_alphabet) @pytest.mark.parametrize( "sequence,alphabet", [ ("N", Alphabet.NT_STRICT), ("A-", Alphabet.NT_EXTENDED), ("AN-", Alphabet.NT_STRICT_GAPPED), ("E", Alphabet.NT_EXTENDED_GAPPED), ("R-", Alphabet.AA), ("?", Alphabet.GENERIC), ], ) def test_validate_alphabet_error(self, sequence, alphabet): with pytest.raises(AlphabetError): Sequence(sequence, alphabet, validate_alphabet=True) @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT, parent="parent"), "parent"), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval( 10, 11, Strand.UNSTRANDED)), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent( id="parent", location=SingleInterval( 10, 11, Strand.UNSTRANDED, parent="parent"), ), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 10, 11, Strand.UNSTRANDED, parent="parent")), ), "parent", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.UNSTRANDED), ), None, ), (Sequence("A", Alphabet.NT_STRICT), None), ], ) def test_parent_id(self, sequence, expected): assert sequence.parent_id == expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT), None), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(sequence_type="seqtype"), ), "seqtype", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 0, 1, Strand.PLUS, parent=Parent(sequence_type="seqtype"), )), ), "seqtype", ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent(location=SingleInterval( 0, 1, Strand.PLUS, parent=Sequence( "AA", Alphabet.NT_STRICT, type="seqtype"), )), ), "seqtype", ), ], ) def test_parent_type(self, sequence, expected): assert sequence.parent_type == expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("A", Alphabet.NT_STRICT), None), ( Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.MINUS), ), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=SingleInterval(10, 11, Strand.MINUS), ), Strand.MINUS, ), ( Sequence( "A", Alphabet.NT_STRICT, parent=Parent( strand=Strand.MINUS, location=SingleInterval(10, 11, Strand.MINUS), ), ), Strand.MINUS, ), ], ) def test_parent_strand(self, sequence, expected): assert sequence.parent_strand == expected @pytest.mark.parametrize( "sequence,new_id,new_type,expected", [ ( Sequence("", Alphabet.NT_STRICT), None, None, Sequence("", Alphabet.NT_STRICT), ), ( Sequence("ACGtacgT", Alphabet.NT_STRICT), None, None, Sequence("AcgtaCGT", Alphabet.NT_STRICT), ), ( Sequence("ATUGCYRSWKMBdhvnNVHDbmkwsrycguta", Alphabet.NT_EXTENDED), None, None, Sequence("taacgryswmkvHDBNnbdhVKMWSYRGCAAT", Alphabet.NT_EXTENDED), ), ( Sequence("--A-CGta", Alphabet.NT_STRICT_GAPPED), None, None, Sequence("taCG-T--", Alphabet.NT_STRICT_GAPPED), ), ( Sequence("AtUC-N-", Alphabet.NT_EXTENDED_GAPPED), None, None, Sequence("-N-GAaT", Alphabet.NT_EXTENDED_GAPPED), ), ( Sequence("ACGta", Alphabet.NT_STRICT), "new_id", "seqtype", Sequence( "taCGT", Alphabet.NT_STRICT, id="new_id", type="seqtype", ), ), ( Sequence("ACGta", Alphabet.NT_STRICT, parent=Strand.PLUS), None, None, Sequence("taCGT", Alphabet.NT_STRICT, parent=Strand.MINUS), ), ( Sequence( "ACGta", Alphabet.NT_STRICT, parent=SingleInterval(5, 10, Strand.PLUS), ), None, None, Sequence( "taCGT", Alphabet.NT_STRICT, parent=SingleInterval(5, 10, Strand.MINUS), ), ), ], ) def test_reverse_complement(self, sequence, new_id, new_type, expected): assert sequence.reverse_complement(new_id=new_id, new_type=new_type) == expected @pytest.mark.parametrize( "sequence", [ Sequence("AAA", Alphabet.AA), Sequence("AAA", Alphabet.GENERIC), Sequence("xxx", Alphabet.NT_STRICT, validate_alphabet=False), ], ) def test_reverse_complement_error(self, sequence): with pytest.raises(AlphabetError): sequence.reverse_complement() @pytest.mark.parametrize( "seq1,seq2,new_id,data_only,expected", [ ( Sequence("", Alphabet.NT_STRICT, parent="parent1"), Sequence("", Alphabet.NT_STRICT, parent="parent2"), "new_id", True, Sequence("", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence("AA", Alphabet.NT_STRICT, parent="parent1"), Sequence("TT", Alphabet.NT_STRICT, parent="parent2"), "new_id", True, Sequence("AATT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype_1", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype_2", parent=Parent( id="parent1", strand=Strand.MINUS, location=SingleInterval(20, 22, Strand.MINUS), ), ), None, True, Sequence("AATT", Alphabet.NT_STRICT), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent1", strand=Strand.PLUS), ), None, False, Sequence( "AATT", Alphabet.NT_STRICT, type="seqtype", parent="parent1", ), ), ( Sequence( "AA", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent1", strand=Strand.PLUS), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent( id="parent1", strand=Strand.PLUS, location=SingleInterval(5, 7, Strand.PLUS), ), ), None, False, Sequence( "AATT", Alphabet.NT_STRICT, type="seqtype", parent="parent1", ), ), ( Sequence( "CC", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(3, 5, Strand.PLUS)), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(10, 12, Strand.PLUS)), ), "new_id", True, Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence( "CC", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(3, 5, Strand.PLUS)), ), Sequence( "TT", Alphabet.NT_STRICT, type="seqtype", parent=Parent(id="parent", location=SingleInterval(0, 2, Strand.PLUS)), ), "new_id", True, Sequence("CCTT", Alphabet.NT_STRICT, id="new_id"), ), ( Sequence("AA", Alphabet.NT_STRICT, id="seq1", parent="parent"), Sequence("", Alphabet.NT_STRICT, id="seq2", parent="parent"), None, False, Sequence("AA", Alphabet.NT_STRICT, parent="parent"), ), ( Sequence( "ACT", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(2, 5, Strand.PLUS)), ), Sequence( "GGA", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(8, 11, Strand.PLUS)), ), "new_id", False, Sequence( "ACTGGA", Alphabet.NT_STRICT, id="new_id", parent=Parent( id="parent", location=CompoundInterval.from_single_intervals([ SingleInterval(2, 5, Strand.PLUS), SingleInterval(8, 11, Strand.PLUS), ]), ), ), ), ( Sequence( "ACT", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(8, 11, Strand.MINUS)), ), Sequence( "GGA", Alphabet.NT_STRICT, parent=Parent(id="parent", location=SingleInterval(2, 5, Strand.MINUS)), ), "new_id", False, Sequence( "ACTGGA", Alphabet.NT_STRICT, id="new_id", parent=Parent( id="parent", location=CompoundInterval.from_single_intervals([ SingleInterval(2, 5, Strand.MINUS), SingleInterval(8, 11, Strand.MINUS), ]), ), ), ), ( Sequence( "ACT", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=SingleInterval(5, 8, Strand.PLUS), ), ), Sequence( "GCG", Alphabet.NT_STRICT, id="seq1", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=SingleInterval(15, 18, Strand.PLUS), ), ), None, False, Sequence( "ACTGCG", Alphabet.NT_STRICT, type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, location=CompoundInterval.from_single_intervals([ SingleInterval(5, 8, Strand.PLUS), SingleInterval(15, 18, Strand.PLUS), ]), ), ), ), ], ) def test_append(self, seq1, seq2, new_id, data_only, expected): assert seq1.append(seq2, new_id, data_only) == expected @pytest.mark.parametrize( "seq1,seq2,new_id,data_only", [ ( Sequence("AA", Alphabet.NT_STRICT), Sequence("TT", Alphabet.NT_EXTENDED), None, True, ), ( Sequence("AA", Alphabet.NT_STRICT, type="seqtype_1"), Sequence("AA", Alphabet.NT_STRICT, type="seqtype_2"), None, False, ), ( Sequence("AA", Alphabet.NT_STRICT, parent="parent1"), Sequence("AA", Alphabet.NT_STRICT, parent="parent2"), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.PLUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(5, 7, Strand.PLUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(2, 4, Strand.MINUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(5, 7, Strand.MINUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.PLUS), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(11, 13, Strand.PLUS), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(10, 12, Strand.UNSTRANDED), ), Sequence( "AA", Alphabet.NT_STRICT, parent=SingleInterval(15, 17, Strand.UNSTRANDED), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent1", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent2", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype_1", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype_2", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAAT", Alphabet.NT_STRICT), parent="grandparent", ), ), None, False, ), ( Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent1", ), ), Sequence( "AA", Alphabet.NT_STRICT, parent=Parent( id="parent", sequence_type="seqtype", strand=Strand.PLUS, sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent2", ), ), None, False, ), ], ) def test_append_error(self, seq1, seq2, new_id, data_only): with pytest.raises(ValueError): seq1.append(seq2, new_id, data_only) @pytest.mark.parametrize( "sequence,sequence_type,include_self,expected", [ ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, Parent(sequence=Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), )), ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, Parent(id="parent", sequence_type="seqtype"), ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), ], ) def test_first_ancestor_of_type(self, sequence, sequence_type, include_self, expected): assert sequence.first_ancestor_of_type( sequence_type, include_self=include_self) == expected @pytest.mark.parametrize( "sequence,sequence_type,include_self", [ ( Sequence("A", Alphabet.NT_STRICT, id="self"), "seqtype_1", True, ), ( Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"), "seqtype_1", True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_2", parent=Parent( id="parent", sequence_type="seqtype_1", parent=Parent(id="grandparent", sequence_type="seqtype_1"), ), ), "seqtype_3", True, ), ], ) def test_first_ancestor_of_type_error(self, sequence, sequence_type, include_self): with pytest.raises(NoSuchAncestorException): sequence.first_ancestor_of_type(sequence_type, include_self=include_self) @pytest.mark.parametrize( "sequence,sequence_type,include_self,expected", [ ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, True, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_1", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, True, ), ( Sequence("A", Alphabet.NT_STRICT, id="self"), "seqtype_1", True, False, ), ( Sequence("A", Alphabet.NT_STRICT, id="self", parent="parent"), "seqtype_1", True, False, ), ( Sequence( "A", Alphabet.NT_STRICT, id="self", type="seqtype_2", parent=Parent( id="parent", sequence_type="seqtype_1", parent=Parent(id="grandparent", sequence_type="seqtype_1"), ), ), "seqtype_3", True, False, ), ], ) def test_has_ancestor_of_type(self, sequence, sequence_type, include_self, expected): assert sequence.has_ancestor_of_type( sequence_type, include_self=include_self) is expected @pytest.mark.parametrize( "sequence,expected", [ (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nATGCATATTT\nGGAAACCAA"), (Sequence("ATGCATATTTGGAAACCAA", Alphabet.NT_STRICT), ">None\nATGCATATTT\nGGAAACCAA"), (Sequence("GGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nGGAAACCAA"), ( Sequence("ATGCATATTTGGAAACCAAGGAAACCAA", Alphabet.NT_STRICT, id="test"), ">test\nATGCATATTT\nGGAAACCAAG\nGAAACCAA", ), ( Sequence( data="AAAAAAA", alphabet=Alphabet.NT_STRICT, id="test", parent=Parent( location=SingleInterval(33, 40, Strand.MINUS)), ), ">test\nAAAAAAA", ), ], ) def test_to_fasta(self, sequence, expected): s_fa = sequence.to_fasta(num_chars=10) assert s_fa == expected def test_empty_to_fasta(self): s = Sequence("", Alphabet.NT_STRICT) with pytest.raises(EmptySequenceFastaError): s.to_fasta()
def test_contains(self): assert not EmptyLocation().contains(EmptyLocation()) assert not EmptyLocation().contains(SingleInterval(0, 1, Strand.PLUS))
def test_union_preserve_overlaps(self): with pytest.raises(EmptyLocationException): EmptyLocation().union_preserve_overlaps( SingleInterval(0, 1, Strand.PLUS))
def test_parent_to_relative_location(self): with pytest.raises(EmptyLocationException): EmptyLocation().parent_to_relative_location( SingleInterval(0, 0, Strand.PLUS))
class TestAbstractInterval: @pytest.mark.parametrize( "location,parent,exp", [ # no-op ( SingleInterval(0, 10, Strand.PLUS), None, SingleInterval(0, 10, Strand.PLUS), ), # non-specified parent (SingleInterval(0, 10, Strand.PLUS), Parent(), SingleInterval(0, 10, Strand.PLUS, parent=Parent())), # specified parent that is non-standard ( SingleInterval(0, 10, Strand.PLUS), Parent(sequence_type="nonstandard"), SingleInterval(0, 10, Strand.PLUS, parent=Parent(sequence_type="nonstandard")), ), # chromosome parent ( SingleInterval(0, 10, Strand.PLUS), Parent(sequence_type=SequenceType.CHROMOSOME), SingleInterval( 0, 10, Strand.PLUS, parent=Parent(sequence_type=SequenceType.CHROMOSOME)), ), # chromosome parent with real sequence ( SingleInterval(0, 10, Strand.PLUS), Parent(sequence_type=SequenceType.CHROMOSOME, sequence=Sequence("ATACGATCAAA", Alphabet.NT_EXTENDED_GAPPED)), SingleInterval( 0, 10, Strand.PLUS, parent=Parent( sequence_type=SequenceType.CHROMOSOME, sequence=Sequence("ATACGATCAAA", Alphabet.NT_EXTENDED_GAPPED), ), ), ), # chromosome parent as would be built by seq_to_parent ( SingleInterval(0, 10, Strand.PLUS), Parent( sequence=Sequence("ATACGATCAAA", Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.CHROMOSOME, id="test"), location=SingleInterval(0, 11, Strand.PLUS), ), SingleInterval( 0, 10, Strand.PLUS, Parent( sequence=Sequence("ATACGATCAAA", Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.CHROMOSOME, id="test"), location=SingleInterval(0, 11, Strand.PLUS), ), ), ), # chunk parent as would be built by seq_chunk_to_parent ( SingleInterval(2, 10, Strand.PLUS), Parent( id="test:1-11", sequence=Sequence( "TACGATCAAA", Alphabet.NT_EXTENDED_GAPPED, id="test:1-11", type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( 1, 11, Strand.PLUS, parent=Parent( id="test", sequence_type=SequenceType.CHROMOSOME), )), ), ), # chunk-relative location is 1-9 SingleInterval( 1, 9, Strand.PLUS, Parent( id="test:1-11", sequence=Sequence( "TACGATCAAA", Alphabet.NT_EXTENDED_GAPPED, id="test:1-11", type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( 1, 11, Strand.PLUS, parent=Parent( id="test", sequence_type=SequenceType.CHROMOSOME), )), ), ), ), ), ], ) def test_liftover_location_to_seq_chunk_parent(self, location, parent, exp): obs = AbstractInterval.liftover_location_to_seq_chunk_parent( location, parent) assert obs == exp @pytest.mark.parametrize( "location,parent,exception", [ # cannot have a naked chunk parent ( SingleInterval(0, 10, Strand.PLUS), Parent(sequence_type=SequenceType.SEQUENCE_CHUNK), NoSuchAncestorException, ), # cannot have a sequence-less chunk parent ( SingleInterval(0, 10, Strand.PLUS), Parent( id="test:1-11", sequence_type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( 1, 11, Strand.PLUS, parent=Parent(id="test", sequence_type=SequenceType.CHROMOSOME), )), ), NullSequenceException, ), # if location is on a chunk, must be a proper chunk with a chromosome ( SingleInterval( 0, 10, Strand.PLUS, parent=Parent(sequence_type=SequenceType.SEQUENCE_CHUNK)), Parent(), NoSuchAncestorException, ), # if location is on a chunk, that chunk's chromosome must match the new parent ( SingleInterval( 5, 8, Strand.PLUS, parent=Parent( id="test:0-9", sequence=Sequence( "ATACGATCA", Alphabet.NT_EXTENDED_GAPPED, id="test:0-9", type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( 0, 9, Strand.PLUS, parent=Parent( id="test", sequence_type=SequenceType.CHROMOSOME), )), ), ), ), Parent( id="wrongchrom:0-9", sequence=Sequence( "ATACGATCA", Alphabet.NT_EXTENDED_GAPPED, id="wrongchrom:0-9", type=SequenceType.SEQUENCE_CHUNK, parent=Parent(location=SingleInterval( 0, 9, Strand.PLUS, parent=Parent( id="wrongchrom", sequence_type=SequenceType.CHROMOSOME), )), ), ), MismatchedParentException, ), # if location is on a chunk, and the chromosome parent has sequence, sequence must match ( SingleInterval( 5, 8, Strand.PLUS, parent=Parent( id="test:0-9", sequence=Sequence( "ATACGATCA", Alphabet.NT_EXTENDED_GAPPED, id="test:0-9", type=SequenceType.SEQUENCE_CHUNK, parent=Parent( sequence=Sequence( "ATACGATCAAAA", Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.CHROMOSOME), location=SingleInterval( 0, 9, Strand.PLUS, parent=Parent( id="test", sequence_type=SequenceType.CHROMOSOME), ), ), ), ), ), Parent( id="test:0-9", sequence=Sequence( "ATACGATCA", Alphabet.NT_EXTENDED_GAPPED, id="test:0-9", type=SequenceType.SEQUENCE_CHUNK, parent=Parent( sequence=Sequence("ATACGATCAATA", Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.CHROMOSOME), location=SingleInterval( 0, 9, Strand.PLUS, parent=Parent( id="test", sequence_type=SequenceType.CHROMOSOME), ), ), ), ), MismatchedParentException, ), ], ) def test_liftover_location_to_seq_chunk_parent_exceptions( self, location, parent, exception): with pytest.raises(exception): _ = AbstractInterval.liftover_location_to_seq_chunk_parent( location, parent)
class TestFeatureIntervalSequenceSubset: """Test the ability to slice the genomic sequence of a feature interval and still get useful results.""" @pytest.mark.parametrize( "schema,absolute_value,relative_value,expected,parent", [ (e3_spliced, 2, 1, 0, parent_genome2_1_15), (e3_spliced, 7, 6, 4, parent_genome2_1_15), (e3_spliced, 14, 13, 9, parent_genome2_1_15), (e3_spliced_minus, 2, 1, 9, parent_genome2_1_15), (e3_spliced_minus, 7, 6, 5, parent_genome2_1_15), (e3_spliced_minus, 14, 13, 0, parent_genome2_1_15), ], ) def test_sequence_pos_to_feature(self, schema, absolute_value, relative_value, expected, parent): feat = schema.to_feature_interval(parent) assert feat.sequence_pos_to_feature(absolute_value) == expected assert feat.chunk_relative_pos_to_feature(relative_value) == expected @pytest.mark.parametrize( "schema,absolute_value,relative_value,expected,parent", [ ( e3_spliced, (7, 13, Strand.PLUS), (6, 12, Strand.PLUS), SingleInterval(4, 8, Strand.PLUS), parent_genome2_1_15, ), ( e3_spliced_minus, (7, 13, Strand.PLUS), (6, 12, Strand.PLUS), (SingleInterval(2, 6, Strand.MINUS)), parent_genome2_1_15, ), ], ) def test_sequence_interval_to_feature(self, schema, absolute_value, relative_value, expected, parent): feat = schema.to_feature_interval(parent) assert feat.sequence_interval_to_feature(*absolute_value) == expected assert feat.chunk_relative_interval_to_feature(*relative_value) == expected @pytest.mark.parametrize( "schema,value,absolute_expected,relative_expected,parent", [ (e3_spliced, 0, 2, 1, parent_genome2_1_15), (e3_spliced, 9, 14, 13, parent_genome2_1_15), (e3_spliced, 4, 7, 6, parent_genome2_1_15), (e3_spliced_minus, 0, 14, 13, parent_genome2_1_15), (e3_spliced_minus, 9, 2, 1, parent_genome2_1_15), (e3_spliced_minus, 5, 7, 6, parent_genome2_1_15), ], ) def test_feature_pos_to_sequence(self, schema, value, absolute_expected, relative_expected, parent): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert feat.feature_pos_to_sequence(value) == absolute_expected assert feat.feature_pos_to_chunk_relative(value) == relative_expected @pytest.mark.parametrize( "schema,value,absolute_expected,relative_expected,parent", [ ( e3_spliced, (0, 5, Strand.PLUS), CompoundInterval([2, 7], [6, 8], Strand.PLUS), CompoundInterval([1, 6], [5, 7], Strand.PLUS), parent_genome2_1_15, ), ( e3_spliced_minus, (0, 5, Strand.PLUS), CompoundInterval([8, 12], [10, 15], Strand.MINUS), CompoundInterval([7, 11], [9, 14], Strand.MINUS), parent_genome2_1_15, ), ], ) def test_feature_interval_to_sequence(self, schema, value, absolute_expected, relative_expected, parent): feat = schema.to_feature_interval(parent) assert feat.feature_interval_to_sequence(*value).reset_parent(None) == absolute_expected assert feat.feature_interval_to_chunk_relative(*value).reset_parent(None) == relative_expected @pytest.mark.parametrize( "schema,parent,expected_spliced", [ (e3_spliced, parent_genome2_1_15, "GTATCTTACC"), ( e3_spliced, parent_genome2_1_15, "GTATCTTACC", ), (e3_spliced_minus, parent_genome2_1_15, "GGTAAGATAC"), ( e3_spliced_minus, parent_genome2_1_15, "GGTAAGATAC", ), ], ) def test_spliced_sequence(self, schema, parent, expected_spliced): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert str(feat.get_spliced_sequence()) == expected_spliced @pytest.mark.parametrize( "schema,parent,expected_genomic,expected_stranded_genomic", [ (e3_spliced, parent_genome2_1_15, "GTATTCTTGGACC", "GTATTCTTGGACC"), ( e3_spliced, parent_genome2_1_15, "GTATTCTTGGACC", "GTATTCTTGGACC", ), (e3_spliced_minus, parent_genome2_1_15, "GTATTCTTGGACC", "GGTCCAAGAATAC"), ], ) def test_genomic_sequence(self, schema, parent, expected_genomic, expected_stranded_genomic): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert str(feat.get_reference_sequence()) == expected_genomic assert str(feat.get_genomic_sequence()) == expected_stranded_genomic def test_sequence_exceptions(self): """All sequence accessors should raise good errors when attempted without sequence info""" feat = e3_spliced.to_feature_interval(parent_or_seq_chunk_parent=parent_no_seq) with pytest.raises(NullSequenceException): _ = feat.get_reference_sequence() with pytest.raises(NullSequenceException): _ = feat.get_spliced_sequence() with pytest.raises(NullSequenceException): _ = feat.get_genomic_sequence() def test_start_end(self): feat = e3_spliced.to_feature_interval(parent_or_seq_chunk_parent=parent_genome2_1_15) assert feat.chunk_relative_start + 1 == feat.start assert feat.chunk_relative_end + 1 == feat.end def test_dict(self): feat = e3_spliced.to_feature_interval(parent_genome2_1_15) feat2 = e3_spliced.to_feature_interval() d = feat.to_dict() d2 = feat2.to_dict() del d["feature_interval_guid"] del d2["feature_interval_guid"] assert d == d2 rel_d = feat.to_dict(chromosome_relative_coordinates=False) del rel_d["feature_interval_guid"] assert rel_d != d assert [x + 1 for x in rel_d["interval_starts"]] == list(d["interval_starts"]) def test_nonstandard_parents(self): feat0 = e3_spliced.to_feature_interval(parent) seq0 = feat0.get_spliced_sequence() feat1 = e3_spliced.to_feature_interval(parent_nonstandard_type) with pytest.raises(NullSequenceException): _ = feat1.get_spliced_sequence() feat2 = e3_spliced.to_feature_interval(parent_no_seq) with pytest.raises(NullSequenceException): _ = feat2.get_spliced_sequence() feat3 = e3_spliced.to_feature_interval(parent_nonstandard_type_with_sequence) seq = feat3.get_spliced_sequence() assert seq == seq0 assert feat0.chromosome_location == feat0.chunk_relative_location assert feat1.chromosome_location != feat1.chunk_relative_location assert feat1._chunk_relative_bounded_chromosome_location == feat1.chunk_relative_location assert feat2.chromosome_location == feat2.chunk_relative_location assert feat3.chromosome_location != feat3.chunk_relative_location assert feat3._chunk_relative_bounded_chromosome_location == feat3.chunk_relative_location # OTOH, this is not the same feat4 = e3_spliced.to_feature_interval(parent_genome2_1_15) assert feat4.chromosome_location != feat4.chunk_relative_location def test_liftover_to_parent_or_seq_chunk_parent(self): feat0 = e3_spliced.to_feature_interval(parent_genome2) feat1 = feat0.liftover_to_parent_or_seq_chunk_parent(parent_genome2_1_15) assert str(feat0.get_spliced_sequence()) == str(feat1.get_spliced_sequence()) assert feat0.chromosome_location.reset_parent(None) == feat1.chromosome_location.reset_parent(None) # bringing in a null parent means no sequence anymore feat2 = feat0.liftover_to_parent_or_seq_chunk_parent(parent_no_seq_with_id) with pytest.raises(NullSequenceException): _ = feat2.get_spliced_sequence() # we can also start in chunk coordinates, then lift to different chunk coordinates feat_chunk = e3_spliced.to_feature_interval(parent_genome2_1_15) feat_subchunk = feat_chunk.liftover_to_parent_or_seq_chunk_parent(parent_genome2_2_8) # this is now a subset assert str(feat_subchunk.get_spliced_sequence()) in str(feat_chunk.get_spliced_sequence()) def test_liftover_to_parent_or_seq_chunk_parent_exception(self): feat0 = e3_spliced.to_feature_interval(parent_genome2_1_15) with pytest.raises(MismatchedParentException): _ = feat0.liftover_to_parent_or_seq_chunk_parent(parent_named) feat1 = e3_spliced.to_feature_interval(parent_genome2) with pytest.raises(MismatchedParentException): _ = feat1.liftover_to_parent_or_seq_chunk_parent(parent_named) feat2 = e3_spliced.to_feature_interval(parent_genome2_1_15) with pytest.raises(MismatchedParentException): _ = feat2.liftover_to_parent_or_seq_chunk_parent(parent_no_seq) with pytest.raises(MismatchedParentException): _ = feat2.liftover_to_parent_or_seq_chunk_parent(parent) @pytest.mark.parametrize( "feature,parent,expected", [ ( e3_spliced, parent_genome2_2_8, CompoundInterval( [2, 7, 12], [6, 10, 15], Strand.PLUS, parent=parent_genome2_2_8.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), ( e3_spliced, parent_genome2_1_15, CompoundInterval( [2, 7, 12], [6, 10, 15], Strand.PLUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), ], ) def test_chromosome_location(self, feature, parent, expected): feat = feature.to_feature_interval(parent) assert feat.chromosome_location == expected @pytest.mark.parametrize( "feature,parent,expected", [ # parent_genome2_2_8 slices off the first exon ( e3_spliced, parent_genome2_2_8, CompoundInterval( [2, 7], [6, 8], Strand.PLUS, parent=parent_genome2_2_8.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), # parent_genome2_1_15 does not slice off anything ( e3_spliced, parent_genome2_1_15, CompoundInterval( [2, 7, 12], [6, 10, 15], Strand.PLUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), ], ) def test__chunk_relative_bounded_chromosome_location(self, feature, parent, expected): feat = feature.to_feature_interval(parent) assert feat._chunk_relative_bounded_chromosome_location == expected @pytest.mark.parametrize( "feature,parent,expected_gaps", [ ( e3_spliced, parent_genome2_1_15, CompoundInterval([5, 9], [6, 11], Strand.PLUS, parent=parent_genome2_1_15), ), ( e3_spliced_minus, parent_genome2_1_15, CompoundInterval([5, 9], [6, 11], Strand.MINUS, parent=parent_genome2_1_15), ), (e3_spliced, parent_genome2_2_8, SingleInterval(4, 5, Strand.PLUS, parent=parent_genome2_2_8)), (e3_spliced_minus, parent_genome2_2_8, SingleInterval(4, 5, Strand.MINUS, parent=parent_genome2_2_8)), ], ) def test_chunk_relative_gaps_location(self, feature, parent, expected_gaps): feat = feature.to_feature_interval(parent) ObjectValidation.require_locations_have_same_nonempty_parent(feat.chunk_relative_gaps_location, expected_gaps) @pytest.mark.parametrize( "feature,parent,expected_span", [ (e3_spliced, parent_genome2_1_15, SingleInterval(2, 14, Strand.PLUS, parent=parent_genome2_1_15)), (e3_spliced_minus, parent_genome2_1_15, SingleInterval(2, 14, Strand.MINUS, parent=parent_genome2_1_15)), (e3_spliced, parent_genome2_2_8, SingleInterval(2, 6, Strand.PLUS, parent=parent_genome2_2_8)), (e3_spliced_minus, parent_genome2_2_8, SingleInterval(2, 6, Strand.MINUS, parent=parent_genome2_2_8)), ], ) def test_chunk_relative_span(self, feature, parent, expected_span): feat = feature.to_feature_interval(parent) ObjectValidation.require_locations_have_same_nonempty_parent(expected_span, feat.chunk_relative_span)
def test_require_location_has_parent(self): with pytest.raises(NullParentException): ObjectValidation.require_location_has_parent( SingleInterval(0, 5, Strand.PLUS)) ObjectValidation.require_location_has_parent( SingleInterval(0, 5, Strand.PLUS, parent="parent"))
class TestFeatureInterval: """Test constructing features of various types""" @pytest.mark.parametrize( "schema,expected", [ (se_unspliced, se_unspliced_repr), (e3_spliced, e3_spliced_repr), (e3_spliced, e3_spliced_repr), (se_unspliced_minus, se_unspliced_repr_minus), (e3_spliced_minus, e3_spliced_repr_minus), ], ) def test_feature_constructor(self, schema, expected): assert str(schema.to_feature_interval()) == expected assert str(schema.to_feature_interval(parent_or_seq_chunk_parent=parent)) == expected @pytest.mark.parametrize( "schema,expected_exception", [ ( FeatureIntervalModel.Schema().load( dict(interval_starts=[0], interval_ends=[], strand=Strand.PLUS.name) ), ValidationException, ), ( FeatureIntervalModel.Schema().load( dict(interval_starts=[0], interval_ends=[2, 5], strand=Strand.PLUS.name) ), ValidationException, ), ( FeatureIntervalModel.Schema().load( dict(interval_starts=[], interval_ends=[2, 5], strand=Strand.PLUS.name) ), ValidationException, ), ], ) def test_feature_excecptions(self, schema, expected_exception): with pytest.raises(expected_exception): _ = schema.to_feature_interval() @pytest.mark.parametrize( "schema,value,expected", [ (e3_spliced, 2, 0), (e3_spliced, 7, 4), (e3_spliced, 14, 9), (e3_spliced_minus, 2, 9), (e3_spliced_minus, 7, 5), (e3_spliced_minus, 14, 0), ], ) def test_sequence_pos_to_feature(self, schema, value, expected): feat = schema.to_feature_interval() assert feat.sequence_pos_to_feature(value) == expected assert feat.chunk_relative_pos_to_feature(value) == expected @pytest.mark.parametrize( "schema,value,expected", [ (e3_spliced, (7, 13, Strand.PLUS), SingleInterval(4, 8, Strand.PLUS)), (e3_spliced_minus, (7, 13, Strand.PLUS), (SingleInterval(2, 6, Strand.MINUS))), ], ) def test_sequence_interval_to_feature(self, schema, value, expected): feat = schema.to_feature_interval() assert feat.sequence_interval_to_feature(*value) == expected assert feat.chunk_relative_interval_to_feature(*value) == expected @pytest.mark.parametrize( "schema,value,expected", [ (e3_spliced, 0, 2), (e3_spliced, 9, 14), (e3_spliced, 4, 7), (e3_spliced_minus, 0, 14), (e3_spliced_minus, 9, 2), (e3_spliced_minus, 5, 7), ], ) def test_feature_pos_to_sequence(self, schema, value, expected): feat = schema.to_feature_interval() assert feat.feature_pos_to_sequence(value) == expected assert feat.feature_pos_to_chunk_relative(value) == expected feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert feat.feature_pos_to_sequence(value) == expected assert feat.feature_pos_to_chunk_relative(value) == expected @pytest.mark.parametrize( "schema,value,expected", [ (e3_spliced, (0, 5, Strand.PLUS), CompoundInterval([2, 7], [6, 8], Strand.PLUS)), (e3_spliced_minus, (0, 5, Strand.PLUS), CompoundInterval([8, 12], [10, 15], Strand.MINUS)), ], ) def test_feature_interval_to_sequence(self, schema, value, expected): feat = schema.to_feature_interval() assert feat.feature_interval_to_sequence(*value).reset_parent(None) == expected assert feat.feature_interval_to_chunk_relative(*value).reset_parent(None) == expected @pytest.mark.parametrize( "schema,value,expected", [ (e3_spliced, (0, 5, Strand.PLUS), CompoundInterval([2, 7], [6, 8], Strand.PLUS, parent=parent)), (e3_spliced_minus, (0, 5, Strand.PLUS), CompoundInterval([8, 12], [10, 15], Strand.MINUS, parent=parent)), ], ) def test_feature_interval_to_sequence_parent(self, schema, value, expected): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert feat.feature_interval_to_sequence(*value) == expected assert feat.feature_interval_to_chunk_relative(*value) == expected @pytest.mark.parametrize( "schema,value,expected", [ ( e3_spliced, SingleInterval(0, 10, Strand.PLUS, parent=parent), dict(interval_starts=(2, 7), interval_ends=(6, 10), strand=Strand.PLUS.name), ), ( e3_spliced_minus, SingleInterval(0, 10, Strand.PLUS, parent=parent), dict(interval_starts=(2, 7), interval_ends=(6, 10), strand=Strand.MINUS.name), ), ], ) def test_intersection(self, schema, value, expected): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) val = feat.intersect(value) expected = FeatureIntervalModel.Schema().load(expected).to_feature_interval(parent_or_seq_chunk_parent=parent) assert str(expected) == str(val) @pytest.mark.parametrize( "schema,value,expected", [ ( e3_spliced, SingleInterval(0, 10, Strand.PLUS), dict(interval_starts=(2, 7), interval_ends=(6, 10), strand=Strand.PLUS.name), ), ( e3_spliced_minus, SingleInterval(0, 10, Strand.PLUS), dict(interval_starts=(2, 7), interval_ends=(6, 10), strand=Strand.MINUS.name), ), ], ) def test_intersection_no_parent(self, schema, value, expected): feat = schema.to_feature_interval() val = feat.intersect(value) expected = FeatureIntervalModel.Schema().load(expected).to_feature_interval() assert str(expected) == str(val) @pytest.mark.parametrize( "schema,parent,expected_spliced", [ (e3_spliced, parent, "ATTCTGGCTA"), (e3_spliced, parent_genome2, "GTATCTTACC"), ( # explicit strand on Parent has no effect e3_spliced, parent_genome2_minus, "GTATCTTACC", ), (e3_spliced_minus, parent, "TAGCCAGAAT"), (e3_spliced_minus, parent_genome2, "GGTAAGATAC"), ( # explicit strand on Parent has no effect e3_spliced_minus, parent_genome2_minus, "GGTAAGATAC", ), ], ) def test_spliced_sequence(self, schema, parent, expected_spliced): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert str(feat.get_spliced_sequence()) == expected_spliced @pytest.mark.parametrize( "schema,parent,expected_genomic,expected_stranded_genomic", [ # positive strand, so genomic == genomic_stranded (e3_spliced, parent, "ATTCTTGGACCTA", "ATTCTTGGACCTA"), (e3_spliced, parent_genome2, "GTATTCTTGGACC", "GTATTCTTGGACC"), ( # explicit strand on Parent has no effect e3_spliced, parent_genome2_minus, "GTATTCTTGGACC", "GTATTCTTGGACC", ), (e3_spliced_minus, parent, "ATTCTTGGACCTA", "TAGGTCCAAGAAT"), (e3_spliced_minus, parent_genome2, "GTATTCTTGGACC", "GGTCCAAGAATAC"), ( # explicit strand on Parent flips it e3_spliced_minus, parent_genome2_minus, "GTATTCTTGGACC", "GGTCCAAGAATAC", ), ], ) def test_genomic_sequence(self, schema, parent, expected_genomic, expected_stranded_genomic): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) assert str(feat.get_reference_sequence()) == expected_genomic assert str(feat.get_genomic_sequence()) == expected_stranded_genomic @pytest.mark.parametrize( "schema,expected", [(e3_spliced, "GTATCTTACC"), (e3_spliced_minus, "GGTAAGATAC")], ) def test_reset_parent(self, schema, expected): feat = schema.to_feature_interval(parent_or_seq_chunk_parent=parent) feat._reset_parent(parent_genome2) assert str(feat.get_spliced_sequence()) == str(expected) def test_object_conversion(self): for model in [se_unspliced, e3_spliced_minus, e3_spliced]: obj = model.to_feature_interval() new_model = FeatureIntervalModel.from_feature_interval(obj) new_model.feature_interval_guid = None assert model == new_model obj = model.to_feature_interval(parent_or_seq_chunk_parent=parent) new_model = FeatureIntervalModel.from_feature_interval(obj) new_model.feature_interval_guid = None assert model == new_model def test_identifiers(self): feat = se_unspliced.to_feature_interval() feat.feature_name = "test" feat.feature_id = "testid" assert feat.identifiers == {"test", "testid"} assert feat.identifiers_dict == {"feature_name": "test", "feature_id": "testid"} def test_intersection_exception(self): schema = FeatureIntervalModel.Schema().load( dict(interval_starts=[10], interval_ends=[15], strand=Strand.MINUS.name) ) feat = schema.to_feature_interval() s = SingleInterval(0, 5, Strand.PLUS) with pytest.raises(EmptyLocationException): _ = feat.intersect(s) def test_gff_export_exceptions(self): feat = se_unspliced.to_feature_interval(parent_or_seq_chunk_parent=parent) with pytest.raises(GFF3MissingSequenceNameError): _ = "\n".join(str(x) for x in feat.to_gff()) feat.sequence_name = "myseq" with pytest.raises(NoSuchAncestorException): _ = "\n".join(str(x) for x in feat.to_gff(chromosome_relative_coordinates=False)) def test_gff_export(self): feat = se_unspliced.to_feature_interval(parent_or_seq_chunk_parent=parent) feat.sequence_name = "myseq" assert ( "\n".join(str(x) for x in feat.to_gff()) == "myseq\tBioCantor\tfeature_interval\t1\t18\t.\t+\t.\tID=6940c467-070a-3524-2dcb-a478a6fa0388\n" "myseq\tBioCantor\tsubregion\t1\t18\t.\t+\t.\tID=feature-6940c467-070a-3524-2dcb-a478a6fa0388-1;" "Parent=6940c467-070a-3524-2dcb-a478a6fa0388" ) def test_gff_export_subset(self): feat = e3_spliced.to_feature_interval(parent_or_seq_chunk_parent=parent_genome2_1_15) feat.sequence_name = "myseq" assert ( "\n".join(str(x) for x in feat.to_gff()) == "myseq\tBioCantor\tfeature_interval\t3\t15\t.\t+\t.\tID=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80\n" "myseq\tBioCantor\tsubregion\t3\t6\t.\t+\t.\tID=feature-c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80-1;" "Parent=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80\n" "myseq\tBioCantor\tsubregion\t8\t10\t.\t+\t.\tID=feature-c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80-2;" "Parent=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80\n" "myseq\tBioCantor\tsubregion\t13\t15\t.\t+\t.\tID=feature-c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80-3;" "Parent=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80" ) assert ( "\n".join(str(x) for x in feat.to_gff(chromosome_relative_coordinates=False)) == "myseq\tBioCantor\tfeature_interval\t2\t14\t.\t+\t.\tID=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80\n" "myseq\tBioCantor\tsubregion\t2\t5\t.\t+\t.\tID=feature-c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80-1;" "Parent=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80\n" "myseq\tBioCantor\tsubregion\t7\t9\t.\t+\t.\tID=feature-c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80-2;" "Parent=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80\n" "myseq\tBioCantor\tsubregion\t12\t14\t.\t+\t.\tID=feature-c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80-3;" "Parent=c45e8d7b-cbd6-43b2-bb08-429d9cb7fe80" ) @pytest.mark.parametrize( "feature,parent,expected_span", [ (e3_spliced, None, SingleInterval(2, 15, Strand.PLUS)), (e3_spliced_minus, None, SingleInterval(2, 15, Strand.MINUS)), (se_unspliced, None, SingleInterval(0, 18, Strand.PLUS)), (se_unspliced, None, SingleInterval(0, 18, Strand.PLUS)), (e3_spliced, parent_genome2, SingleInterval(2, 15, Strand.PLUS, parent=parent_genome2)), (e3_spliced_minus, parent_genome2, SingleInterval(2, 15, Strand.MINUS, parent=parent_genome2)), (se_unspliced, parent_genome2, SingleInterval(0, 18, Strand.PLUS, parent=parent_genome2)), (se_unspliced, parent_genome2, SingleInterval(0, 18, Strand.PLUS, parent=parent_genome2)), ( e3_spliced, parent_genome2_1_15, SingleInterval( 2, 15, Strand.PLUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME) ), ), ( e3_spliced_minus, parent_genome2_1_15, SingleInterval( 2, 15, Strand.MINUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME) ), ), ( e3_spliced, parent_genome2_2_8, SingleInterval( 2, 15, Strand.PLUS, parent=parent_genome2_2_8.first_ancestor_of_type(SequenceType.CHROMOSOME) ), ), ( e3_spliced_minus, parent_genome2_2_8, SingleInterval( 2, 15, Strand.MINUS, parent=parent_genome2_2_8.first_ancestor_of_type(SequenceType.CHROMOSOME) ), ), ], ) def test_chromosome_span(self, feature, parent, expected_span): feat = feature.to_feature_interval(parent) assert feat.chromosome_span == expected_span @pytest.mark.parametrize( "feature,parent,expected_gaps", [ (e3_spliced, None, CompoundInterval([6, 10], [7, 12], Strand.PLUS)), (e3_spliced_minus, None, CompoundInterval([6, 10], [7, 12], Strand.MINUS)), (se_unspliced, None, EmptyLocation()), (se_unspliced, None, EmptyLocation()), (e3_spliced, parent_genome2, CompoundInterval([6, 10], [7, 12], Strand.PLUS, parent=parent_genome2)), (e3_spliced_minus, parent_genome2, CompoundInterval([6, 10], [7, 12], Strand.MINUS, parent=parent_genome2)), (se_unspliced, parent_genome2, EmptyLocation()), (se_unspliced, parent_genome2, EmptyLocation()), ( e3_spliced, parent_genome2_1_15, CompoundInterval( [6, 10], [7, 12], Strand.PLUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), ( e3_spliced_minus, parent_genome2_1_15, CompoundInterval( [6, 10], [7, 12], Strand.MINUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), (se_unspliced, parent_genome2_1_15, EmptyLocation()), (se_unspliced, parent_genome2_1_15, EmptyLocation()), ( e3_spliced, parent_genome2_2_8, CompoundInterval( [6, 10], [7, 12], Strand.PLUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), ( e3_spliced_minus, parent_genome2_2_8, CompoundInterval( [6, 10], [7, 12], Strand.MINUS, parent=parent_genome2_1_15.first_ancestor_of_type(SequenceType.CHROMOSOME), ), ), ], ) def test_chromosome_gaps_location(self, feature, parent, expected_gaps): feat = feature.to_feature_interval(parent) assert feat.chromosome_gaps_location == expected_gaps
id="genome2_minus", sequence=Sequence(genome2, Alphabet.NT_STRICT), strand=Strand.MINUS, sequence_type=SequenceType.CHROMOSOME, ) # slice the genome down to contain some of the transcripts parent_genome2_1_15 = Parent( id="genome2_1_15", sequence=Sequence( genome2[1:15], Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.SEQUENCE_CHUNK, parent=Parent( location=SingleInterval( 1, 15, Strand.PLUS, parent=Parent(id="genome2", sequence_type=SequenceType.CHROMOSOME) ) ), ), ) parent_genome2_2_8 = Parent( id="genome2_2_8", sequence=Sequence( genome2[2:8], Alphabet.NT_EXTENDED_GAPPED, type=SequenceType.SEQUENCE_CHUNK, parent=Parent( location=SingleInterval( 2, 8, Strand.PLUS, parent=Parent(id="genome2", sequence_type=SequenceType.CHROMOSOME) )
def test_location(self): assert Parent(location=SingleInterval(0, 1, Strand.PLUS)).location == SingleInterval(0, 1, Strand.PLUS)
class TestParent: @pytest.mark.parametrize( "id,sequence_type,strand,location,sequence,expected", [ (None, None, None, None, None, Parent()), ( "id", "seqtype", Strand.MINUS, SingleInterval( 0, 1, Strand.MINUS, Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent(id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2")), ), Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, location=SingleInterval( 0, 1, Strand.MINUS, Parent( id="id", sequence_type="seqtype", strand=Strand.MINUS, sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), sequence=Sequence( "AAA", Alphabet.NT_STRICT, id="id", type="seqtype", parent=Parent( id="id2", sequence=Sequence("CCC", Alphabet.NT_STRICT, id="id2"), ), ), ), ), ], ) def test_init(self, id, sequence_type, strand, location, sequence, expected): assert expected == Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, ) @pytest.mark.parametrize( "id,sequence_type,strand,location,sequence,parent,expected_exception", [ ("id1", None, None, SingleInterval(0, 5, Strand.PLUS, parent="id2"), None, None, ParentException), ("id1", None, None, None, Sequence("AAA", Alphabet.NT_STRICT, id="id2"), None, ParentException), ( None, None, None, SingleInterval(0, 5, Strand.PLUS, parent="id1"), Sequence("AAC", Alphabet.NT_STRICT, id="id2"), None, ParentException, ), ( None, "seqtype", None, SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")), None, None, ParentException, ), (None, "seqtype", None, None, Sequence("AAT", Alphabet.NT_STRICT, type="unknown"), None, ParentException), ( None, None, None, SingleInterval(0, 5, Strand.PLUS, parent=Parent(sequence_type="unknown")), Sequence("AAG", Alphabet.NT_STRICT, type="seqtype"), None, ParentException, ), (None, None, Strand.MINUS, SingleInterval(0, 5, Strand.PLUS), None, None, InvalidStrandException), ( None, None, None, SingleInterval(0, 10, Strand.PLUS), Sequence("A", Alphabet.NT_STRICT), None, InvalidPositionException, ), ( None, None, None, None, Sequence("AA", Alphabet.NT_STRICT), Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), LocationException, ), (None, None, Strand.PLUS, SingleInterval(5, 10, Strand.MINUS), None, None, InvalidStrandException), ( None, None, None, None, Sequence("AA", Alphabet.NT_STRICT, parent="id1"), Parent(id="id2"), MismatchedParentException, ), ], ) def test_init_error(self, id, sequence_type, strand, location, sequence, parent, expected_exception): with pytest.raises(expected_exception): Parent( id=id, sequence_type=sequence_type, strand=strand, location=location, sequence=sequence, parent=parent, ) @pytest.mark.parametrize( "obj,expected", [ ( Sequence("AAA", Alphabet.NT_STRICT), Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)), ), ("parent", Parent(id="parent")), ( SingleInterval(5, 10, Strand.PLUS), Parent(location=SingleInterval(5, 10, Strand.PLUS)), ), ( CompoundInterval([5], [10], Strand.PLUS), Parent(location=CompoundInterval([5], [10], Strand.PLUS)), ), (EmptyLocation(), Parent(location=EmptyLocation())), (Strand.MINUS, Parent(strand=Strand.MINUS)), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, parent=Parent(id="grandparent"), ), Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, parent=Parent(id="grandparent"), ), ), ], ) def test_make_parent(self, obj, expected): assert make_parent(obj) == expected @pytest.mark.parametrize( "parent1,parent2,expected", [ (Parent(), Parent(), True), (Parent(), Parent(id=None, sequence_type=None), True), (Parent(id="id1"), Parent(id="id2"), False), ( Parent(sequence_type=None), Parent(sequence_type="unknown"), False, ), (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), False), ( Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")), Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")), False, ), ( Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent=Parent(id="parent"))), False, ), ( Parent(parent="parent1"), Parent(parent="parent2"), False, ), ], ) def test_eq(self, parent1, parent2, expected): assert (parent1 == parent2) is expected @pytest.mark.parametrize( "parent1,parent2,expected", [ (Parent(), Parent(), True), (Parent(id="id1"), Parent(id="id2"), False), ( Parent(sequence_type=None), Parent(sequence_type="unknown"), False, ), (Parent(strand=Strand.UNSTRANDED), Parent(strand=Strand.MINUS), True), ( Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id1")), Parent(location=SingleInterval(0, 5, Strand.PLUS, parent="id2")), False, ), ( Parent(sequence=Sequence("A", Alphabet.NT_STRICT)), Parent(sequence=Sequence("A", Alphabet.NT_STRICT, parent="parent")), False, ), ( Parent(parent="parent1"), Parent(parent="parent2"), False, ), ], ) def test_equals_except_location(self, parent1, parent2, expected): assert parent1.equals_except_location(parent2) is expected @pytest.mark.parametrize( "id,location,sequence,expected", [ ("id", None, None, "id"), ( None, SingleInterval(0, 1, Strand.PLUS, parent="id"), None, "id", ), ( None, None, Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"), "id", ), ( "id", SingleInterval(0, 1, Strand.PLUS, parent="id"), Sequence("A", Alphabet.NT_STRICT, id="id", parent="id2"), "id", ), ], ) def test_id(self, id, location, sequence, expected): assert Parent(id=id, location=location, sequence=sequence).id == expected @pytest.mark.parametrize( "sequence_type,location,sequence,expected", [ ("seqtype", None, None, "seqtype"), ( None, SingleInterval( 0, 5, Strand.PLUS, parent=Parent(sequence_type="seqtype"), ), None, "seqtype", ), ( None, None, Sequence("A", Alphabet.NT_STRICT, type="seqtype"), "seqtype", ), ( None, None, Sequence( "A", Alphabet.NT_STRICT, type="seqtype", parent=Parent(sequence_type="seqtype_2"), ), "seqtype", ), ], ) def test_sequence_type(self, sequence_type, location, sequence, expected): assert Parent(sequence_type=sequence_type, location=location, sequence=sequence).sequence_type == expected @pytest.mark.parametrize( "strand,location,sequence,expected", [ (Strand.PLUS, None, None, Strand.PLUS), (None, SingleInterval(0, 5, Strand.MINUS), None, Strand.MINUS), ( Strand.PLUS, None, Sequence("A", Alphabet.NT_STRICT, parent=Strand.MINUS), Strand.PLUS, ), ], ) def test_strand(self, strand, location, sequence, expected): assert Parent(strand=strand, location=location, sequence=sequence).strand == expected def test_location(self): assert Parent(location=SingleInterval(0, 1, Strand.PLUS)).location == SingleInterval(0, 1, Strand.PLUS) def test_sequence(self): assert Parent(sequence=Sequence("A", Alphabet.NT_STRICT)).sequence == Sequence("A", Alphabet.NT_STRICT) @pytest.mark.parametrize( "parent,expected", [ (Parent(parent="id"), Parent(id="id")), ( Parent( sequence=Sequence( "AA", Alphabet.NT_STRICT, parent=Parent(sequence_type="chr"), ) ), Parent(sequence_type="chr"), ), ], ) def test_parent(self, parent, expected): assert parent.parent == expected @pytest.mark.parametrize( "parent,expected", [ (Parent(), Parent()), (Parent(strand=Strand.PLUS), Parent()), ( Parent(strand=Strand.PLUS, location=SingleInterval(5, 10, Strand.PLUS)), Parent(), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.PLUS, location=SingleInterval(0, 1, Strand.PLUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), Parent( id="parent", sequence_type="unknown", sequence=Sequence("AAA", Alphabet.NT_STRICT), parent="grandparent", ), ), ], ) def test_strip_location_info(self, parent, expected): assert parent.strip_location_info() == expected @pytest.mark.parametrize( "parent,sequence_type,include_self,expected", [ ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), ), ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, Parent(id="parent", sequence_type="seqtype"), ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), ], ) def test_first_ancestor_of_type(self, parent, sequence_type, include_self, expected): assert parent.first_ancestor_of_type(sequence_type, include_self=include_self) == expected @pytest.mark.parametrize( "parent,sequence_type,include_self", [ (Parent(id="self"), "seqtype_2", True), ( Parent(id="self", parent="parent"), "seqtype_2", True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "chr", True, ), ], ) def test_first_ancestor_of_type_error(self, parent, sequence_type, include_self): with pytest.raises(NoSuchAncestorException): parent.first_ancestor_of_type(sequence_type, include_self=include_self) @pytest.mark.parametrize( "parent,sequence_type,include_self,expected", [ ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", True, True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent(id="parent", sequence_type="seqtype"), ), "seqtype", False, True, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "seqtype_2", True, True, ), ( Parent(id="self"), "seqtype_2", True, False, ), ( Parent(id="self", parent="parent"), "seqtype_2", True, False, ), ( Parent( id="self", sequence_type="seqtype", parent=Parent( id="parent", sequence_type="seqtype_2", parent=Parent(id="grandparent", sequence_type="seqtype_2"), ), ), "chr", True, False, ), ], ) def test_has_ancestor_of_type(self, parent, sequence_type, include_self, expected): assert parent.has_ancestor_of_type(sequence_type, include_self=include_self) is expected @pytest.mark.parametrize( "parent,expected", [ ( Parent( id="parent", location=SingleInterval(3, 5, Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), SingleInterval(13, 15, Strand.PLUS, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(0, 5, Strand.PLUS), sequence_type="unknown", strand=Strand.PLUS, parent=Parent( id="grandparent", location=SingleInterval(100, 200, Strand.MINUS), ), ), SingleInterval(195, 200, Strand.MINUS, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(6, 9, Strand.MINUS), parent=Parent( id="grandparent", location=SingleInterval(0, 10, Strand.PLUS), sequence_type="chr", strand=Strand.PLUS, parent="great grandparent", ), ), SingleInterval( 6, 9, Strand.MINUS, parent=Parent( id="grandparent", sequence_type="chr", parent="great grandparent", ), ), ), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, location=SingleInterval(6, 8, Strand.MINUS), parent=Parent( id="grandparent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(5, 15, Strand.MINUS), parent="great grandparent", ), ), SingleInterval( 7, 9, Strand.PLUS, parent=Parent( id="grandparent", sequence_type="unknown", parent="great grandparent", ), ), ), ( Parent( id="parent", location=SingleInterval(3, 5, Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), SingleInterval(13, 15, Strand.UNSTRANDED, parent="grandparent"), ), ( Parent( id="parent", location=SingleInterval(3, 5, Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)), ), SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"), ), ], ) def test_lift_child_location_contiguous_to_parent_single_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected", [ ( Parent( id="parent", location=CompoundInterval([3, 7], [5, 10], Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), CompoundInterval([13, 17], [15, 20], Strand.PLUS, parent="grandparent"), ), ( Parent( id="parent", location=CompoundInterval([0, 10], [5, 15], Strand.PLUS), sequence_type="unknown", strand=Strand.PLUS, parent=Parent( id="grandparent", location=SingleInterval(100, 200, Strand.MINUS), ), ), CompoundInterval( [185, 195], [190, 200], Strand.MINUS, parent="grandparent", ), ), ( Parent( id="parent", location=CompoundInterval([6], [9], Strand.MINUS), parent=Parent( id="grandparent", location=SingleInterval(0, 10, Strand.PLUS), sequence_type="chr", strand=Strand.PLUS, parent="great grandparent", ), ), SingleInterval( 6, 9, Strand.MINUS, parent=Parent( id="grandparent", sequence_type="chr", parent="great grandparent", ), ), ), ( Parent( id="parent", sequence_type="chr", strand=Strand.MINUS, location=CompoundInterval([6], [8], Strand.MINUS), parent=Parent( id="grandparent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(5, 15, Strand.MINUS), parent="great grandparent", ), ), SingleInterval( 7, 9, Strand.PLUS, parent=Parent( id="grandparent", sequence_type="unknown", parent="great grandparent", ), ), ), ( Parent( id="parent", location=CompoundInterval([3, 7], [5, 10], Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.PLUS)), ), CompoundInterval( [13, 17], [15, 20], Strand.UNSTRANDED, parent="grandparent", ), ), ( Parent( id="parent", location=CompoundInterval([3], [5], Strand.UNSTRANDED), parent=Parent(id="grandparent", location=SingleInterval(10, 20, Strand.MINUS)), ), SingleInterval(15, 17, Strand.UNSTRANDED, parent="grandparent"), ), ], ) def test_lift_child_location_discontiguous_to_parent_single_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected_error", [ # No location ( Parent(parent=SingleInterval(5, 10, Strand.PLUS)), NullParentException, ), # Parent has no location ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent="grandparent", ), NullParentException, ), # Location on parent can't be unstranded ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent=Parent( id="grandparent", location=SingleInterval(0, 100, Strand.UNSTRANDED), ), ), InvalidStrandException, ), # Location must fit inside location on parent ( Parent( location=SingleInterval(5, 10, Strand.PLUS), parent=Parent(id="grandparent", location=SingleInterval(30, 31, Strand.PLUS)), ), ValueError, ), ], ) def test_lift_child_location_to_parent_single_interval_error(self, parent, expected_error): with pytest.raises(expected_error): parent.lift_child_location_to_parent() @pytest.mark.parametrize( "parent,expected", [ # Location takes up entire parent location ( Parent( id="parent", location=SingleInterval(0, 10, Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([5, 20], [10, 25], Strand.PLUS), ), ), CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"), ), # Location (unstranded) takes up part of parent location (minus) ( Parent( id="parent", location=SingleInterval(10, 20, Strand.UNSTRANDED), parent=Parent( id="grandparent", location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS), ), ), CompoundInterval( [14, 20], [18, 26], Strand.UNSTRANDED, parent="grandparent", ), ), # Location (minus) takes up one block of parent location (plus); location is at end of sequence ( Parent( id="parent", location=SingleInterval(5, 10, Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.PLUS), ), ), SingleInterval(40, 45, Strand.MINUS, parent="grandparent"), ), # Location (minus) takes up part of one block of parent location (minus) ( Parent( id="parent", location=SingleInterval(0, 4, Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.MINUS), ), ), SingleInterval(41, 45, Strand.PLUS, parent="grandparent"), ), ], ) def test_lift_child_location_contiguous_to_parent_compound_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected", [ # Location takes up entire parent location ( Parent( id="parent", location=CompoundInterval([0, 5], [5, 10], Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([5, 20], [10, 25], Strand.PLUS), ), ), CompoundInterval([5, 20], [10, 25], Strand.PLUS, parent="grandparent"), ), # Location (unstranded) takes up part of parent location (minus) ( Parent( id="parent", location=CompoundInterval([10, 22], [20, 23], Strand.UNSTRANDED), parent=Parent( id="grandparent", location=CompoundInterval([10, 20, 30], [18, 28, 38], Strand.MINUS), ), ), CompoundInterval( [11, 14, 20], [12, 18, 26], Strand.UNSTRANDED, parent="grandparent", ), ), # Location (minus) takes up one block of parent location (plus); location is at end of sequence ( Parent( id="parent", location=CompoundInterval([5], [10], Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.PLUS), ), ), SingleInterval(40, 45, Strand.MINUS, parent="grandparent"), ), # Location (minus) takes up part of one block of parent location (minus) ( Parent( id="parent", location=CompoundInterval([0, 3], [1, 4], Strand.MINUS), parent=Parent( id="grandparent", location=CompoundInterval([30, 40], [35, 45], Strand.MINUS), ), ), CompoundInterval([41, 44], [42, 45], Strand.PLUS, parent="grandparent"), ), ], ) def test_lift_child_location_discontiguous_to_parent_compound_interval(self, parent, expected): assert parent.lift_child_location_to_parent() == expected @pytest.mark.parametrize( "parent,expected_error", [ # Location must fit inside location on parent ( Parent( location=SingleInterval(5, 50, Strand.PLUS), parent=Parent( id="grandparent", location=CompoundInterval([10, 20], [15, 25], Strand.PLUS), ), ), InvalidPositionException, ), ], ) def test_lift_child_location_to_parent_compound_interval_error(self, parent, expected_error): with pytest.raises(expected_error): parent.lift_child_location_to_parent() @pytest.mark.parametrize( "parent,location,expected", [ ( Parent(), SingleInterval(5, 10, Strand.PLUS), Parent(location=SingleInterval(5, 10, Strand.PLUS), strand=Strand.PLUS), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(0, 2, Strand.MINUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), SingleInterval(2, 3, Strand.PLUS), Parent( id="parent", sequence_type="unknown", strand=Strand.PLUS, location=SingleInterval(2, 3, Strand.PLUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), ), ( Parent( id="parent", sequence_type="unknown", strand=Strand.MINUS, location=SingleInterval(0, 2, Strand.MINUS), sequence=Sequence("AAA", Alphabet.NT_STRICT), ), None, Parent( id="parent", sequence_type="unknown", sequence=Sequence("AAA", Alphabet.NT_STRICT), ), ), ], ) def test_reset_location(self, parent, location, expected): assert parent.reset_location(location) == expected @pytest.mark.parametrize( "parent,location,expected_exception", [ ( Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT)), SingleInterval(0, 5, Strand.PLUS), InvalidPositionException, ), ( Parent(id="id1", sequence=Sequence("AAA", Alphabet.NT_STRICT)), SingleInterval( 0, 1, Strand.PLUS, parent=Parent(id="id2", sequence=Sequence("AAA", Alphabet.NT_STRICT)), ), ParentException, ), ], ) def test_reset_location_error(self, parent, location, expected_exception): with pytest.raises(expected_exception): parent.reset_location(location) @pytest.mark.parametrize( "parent,sequence,include_self,expected", [ (Parent(), Sequence("AA", Alphabet.NT_STRICT), True, False), (Parent(), Sequence("AA", Alphabet.NT_STRICT), False, False), ( Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)), Sequence("AA", Alphabet.NT_STRICT), True, True, ), ( Parent(sequence=Sequence("AA", Alphabet.NT_STRICT)), Sequence("AA", Alphabet.NT_STRICT), False, False, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AA", Alphabet.NT_STRICT), ), Sequence("AA", Alphabet.NT_STRICT), False, True, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AAT", Alphabet.NT_STRICT), ), Sequence("AAT", Alphabet.NT_STRICT), False, True, ), ( Parent( sequence=Sequence("AA", Alphabet.NT_STRICT), parent=Sequence("AAT", Alphabet.NT_STRICT), ), Sequence("AAT", Alphabet.NT_STRICT, id="id"), True, False, ), ( Parent( parent=Parent(parent=Parent(parent=Parent(sequence=Sequence("AAA", Alphabet.NT_STRICT, id="seq")))) ), Sequence("AAA", Alphabet.NT_STRICT, id="seq"), True, True, ), ], ) def test_has_ancestor_sequence(self, parent, sequence, include_self, expected): assert parent.has_ancestor_sequence(sequence, include_self) == expected
def test_require_parent_has_location(self): with pytest.raises(NullParentException): ObjectValidation.require_parent_has_location(Parent(id="parent")) ObjectValidation.require_parent_has_location( Parent(location=SingleInterval(5, 6, Strand.PLUS)))