def __init__( self, cds_starts: List[int], cds_ends: List[int], strand: Strand, frames_or_phases: List[Union[CDSFrame, CDSPhase]], sequence_guid: Optional[UUID] = None, sequence_name: Optional[str] = None, protein_id: Optional[str] = None, product: Optional[str] = None, qualifiers: Optional[Dict[Hashable, QualifierValue]] = None, guid: Optional[UUID] = None, parent_or_seq_chunk_parent: Optional[Parent] = None, ): self._location = self.initialize_location(cds_starts, cds_ends, strand, parent_or_seq_chunk_parent) self._genomic_starts = cds_starts self._genomic_ends = cds_ends self.start = cds_starts[0] self.end = cds_ends[-1] self._strand = strand self._parent_or_seq_chunk_parent = parent_or_seq_chunk_parent self.sequence_guid = sequence_guid self.sequence_name = sequence_name self.product = product self.protein_id = protein_id self._import_qualifiers_from_list(qualifiers) if len(frames_or_phases) != len(self._genomic_starts): raise MismatchedFrameException( "Number of frame or phase entries must match number of exons") if len(self.chromosome_location) == 0: raise InvalidCDSIntervalError("Cannot have an empty CDS interval") # only allow either all CDSFrame or all CDSPhase is_frame = isinstance(frames_or_phases[0], CDSFrame) for frame_or_phase in frames_or_phases[1:]: if is_frame and isinstance(frame_or_phase, CDSPhase): raise MismatchedFrameException("Cannot mix frame and phase") elif not is_frame and isinstance(frame_or_phase, CDSFrame): raise MismatchedFrameException("Cannot mix frame and phase") if is_frame: self.frames = frames_or_phases else: self.frames = [x.to_frame() for x in frames_or_phases] if guid is None: self.guid = digest_object( self._genomic_starts, self._genomic_ends, self.frames, self.product, self.protein_id, self.qualifiers, ) else: self.guid = guid
class TestBedWriter: tx1 = dict( exon_starts=[2], exon_ends=[18], strand=Strand.PLUS.name, cds_starts=[5], cds_ends=[9], cds_frames=[CDSFrame.ZERO.name], sequence_name="chr1", ) tx2 = dict( exon_starts=[2, 7, 12], exon_ends=[6, 10, 15], strand=Strand.PLUS.name, cds_starts=[4, 7, 12], cds_ends=[6, 10, 13], cds_frames=[CDSFrame.ZERO.name, CDSFrame.TWO.name, CDSFrame.TWO.name], transcript_interval_guid=digest_object(123), transcript_symbol="name", ) feat1 = dict(interval_starts=[2], interval_ends=[5], strand=Strand.PLUS.name, sequence_name="chr10") feat2 = dict(interval_starts=[2, 7, 12], interval_ends=[6, 10, 15], strand=Strand.PLUS.name) feat3 = dict(interval_starts=[25], interval_ends=[30], strand=Strand.MINUS.name) @pytest.mark.parametrize( "tx,expected", [ (tx1, [ "chr1", "2", "18", "None", "0", "+", "5", "9", "0,0,0", "1", "16", "0" ]), ( tx2, [ "None", "2", "15", "name", "0", "+", "4", "13", "0,0,0", "3", "4,3,3", "0,5,10", ], ), ], ) def test_tx(self, tx, expected): model = TranscriptIntervalModel.Schema().load(tx) obj = model.to_transcript_interval() assert str(obj.to_bed12()) == "\t".join(expected) @pytest.mark.parametrize( "feat,expected", [ (feat1, [ "chr10", "2", "5", "None", "0", "+", "0", "0", "0,0,0", "1", "3", "0" ]), (feat2, [ "None", "2", "15", "None", "0", "+", "0", "0", "0,0,0", "3", "4,3,3", "0,5,10" ]), (feat3, [ "None", "25", "30", "None", "0", "-", "0", "0", "0,0,0", "1", "5", "0" ]), ], ) def test_feat(self, feat, expected): model = FeatureIntervalModel.Schema().load(feat) obj = model.to_feature_interval() assert str(obj.to_bed12()) == "\t".join(expected) @pytest.mark.parametrize( "tx,score,rgb,name,expected", [ ( tx2, 10, RGB(128, 128, 128), "transcript_symbol", [ "None", "2", "15", "name", "10", "+", "4", "13", "128,128,128", "3", "4,3,3", "0,5,10" ], ), ( # if name is not an attribute, just pass it along tx2, 10, RGB(128, 128, 128), "test", [ "None", "2", "15", "test", "10", "+", "4", "13", "128,128,128", "3", "4,3,3", "0,5,10" ], ), ], ) def test_changed_metadata(self, tx, score, rgb, name, expected): model = TranscriptIntervalModel.Schema().load(tx) obj = model.to_transcript_interval() assert str(obj.to_bed12(score, rgb, name)) == "\t".join(expected)
def __init__( self, exon_starts: List[int], exon_ends: List[int], strand: Strand, cds_starts: Optional[List[int]] = None, cds_ends: Optional[List[int]] = None, cds_frames: Optional[List[CDSFrame]] = None, qualifiers: Optional[Dict[Hashable, QualifierValue]] = None, is_primary_tx: Optional[bool] = None, transcript_id: Optional[str] = None, transcript_symbol: Optional[str] = None, transcript_type: Optional[Biotype] = None, sequence_guid: Optional[UUID] = None, sequence_name: Optional[str] = None, protein_id: Optional[str] = None, product: Optional[str] = None, guid: Optional[UUID] = None, transcript_guid: Optional[UUID] = None, parent_or_seq_chunk_parent: Optional[Parent] = None, ): self._location = TranscriptInterval.initialize_location( exon_starts, exon_ends, strand, parent_or_seq_chunk_parent=parent_or_seq_chunk_parent, ) if cds_starts is not None and cds_ends is None: raise InvalidCDSIntervalError("If CDS start is defined, CDS end must be defined") elif cds_starts is None and cds_ends is not None: raise InvalidCDSIntervalError("If CDS end is defined, CDS start must be defined") elif cds_starts is not None and cds_ends is not None: # must be coding if len(cds_starts) != len(cds_ends): raise InvalidCDSIntervalError("Number of CDS starts does not number of CDS ends") elif cds_starts[0] < exon_starts[0]: raise InvalidCDSIntervalError("CDS start must be greater than or equal to exon start") elif cds_ends[-1] > exon_ends[-1]: raise InvalidCDSIntervalError("CDS end must be less than or equal to than exon end") elif cds_frames is None: raise InvalidCDSIntervalError("If CDS interval is defined, CDS frames must be defined") elif len(cds_frames) != len(cds_starts): raise InvalidCDSIntervalError("Number of CDS frames must match number of CDS starts/ends") # as a result of a parent or seq chunk parent constructor, it may be the case that this CDS is entirely # sliced out. Check this case, and then void out the CDS. try: self.cds = CDSInterval( cds_starts, cds_ends, strand, cds_frames, sequence_guid, sequence_name, protein_id, product, parent_or_seq_chunk_parent=parent_or_seq_chunk_parent, ) except LocationOverlapException: self.cds = None self._cds_frames = cds_frames else: self.cds = self._cds_frames = self._cds_start = self._cds_end = None self._genomic_starts = exon_starts self._genomic_ends = exon_ends self._strand = strand self._parent_or_seq_chunk_parent = parent_or_seq_chunk_parent self.start = self.genomic_start = exon_starts[0] self.end = self.genomic_end = exon_ends[-1] self._is_primary_feature = is_primary_tx self.transcript_id = transcript_id self.transcript_symbol = transcript_symbol self.transcript_type = transcript_type self.protein_id = protein_id self.product = product self.sequence_guid = sequence_guid self.sequence_name = sequence_name self.bin = bins(self.start, self.end, fmt="bed") # qualifiers come in as a List, convert to Set self._import_qualifiers_from_list(qualifiers) if guid is None: self.guid = digest_object( self._genomic_starts, self._genomic_ends, self._cds_frames, self.qualifiers, self.transcript_id, self.transcript_symbol, self.transcript_type, self.protein_id, self.sequence_name, self.is_primary_tx, self.cds.guid if self.cds else None, ) else: self.guid = guid self.transcript_guid = transcript_guid
def test_nested_kwargs(kwargs, str_rep, uuid): assert list(_encode_object_for_digest(**kwargs)) == str_rep assert digest_object(**kwargs) == uuid
def test_nested(val, str_rep, uuid): assert list(_encode_object_for_digest(val)) == str_rep assert digest_object(val) == uuid
def test_dicts_of_sets(val, str_rep, uuid): assert list(_encode_object_for_digest(val)) == str_rep assert digest_object(val) == uuid
def test_sets(val, str_rep, uuid): assert str_rep == list(_encode_object_for_digest(val)) assert digest_object(val) == uuid
def test_same_values_different_keys(args, kwargs, str_rep, uuid): assert list(_encode_object_for_digest(*args, **kwargs)) == str_rep assert digest_object(*args, **kwargs) == uuid