def join(self, other=None, offset=0, keepself=True): """ Joins two SeqRecordEM2 objects into a new one representing the resulting merged sequence :param keepself: if True and overlapping subsequences are different, then keep sequence\ from self record, otherwise keep the sequence of other record. :param other: the other SeqRecordEM2 object :param offset: the offset of the two sequences. If the value is negative, then the two\ sequences overlap. :return: the result of merging records as a new SeqRecordEM2 object """ if len(self.seq) + offset < 0: return other.join(self, offset=-len(self.seq) - len(other.seq) - offset, keepself=not keepself) if offset >= 0: new_seq = str(self.seq) + self.seq.any_residue * offset + str( other.seq) else: if str(self.seq)[offset:] != str(other.seq)[0:-offset]: warnings.warn( 'Warning!!! Overlapping subsequences are different.') if keepself is True: new_seq = str(self.seq) + str(other.seq)[-offset:] else: new_seq = str(self.seq)[0:offset] + str(other.seq) if self.seq.is_protein() and other.seq.is_protein(): new_record = SeqRecordEM2(SeqEM2.protein(new_seq)) elif not (self.seq.is_protein() or other.seq.is_protein()): new_record = SeqRecordEM2(SeqEM2.dna(new_seq)) else: raise ValueError( 'Sequences are not of the same type. It is impossible to join them.' ) new_record.id = self.id + '_' + other.id for feature in self.features: new_record.add_feature(location=feature.location, strand=feature.strand, id=feature.id) for feature in other.features: new_record.add_feature(location=FeatureLocation( feature.location.start + len(self.seq) + offset, feature.location.end + len(self.seq) + offset), strand=feature.strand, id=feature.id) return new_record
class SeqFeatureTests(unittest.TestCase): sprot: SeqRecord = SeqRecord( SeqEM2.protein('MYNAMEISFREDHEREIAMWHEREARETHEYALLTHISISEXCELLENT'), id='X', name='DummyProt') sprot.features = [ SeqFeatureEM2(parent=sprot, location=FeatureLocation(0, 11), type='domain', id='d1'), # MYNAMEISFRED SeqFeatureEM2(parent=sprot, location=FeatureLocation(8, 18), type='domain', id='d2'), # FREDHEREIAM SeqFeatureEM2(parent=sprot, location=FeatureLocation(19, 30), type='domain', id='d3'), # WHEREARETHEY SeqFeatureEM2(parent=sprot, location=FeatureLocation(6, 23), type='domain', id='d4'), # ISFREDHEREIAMWHERE SeqFeatureEM2(parent=sprot, location=FeatureLocation(34, AfterPosition(39)), id='d5'), # THISIS SeqFeatureEM2(parent=sprot, location=FeatureLocation(BeforePosition(2), 5), type='domain', id='d6'), # MYNAME SeqFeatureEM2(parent=sprot, location=FeatureLocation(19, 23), type='domain', id='d7'), # WHERE SeqFeatureEM2(parent=sprot, location=FeatureLocation(BeforePosition(30), 37), type='domain', id='d8') # YALLTHI ] @classmethod def test_parent(cls): assert [f.id for f in cls.sprot.features ] == ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8'] assert cls.sprot.features[1].parent.id == cls.sprot.id assert cls.sprot.features[1].parent.name == cls.sprot.name assert cls.sprot.features[1].parent.seq._data == cls.sprot.seq._data @classmethod def test_lies_within(cls): assert cls.sprot.features[1].lies_within(5, 25) assert not cls.sprot.features[1].lies_within(10, 25) assert not cls.sprot.features[1].lies_within(19, 25) @classmethod def test_lies_within_fuzzy(cls): with pytest.warns(UserWarning): cls.sprot.features[4].lies_within(30, 42) cls.sprot.features[5].lies_within(0, 10) @classmethod def test_overlaps(cls): assert cls.sprot.features[2].overlaps(20, 25) assert cls.sprot.features[2].overlaps(20, 40) assert cls.sprot.features[2].overlaps(20) assert not cls.sprot.features[2].overlaps(35) assert not cls.sprot.features[2].overlaps(2, 5) @classmethod def test_overlaps_fuzzy(cls): with pytest.warns(UserWarning): cls.sprot.features[4].overlaps(35) cls.sprot.features[5].overlaps(3) @classmethod def test_covers(cls): assert cls.sprot.features[3].covers(15, 20) assert not cls.sprot.features[3].covers(4, 20) @classmethod def test_covers_fuzzy(cls): with pytest.warns(UserWarning): cls.sprot.features[4].covers(35, 38) cls.sprot.features[5].covers(3, 4) @classmethod def test_intersect(cls): assert cls.sprot.features[4].intersect( cls.sprot.features[7]).location == FeatureLocation(34, 37) assert cls.sprot.features[2].intersect( cls.sprot.features[3]).location == cls.sprot.features[6].location assert cls.sprot.features[1].intersect( cls.sprot.features[3]).location == FeatureLocation(8, 18) @classmethod def test_intersect_errors(cls): with pytest.raises(ValueError, match=r'Undetermined .*'): cls.sprot.features[0].intersect( SeqFeatureEM2(location=FeatureLocation(30, 37))) @classmethod def test_intersect_fuzzy(cls): with pytest.warns(UserWarning): cls.sprot.features[5].intersect(cls.sprot.features[0]) @classmethod def test_move(cls): assert cls.sprot.features[0].move(5).location == FeatureLocation(5, 16)
def protein_seq(): return SeqEM2.protein('HITHEREFREDANDGREG')