Beispiel #1
0
    def join(self, other=None, offset=0, keepself=True):
        """
        Joins two SeqRecordEM2 objects into a new one representing the resulting merged sequence

        :param keepself: if True and overlapping subsequences are different, then keep sequence\
        from self record, otherwise keep the sequence of other record.
        :param other: the other SeqRecordEM2 object
        :param offset: the offset of the two sequences. If the value is negative, then the two\
        sequences overlap.
        :return: the result of merging records as a new SeqRecordEM2 object
        """
        if len(self.seq) + offset < 0:
            return other.join(self,
                              offset=-len(self.seq) - len(other.seq) - offset,
                              keepself=not keepself)

        if offset >= 0:
            new_seq = str(self.seq) + self.seq.any_residue * offset + str(
                other.seq)
        else:
            if str(self.seq)[offset:] != str(other.seq)[0:-offset]:
                warnings.warn(
                    'Warning!!! Overlapping subsequences are different.')
            if keepself is True:
                new_seq = str(self.seq) + str(other.seq)[-offset:]
            else:
                new_seq = str(self.seq)[0:offset] + str(other.seq)

        if self.seq.is_protein() and other.seq.is_protein():
            new_record = SeqRecordEM2(SeqEM2.protein(new_seq))
        elif not (self.seq.is_protein() or other.seq.is_protein()):
            new_record = SeqRecordEM2(SeqEM2.dna(new_seq))
        else:
            raise ValueError(
                'Sequences are not of the same type. It is impossible to join them.'
            )

        new_record.id = self.id + '_' + other.id

        for feature in self.features:
            new_record.add_feature(location=feature.location,
                                   strand=feature.strand,
                                   id=feature.id)

        for feature in other.features:
            new_record.add_feature(location=FeatureLocation(
                feature.location.start + len(self.seq) + offset,
                feature.location.end + len(self.seq) + offset),
                                   strand=feature.strand,
                                   id=feature.id)
        return new_record
Beispiel #2
0
class SeqFeatureTests(unittest.TestCase):
    sprot: SeqRecord = SeqRecord(
        SeqEM2.protein('MYNAMEISFREDHEREIAMWHEREARETHEYALLTHISISEXCELLENT'),
        id='X',
        name='DummyProt')
    sprot.features = [
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(0, 11),
                      type='domain',
                      id='d1'),  # MYNAMEISFRED
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(8, 18),
                      type='domain',
                      id='d2'),  # FREDHEREIAM
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(19, 30),
                      type='domain',
                      id='d3'),  # WHEREARETHEY
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(6, 23),
                      type='domain',
                      id='d4'),  # ISFREDHEREIAMWHERE
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(34, AfterPosition(39)),
                      id='d5'),  # THISIS
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(BeforePosition(2), 5),
                      type='domain',
                      id='d6'),  # MYNAME
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(19, 23),
                      type='domain',
                      id='d7'),  # WHERE
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(BeforePosition(30), 37),
                      type='domain',
                      id='d8')  # YALLTHI
    ]

    @classmethod
    def test_parent(cls):
        assert [f.id for f in cls.sprot.features
                ] == ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8']
        assert cls.sprot.features[1].parent.id == cls.sprot.id
        assert cls.sprot.features[1].parent.name == cls.sprot.name
        assert cls.sprot.features[1].parent.seq._data == cls.sprot.seq._data

    @classmethod
    def test_lies_within(cls):
        assert cls.sprot.features[1].lies_within(5, 25)
        assert not cls.sprot.features[1].lies_within(10, 25)
        assert not cls.sprot.features[1].lies_within(19, 25)

    @classmethod
    def test_lies_within_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[4].lies_within(30, 42)
            cls.sprot.features[5].lies_within(0, 10)

    @classmethod
    def test_overlaps(cls):
        assert cls.sprot.features[2].overlaps(20, 25)
        assert cls.sprot.features[2].overlaps(20, 40)
        assert cls.sprot.features[2].overlaps(20)
        assert not cls.sprot.features[2].overlaps(35)
        assert not cls.sprot.features[2].overlaps(2, 5)

    @classmethod
    def test_overlaps_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[4].overlaps(35)
            cls.sprot.features[5].overlaps(3)

    @classmethod
    def test_covers(cls):
        assert cls.sprot.features[3].covers(15, 20)
        assert not cls.sprot.features[3].covers(4, 20)

    @classmethod
    def test_covers_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[4].covers(35, 38)
            cls.sprot.features[5].covers(3, 4)

    @classmethod
    def test_intersect(cls):
        assert cls.sprot.features[4].intersect(
            cls.sprot.features[7]).location == FeatureLocation(34, 37)
        assert cls.sprot.features[2].intersect(
            cls.sprot.features[3]).location == cls.sprot.features[6].location
        assert cls.sprot.features[1].intersect(
            cls.sprot.features[3]).location == FeatureLocation(8, 18)

    @classmethod
    def test_intersect_errors(cls):
        with pytest.raises(ValueError, match=r'Undetermined .*'):
            cls.sprot.features[0].intersect(
                SeqFeatureEM2(location=FeatureLocation(30, 37)))

    @classmethod
    def test_intersect_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[5].intersect(cls.sprot.features[0])

    @classmethod
    def test_move(cls):
        assert cls.sprot.features[0].move(5).location == FeatureLocation(5, 16)
Beispiel #3
0
def protein_seq():
    return SeqEM2.protein('HITHEREFREDANDGREG')