Esempio n. 1
0
def joined_rec1NNNrec2():
    return SeqRecordEM2(dna_seq('rec1NNNrec2'), id='Rec1_NNN_Rec2', name='R1R2',
                        features=[SeqFeatureEM2(location=FeatureLocation(0, 5), strand=1, id='A1'),
                                  SeqFeatureEM2(location=FeatureLocation(28, 33), strand=1, id='B1'),
                                  SeqFeatureEM2(location=FeatureLocation(38, 43), strand=1, id='A2'),
                                  SeqFeatureEM2(location=FeatureLocation(66, 71), strand=1, id='B2')
                                  ])
Esempio n. 2
0
    def orfs_to_features(self,
                         start=['ATG'],
                         stop=None,
                         filter=None,
                         add=False):
        """
        Determines all open reading frames in a sequence record. All the returned ORFs have a length
        that is a multiple of 3. Thus, for sequences without any stop codon, 3 ORFs are returned,
        one for each frame. Both strands are examined but it is possible to filter the ORFs by
        length, frame, etc. with the FeatureFilter defined by the filter argument.

        :param start: a list of accepted start codons
        :param stop: a list of accepted stop codons
        :param filter: a FeatureFilter object defining a filter to select ORFs according to some
         criteria
        :param add: if True, the selected ORFs are added to the record's features
        :return: a list of ORFs as SeqFeatureEM2 objects
        """
        features = []
        for orf in self.seq.get_orfs(start=start, stop=stop):
            features.append(
                SeqFeatureEM2(location=FeatureLocation(orf[0], orf[1]),
                              type='ORF',
                              strand=1))
        for orf in self.reverse_complement().seq.get_orfs(start=start,
                                                          stop=stop):
            features.append(
                SeqFeatureEM2(location=FeatureLocation(orf[0], orf[1]),
                              type='ORF',
                              strand=-1))
        if filter is not None:
            features = filter.apply(features)
        if add:
            self.features.extend(features)
        return features
Esempio n. 3
0
def dna_stitch_rec1_overlap_rec2():
    stitched = joined_rec1_overlap_rec2()
    stitched.features += [SeqFeatureEM2(location=FeatureLocation(0, 35), strand=1, id='Rec1'),
                          SeqFeatureEM2(location=FeatureLocation(25, 60), strand=1, id='Rec2'),
                          SeqFeatureEM2(location=FeatureLocation(22, 45), strand=1, id='stitcher')
                          ]
    return stitched
Esempio n. 4
0
def dna_stitch_rec1_NNN_rec2rev():
    stitched = joined_rec1NNNrec2()
    stitched.features += [SeqFeatureEM2(location=FeatureLocation(0, 35), strand=1, id='Rec1'),
                          SeqFeatureEM2(location=FeatureLocation(38, 73), strand=-1, id='Rec2'),
                          SeqFeatureEM2(location=FeatureLocation(30, 48), strand=1, id='stitcher')
                          ]
    return stitched
Esempio n. 5
0
def joined_rec1_overlap_rec2():
    return SeqRecordEM2(dna_seq('rec1_overlap_rec2/rec3'),
                        id='Rec1_overlap_Rec2', name='R1R2',
                        features=[SeqFeatureEM2(location=FeatureLocation(0, 5), strand=1, id='A1'),
                                  SeqFeatureEM2(location=FeatureLocation(28, 33), strand=1, id='B1'),
                                  SeqFeatureEM2(location=FeatureLocation(25, 30), strand=1, id='A2'),
                                  SeqFeatureEM2(location=FeatureLocation(53, 58), strand=1, id='B2')
                                  ])
Esempio n. 6
0
    def add_feature(self, **kwargs):
        """
        Adds a feature to the current record according to arguments passed as \*\*kwargs.

        :param kwargs: keyword arguments to pass to SeqFeatureEM2 class
        """
        self.features.append(SeqFeatureEM2(parent=self, **kwargs))
Esempio n. 7
0
def dna_rec():
    return SeqRecordEM2(dna_seq('rec'), id='Rec', name='Rec',
                        features=[SeqFeatureEM2(location=FeatureLocation(0, 2), strand=1, id='A'),
                                  SeqFeatureEM2(location=FeatureLocation(6, 10), strand=1,
                                                type='yyy', id='B'),
                                  SeqFeatureEM2(location=FeatureLocation(15, 20), strand=1, id='C'),
                                  SeqFeatureEM2(location=FeatureLocation(20, 30), strand=1, id='D'),
                                  SeqFeatureEM2(location=FeatureLocation(6, 10), strand=-1, id='F'),
                                  SeqFeatureEM2(location=FeatureLocation(18, 25), strand=-1,
                                                type='xxx', id='G'),
                                  SeqFeatureEM2(location=FeatureLocation(16, 19), strand=0, id='H')
                                  ])
Esempio n. 8
0
class SeqFeatureTests(unittest.TestCase):
    sprot: SeqRecord = SeqRecord(
        SeqEM2.protein('MYNAMEISFREDHEREIAMWHEREARETHEYALLTHISISEXCELLENT'),
        id='X',
        name='DummyProt')
    sprot.features = [
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(0, 11),
                      type='domain',
                      id='d1'),  # MYNAMEISFRED
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(8, 18),
                      type='domain',
                      id='d2'),  # FREDHEREIAM
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(19, 30),
                      type='domain',
                      id='d3'),  # WHEREARETHEY
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(6, 23),
                      type='domain',
                      id='d4'),  # ISFREDHEREIAMWHERE
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(34, AfterPosition(39)),
                      id='d5'),  # THISIS
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(BeforePosition(2), 5),
                      type='domain',
                      id='d6'),  # MYNAME
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(19, 23),
                      type='domain',
                      id='d7'),  # WHERE
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(BeforePosition(30), 37),
                      type='domain',
                      id='d8')  # YALLTHI
    ]

    @classmethod
    def test_parent(cls):
        assert [f.id for f in cls.sprot.features
                ] == ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8']
        assert cls.sprot.features[1].parent.id == cls.sprot.id
        assert cls.sprot.features[1].parent.name == cls.sprot.name
        assert cls.sprot.features[1].parent.seq._data == cls.sprot.seq._data

    @classmethod
    def test_lies_within(cls):
        assert cls.sprot.features[1].lies_within(5, 25)
        assert not cls.sprot.features[1].lies_within(10, 25)
        assert not cls.sprot.features[1].lies_within(19, 25)

    @classmethod
    def test_lies_within_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[4].lies_within(30, 42)
            cls.sprot.features[5].lies_within(0, 10)

    @classmethod
    def test_overlaps(cls):
        assert cls.sprot.features[2].overlaps(20, 25)
        assert cls.sprot.features[2].overlaps(20, 40)
        assert cls.sprot.features[2].overlaps(20)
        assert not cls.sprot.features[2].overlaps(35)
        assert not cls.sprot.features[2].overlaps(2, 5)

    @classmethod
    def test_overlaps_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[4].overlaps(35)
            cls.sprot.features[5].overlaps(3)

    @classmethod
    def test_covers(cls):
        assert cls.sprot.features[3].covers(15, 20)
        assert not cls.sprot.features[3].covers(4, 20)

    @classmethod
    def test_covers_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[4].covers(35, 38)
            cls.sprot.features[5].covers(3, 4)

    @classmethod
    def test_intersect(cls):
        assert cls.sprot.features[4].intersect(
            cls.sprot.features[7]).location == FeatureLocation(34, 37)
        assert cls.sprot.features[2].intersect(
            cls.sprot.features[3]).location == cls.sprot.features[6].location
        assert cls.sprot.features[1].intersect(
            cls.sprot.features[3]).location == FeatureLocation(8, 18)

    @classmethod
    def test_intersect_errors(cls):
        with pytest.raises(ValueError, match=r'Undetermined .*'):
            cls.sprot.features[0].intersect(
                SeqFeatureEM2(location=FeatureLocation(30, 37)))

    @classmethod
    def test_intersect_fuzzy(cls):
        with pytest.warns(UserWarning):
            cls.sprot.features[5].intersect(cls.sprot.features[0])

    @classmethod
    def test_move(cls):
        assert cls.sprot.features[0].move(5).location == FeatureLocation(5, 16)
Esempio n. 9
0
 def test_intersect_errors(cls):
     with pytest.raises(ValueError, match=r'Undetermined .*'):
         cls.sprot.features[0].intersect(
             SeqFeatureEM2(location=FeatureLocation(30, 37)))
Esempio n. 10
0
def dna_rec2_rev():
    return SeqRecordEM2(dna_seq('rec2'), id='Rec2', name='R2',
                        features=[SeqFeatureEM2(location=FeatureLocation(0, 5), strand=1, id='A2'),
                                  SeqFeatureEM2(location=FeatureLocation(28, 33), strand=1, id='B2')
                                  ]).reverse_complement()
Esempio n. 11
0
def dna_rec1():
    return SeqRecordEM2(dna_seq('rec1'), id='Rec1', name='R1',
                        features=[SeqFeatureEM2(location=FeatureLocation(0, 5), strand=1, id='A1'),
                                  SeqFeatureEM2(location=FeatureLocation(28, 33), strand=1, id='B1')
                                  ])
Esempio n. 12
0
class GFFtests(unittest.TestCase):
    sprot: SeqRecord = SeqRecord(
        SeqEM2.dna('ATGAGTCGGTAACGATGCATGCATGCAGCTGACGC'),
        id='X',
        name='DummyDNA')
    sprot.features = [
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(0, 2),
                      type='start',
                      strand=1,
                      qualifiers={
                          'codon': 'start',
                          'source': '',
                          'phase': '0',
                          'score': '0'
                      }),
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(8, 18),
                      type='domain',
                      id='d1',
                      strand=0,
                      qualifiers={
                          'source': '',
                          'phase': '0',
                          'score': '0'
                      }),
        SeqFeatureEM2(parent=sprot,
                      location=FeatureLocation(16, 30),
                      type='domain',
                      id='d2',
                      strand=-1,
                      qualifiers={
                          'source': '',
                          'phase': '0',
                          'score': '0'
                      })
    ]

    df0 = DataFrame({
        'seq_id': ['X'],
        'source': [''],
        'type': ['start'],
        'start': ['0'],
        'end': ['2'],
        'score': ['0'],
        'strand': ['+'],
        'phase': ['0'],
        'attributes': ['codon=start;id=<unknown id>']
    })

    df1 = DataFrame({
        'seq_id': ['X', 'X', 'X'],
        'source': ['', '', ''],
        'type': ['start', 'domain', 'domain'],
        'start': ['0', '8', '16'],
        'end': ['2', '18', '30'],
        'score': ['0', '0', '0'],
        'strand': ['+', '?', '-'],
        'phase': ['0', '0', '0'],
        'attributes': ['codon=start;id=<unknown id>', 'id=d1', 'id=d2']
    })

    @classmethod
    def test_df_from_feature(cls):
        assert_frame_equal(
            GFF.df_from_feature(cls.sprot.features[0]).reset_index(drop=True),
            cls.df0.reset_index(drop=True))
        assert_frame_equal(
            GFF.df_from_feature(None).reset_index(drop=True),
            DataFrame(columns=[
                'seq_id', 'source', 'type', 'start', 'end', 'score', 'strand',
                'phase', 'attributes'
            ]).reset_index(drop=True))

    @classmethod
    def test_from_feature_list(cls):
        assert_frame_equal(
            GFF(cls.sprot.features[0:3]).df.reset_index(drop=True),
            cls.df1.reset_index(drop=True))
        assert_frame_equal(
            GFF([]).df.reset_index(drop=True),
            DataFrame(columns=[
                'seq_id', 'source', 'type', 'start', 'end', 'score', 'strand',
                'phase', 'attributes'
            ]).reset_index(drop=True))
        assert_frame_equal(
            GFF(None).df.reset_index(drop=True),
            DataFrame(columns=[
                'seq_id', 'source', 'type', 'start', 'end', 'score', 'strand',
                'phase', 'attributes'
            ]).reset_index(drop=True))
        assert_frame_equal(
            GFF().df.reset_index(drop=True),
            DataFrame(columns=[
                'seq_id', 'source', 'type', 'start', 'end', 'score', 'strand',
                'phase', 'attributes'
            ]).reset_index(drop=True))

    @classmethod
    def test_add_list(cls):
        assert_frame_equal(
            GFF([cls.sprot.features[0]]).add_feature_list(
                cls.sprot.features[1:3]).df.reset_index(drop=True),
            cls.df1.reset_index(drop=True))

    @classmethod
    def test_to_feature_list(cls):
        for i in range(0, len(cls.sprot.features)):
            assert GFF(input_df=cls.df1).to_feature_list(parents=cls.sprot)[i].__str__() == \
                   cls.sprot.features[i].__str__()
            assert GFF(input_df=cls.df1).to_feature_list(parents=cls.sprot)[i].parent.id == \
                   cls.sprot.features[i].parent.id
            assert GFF(input_df=cls.df1).to_feature_list()[i].parent is None
        for i in range(0, len(cls.sprot.features)):
            assert GFF(input_df=cls.df1).to_feature_list(parents=[cls.sprot,
                                                                  cls.sprot,
                                                                  cls.sprot])[i].__str__() == \
                   cls.sprot.features[i].__str__()
        with pytest.raises(
                ValueError,
                match=
                r'The number of parents should match the number of features .*'
        ):
            GFF(input_df=cls.df1).to_feature_list(parents=[cls.sprot])