コード例 #1
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
 def test_intersection(self):
     '''Intersection should either return None or the correct intersection'''
     a = intervals.Interval(5, 10)
     b = intervals.Interval(8, 15)
     c = intervals.Interval(12, 20)
     self.assertEqual(a.intersection(c), None)
     self.assertEqual(a.intersection(b), intervals.Interval(8, 10))
コード例 #2
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
    def test_length_sum_from_list(self):
        '''Test that total length of intervals is summed correctly'''
        a = [
            intervals.Interval(1, 2),
            intervals.Interval(4, 5),
            intervals.Interval(10, 19)
        ]

        self.assertEqual(14, intervals.length_sum_from_list(a))
コード例 #3
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
 def test_init(self):
     '''Throw error if try to construct genome_interval from a non-int, or end<start'''
     with self.assertRaises(intervals.Error):
         intervals.Interval('a', 1)
     with self.assertRaises(intervals.Error):
         intervals.Interval(1, 'a')
     with self.assertRaises(intervals.Error):
         intervals.Interval('a', 'a')
     with self.assertRaises(intervals.Error):
         intervals.Interval(3, 2)
コード例 #4
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
 def test_union_flll_gap(self):
     '''union_fill_gap() should ignore intersections and return the maximum range of coords'''
     a = intervals.Interval(5, 10)
     b = intervals.Interval(8, 15)
     c = intervals.Interval(12, 20)
     d = intervals.Interval(21, 22)
     self.assertEqual(a.union_fill_gap(c), intervals.Interval(5, 20))
     self.assertEqual(c.union_fill_gap(a), intervals.Interval(5, 20))
     self.assertEqual(a.union_fill_gap(b), intervals.Interval(5, 15))
     self.assertEqual(b.union_fill_gap(a), intervals.Interval(5, 15))
     self.assertEqual(c.union_fill_gap(d), intervals.Interval(12, 22))
     self.assertEqual(d.union_fill_gap(c), intervals.Interval(12, 22))
コード例 #5
0
def _orfs_from_aa_seq(seq):
    orfs = []
    pos = 0
    while pos < len(seq):
        next_stop = seq.find('*', pos)
        if next_stop == -1:
            orfs.append(intervals.Interval(pos, len(seq) - 1))
            break
        elif next_stop > pos:
            orfs.append(intervals.Interval(pos, next_stop))
        pos = next_stop + 1
    return orfs
コード例 #6
0
 def gaps(self, min_length=1):
     '''Finds the positions of all gaps in the sequence that are at least min_length long. Returns a list of Intervals. Coords are zero-based'''
     gaps = []
     regex = re.compile('N+', re.IGNORECASE)
     for m in regex.finditer(self.seq):
         if m.span()[1] - m.span()[0] + 1 >= min_length:
             gaps.append(intervals.Interval(m.span()[0], m.span()[1] - 1))
     return gaps
コード例 #7
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
 def test_union(self):
     '''Union should either return None or the correct union'''
     a = intervals.Interval(5, 10)
     b = intervals.Interval(8, 15)
     c = intervals.Interval(12, 20)
     d = intervals.Interval(21, 22)
     self.assertEqual(a.union(c), None)
     self.assertEqual(c.union(a), None)
     self.assertEqual(a.union(b), intervals.Interval(5, 15))
     self.assertEqual(b.union(a), intervals.Interval(5, 15))
     self.assertEqual(c.union(d), intervals.Interval(12, 22))
     self.assertEqual(d.union(c), intervals.Interval(12, 22))
コード例 #8
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
    def test_merge_overlapping_in_list(self):
        '''merge_overlapping_in_list() merges correctly'''
        a = [
            intervals.Interval(1, 2),
            intervals.Interval(51, 60),
            intervals.Interval(10, 20),
            intervals.Interval(20, 30),
            intervals.Interval(20, 30),
            intervals.Interval(29, 50),
            intervals.Interval(65, 70)
        ]

        b = [
            intervals.Interval(1, 2),
            intervals.Interval(10, 60),
            intervals.Interval(65, 70)
        ]

        intervals.merge_overlapping_in_list(a)
        self.assertSequenceEqual(a, b)
コード例 #9
0
    def contig_coords(self):
        '''Finds coords of contigs, i.e. everything that's not a gap (N or n). Returns a list of Intervals. Coords are zero-based'''
        # contigs are the opposite of gaps, so work out the coords from the gap coords
        gaps = self.gaps()

        if len(gaps) == 0:
            return [intervals.Interval(0, len(self) - 1)]

        coords = [0]
        for g in gaps:
            if g.start == 0:
                coords = [g.end + 1]
            else:
                coords += [g.start - 1, g.end + 1]

        if coords[-1] < len(self):
            coords.append(len(self) - 1)

        return [
            intervals.Interval(coords[i], coords[i + 1])
            for i in range(0,
                           len(coords) - 1, 2)
        ]
コード例 #10
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
    def test_intersects(self):
        '''Intersection of two intervals should do the right thing'''
        a = intervals.Interval(5, 10)
        no_intersect = [intervals.Interval(3, 4), intervals.Interval(11, 20)]
        intersect = [
            intervals.Interval(3, 5),
            intervals.Interval(3, 6),
            intervals.Interval(9, 12),
            intervals.Interval(10, 12),
            intervals.Interval(6, 7),
            intervals.Interval(1, 20)
        ]

        for i in no_intersect:
            self.assertFalse(a.intersects(i),
                             'shouldn\'t intersect: ' + str(a) + ', ' + str(i))

        for i in intersect:
            self.assertTrue(a.intersects(i),
                            'should intersect: ' + str(a) + ', ' + str(i))
コード例 #11
0
ファイル: sequences_test.py プロジェクト: js21/Fastaq
    def test_orfs_from_aa_seq(self):
        '''Test _orfs_from_aa_seq()'''
        test_seqs = [
            '', '*', '**', 'A', 'A*A*A', 'AB**CDE*AB', '*ABCDE*', '**ABCDE**'
        ]

        correct_coords = [[], [], [], [intervals.Interval(0, 0)],
                          [
                              intervals.Interval(0, 1),
                              intervals.Interval(2, 3),
                              intervals.Interval(4, 4)
                          ],
                          [
                              intervals.Interval(0, 2),
                              intervals.Interval(4, 7),
                              intervals.Interval(8, 9)
                          ], [intervals.Interval(1, 6)],
                          [intervals.Interval(2, 7)]]

        for i in range(len(test_seqs)):
            orfs = sequences._orfs_from_aa_seq(test_seqs[i])
            self.assertListEqual(correct_coords[i], orfs)
コード例 #12
0
    def orfs(self, frame=0, revcomp=False):
        assert frame in [0, 1, 2]
        if revcomp:
            self.revcomp()

        aa_seq = self.translate(frame=frame).seq.rstrip('X')
        if revcomp:
            self.revcomp()

        orfs = _orfs_from_aa_seq(aa_seq)
        for i in range(len(orfs)):
            if revcomp:
                start = len(self) - (orfs[i].end * 3 + 3) - frame
                end = len(self) - (orfs[i].start * 3) - 1 - frame
            else:
                start = orfs[i].start * 3 + frame
                end = orfs[i].end * 3 + 2 + frame

            orfs[i] = intervals.Interval(start, end)

        return orfs
コード例 #13
0
ファイル: sequences_test.py プロジェクト: js21/Fastaq
    def test_gaps(self):
        '''gaps() should find the gaps in a sequence correctly'''
        test_seqs = [
            sequences.Fasta('ID', 'ACGT'),
            sequences.Fasta('ID', 'NACGT'),
            sequences.Fasta('ID', 'NACGTN'),
            sequences.Fasta('ID', 'ANNCGT'),
            sequences.Fasta('ID', 'NANNCGTNN')
        ]

        correct_gaps = [[], [intervals.Interval(0, 0)],
                        [intervals.Interval(0, 0),
                         intervals.Interval(5, 5)], [intervals.Interval(1, 2)],
                        [
                            intervals.Interval(0, 0),
                            intervals.Interval(2, 3),
                            intervals.Interval(7, 8)
                        ]]

        for i in range(len(test_seqs)):
            gaps = test_seqs[i].gaps()
            self.assertListEqual(correct_gaps[i], gaps)
コード例 #14
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
 def test_comparisons(self):
     '''<, <=, == should work as expected'''
     self.assertTrue(intervals.Interval(1, 2) < intervals.Interval(2, 2))
     self.assertTrue(intervals.Interval(1, 2) <= intervals.Interval(2, 2))
     self.assertFalse(intervals.Interval(2, 2) <= intervals.Interval(1, 2))
     self.assertFalse(intervals.Interval(2, 2) < intervals.Interval(1, 2))
     self.assertFalse(intervals.Interval(2, 2) < intervals.Interval(2, 2))
     self.assertTrue(intervals.Interval(1, 2) == intervals.Interval(1, 2))
     self.assertFalse(intervals.Interval(1, 2) == intervals.Interval(1, 3))
     self.assertTrue(intervals.Interval(1, 2) != intervals.Interval(1, 3))
     self.assertFalse(intervals.Interval(1, 2) != intervals.Interval(1, 2))
コード例 #15
0
ファイル: sequences_test.py プロジェクト: js21/Fastaq
    def test_all_orfs(self):
        '''Test all_orfs()'''
        d = {}
        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test_orfs.fa'), d)
        seq = d['1']
        orfs = seq.all_orfs(min_length=120)
        expected = [(intervals.Interval(27, 221), False),
                    (intervals.Interval(44, 226), False),
                    (intervals.Interval(48, 170), True),
                    (intervals.Interval(109, 240), False),
                    (intervals.Interval(143, 265), True),
                    (intervals.Interval(227, 421), False),
                    (intervals.Interval(277, 432), True),
                    (intervals.Interval(286, 477), False),
                    (intervals.Interval(288, 518), True),
                    (intervals.Interval(562, 702), False),
                    (intervals.Interval(600, 758), False),
                    (intervals.Interval(605, 817), False),
                    (intervals.Interval(818, 937), False),
                    (intervals.Interval(835, 987), False),
                    (intervals.Interval(864, 998), False)]

        self.assertEqual(len(orfs), len(expected))

        for i in range(len(orfs)):
            print(orfs[i][0], expected[i][0])
            self.assertEqual(orfs[i][0], expected[i][0])
            self.assertEqual(orfs[i][1], expected[i][1])
コード例 #16
0
ファイル: sequences_test.py プロジェクト: js21/Fastaq
    def test_orfs(self):
        '''Test orfs()'''
        test_seqs = [
            (sequences.Fasta('ID',
                             'AAACCCGG'), 0, False, [intervals.Interval(0,
                                                                        5)]),
            (sequences.Fasta('ID', 'AAAACCCGG'), 1, False,
             [intervals.Interval(1, 6)]),
            (sequences.Fasta('ID', 'AAAAACCCGG'), 2, False,
             [intervals.Interval(2, 7)]),
            (sequences.Fasta('ID',
                             'CCGGGTTT'), 0, True, [intervals.Interval(2, 7)]),
            (sequences.Fasta('ID',
                             'CCGGGTTTT'), 1, True, [intervals.Interval(2,
                                                                        7)]),
            (sequences.Fasta('ID', 'CCGGGTTTTT'), 2, True,
             [intervals.Interval(2, 7)]),
            (sequences.Fasta('ID', 'AAACCCTGA'), 0, False,
             [intervals.Interval(0, 8)]),
            (sequences.Fasta('ID', 'AAACCCTGATAG'), 0, False,
             [intervals.Interval(0, 8)]),
            (sequences.Fasta('ID', 'AAACCCTGA'), 1, False,
             [intervals.Interval(1, 6)]),
            (sequences.Fasta('ID', ''), 0, False, []),
            (sequences.Fasta('ID', 'A'), 0, False, []),
            (sequences.Fasta('ID', 'AA'), 0, False, []),
            (sequences.Fasta('ID',
                             'AAA'), 0, False, [intervals.Interval(0, 2)]),
            (sequences.Fasta('ID',
                             'AAAAAA'), 0, False, [intervals.Interval(0, 5)]),
            (sequences.Fasta('ID', 'AAA'), 1, False, []),
            (sequences.Fasta('ID', 'AAA'), 2, False, []),
            (sequences.Fasta('ID', 'AAA'), 0, True, [intervals.Interval(0,
                                                                        2)]),
            (sequences.Fasta('ID', 'AAA'), 1, True, []),
            (sequences.Fasta('ID', 'AAA'), 2, True, []),
            (sequences.Fasta('ID', 'TAA'), 0, False, []),
            (sequences.Fasta('ID', 'CTA'), 0, True, [])
        ]

        for t in test_seqs:
            orfs = t[0].orfs(frame=t[1], revcomp=t[2])
            self.assertListEqual(orfs, t[3])
コード例 #17
0
ファイル: sequences_test.py プロジェクト: js21/Fastaq
    def test_contig_coords(self):
        '''contig_coords() should get the coords of all contigs in a sequence correctly'''
        test_seqs = [
            sequences.Fasta('ID', 'ACGT'),
            sequences.Fasta('ID', 'NACGT'),
            sequences.Fasta('ID', 'NNACGT'),
            sequences.Fasta('ID', 'ACGTN'),
            sequences.Fasta('ID', 'ACGTNN'),
            sequences.Fasta('ID', 'NANNCGT'),
            sequences.Fasta('ID', 'ACNNNGTNA'),
            sequences.Fasta('ID', 'ANNCGTNNAAAAA')
        ]

        correct_coords = [[intervals.Interval(0,
                                              3)], [intervals.Interval(1, 4)],
                          [intervals.Interval(2,
                                              5)], [intervals.Interval(0, 3)],
                          [intervals.Interval(0, 3)],
                          [intervals.Interval(1, 1),
                           intervals.Interval(4, 6)],
                          [
                              intervals.Interval(0, 1),
                              intervals.Interval(5, 6),
                              intervals.Interval(8, 8)
                          ],
                          [
                              intervals.Interval(0, 0),
                              intervals.Interval(3, 5),
                              intervals.Interval(8, 12)
                          ]]

        for i in range(len(test_seqs)):
            gaps = test_seqs[i].contig_coords()
            self.assertListEqual(correct_coords[i], gaps)
コード例 #18
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
 def test_len(self):
     self.assertEqual(len(intervals.Interval(1, 2)), 2)
     self.assertEqual(len(intervals.Interval(1, 1)), 1)
     self.assertEqual(len(intervals.Interval(10, 20)), 11)
コード例 #19
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
    def test_contains(self):
        '''Check that contains() works as expected'''
        a = intervals.Interval(5, 10)
        not_contained = [
            intervals.Interval(1, 2),
            intervals.Interval(4, 5),
            intervals.Interval(4, 10),
            intervals.Interval(4, 11),
            intervals.Interval(5, 11),
            intervals.Interval(1, 2),
            intervals.Interval(9, 11),
            intervals.Interval(10, 11),
            intervals.Interval(11, 20)
        ]

        contained = [
            intervals.Interval(5, 5),
            intervals.Interval(5, 10),
            intervals.Interval(6, 7),
            intervals.Interval(6, 10),
            intervals.Interval(10, 10)
        ]

        for i in not_contained:
            self.assertFalse(a.contains(i),
                             'shouldn\'t contain: ' + str(a) + ', ' + str(i))

        for i in contained:
            self.assertTrue(a.contains(i),
                            'should contain: ' + str(a) + ', ' + str(i))
コード例 #20
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
    def test_remove_contained_in_list(self):
        '''test_remove_contained_in_list removes the right elements of list'''
        a = [
            intervals.Interval(1, 2),
            intervals.Interval(4, 4),
            intervals.Interval(4, 5),
            intervals.Interval(5, 6),
            intervals.Interval(7, 9),
            intervals.Interval(8, 10),
            intervals.Interval(9, 11),
            intervals.Interval(20, 25),
            intervals.Interval(20, 24),
            intervals.Interval(20, 26),
            intervals.Interval(30, 38),
            intervals.Interval(30, 37),
            intervals.Interval(30, 36),
            intervals.Interval(30, 35),
            intervals.Interval(30, 35),
            intervals.Interval(32, 33),
            intervals.Interval(38, 50),
            intervals.Interval(65, 70),
            intervals.Interval(67, 70)
        ]

        b = [
            intervals.Interval(1, 2),
            intervals.Interval(4, 5),
            intervals.Interval(5, 6),
            intervals.Interval(7, 9),
            intervals.Interval(8, 10),
            intervals.Interval(9, 11),
            intervals.Interval(20, 26),
            intervals.Interval(30, 38),
            intervals.Interval(38, 50),
            intervals.Interval(65, 70)
        ]

        intervals.remove_contained_in_list(a)
        self.assertSequenceEqual(a, b)
コード例 #21
0
ファイル: intervals_test.py プロジェクト: js21/Fastaq
    def test_intersection(self):
        '''intersection() should correctly intersect two lists of intervals'''
        a = [
            intervals.Interval(1, 2),
            intervals.Interval(10, 20),
            intervals.Interval(51, 52),
            intervals.Interval(54, 55),
            intervals.Interval(57, 58)
        ]

        b = [
            intervals.Interval(5, 6),
            intervals.Interval(9, 11),
            intervals.Interval(13, 14),
            intervals.Interval(17, 18),
            intervals.Interval(20, 25),
            intervals.Interval(50, 60)
        ]

        c = [intervals.Interval(100, 200)]

        i = [
            intervals.Interval(10, 11),
            intervals.Interval(13, 14),
            intervals.Interval(17, 18),
            intervals.Interval(20, 20),
            intervals.Interval(51, 52),
            intervals.Interval(54, 55),
            intervals.Interval(57, 58)
        ]

        self.assertSequenceEqual(intervals.intersection(a, b), i)
        self.assertSequenceEqual(intervals.intersection(b, a), i)
        self.assertSequenceEqual(intervals.intersection(c, a), [])
        self.assertEqual(intervals.intersection([], a), [])
        self.assertEqual(intervals.intersection(a, []), [])