Beispiel #1
0
 def testSplit(self):
     self.b = [[5, 12, 'I'], [20, 22, 'II'], [23, 28, 'III']]
     r = Interval.split(self.b, 21, 24, flag='left')
     self.assertListEqual(r, [[5, 12, 'I'], [20, 21, 'II']],
                          'Failed in Split')
     r = Interval.split(self.b, 24, 26, flag='middle')
     self.assertListEqual(r, [[24, 26, 'III']], 'Failed in Split')
     r = Interval.split(self.b, 8, 21, flag='right')
     self.assertListEqual(r, [[21, 22, 'II'], [23, 28, 'III']],
                          'Failed in Split')
Beispiel #2
0
 def testInstanceSubstract(self):
     self.a = Interval(self.a)  # self.a is an instance
     self.b = Interval(self.b)  # self.b is an instance
     c = self.a - self.b
     self.assertListEqual(c.interval, [[1, 5, 'a', 'c'], [17, 20, 'b', 'd'],
                                       [22, 23, 'b', 'd'], [30, 35, 'e']],
                          'Failed in instance c = a - b')
     c = self.b - self.a
     self.assertListEqual(c.interval, [[25, 28, 'III']],
                          'Failed in instance c = b - a')
     self.a -= self.b
     self.assertListEqual(self.a.interval,
                          [[1, 5, 'a', 'c'], [17, 20, 'b', 'd'],
                           [22, 23, 'b', 'd'], [30, 35, 'e']],
                          'Failed in instance a -= b')
Beispiel #3
0
def check_gDNA_reads(read_info, read_seq, target_fa):
    read_len = len(read_seq)
    # add padding
    read_lst = deepcopy(read_info)
    for item in read_lst:
        item[1] += 10
    if len(Interval(read_lst)) < len(read_lst):  # realign
        new_read_info = []
        for read in target_fa.map(read_seq, MD=True):
            left_cigar, right_cigar = '', ''
            if read.q_st > 0:
                left_cigar = '{}S'.format(read.q_st)
            if read_len > read.q_en:
                right_cigar = '{}S'.format(read_len - read.q_en)
            if read.strand == 1:  # plus strand
                strand, reverse = '+', False
                cigar = left_cigar + read.cigar_str + right_cigar
            else:  # minus strand
                strand, reverse = '-', True
                cigar = right_cigar + read.cigar_str + left_cigar
            # reverse alignment if possible
            alignment = convert_CIGAR(cigar, read.MD, reverse=reverse)
            aln = ''.join('{}{}'.format(x[0], x[1]) for x in alignment)
            index1, index2 = index_alignment(aln)
            new_read_info.append(
                [index1, index2, read.r_st, read.r_en, strand, aln, alignment])
        return new_read_info
    else:
        return read_info
Beispiel #4
0
 def testMapto(self):
     r = Interval.mapto(self.d, self.c)
     self.assertListEqual(
         r,
         [[3, 4, 'a', 'I'], [3, 7, 'b', 'I'], [4, 6, 'e', 'I'],
          [4, 7, 'd', 'I'], [5, 7, 'f', 'I'], [10, 11, 'd', 'II'],
          [10, 12, 'x', 'II'], [16, 20, 'x', 'III'], [16, 17, 'h', 'III'],
          [18, 20, 'i', 'III'], [23, 24, 'x', 'IV']], 'Failed in Mapto')
Beispiel #5
0
 def testInstanceAdd(self):
     self.a = Interval(self.a)  # self.a is an instance
     self.b = Interval(self.b)  # self.a is an instance
     c = self.a + self.b
     self.assertListEqual(c.interval,
                          [[1, 12, 'a', 'c', 'I'],
                           [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']],
                          'Failed in instance c = a + b')
     c = self.b + self.a  # TODO: why this addition did not change the order
     self.assertListEqual(c.interval,
                          [[1, 12, 'a', 'c', 'I'],
                           [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']],
                          'Failed in instance c = b + a')
     self.a += self.b
     self.assertListEqual(self.a.interval,
                          [[1, 12, 'a', 'c', 'I'],
                           [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']],
                          'Failed in instance a += b')
Beispiel #6
0
 def testInstanceMultiple(self):
     self.a = Interval(self.a)  # self.a is an instance
     self.b = Interval(self.b)  # self.b is an instance
     c = self.a * self.b
     self.assertListEqual(c.interval,
                          [[5, 12, 'a', 'c', 'I'], [20, 22, 'b', 'd', 'II'],
                           [23, 25, 'b', 'd', 'III']],
                          'Failed in instance c = a * b')
     c = self.b * self.a
     self.assertListEqual(c.interval,
                          [[5, 12, 'I', 'a', 'c'], [20, 22, 'II', 'b', 'd'],
                           [23, 25, 'III', 'b', 'd']],
                          'Failed in instance c = b * a')
     self.a *= self.b
     self.assertListEqual(self.a.interval,
                          [[5, 12, 'a', 'c', 'I'], [20, 22, 'b', 'd', 'II'],
                           [23, 25, 'b', 'd', 'III']],
                          'Failed in instance a *= b')
Beispiel #7
0
 def testAdd(self):
     self.a = Interval(self.a)  # self.a is an instance
     c = self.a + self.b
     self.assertListEqual(c.interval,
                          [[1, 12, 'a', 'c', 'I'],
                           [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']],
                          'Failed in c = a + b')
     c = self.b + self.a
     self.assertListEqual(c.interval,
                          [[1, 12, 'a', 'c', 'I'],
                           [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']],
                          'Failed in c = b + a')
     self.a += self.b
     self.assertListEqual(self.a.interval,
                          [[1, 12, 'a', 'c', 'I'],
                           [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']],
                          'Failed in a += b')
Beispiel #8
0
 def testSlice(self):
     self.a = Interval(self.a)
     self.assertEqual(self.a[1], [17, 25, 'b', 'd'], 'Failed in a[1]')
     self.assertListEqual(self.a[:2],
                          [[1, 12, 'a', 'c'], [17, 25, 'b', 'd']],
                          'Failed in a[:2]')
     self.assertTrue([27, 34] in self.a, 'Failed in [27, 34] in a')
     self.assertTrue([31, 34] in self.a, 'Failed in [31, 34] in a')
     self.assertTrue([31, 37] in self.a, 'Failed in [31, 37] in a')
     self.assertTrue([27, 37] in self.a, 'Failed in [27, 37] in a')
     self.assertTrue([[27, 32], [33, 34]] in self.a,
                     'Failed in [[27, 32], [33, 34]] in a')
     self.assertTrue([[31, 32], [33, 34]] in self.a,
                     'Failed in [[31, 32], [33, 34]] in a')
     self.assertTrue([[31, 32], [33, 37]] in self.a,
                     'Failed in [[31, 32], [33, 37]] in a')
     self.assertTrue([[27, 32], [33, 37]] in self.a,
                     'Failed in [[27, 32], [33, 37]] in a')
Beispiel #9
0
def parse_indel(read_info, target_name, target, bond_name=None, gDNA=False):
    min_indel = None
    indel_info = []
    target_len = target[target_name]['total']
    if not gDNA:  # donor
        left_bond = target[bond_name]['left_bond']
        right_bond = target[bond_name]['right_bond']
        dis = 5
    else:  # gDNA
        left_bond = 0
        right_bond = 0
        cut_left = target[target_name]['cut_left']
        cut_right = target[target_name]['cut_right']
        dis = 20
    for index1, index2, start, end, strand, aln, alignment in read_info:
        if strand == '+':
            interval = target[target_name]['interval']
        else:
            interval = target[target_name]['rev_interval']
            # reverse start and end
            start, end = target_len - end, target_len - start
        aln_info = '{}|{}|{}|{}|{}|{}'.format(strand, start, end, index1,
                                              index2, aln)
        if start >= dis or target_len - end >= dis:
            target_tag = 'partially'
            seg_type = '-'.join(
                x[2] for x in Interval.mapto([start, end], interval))
        else:
            target_tag = 'full'
            if strand == '+':
                seg_type = 'L-LH-C-RH-R' if gDNA else 'LH-I-RH'
            else:
                seg_type = 'R-RH-C-LH-L' if gDNA else 'RH-I-LH'
        cut_indel = 0
        total_indel = 0
        reference_pos = start
        read_tag = 'full'
        for n, (num, t) in enumerate(alignment):
            if t in ('S', 'H'):
                if n == 0:  # first postion
                    if num - left_bond >= dis:
                        read_tag = 'partially'
                else:  # last position
                    if num - right_bond >= dis:
                        read_tag = 'partially'
            if gDNA:
                if t == 'I' and cut_left <= reference_pos <= cut_right:
                    cut_indel += num
                if t in ('D', 'U'):
                    if (cut_left <= reference_pos + num
                            and reference_pos <= cut_right):
                        cut_indel += num
                if t in ('M', 'U', 'D'):
                    reference_pos += num
            else:
                if t in ('D', 'U', 'I'):
                    cut_indel += num
            if t in ('D', 'U', 'I'):
                total_indel += num
        if target_tag == 'partially' or read_tag == 'partially':
            tag = 'partially'
        else:
            tag = 'full'
        if min_indel is None or total_indel < min_indel:
            min_indel = total_indel
            min_cut_indel = cut_indel
            final_tag = tag
        indel_info.append('{}|{}|{}|{}|{}'.format(target_name, seg_type,
                                                  total_indel, cut_indel,
                                                  aln_info))
    return [indel_info, final_tag, min_indel, min_cut_indel, target_name]
Beispiel #10
0
 def testLen(self):
     a = Interval(self.a)
     self.assertEqual(len(a), 3, 'Failed in length')
     b = Interval(self.b)
     self.assertEqual(len(b), 3, 'Fbiled in length')
Beispiel #11
0
 def testInit(self):
     a = Interval(self.a)  # merge intervals
     self.assertListEqual(
         a.interval, [[1, 12, 'a', 'c'], [17, 25, 'b', 'd'], [30, 35, 'e']],
         'Failed in initiation')
Beispiel #12
0
 def testOverlapwith(self):
     r = Interval.overlapwith(self.c, self.d)
     self.assertListEqual(
         r, [[3, 7, 'I', 'a', 'b', 'e', 'd', 'f'], [10, 12, 'II', 'd', 'x'],
             [16, 20, 'III', 'x', 'h', 'i'], [23, 25, 'IV', 'x']],
         'Failed in Overlapwith')
Beispiel #13
0
 def testConvert(self):
     e = Interval(self.e)  # convert list to nested list
     self.assertListEqual(e.interval, [[10, 15, 't']], 'Failed in convert')