def testSplit(self): self.b = [[5, 12, 'I'], [20, 22, 'II'], [23, 28, 'III']] r = Interval.split(self.b, 21, 24, flag='left') self.assertListEqual(r, [[5, 12, 'I'], [20, 21, 'II']], 'Failed in Split') r = Interval.split(self.b, 24, 26, flag='middle') self.assertListEqual(r, [[24, 26, 'III']], 'Failed in Split') r = Interval.split(self.b, 8, 21, flag='right') self.assertListEqual(r, [[21, 22, 'II'], [23, 28, 'III']], 'Failed in Split')
def testInstanceSubstract(self): self.a = Interval(self.a) # self.a is an instance self.b = Interval(self.b) # self.b is an instance c = self.a - self.b self.assertListEqual(c.interval, [[1, 5, 'a', 'c'], [17, 20, 'b', 'd'], [22, 23, 'b', 'd'], [30, 35, 'e']], 'Failed in instance c = a - b') c = self.b - self.a self.assertListEqual(c.interval, [[25, 28, 'III']], 'Failed in instance c = b - a') self.a -= self.b self.assertListEqual(self.a.interval, [[1, 5, 'a', 'c'], [17, 20, 'b', 'd'], [22, 23, 'b', 'd'], [30, 35, 'e']], 'Failed in instance a -= b')
def check_gDNA_reads(read_info, read_seq, target_fa): read_len = len(read_seq) # add padding read_lst = deepcopy(read_info) for item in read_lst: item[1] += 10 if len(Interval(read_lst)) < len(read_lst): # realign new_read_info = [] for read in target_fa.map(read_seq, MD=True): left_cigar, right_cigar = '', '' if read.q_st > 0: left_cigar = '{}S'.format(read.q_st) if read_len > read.q_en: right_cigar = '{}S'.format(read_len - read.q_en) if read.strand == 1: # plus strand strand, reverse = '+', False cigar = left_cigar + read.cigar_str + right_cigar else: # minus strand strand, reverse = '-', True cigar = right_cigar + read.cigar_str + left_cigar # reverse alignment if possible alignment = convert_CIGAR(cigar, read.MD, reverse=reverse) aln = ''.join('{}{}'.format(x[0], x[1]) for x in alignment) index1, index2 = index_alignment(aln) new_read_info.append( [index1, index2, read.r_st, read.r_en, strand, aln, alignment]) return new_read_info else: return read_info
def testMapto(self): r = Interval.mapto(self.d, self.c) self.assertListEqual( r, [[3, 4, 'a', 'I'], [3, 7, 'b', 'I'], [4, 6, 'e', 'I'], [4, 7, 'd', 'I'], [5, 7, 'f', 'I'], [10, 11, 'd', 'II'], [10, 12, 'x', 'II'], [16, 20, 'x', 'III'], [16, 17, 'h', 'III'], [18, 20, 'i', 'III'], [23, 24, 'x', 'IV']], 'Failed in Mapto')
def testInstanceAdd(self): self.a = Interval(self.a) # self.a is an instance self.b = Interval(self.b) # self.a is an instance c = self.a + self.b self.assertListEqual(c.interval, [[1, 12, 'a', 'c', 'I'], [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']], 'Failed in instance c = a + b') c = self.b + self.a # TODO: why this addition did not change the order self.assertListEqual(c.interval, [[1, 12, 'a', 'c', 'I'], [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']], 'Failed in instance c = b + a') self.a += self.b self.assertListEqual(self.a.interval, [[1, 12, 'a', 'c', 'I'], [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']], 'Failed in instance a += b')
def testInstanceMultiple(self): self.a = Interval(self.a) # self.a is an instance self.b = Interval(self.b) # self.b is an instance c = self.a * self.b self.assertListEqual(c.interval, [[5, 12, 'a', 'c', 'I'], [20, 22, 'b', 'd', 'II'], [23, 25, 'b', 'd', 'III']], 'Failed in instance c = a * b') c = self.b * self.a self.assertListEqual(c.interval, [[5, 12, 'I', 'a', 'c'], [20, 22, 'II', 'b', 'd'], [23, 25, 'III', 'b', 'd']], 'Failed in instance c = b * a') self.a *= self.b self.assertListEqual(self.a.interval, [[5, 12, 'a', 'c', 'I'], [20, 22, 'b', 'd', 'II'], [23, 25, 'b', 'd', 'III']], 'Failed in instance a *= b')
def testAdd(self): self.a = Interval(self.a) # self.a is an instance c = self.a + self.b self.assertListEqual(c.interval, [[1, 12, 'a', 'c', 'I'], [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']], 'Failed in c = a + b') c = self.b + self.a self.assertListEqual(c.interval, [[1, 12, 'a', 'c', 'I'], [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']], 'Failed in c = b + a') self.a += self.b self.assertListEqual(self.a.interval, [[1, 12, 'a', 'c', 'I'], [17, 28, 'b', 'd', 'II', 'III'], [30, 35, 'e']], 'Failed in a += b')
def testSlice(self): self.a = Interval(self.a) self.assertEqual(self.a[1], [17, 25, 'b', 'd'], 'Failed in a[1]') self.assertListEqual(self.a[:2], [[1, 12, 'a', 'c'], [17, 25, 'b', 'd']], 'Failed in a[:2]') self.assertTrue([27, 34] in self.a, 'Failed in [27, 34] in a') self.assertTrue([31, 34] in self.a, 'Failed in [31, 34] in a') self.assertTrue([31, 37] in self.a, 'Failed in [31, 37] in a') self.assertTrue([27, 37] in self.a, 'Failed in [27, 37] in a') self.assertTrue([[27, 32], [33, 34]] in self.a, 'Failed in [[27, 32], [33, 34]] in a') self.assertTrue([[31, 32], [33, 34]] in self.a, 'Failed in [[31, 32], [33, 34]] in a') self.assertTrue([[31, 32], [33, 37]] in self.a, 'Failed in [[31, 32], [33, 37]] in a') self.assertTrue([[27, 32], [33, 37]] in self.a, 'Failed in [[27, 32], [33, 37]] in a')
def parse_indel(read_info, target_name, target, bond_name=None, gDNA=False): min_indel = None indel_info = [] target_len = target[target_name]['total'] if not gDNA: # donor left_bond = target[bond_name]['left_bond'] right_bond = target[bond_name]['right_bond'] dis = 5 else: # gDNA left_bond = 0 right_bond = 0 cut_left = target[target_name]['cut_left'] cut_right = target[target_name]['cut_right'] dis = 20 for index1, index2, start, end, strand, aln, alignment in read_info: if strand == '+': interval = target[target_name]['interval'] else: interval = target[target_name]['rev_interval'] # reverse start and end start, end = target_len - end, target_len - start aln_info = '{}|{}|{}|{}|{}|{}'.format(strand, start, end, index1, index2, aln) if start >= dis or target_len - end >= dis: target_tag = 'partially' seg_type = '-'.join( x[2] for x in Interval.mapto([start, end], interval)) else: target_tag = 'full' if strand == '+': seg_type = 'L-LH-C-RH-R' if gDNA else 'LH-I-RH' else: seg_type = 'R-RH-C-LH-L' if gDNA else 'RH-I-LH' cut_indel = 0 total_indel = 0 reference_pos = start read_tag = 'full' for n, (num, t) in enumerate(alignment): if t in ('S', 'H'): if n == 0: # first postion if num - left_bond >= dis: read_tag = 'partially' else: # last position if num - right_bond >= dis: read_tag = 'partially' if gDNA: if t == 'I' and cut_left <= reference_pos <= cut_right: cut_indel += num if t in ('D', 'U'): if (cut_left <= reference_pos + num and reference_pos <= cut_right): cut_indel += num if t in ('M', 'U', 'D'): reference_pos += num else: if t in ('D', 'U', 'I'): cut_indel += num if t in ('D', 'U', 'I'): total_indel += num if target_tag == 'partially' or read_tag == 'partially': tag = 'partially' else: tag = 'full' if min_indel is None or total_indel < min_indel: min_indel = total_indel min_cut_indel = cut_indel final_tag = tag indel_info.append('{}|{}|{}|{}|{}'.format(target_name, seg_type, total_indel, cut_indel, aln_info)) return [indel_info, final_tag, min_indel, min_cut_indel, target_name]
def testLen(self): a = Interval(self.a) self.assertEqual(len(a), 3, 'Failed in length') b = Interval(self.b) self.assertEqual(len(b), 3, 'Fbiled in length')
def testInit(self): a = Interval(self.a) # merge intervals self.assertListEqual( a.interval, [[1, 12, 'a', 'c'], [17, 25, 'b', 'd'], [30, 35, 'e']], 'Failed in initiation')
def testOverlapwith(self): r = Interval.overlapwith(self.c, self.d) self.assertListEqual( r, [[3, 7, 'I', 'a', 'b', 'e', 'd', 'f'], [10, 12, 'II', 'd', 'x'], [16, 20, 'III', 'x', 'h', 'i'], [23, 25, 'IV', 'x']], 'Failed in Overlapwith')
def testConvert(self): e = Interval(self.e) # convert list to nested list self.assertListEqual(e.interval, [[10, 15, 't']], 'Failed in convert')