Beispiel #1
0
 def testMapto(self):
     r = Interval.mapto(self.d, self.c)
     self.assertListEqual(
         r,
         [[3, 4, 'a', 'I'], [3, 7, 'b', 'I'], [4, 6, 'e', 'I'],
          [4, 7, 'd', 'I'], [5, 7, 'f', 'I'], [10, 11, 'd', 'II'],
          [10, 12, 'x', 'II'], [16, 20, 'x', 'III'], [16, 17, 'h', 'III'],
          [18, 20, 'i', 'III'], [23, 24, 'x', 'IV']], 'Failed in Mapto')
Beispiel #2
0
def parse_indel(read_info, target_name, target, bond_name=None, gDNA=False):
    min_indel = None
    indel_info = []
    target_len = target[target_name]['total']
    if not gDNA:  # donor
        left_bond = target[bond_name]['left_bond']
        right_bond = target[bond_name]['right_bond']
        dis = 5
    else:  # gDNA
        left_bond = 0
        right_bond = 0
        cut_left = target[target_name]['cut_left']
        cut_right = target[target_name]['cut_right']
        dis = 20
    for index1, index2, start, end, strand, aln, alignment in read_info:
        if strand == '+':
            interval = target[target_name]['interval']
        else:
            interval = target[target_name]['rev_interval']
            # reverse start and end
            start, end = target_len - end, target_len - start
        aln_info = '{}|{}|{}|{}|{}|{}'.format(strand, start, end, index1,
                                              index2, aln)
        if start >= dis or target_len - end >= dis:
            target_tag = 'partially'
            seg_type = '-'.join(
                x[2] for x in Interval.mapto([start, end], interval))
        else:
            target_tag = 'full'
            if strand == '+':
                seg_type = 'L-LH-C-RH-R' if gDNA else 'LH-I-RH'
            else:
                seg_type = 'R-RH-C-LH-L' if gDNA else 'RH-I-LH'
        cut_indel = 0
        total_indel = 0
        reference_pos = start
        read_tag = 'full'
        for n, (num, t) in enumerate(alignment):
            if t in ('S', 'H'):
                if n == 0:  # first postion
                    if num - left_bond >= dis:
                        read_tag = 'partially'
                else:  # last position
                    if num - right_bond >= dis:
                        read_tag = 'partially'
            if gDNA:
                if t == 'I' and cut_left <= reference_pos <= cut_right:
                    cut_indel += num
                if t in ('D', 'U'):
                    if (cut_left <= reference_pos + num
                            and reference_pos <= cut_right):
                        cut_indel += num
                if t in ('M', 'U', 'D'):
                    reference_pos += num
            else:
                if t in ('D', 'U', 'I'):
                    cut_indel += num
            if t in ('D', 'U', 'I'):
                total_indel += num
        if target_tag == 'partially' or read_tag == 'partially':
            tag = 'partially'
        else:
            tag = 'full'
        if min_indel is None or total_indel < min_indel:
            min_indel = total_indel
            min_cut_indel = cut_indel
            final_tag = tag
        indel_info.append('{}|{}|{}|{}|{}'.format(target_name, seg_type,
                                                  total_indel, cut_indel,
                                                  aln_info))
    return [indel_info, final_tag, min_indel, min_cut_indel, target_name]