def test_cigar(self): """testing cigar correction function""" cigar = [[0, 14], [1, 1], [0, 5]] from mirtop.mirna.realign import cigar_correction fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA", "AAAAGCTGGGTTGAGAGGA") print "\n testing cigar correction" print fixed[0] print fixed[1]
def test_cigar(self): """testing cigar correction function""" cigar = [[0, 14], [1, 1], [0, 5]] from mirtop.mirna.realign import cigar_correction, make_cigar fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA", "AAAAGCTGGGTTGAGAGGA") if not fixed[0] == "AAAAGCTGGGTTGAGGAGGA": raise ValueError("Sequence 1 is not right.") if not fixed[1] == "AAAAGCTGGGTTGA-GAGGA": raise ValueError("Sequence 2 is not right.") if not make_cigar("AAA-AAATAAA", "AGACAAA-AAA") == "MGMD3MI3M": raise ValueError("Cigar not eq to MAMDMMMIMMM: %s" % make_cigar("AAA-AAATAAA", "AGACAAA-AAA"))
def tune(seq, precursor, start, cigar): """ The actual fn that will realign the sequence """ if cigar: seq, mature = cigar_correction(cigar, seq, precursor[start:]) else: seq, mature, score, p, size = align(seq, precursor[start:start + len(seq)]) cigar = make_cigar(seq, mature) if seq.startswith("-"): seq = seq[1:] if seq.endswith("-"): seq = seq[:-1] logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature)) error = set() pattern_addition = [[1, 1, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1], [0, 0, 1], [1, 1, 1]] for pos in range(0, len(seq)): if seq[pos] != mature[pos]: error.add(pos) subs, add = [], [] for e in error: if e < len(seq) - 3: subs.append([e, seq[e], mature[e]]) pattern, error_add = [], [] for e in range(len(seq) - 3, len(seq)): if e in error: pattern.append(1) error_add.append(e) else: pattern.append(0) for p in pattern_addition: if pattern == p: add = seq[error_add[0]:].replace("-", "") break if not add and error_add: for e in error_add: subs.append([e, seq[e], mature[e]]) return subs, add, make_cigar(seq, mature)
def test_cigar(self): """testing cigar correction function""" cigar = [[0, 14], [1, 1], [0, 5]] from mirtop.mirna.realign import cigar_correction, make_cigar, \ cigar2snp, expand_cigar fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA", "AAAAGCTGGGTTGAGAGGA") if not fixed[0] == "AAAAGCTGGGTTGAGGAGGA": raise ValueError("Sequence 1 is not right.") if not fixed[1] == "AAAAGCTGGGTTGA-GAGGA": raise ValueError("Sequence 2 is not right.") if not make_cigar("AAA-AAATAAA", "AGACAAA-AAA") == "MAMD3MI3M": raise ValueError("Cigar not eq to MAMD3MI3M: %s" % make_cigar("AAA-AAATAAA", "AGACAAA-AAA")) # test expand cigar if not expand_cigar("3MA3M") == "MMMAMMM": raise ValueError("Cigar 3MA3M not eqaul to MMMAMMM but to %s" % expand_cigar("3MA3M")) # test cigar to snp if not cigar2snp("3MA3M", "AAATCCC")[0] == [3, "A", "T"]: raise ValueError("3MA3M not equal AAATCCC but %s" % cigar2snp("3MA3M", "AAATCCC"))
def tune(seq, precursor, start, cigar): """ The actual fn that will realign the sequence to find the nt changes at 5', 3' sequence and nt variations. Args: *seq (str)*: sequence of the read. *precursor (str)*: sequence of the precursor. *start (int)*: start position of sequence on the precursor, +1. *cigar (str)*: similar to SAM CIGAR attribute. Returns: *list* with: subs (list): substitutions add (list): nt added to the end cigar (str): updated cigar """ end = len(seq) if start < 0: end = end + start start = 0 if cigar: seq, mature = cigar_correction(cigar, seq, precursor[start:]) else: seq, mature, score, p, size = align(seq, precursor[start:start + end]) cigar = make_cigar(seq, mature) if seq.startswith("-"): seq = seq[1:] if seq.endswith("-"): seq = seq[:-1] logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature)) error = set() for pos in range(0, len(seq)): if seq[pos] != mature[pos]: error.add(pos) subs, add = [], [] prob = 0 add_position = [] for e in range(len(seq) - 1, len(seq) - 6, -1): if e in error: prob = 1 if prob == 1: add.append(seq[e]) add_position.append(e) if e not in error and prob == 0 and seq[e] in ["A", "T"]: add.append(seq[e]) add_position.append(e) continue if e not in error: if add: add.pop() add_position.pop() if prob == 0: add = [] add_position = [] break for e in error: if e not in add_position: subs.append([e, seq[e], mature[e]]) logger.debug("TUNE:: %s %s" % (subs, add)) return subs, "".join(add), make_cigar(seq, mature)