def test_cigar(self):
     """testing cigar correction function"""
     cigar = [[0, 14], [1, 1], [0, 5]]
     from mirtop.mirna.realign import cigar_correction
     fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA",
                              "AAAAGCTGGGTTGAGAGGA")
     print "\n testing cigar correction"
     print fixed[0]
     print fixed[1]
 def test_cigar(self):
     """testing cigar correction function"""
     cigar = [[0, 14], [1, 1], [0, 5]]
     from mirtop.mirna.realign import cigar_correction, make_cigar
     fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA",
                              "AAAAGCTGGGTTGAGAGGA")
     if not fixed[0] == "AAAAGCTGGGTTGAGGAGGA":
         raise ValueError("Sequence 1 is not right.")
     if not fixed[1] == "AAAAGCTGGGTTGA-GAGGA":
         raise ValueError("Sequence 2 is not right.")
     if not make_cigar("AAA-AAATAAA", "AGACAAA-AAA") == "MGMD3MI3M":
         raise ValueError("Cigar not eq to MAMDMMMIMMM: %s" %
                          make_cigar("AAA-AAATAAA", "AGACAAA-AAA"))
Exemple #3
0
def tune(seq, precursor, start, cigar):
    """
    The actual fn that will realign the sequence
    """
    if cigar:
        seq, mature = cigar_correction(cigar, seq, precursor[start:])
    else:
        seq, mature, score, p, size = align(seq,
                                            precursor[start:start + len(seq)])
        cigar = make_cigar(seq, mature)
    if seq.startswith("-"):
        seq = seq[1:]
    if seq.endswith("-"):
        seq = seq[:-1]
    logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature))
    error = set()
    pattern_addition = [[1, 1, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1], [0, 0, 1],
                        [1, 1, 1]]
    for pos in range(0, len(seq)):
        if seq[pos] != mature[pos]:
            error.add(pos)

    subs, add = [], []
    for e in error:
        if e < len(seq) - 3:
            subs.append([e, seq[e], mature[e]])

    pattern, error_add = [], []
    for e in range(len(seq) - 3, len(seq)):
        if e in error:
            pattern.append(1)
            error_add.append(e)
        else:
            pattern.append(0)
    for p in pattern_addition:
        if pattern == p:
            add = seq[error_add[0]:].replace("-", "")
            break
    if not add and error_add:
        for e in error_add:
            subs.append([e, seq[e], mature[e]])

    return subs, add, make_cigar(seq, mature)
Exemple #4
0
 def test_cigar(self):
     """testing cigar correction function"""
     cigar = [[0, 14], [1, 1], [0, 5]]
     from mirtop.mirna.realign import cigar_correction, make_cigar, \
         cigar2snp, expand_cigar
     fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA",
                              "AAAAGCTGGGTTGAGAGGA")
     if not fixed[0] == "AAAAGCTGGGTTGAGGAGGA":
         raise ValueError("Sequence 1 is not right.")
     if not fixed[1] == "AAAAGCTGGGTTGA-GAGGA":
         raise ValueError("Sequence 2 is not right.")
     if not make_cigar("AAA-AAATAAA", "AGACAAA-AAA") == "MAMD3MI3M":
         raise ValueError("Cigar not eq to MAMD3MI3M: %s" %
                          make_cigar("AAA-AAATAAA", "AGACAAA-AAA"))
     # test expand cigar
     if not expand_cigar("3MA3M") == "MMMAMMM":
         raise ValueError("Cigar 3MA3M not eqaul to MMMAMMM but to %s" %
                          expand_cigar("3MA3M"))
     # test cigar to snp
     if not cigar2snp("3MA3M", "AAATCCC")[0] == [3, "A", "T"]:
         raise ValueError("3MA3M not equal AAATCCC but %s" %
                          cigar2snp("3MA3M", "AAATCCC"))
Exemple #5
0
 def test_cigar(self):
     """testing cigar correction function"""
     cigar = [[0, 14], [1, 1], [0, 5]]
     from mirtop.mirna.realign import cigar_correction, make_cigar, \
         cigar2snp, expand_cigar
     fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA",
                              "AAAAGCTGGGTTGAGAGGA")
     if not fixed[0] == "AAAAGCTGGGTTGAGGAGGA":
         raise ValueError("Sequence 1 is not right.")
     if not fixed[1] == "AAAAGCTGGGTTGA-GAGGA":
         raise ValueError("Sequence 2 is not right.")
     if not make_cigar("AAA-AAATAAA", "AGACAAA-AAA") == "MAMD3MI3M":
         raise ValueError("Cigar not eq to MAMD3MI3M: %s" %
                          make_cigar("AAA-AAATAAA", "AGACAAA-AAA"))
     # test expand cigar
     if not expand_cigar("3MA3M") == "MMMAMMM":
         raise ValueError("Cigar 3MA3M not eqaul to MMMAMMM but to %s" %
                          expand_cigar("3MA3M"))
     # test cigar to snp
     if not cigar2snp("3MA3M", "AAATCCC")[0] == [3, "A", "T"]:
         raise ValueError("3MA3M not equal AAATCCC but %s" %
                          cigar2snp("3MA3M", "AAATCCC"))
Exemple #6
0
def tune(seq, precursor, start, cigar):
    """
    The actual fn that will realign the sequence to find the nt changes
    at 5', 3' sequence and nt variations.

    Args:
        *seq (str)*: sequence of the read.

        *precursor (str)*: sequence of the precursor.

        *start (int)*: start position of sequence on the precursor, +1.

        *cigar (str)*: similar to SAM CIGAR attribute.

    Returns:

        *list* with:

            subs (list): substitutions

            add (list): nt added to the end

            cigar (str): updated cigar
    """
    end = len(seq)
    if start < 0:
        end = end + start
        start = 0
    if cigar:
        seq, mature = cigar_correction(cigar, seq, precursor[start:])
    else:
        seq, mature, score, p, size = align(seq, precursor[start:start + end])
        cigar = make_cigar(seq, mature)
    if seq.startswith("-"):
        seq = seq[1:]
    if seq.endswith("-"):
        seq = seq[:-1]
    logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature))

    error = set()
    for pos in range(0, len(seq)):
        if seq[pos] != mature[pos]:
            error.add(pos)

    subs, add = [], []

    prob = 0
    add_position = []
    for e in range(len(seq) - 1, len(seq) - 6, -1):
        if e in error:
            prob = 1
        if prob == 1:
            add.append(seq[e])
            add_position.append(e)
        if e not in error and prob == 0 and seq[e] in ["A", "T"]:
            add.append(seq[e])
            add_position.append(e)
            continue
        if e not in error:
            if add:
                add.pop()
                add_position.pop()
            if prob == 0:
                add = []
                add_position = []
            break

    for e in error:
        if e not in add_position:
            subs.append([e, seq[e], mature[e]])

    logger.debug("TUNE:: %s %s" % (subs, add))

    return subs, "".join(add), make_cigar(seq, mature)