Exemple #1
0
def _align_to_mature(seq, hairpin, mature):
    """Get alignment between seq and mature"""
    mirna = get_mature_sequence(hairpin, mature)
    hit = align(seq, mirna)
    start = hit[0][:8].count("-") - 4 + int(mature[0])
    logger.debug("PROST::align:sequence to mature %s" % hit[0])
    logger.debug("PROST::align:start: %s -> %s" % (mature[0], start))
    return start
 def test_locala(self):
     """testing pairwise alignment"""
     from mirtop.mirna.realign import align
     print "\nExamples of perfect match, deletion, mutation"
     print align("TGAGGTAGTAGGTTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0]
     print align("TGAGGTGTAGGTTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0]
     print align("TGAGGTAGTAGGCTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0]
Exemple #3
0
 def test_locala(self):
     """testing pairwise alignment"""
     from mirtop.mirna.realign import align
     print("\nExamples of perfect match, deletion, mutation")
     print(align("TGAGTAGTAGGTTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0])
     print(align("TGAGGTGTAGGTTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0])
     print(align("TGAGGTAGTAGGCTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0])
     print(align("TGANTAGTAGNTTGTATNGTT", "TGAGTAGTAGGTTGTATAGTTT")[0])
     print(align("TGANTAGTNGNTTGTATNGTT", "TGAGTATAGGCCTTGTATAGTT")[0])
     print(align("NCANAGTCCAAGNTCATN", "TCATAGTCCAAGGTCATG")[0])
Exemple #4
0
 def test_locala(self):
     """testing pairwise alignment"""
     from mirtop.mirna.realign import align
     print("\nExamples of perfect match, deletion, mutation")
     print(align("TGAGTAGTAGGTTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0])
     print(align("TGAGGTGTAGGTTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0])
     print(align("TGAGGTAGTAGGCTGTATAGTT", "TGAGGTAGTAGGTTGTATAGTT")[0])
     print(align("TGANTAGTAGNTTGTATNGTT", "TGAGTAGTAGGTTGTATAGTTT")[0])
     print(align("TGANTAGTNGNTTGTATNGTT", "TGAGTATAGGCCTTGTATAGTT")[0])
     print(align("NCANAGTCCAAGNTCATN", "TCATAGTCCAAGGTCATG")[0])
Exemple #5
0
def tune(seq, precursor, start, cigar):
    """
    The actual fn that will realign the sequence
    """
    if cigar:
        seq, mature = cigar_correction(cigar, seq, precursor[start:])
    else:
        seq, mature, score, p, size = align(seq,
                                            precursor[start:start + len(seq)])
        cigar = make_cigar(seq, mature)
    if seq.startswith("-"):
        seq = seq[1:]
    if seq.endswith("-"):
        seq = seq[:-1]
    logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature))
    error = set()
    pattern_addition = [[1, 1, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1], [0, 0, 1],
                        [1, 1, 1]]
    for pos in range(0, len(seq)):
        if seq[pos] != mature[pos]:
            error.add(pos)

    subs, add = [], []
    for e in error:
        if e < len(seq) - 3:
            subs.append([e, seq[e], mature[e]])

    pattern, error_add = [], []
    for e in range(len(seq) - 3, len(seq)):
        if e in error:
            pattern.append(1)
            error_add.append(e)
        else:
            pattern.append(0)
    for p in pattern_addition:
        if pattern == p:
            add = seq[error_add[0]:].replace("-", "")
            break
    if not add and error_add:
        for e in error_add:
            subs.append([e, seq[e], mature[e]])

    return subs, add, make_cigar(seq, mature)
Exemple #6
0
def create_iso(name, mir, seq, numsim, exp):
    data = dict()
    reads = dict()
    full_read = list()
    clean_read = list()
    seen = set()
    for mirna in mir[name]:
        info = mir[name][mirna]
        mirSeq = seq[info[0]:info[1] + 1]
        for rand in range(int(numsim)):
            # expression
            e = 1
            if exp:
                trial = random.randint(1, 100)
                p = random.randint(1, 50) / 50.0
                e = numpy.random.negative_binomial(trial, p, 1)[0]
            iso = realign.isomir()
            randSeq, iso.start, iso.t5, iso.t3, iso.subs, iso.add = variation(
                info, seq)
            if randSeq in seen:
                continue
            seen.add(randSeq)
            iso.end = iso.start + len(randSeq)
            aln = realign.align(randSeq, seq[iso.start:iso.end])
            iso.cigar = realign.make_cigar(aln[0], aln[1])
            iso.mirna = mirna
            query_name = "%s.%s.%s" % (mirna, iso.format_id("."), randSeq)
            reads[query_name] = realign.hits()
            reads[query_name].set_sequence(randSeq)
            reads[query_name].counts = e
            reads[query_name].set_precursor(name, iso)
            full_read.extend(create_read(randSeq, e))
            clean_read.append([
                randSeq,
                e,
            ])
            # print [randSeq, mutLab, addTag, t5Lab, t3Lab, mirSeq]
            # data[randSeq] = [exp, iso] # create real object used in code to generate GFF
    write_fastq(full_read, full_fq)
    write_collapse_fastq(clean_read, clean_fq)
    gff = body.create(reads, "miRBase21", "sim1")
    return gff
Exemple #7
0
def create_iso(name, mir, seq, numsim, exp):
    data = dict()
    reads = dict()
    full_read = list()
    clean_read = list()
    seen = set()
    for mirna in mir[name]:
        info = mir[name][mirna]
        mirSeq = seq[info[0]:info[1] + 1]
        for rand in range(int(numsim)):
             # expression
            e = 1
            if exp:
                trial = random.randint(1, 100)
                p = random.randint(1, 50) / 50.0
                e = numpy.random.negative_binomial(trial, p, 1)[0]
            iso = realign.isomir()
            randSeq, iso.start, iso.t5, iso.t3, iso.subs, iso.add = variation(info, seq)
            if randSeq in seen:
                continue
            seen.add(randSeq)
            iso.end = iso.start + len(randSeq)
            aln = realign.align(randSeq, seq[iso.start:iso.end])
            iso.cigar = realign.make_cigar(aln[0], aln[1])
            iso.mirna = mirna
            query_name = "%s.%s.%s" % (mirna, iso.format_id("."), randSeq)
            reads[query_name] = realign.hits()
            reads[query_name].set_sequence(randSeq)
            reads[query_name].counts = e
            reads[query_name].set_precursor(name, iso)
            full_read.extend(create_read(randSeq, e))
            clean_read.append([randSeq, e,])
            # print([randSeq, mutLab, addTag, t5Lab, t3Lab, mirSeq])
            # data[randSeq] = [exp, iso] # create real object used in code to generate GFF
    write_fastq(full_read, full_fq)
    write_collapse_fastq(clean_read, clean_fq)
    gff = body.create(reads, "miRBase21", "sim1")
    return gff
Exemple #8
0
def tune(seq, precursor, start, cigar):
    """
    The actual fn that will realign the sequence to find the nt changes
    at 5', 3' sequence and nt variations.

    Args:
        *seq (str)*: sequence of the read.

        *precursor (str)*: sequence of the precursor.

        *start (int)*: start position of sequence on the precursor, +1.

        *cigar (str)*: similar to SAM CIGAR attribute.

    Returns:

        *list* with:

            subs (list): substitutions

            add (list): nt added to the end

            cigar (str): updated cigar
    """
    end = len(seq)
    if start < 0:
        end = end + start
        start = 0
    if cigar:
        seq, mature = cigar_correction(cigar, seq, precursor[start:])
    else:
        seq, mature, score, p, size = align(seq, precursor[start:start + end])
        cigar = make_cigar(seq, mature)
    if seq.startswith("-"):
        seq = seq[1:]
    if seq.endswith("-"):
        seq = seq[:-1]
    logger.debug("TUNE:: %s %s %s" % (cigar, seq, mature))

    error = set()
    for pos in range(0, len(seq)):
        if seq[pos] != mature[pos]:
            error.add(pos)

    subs, add = [], []

    prob = 0
    add_position = []
    for e in range(len(seq) - 1, len(seq) - 6, -1):
        if e in error:
            prob = 1
        if prob == 1:
            add.append(seq[e])
            add_position.append(e)
        if e not in error and prob == 0 and seq[e] in ["A", "T"]:
            add.append(seq[e])
            add_position.append(e)
            continue
        if e not in error:
            if add:
                add.pop()
                add_position.pop()
            if prob == 0:
                add = []
                add_position = []
            break

    for e in error:
        if e not in add_position:
            subs.append([e, seq[e], mature[e]])

    logger.debug("TUNE:: %s %s" % (subs, add))

    return subs, "".join(add), make_cigar(seq, mature)