Beispiel #1
0
def getMapPeptide2Cds(peptide_sequence, cds_sequence, options):
    """get map between peptide sequence and cds sequence.

    The returned alignment is in nucleotides.

    """

    # remove whitespaces form protein sequence
    p = re.sub(" ", "", peptide_sequence)

    # remove gaps and whitespaces from cds
    c = re.sub("[ .-]", "", cds_sequence)

    w = Genomics.Protein2Wobble(p.upper())

    if options.loglevel >= 6:
        options.stdlog.write("# peptide original (%5i): %s\n" % (len(p), p))
        options.stdlog.write("# cds original     (%5i): %s\n" % (len(c), c))
        options.stdlog.write("# wobble sequence  (%5i): %s\n" % (len(w), w))
        options.stdlog.flush()

    seq_wobble = alignlib_lite.py_makeSequence(w)
    seq_cds = alignlib_lite.py_makeSequence(c.upper())
    seq_peptide = alignlib_lite.py_makeSequence(p)

    map_p2c = alignlib_lite.py_makeAlignmentVector()

    try:
        AlignCodonBased(seq_wobble,
                        seq_cds,
                        seq_peptide,
                        map_p2c,
                        options=options)
    except ValueError, msg:
        raise ValueError("mapping error for sequence: %s" % (msg))
Beispiel #2
0
def getMapPeptide2Cds(peptide_sequence, cds_sequence, options):
    """get map between peptide sequence and cds sequence.

    The returned alignment is in nucleotides.

    """

    # remove whitespaces form protein sequence
    p = re.sub(" ", "", peptide_sequence)

    # remove gaps and whitespaces from cds
    c = re.sub("[ .-]", "", cds_sequence)

    w = Genomics.Protein2Wobble(p.upper())

    if options.loglevel >= 6:
        options.stdlog.write("# peptide original (%5i): %s\n" % (len(p), p))
        options.stdlog.write("# cds original     (%5i): %s\n" % (len(c), c))
        options.stdlog.write("# wobble sequence  (%5i): %s\n" % (len(w), w))
        options.stdlog.flush()

    seq_wobble = alignlib_lite.py_makeSequence(w)
    seq_cds = alignlib_lite.py_makeSequence(c.upper())
    seq_peptide = alignlib_lite.py_makeSequence(p)

    map_p2c = alignlib_lite.py_makeAlignmentVector()

    try:
        AlignCodonBased(seq_wobble,
                        seq_cds,
                        seq_peptide,
                        map_p2c,
                        options=options)
    except ValueError as msg:
        raise ValueError("mapping error for sequence: %s" % (msg))

    # if there are more than five frameshifts - do exhaustive alignment
    max_gaps = 5
    num_peptide_gaps = len(re.sub("[^-]", "", p))
    ngaps = map_p2c.getNumGaps() - \
        (num_peptide_gaps * 3) - abs(len(w) - len(c))

    if options.loglevel >= 6:
        options.stdlog.write(
            "# alignment between wobble and cds: ngaps=%i, npeptide_gaps=%i\n"
            % (ngaps, num_peptide_gaps))
        printPrettyAlignment(seq_wobble, seq_cds, p, map_p2c, options)

    if ngaps > max_gaps:
        if options.loglevel >= 2:
            options.stdlog.write(
                "# too many gaps (%i>%i), realigning exhaustively.\n" %
                (ngaps, max_gaps))
            options.stdlog.flush()
        full_map_p2c = alignlib_lite.py_makeAlignmentVector()

        AlignExhaustive(seq_wobble, seq_cds, seq_peptide, full_map_p2c,
                        options)
        if options.loglevel >= 6:
            options.stdlog.write("# full alignment between wobble and cds:\n")
            options.stdlog.flush()
            printPrettyAlignment(seq_wobble, seq_cds, p, full_map_p2c, options)

        map_p2c = full_map_p2c

    # remove incomplete codons
    x = 0
    while x < len(p) * 3:
        if (map_p2c.mapRowToCol(x) < 0 or map_p2c.mapRowToCol(x + 1) < 0
                or map_p2c.mapRowToCol(x + 2) < 0):
            map_p2c.removeRowRegion(x, x + 3)
        x += 3

    if map_p2c.getLength() == 0:
        if options.loglevel >= 1:
            options.stdlog.write("# WARNING: empty alignment\n")
            if options.loglevel >= 6:
                options.stdlog.write("# peptide original: %s\n" % p)
                options.stdlog.write("# cds original    : %s\n" % c)
                options.stdlog.write("# wobble sequence : %s\n" % w)

        raise ValueError("empty alignment")

    assert (map_p2c.getRowTo() <= seq_wobble.getLength())
    assert (map_p2c.getColTo() <= seq_cds.getLength())

    return map_p2c