Esempio n. 1
0
def Alignment2DNA(alignment, query_from=0, sbjct_from=0):
    """convert a peptide2genome alignment to a nucleotide2nucleotide
    alignment.

    Instead of peptide coordinates, the alignment will be
    in codon coordinates.

    Arguments
    ---------
    aligment : list
        List of tuples of the alignment in CIGAR format.
    query_from : int
        Start position of alignment on peptide sequence.
    sbjct_from : int
        Start position of alignment on nucleotide sequence.

    Returns
    -------
    alignment : object
       The alignment as an alignlib.AlignmentVector object.
    """

    map_query2sbjct = alignlib_lite.py_makeAlignmentVector()

    # count in nucleotides for query
    query_pos = query_from * 3
    sbjct_pos = sbjct_from

    for state, l_query, l_sbjct in alignment:

        # count as nucleotides
        l_query *= 3

        if state in ("A", "B", "C"):

            if state in ("A"):
                l_query = 0
            elif state in ("B"):
                l_query = 1
            elif state in ("C"):
                l_query = 2

        elif state in ("a", "b", "c"):

            if state in ("a"):
                l_query = 0
            elif state in ("b"):
                l_query = 2
            elif state in ("c"):
                l_query = 1

        elif state == "S":
            l_query = l_sbjct

        if l_query > 0 and l_sbjct > 0:
            alignlib_lite.addDiagonal2Alignment(map_query2sbjct, query_pos,
                                                query_pos + l_query,
                                                sbjct_pos - query_pos)

        query_pos += l_query
        sbjct_pos += l_sbjct

    return map_query2sbjct
Esempio n. 2
0
def PrintCluster(cluster,
                 cluster_id,
                 lengths,
                 peptide_sequences=None,
                 regex_preferred=None):
    """print a cluster.

    Take longest sequence as representative. If preferred is given, only take
    genes matching preferred identifier.
    """

    if regex_preferred:
        rx = re.compile(regex_preferred)
    else:
        rx = None

    max_al = 0
    max_pl = 0
    rep_a = None
    rep_p = None
    for c in cluster:
        l = 0
        if c in lengths:
            l = lengths[c]

        if l > max_al:
            max_al = l
            rep_a = c

        if rx and rx.search(c) and l > max_pl:
            max_pl = l
            rep_p = c

    if max_pl > 0:
        max_l = max_pl
        rep = rep_p
    else:
        max_l = max_al
        rep = rep_a

    for mem in cluster:
        l = 0
        if mem in lengths:
            l = lengths[mem]
        if peptide_sequences:
            map_rep2mem = alignlib_lite.makeAlignmentVector()

            if rep == mem and rep in lengths:
                alignlib_lite.addDiagonal2Alignment(
                    map_rep2mem, 1, lengths[rep], 0)
            elif mem in peptide_sequences and \
                    rep in peptide_sequences:
                alignator = alignlib_lite.makeAlignatorDPFull(
                    alignlib_lite.ALIGNMENT_LOCAL, -10.0, -1.0)
                alignator.align(map_rep2mem,
                                alignlib_lite.makeSequence(
                                    peptide_sequences[rep]),
                                alignlib_lite.makeSequence(peptide_sequences[mem]))

            f = alignlib_lite.AlignmentFormatEmissions(map_rep2mem)
            print string.join(map(str, (rep, mem, l, f)), "\t")

        else:
            print string.join(map(str, (rep, mem, l)), "\t")

    sys.stdout.flush()

    return cluster_id
Esempio n. 3
0
        nexons += 1

        if last_exon.mQueryToken != this_exon.mQueryToken:

            if last_exon.mQueryToken:
                f = alignlib_lite.AlignmentFormatEmissions(
                    map_prediction2genome)
                print string.join(
                    map(str, (last_exon.mQueryToken, last_exon.mSbjctToken,
                              last_exon.mSbjctStrand, f)), "\t")

                npairs += 1
            map_prediction2genome.clear()

        alignlib_lite.addDiagonal2Alignment(
            map_prediction2genome, this_exon.mPeptideFrom + 1,
            this_exon.mPeptideTo + 1,
            this_exon.mGenomeFrom - this_exon.mPeptideFrom)

        last_exon = this_exon

    f = alignlib_lite.AlignmentFormatEmissions(map_prediction2genome)
    print string.join(
        map(str, (last_exon.mQueryToken, last_exon.mSbjctToken,
                  last_exon.mSbjctStrand, f)), "\t")
    npairs += 1

    print "# nexons=%i, npairs=%i" % (nexons, npairs)

    print E.GetFooter()

Esempio n. 4
0
        if last_exon.mQueryToken != this_exon.mQueryToken:

            if last_exon.mQueryToken:
                f = alignlib_lite.AlignmentFormatEmissions(
                    map_prediction2genome)
                print string.join(map(str, (last_exon.mQueryToken,
                                            last_exon.mSbjctToken,
                                            last_exon.mSbjctStrand,
                                            f)), "\t")

                npairs += 1
            map_prediction2genome.clear()

        alignlib_lite.addDiagonal2Alignment(map_prediction2genome,
                                            this_exon.mPeptideFrom + 1,
                                            this_exon.mPeptideTo + 1,
                                            this_exon.mGenomeFrom - this_exon.mPeptideFrom)

        last_exon = this_exon

    f = alignlib_lite.AlignmentFormatEmissions(map_prediction2genome)
    print string.join(map(str, (last_exon.mQueryToken,
                                last_exon.mSbjctToken,
                                last_exon.mSbjctStrand,
                                f)), "\t")
    npairs += 1

    print "# nexons=%i, npairs=%i" % (nexons, npairs)

    print E.GetFooter()
Esempio n. 5
0
def Alignment2DNA(alignment, query_from=0, sbjct_from=0):
    """convert a peptide2genome alignment to a nucleotide2nucleotide
    alignment.

    Instead of peptide coordinates, the alignment will be
    in codon coordinates.

    Arguments
    ---------
    aligment : list
        List of tuples of the alignment in CIGAR format.
    query_from : int
        Start position of alignment on peptide sequence.
    sbjct_from : int
        Start position of alignment on nucleotide sequence.

    Returns
    -------
    alignment : object
       The alignment as an alignlib.AlignmentVector object.
    """

    map_query2sbjct = alignlib_lite.py_makeAlignmentVector()

    # count in nucleotides for query
    query_pos = query_from * 3
    sbjct_pos = sbjct_from

    for state, l_query, l_sbjct in alignment:

        # count as nucleotides
        l_query *= 3

        if state in ("A", "B", "C"):

            if state in ("A"):
                l_query = 0
            elif state in ("B"):
                l_query = 1
            elif state in ("C"):
                l_query = 2

        elif state in ("a", "b", "c"):

            if state in ("a"):
                l_query = 0
            elif state in ("b"):
                l_query = 2
            elif state in ("c"):
                l_query = 1

        elif state == "S":
            l_query = l_sbjct

        if l_query > 0 and l_sbjct > 0:
            alignlib_lite.addDiagonal2Alignment(map_query2sbjct,
                                                query_pos, query_pos +
                                                l_query,
                                                sbjct_pos - query_pos)

        query_pos += l_query
        sbjct_pos += l_sbjct

    return map_query2sbjct
Esempio n. 6
0
def PrintCluster(cluster,
                 cluster_id,
                 lengths,
                 peptide_sequences=None,
                 regex_preferred=None):
    """print a cluster.

    Take longest sequence as representative. If preferred is given, only take
    genes matching preferred identifier.
    """

    if regex_preferred:
        rx = re.compile(regex_preferred)
    else:
        rx = None

    max_al = 0
    max_pl = 0
    rep_a = None
    rep_p = None
    for c in cluster:
        l = 0
        if c in lengths: l = lengths[c]

        if l > max_al:
            max_al = l
            rep_a = c

        if rx and rx.search(c) and l > max_pl:
            max_pl = l
            rep_p = c

    if max_pl > 0:
        max_l = max_pl
        rep = rep_p
    else:
        max_l = max_al
        rep = rep_a

    for mem in cluster:
        l = 0
        if mem in lengths: l = lengths[mem]
        if peptide_sequences:
            map_rep2mem = alignlib_lite.makeAlignmentVector()

            if rep == mem and rep in lengths:
                alignlib_lite.addDiagonal2Alignment(map_rep2mem, 1,
                                                    lengths[rep], 0)
            elif mem in peptide_sequences and \
                     rep in peptide_sequences:
                alignator = alignlib_lite.makeAlignatorDPFull(
                    alignlib_lite.ALIGNMENT_LOCAL, -10.0, -1.0)
                alignator.align(
                    map_rep2mem,
                    alignlib_lite.makeSequence(peptide_sequences[rep]),
                    alignlib_lite.makeSequence(peptide_sequences[mem]))

            f = alignlib_lite.AlignmentFormatEmissions(map_rep2mem)
            print string.join(map(str, (rep, mem, l, f)), "\t")

        else:
            print string.join(map(str, (rep, mem, l)), "\t")

    sys.stdout.flush()

    return cluster_id
Esempio n. 7
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-g", "--genome-file", dest="genome_file", type="string",
        help="filename with genome.")

    parser.add_option(
        "-o", "--is-forward-coordinates", dest="forward_coordinates",
        action="store_true",
        help="input uses forward coordinates.")

    parser.add_option(
        "-f", "--format", dest="format", type="choice",
        choices=(
            "default", "cds", "cdnas", "map", "gff", "intron-fasta", "exons"),
        help="output format.")

    parser.add_option(
        "-r", "--reset-to-start", dest="reset_to_start", action="store_true",
        help="move genomic coordinates to begin from 0.")

    parser.add_option("--reset-query", dest="reset_query", action="store_true",
                      help="move peptide coordinates to begin from 0.")

    parser.set_defaults(
        genome_file=None,
        forward_coordinates=False,
        format="default",
        reset_to_start=False,
        reset_query=False)

    (options, args) = E.Start(parser, add_pipe_options=True)

    if len(args) > 0:
        print USAGE, "no arguments required."
        sys.exit(2)

    cds_id = 1

    entry = PredictionParser.PredictionParserEntry()

    fasta = IndexedFasta.IndexedFasta(options.genome_file)

    ninput, noutput, nskipped, nerrors = 0, 0, 0, 0

    for line in sys.stdin:

        if line[0] == "#":
            continue
        if line.startswith("id"):
            continue

        ninput += 1

        try:
            entry.Read(line)
        except ValueError, msg:
            options.stdlog.write(
                "# parsing failed with msg %s in line %s" % (msg, line))
            nerrors += 1
            continue

        cds = Exons.Alignment2Exons(entry.mMapPeptide2Genome,
                                    query_from=entry.mQueryFrom,
                                    sbjct_from=entry.mSbjctGenomeFrom,
                                    add_stop_codon=0)

        for cd in cds:
            cd.mSbjctToken = entry.mSbjctToken
            cd.mSbjctStrand = entry.mSbjctStrand

        if cds[-1].mGenomeTo != entry.mSbjctGenomeTo:
            options.stdlog.write(
                "# WARNING: discrepancy in exon calculation!!!\n")
            for cd in cds:
                options.stdlog.write("# %s\n" % str(cd))
            options.stdlog.write("# %s\n" % entry)

        lsequence = fasta.getLength(entry.mSbjctToken)
        genomic_sequence = fasta.getSequence(entry.mSbjctToken,
                                             entry.mSbjctStrand,
                                             entry.mSbjctGenomeFrom,
                                             entry.mSbjctGenomeTo)

        # deal with forward coordinates: convert them to negative strand
        # coordinates
        if options.forward_coordinates and \
                entry.mSbjctStrand == "-":
            entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo = lsequence - \
                entry.mSbjctGenomeTo, lsequence - entry.mSbjctGenomeFrom
            for cd in cds:
                cd.InvertGenomicCoordinates(lsequence)

        # attach sequence to cds
        for cd in cds:
            start = cd.mGenomeFrom - entry.mSbjctGenomeFrom
            end = cd.mGenomeTo - entry.mSbjctGenomeFrom
            cd.mSequence = genomic_sequence[start:end]

        # reset coordinates for query
        if options.reset_to_start:
            offset = entry.mPeptideFrom
            for cd in cds:
                cd.mPeptideFrom -= offset
                cd.mPeptideTo -= offset

        # play with coordinates
        if options.reset_to_start:
            offset = entry.mSbjctGenomeFrom
            for cd in cds:
                cd.mGenomeFrom -= offset
                cd.mGenomeTo -= offset
        else:
            offset = 0

        if options.format == "cds":
            rank = 0
            for cd in cds:
                rank += 1
                cd.mQueryToken = entry.mQueryToken
                cd.mSbjctToken = entry.mSbjctToken
                cd.mSbjctStrand = entry.mSbjctStrand
                cd.mRank = rank
                print str(cd)

        if options.format == "exons":
            rank = 0
            for cd in cds:
                rank += 1
                options.stdout.write("\t".join(map(str, (entry.mPredictionId,
                                                         cd.mSbjctToken,
                                                         cd.mSbjctStrand,
                                                         rank,
                                                         cd.frame,
                                                         cd.mPeptideFrom,
                                                         cd.mPeptideTo,
                                                         cd.mGenomeFrom,
                                                         cd.mGenomeTo))) + "\n")

        elif options.format == "cdnas":
            print string.join(map(str, (entry.mPredictionId,
                                        entry.mQueryToken,
                                        entry.mSbjctToken,
                                        entry.mSbjctStrand,
                                        entry.mSbjctGenomeFrom - offset,
                                        entry.mSbjctGenomeTo - offset,
                                        genomic_sequence)), "\t")

        elif options.format == "map":

            map_prediction2genome = alignlib_lite.makeAlignmentSet()

            for cd in cds:
                alignlib_lite.addDiagonal2Alignment(map_prediction2genome,
                                                    cd.mPeptideFrom + 1,
                                                    cd.mPeptideTo,
                                                    (cd.mGenomeFrom - offset) - cd.mPeptideFrom)

            print string.join(map(str, (entry.mPredictionId,
                                        entry.mSbjctToken,
                                        entry.mSbjctStrand,
                                        alignlib_lite.AlignmentFormatEmissions(map_prediction2genome))), "\t")

        elif options.format == "intron-fasta":
            rank = 0
            if len(cds) == 1:
                nskipped += 1
                continue

            last = cds[0].mGenomeTo
            for cd in cds[1:]:
                rank += 1
                key = "%s %i %s:%s:%i:%i" % (
                    entry.mPredictionId, rank, entry.mSbjctToken, entry.mSbjctStrand, last, entry.mSbjctGenomeFrom)
                sequence = genomic_sequence[
                    last - entry.mSbjctGenomeFrom:cd.mGenomeFrom - entry.mSbjctGenomeFrom]
                options.stdout.write(">%s\n%s\n" % (key, sequence))
                last = cd.mGenomeTo

        elif options.format == "gff-match":
            print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tTarget \"%s\" %i %i; Score %i; Introns %i; Frameshifts %i; Stops %i" % \
                  (entry.mSbjctToken,
                   "gpipe", "similarity",
                   entry.mSbjctGenomeFrom,
                   entry.mSbjctGenomeTo,
                   entry.mPercentIdentity,
                   entry.mSbjctStrand,
                   ".",
                   entry.mQueryToken,
                   entry.mQueryFrom,
                   entry.mQueryTo,
                   entry.score,
                   entry.mNIntrons,
                   entry.mNFrameShifts,
                   entry.mNStopCodons)

        elif options.format == "gff-exon":
            rank = 0
            for cd in cds:
                rank += 1
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tTarget \"%s\" %i %i; Score %i; Rank %i/%i; Prediction %i" % \
                      (entry.mSbjctToken,
                       "gpipe", "similarity",
                       cd.mGenomeFrom,
                       cd.mGenomeTo,
                       entry.mPercentIdentity,
                       entry.mSbjctStrand,
                       ".",
                       entry.mQueryToken,
                       cd.mPeptideFrom / 3 + 1,
                       cd.mPeptideTo / 3 + 1,
                       entry.score,
                       rank,
                       len(cds),
                       entry.mPredictionId)
        else:
            exon_from = 0
            for cd in cds:
                cd.mPeptideFrom = exon_from
                exon_from += cd.mGenomeTo - cd.mGenomeFrom
                cd.mPeptideTo = exon_from
                print string.join(map(str, (cds_id, entry.mPredictionId,
                                            cd.mPeptideFrom, cd.mPeptideTo,
                                            cd.frame,
                                            cd.mGenomeFrom, cd.mGenomeTo,
                                            cd.mSequence
                                            )), "\t")
                cds_id += 1

        noutput += 1
Esempio n. 8
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: gpipe/predictions2cds.py 1858 2008-05-13 15:07:05Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-g",
                      "--genome-file",
                      dest="genome_file",
                      type="string",
                      help="filename with genome.")

    parser.add_option("-o",
                      "--forward-coordinates",
                      dest="forward_coordinates",
                      action="store_true",
                      help="input uses forward coordinates.")

    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="choice",
                      choices=("default", "cds", "cdnas", "map", "gff",
                               "intron-fasta", "exons"),
                      help="output format.")

    parser.add_option("-r",
                      "--reset-to-start",
                      dest="reset_to_start",
                      action="store_true",
                      help="move genomic coordinates to begin from 0.")

    parser.add_option("--reset-query",
                      dest="reset_query",
                      action="store_true",
                      help="move peptide coordinates to begin from 0.")

    parser.set_defaults(genome_file=None,
                        forward_coordinates=False,
                        format="default",
                        reset_to_start=False,
                        reset_query=False)

    (options, args) = E.Start(parser, add_pipe_options=True)

    if len(args) > 0:
        print USAGE, "no arguments required."
        sys.exit(2)

    cds_id = 1

    entry = PredictionParser.PredictionParserEntry()

    fasta = IndexedFasta.IndexedFasta(options.genome_file)

    ninput, noutput, nskipped, nerrors = 0, 0, 0, 0

    for line in sys.stdin:

        if line[0] == "#":
            continue
        if line.startswith("id"):
            continue

        ninput += 1

        try:
            entry.Read(line)
        except ValueError, msg:
            options.stdlog.write("# parsing failed with msg %s in line %s" %
                                 (msg, line))
            nerrors += 1
            continue

        cds = Exons.Alignment2Exons(entry.mMapPeptide2Genome,
                                    query_from=entry.mQueryFrom,
                                    sbjct_from=entry.mSbjctGenomeFrom,
                                    add_stop_codon=0)

        for cd in cds:
            cd.mSbjctToken = entry.mSbjctToken
            cd.mSbjctStrand = entry.mSbjctStrand

        if cds[-1].mGenomeTo != entry.mSbjctGenomeTo:
            options.stdlog.write(
                "# WARNING: discrepancy in exon calculation!!!\n")
            for cd in cds:
                options.stdlog.write("# %s\n" % str(cd))
            options.stdlog.write("# %s\n" % entry)

        lsequence = fasta.getLength(entry.mSbjctToken)
        genomic_sequence = fasta.getSequence(entry.mSbjctToken,
                                             entry.mSbjctStrand,
                                             entry.mSbjctGenomeFrom,
                                             entry.mSbjctGenomeTo)

        # deal with forward coordinates: convert them to negative strand
        # coordinates
        if options.forward_coordinates and \
                entry.mSbjctStrand == "-":
            entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo = lsequence - \
                entry.mSbjctGenomeTo, lsequence - entry.mSbjctGenomeFrom
            for cd in cds:
                cd.InvertGenomicCoordinates(lsequence)

        # attach sequence to cds
        for cd in cds:
            start = cd.mGenomeFrom - entry.mSbjctGenomeFrom
            end = cd.mGenomeTo - entry.mSbjctGenomeFrom
            cd.mSequence = genomic_sequence[start:end]

        # reset coordinates for query
        if options.reset_to_start:
            offset = entry.mPeptideFrom
            for cd in cds:
                cd.mPeptideFrom -= offset
                cd.mPeptideTo -= offset

        # play with coordinates
        if options.reset_to_start:
            offset = entry.mSbjctGenomeFrom
            for cd in cds:
                cd.mGenomeFrom -= offset
                cd.mGenomeTo -= offset
        else:
            offset = 0

        if options.format == "cds":
            rank = 0
            for cd in cds:
                rank += 1
                cd.mQueryToken = entry.mQueryToken
                cd.mSbjctToken = entry.mSbjctToken
                cd.mSbjctStrand = entry.mSbjctStrand
                cd.mRank = rank
                print str(cd)

        if options.format == "exons":
            rank = 0
            for cd in cds:
                rank += 1
                options.stdout.write("\t".join(
                    map(str, (entry.mPredictionId, cd.mSbjctToken,
                              cd.mSbjctStrand, rank, cd.frame, cd.mPeptideFrom,
                              cd.mPeptideTo, cd.mGenomeFrom, cd.mGenomeTo))) +
                                     "\n")

        elif options.format == "cdnas":
            print string.join(
                map(str,
                    (entry.mPredictionId, entry.mQueryToken, entry.mSbjctToken,
                     entry.mSbjctStrand, entry.mSbjctGenomeFrom - offset,
                     entry.mSbjctGenomeTo - offset, genomic_sequence)), "\t")

        elif options.format == "map":

            map_prediction2genome = alignlib_lite.makeAlignmentSet()

            for cd in cds:
                alignlib_lite.addDiagonal2Alignment(
                    map_prediction2genome, cd.mPeptideFrom + 1, cd.mPeptideTo,
                    (cd.mGenomeFrom - offset) - cd.mPeptideFrom)

            print string.join(
                map(str, (entry.mPredictionId, entry.mSbjctToken,
                          entry.mSbjctStrand,
                          alignlib_lite.AlignmentFormatEmissions(
                              map_prediction2genome))), "\t")

        elif options.format == "intron-fasta":
            rank = 0
            if len(cds) == 1:
                nskipped += 1
                continue

            last = cds[0].mGenomeTo
            for cd in cds[1:]:
                rank += 1
                key = "%s %i %s:%s:%i:%i" % (
                    entry.mPredictionId, rank, entry.mSbjctToken,
                    entry.mSbjctStrand, last, entry.mSbjctGenomeFrom)
                sequence = genomic_sequence[last - entry.mSbjctGenomeFrom:cd.
                                            mGenomeFrom -
                                            entry.mSbjctGenomeFrom]
                options.stdout.write(">%s\n%s\n" % (key, sequence))
                last = cd.mGenomeTo

        elif options.format == "gff-match":
            print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tTarget \"%s\" %i %i; Score %i; Introns %i; Frameshifts %i; Stops %i" % \
                  (entry.mSbjctToken,
                   "gpipe", "similarity",
                   entry.mSbjctGenomeFrom,
                   entry.mSbjctGenomeTo,
                   entry.mPercentIdentity,
                   entry.mSbjctStrand,
                   ".",
                   entry.mQueryToken,
                   entry.mQueryFrom,
                   entry.mQueryTo,
                   entry.score,
                   entry.mNIntrons,
                   entry.mNFrameShifts,
                   entry.mNStopCodons)

        elif options.format == "gff-exon":
            rank = 0
            for cd in cds:
                rank += 1
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tTarget \"%s\" %i %i; Score %i; Rank %i/%i; Prediction %i" % \
                      (entry.mSbjctToken,
                       "gpipe", "similarity",
                       cd.mGenomeFrom,
                       cd.mGenomeTo,
                       entry.mPercentIdentity,
                       entry.mSbjctStrand,
                       ".",
                       entry.mQueryToken,
                       cd.mPeptideFrom / 3 + 1,
                       cd.mPeptideTo / 3 + 1,
                       entry.score,
                       rank,
                       len(cds),
                       entry.mPredictionId)
        else:
            exon_from = 0
            for cd in cds:
                cd.mPeptideFrom = exon_from
                exon_from += cd.mGenomeTo - cd.mGenomeFrom
                cd.mPeptideTo = exon_from
                print string.join(
                    map(str, (cds_id, entry.mPredictionId, cd.mPeptideFrom,
                              cd.mPeptideTo, cd.frame, cd.mGenomeFrom,
                              cd.mGenomeTo, cd.mSequence)), "\t")
                cds_id += 1

        noutput += 1