Example #1
0
File: psl.py Project: rrane/jcvi
def gff(args):
    """
    %prog gff pslfile

    Convert to gff format.
    """
    p = OptionParser(gff.__doc__)
    p.add_option("--source", default="GMAP",
                 help="specify GFF source [default: %default]")
    p.add_option("--type", default="EST_match",
                help="specify GFF feature type [default: %default]")
    p.add_option("--suffix", default=".match",
                 help="match ID suffix [default: \"%default\"]")
    p.add_option("--swap", default=False, action="store_true",
                 help="swap query and target features [default: %default]")
    p.add_option("--simple_score", default=False, action="store_true",
                 help="calculate a simple percent score [default: %default]")
    p.set_outfile()

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    pslfile, = args
    fw = must_open(opts.outfile, "w")

    print >> fw, "##gff-version 3"
    psl = Psl(pslfile)
    for p in psl:
        if opts.swap:
            p.swap

        psl.trackMatches(p.qName)
        # switch from 0-origin to 1-origin
        p.qStart += 1
        p.tStart += 1
        if p.strand == "-":
            p.qStart, p.qEnd = p.qEnd, p.qStart

        print >> fw, p.gffline(source=opts.source, type=opts.type, suffix=opts.suffix, \
                primary_tag="ID", alt_score=opts.simple_score, \
                count=psl.getMatchCount(p.qName))

        # create an empty PslLine() object and load only
        # the targetName, queryName and strand info
        part = PslLine("\t".join(str(x) for x in [0] * p.nargs))
        part.tName, part.qName, part.strand = p.tName, p.qName, p.strand

        nparts = len(p.qStarts)
        for n in xrange(nparts):
            part.qStart, part.tStart, aLen = p.qStarts[n], p.tStarts[n], p.blockSizes[n]
            part.qEnd = part.qStart + aLen
            part.tEnd = part.tStart + aLen

            part.qStart += 1
            part.tStart += 1

            if part.strand == "-":
                part.aLen = p.blockSizes[nparts - 1 - n]
                part.qEnd = p.qStarts[nparts - 1 - n]
                part.qStart = part.qEnd + part.aLen
                part.qEnd += 1

            print >> fw, part.gffline(source=opts.source, suffix=opts.suffix, \
                    count=psl.getMatchCount(part.qName))