def gff(args): """ %prog gff pslfile Convert to gff format. """ p = OptionParser(gff.__doc__) p.add_option("--source", default="GMAP", help="specify GFF source [default: %default]") p.add_option("--type", default="EST_match", help="specify GFF feature type [default: %default]") p.add_option("--suffix", default=".match", help="match ID suffix [default: \"%default\"]") p.add_option("--swap", default=False, action="store_true", help="swap query and target features [default: %default]") p.add_option("--simple_score", default=False, action="store_true", help="calculate a simple percent score [default: %default]") p.set_outfile() opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) pslfile, = args fw = must_open(opts.outfile, "w") print("##gff-version 3", file=fw) psl = Psl(pslfile) for p in psl: if opts.swap: p.swap psl.trackMatches(p.qName) # switch from 0-origin to 1-origin p.qStart += 1 p.tStart += 1 print(p.gffline(source=opts.source, type=opts.type, suffix=opts.suffix, \ primary_tag="ID", alt_score=opts.simple_score, \ count=psl.getMatchCount(p.qName)), file=fw) # create an empty PslLine() object and load only # the targetName, queryName and strand info part = PslLine("\t".join(str(x) for x in [0] * p.nargs)) part.tName, part.qName, part.strand = p.tName, p.qName, p.strand nparts = len(p.qStarts) for n in xrange(nparts): part.qStart, part.tStart, aLen = p.qStarts[n] + 1, p.tStarts[n] + 1, p.blockSizes[n] part.qEnd = part.qStart + aLen - 1 part.tEnd = part.tStart + aLen - 1 if part.strand == "-": part.qStart = p.qSize - (p.qStarts[n] + p.blockSizes[n]) + 1 part.qEnd = p.qSize - p.qStarts[n] print(part.gffline(source=opts.source, suffix=opts.suffix, \ count=psl.getMatchCount(part.qName)), file=fw)