def main(): parser = OptionParser() parser.add_option("-f", dest="gtf", default=None, help="combined.gtf file from Cuffcompare") parser.add_option("-t", dest="tracking", default=None, help="tracking file from Cuffcompare") parser.add_option("-s", dest="samples", default=0, type="int", help="number of samples a ID must appear in") parser.add_option("-e", dest="exon_thresh", default=0, type="float", help="minimum proportion of exons of longest isoform " \ "that must be in a transcript") (options, args) = parser.parse_args() if options.samples > 0: if options.tracking is None: print parser.print_help() exit(-1) trackout = options.tracking + ".filtered" if os.path.isfile(trackout): print "%s already exists, aborting." %(trackout) exit(-1) if options.gtf is None: print parser.print_help() exit(-1) gtfout = options.gtf + ".filtered" if os.path.isfile(gtfout): print "%s already exists, aborting." %(trackout) exit(-1) if options.exon_thresh > 0: if options.gtf is None: print parser.print_help() exit(-1) gtfout = options.gtf + ".filtered" if os.path.isfile(gtfout): print "%s already exists, aborting." %(gtfout) exit(-1) if options.samples + options.exon_thresh == 0.0: print parser.print_help() exit(-1) gtflines = GTFtoDict(options.gtf) if options.samples > 0: tracklines = parseTracking(options.tracking) tracklines = filterByNumsamples(tracklines, int(options.samples)) gtflines = filterGTFByTracking(gtflines, tracklines) outputTracking(tracklines, trackout) if options.exon_thresh > 0: gtflines = filterGTFByExonCount(gtflines, options.exon_thresh) outputGTF(gtflines, gtfout)
def main(): logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s', level=logging.INFO) description = "Counts up size of transcripts in a GTF file. Can also " \ "filter on minimum transcript size." parser = ArgumentParser(description=description) parser.add_argument("-g", "--gtf", dest="gtf", default=False, type=str, required=True, help="gtf file to analyze") parser.add_argument("-l", "--length", dest="length", default=False, action="store_true", help="output length of each transcript") parser.add_argument("-m", "--min_size", dest="min_size", default=False, type=int, help="remove transcripts below this size.") parser.add_argument("-M", "--max_size", dest="max_size", default=False, type=int, help="remove transcripts greater than this size.") parser.add_argument("-o", "--outfile", dest="outfn", default=False, type=str) args = parser.parse_args() if not os.path.isfile(args.gtf): logging.error("%s cannot be found." %(args.gtf)) parser.print_help() exit(-1) gtflines = GTFtoDict(args.gtf) def checkOutFile(args): if not args.outfn: logging.error("need to provide an output filename.") parser.print_help() exit(-1) if os.path.isfile(args.outfn): logging.error("%s already exists, aborting." %(args.outfn)) exit(-1) if args.length: checkOutFile(args) lengths = calculateLengths(gtflines) outputLengths(lengths, args.outfn) exit(1) if args.min_size: checkOutFile(args) gtflines = filterByMinLength(gtflines, args.min_size) if args.max_size: checkOutFile(args) gtflines = filterByMaxLength(gtflines, args.max_size) outputGTF(gtflines, args.outfn)
def main(): logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s', level=logging.INFO) description = "Replaces the attributes in -r with the ones from -s. " \ "Deletes the attributes in -d." parser = ArgumentParser(description=description) parser.add_argument("-g", "--gtf", dest="gtf", default=False, type=str, required=True, help="combined.gtf file from Cuffcompare") parser.add_argument("-s", "--source", nargs="*", dest="source", default=False, help="attributes to replace the -r attributes") parser.add_argument("-r", "--replace", nargs="*", dest="replace", default=False, help="attributes to be replaced") parser.add_argument("-d", "--delete", nargs="*", dest="delete", default=False, help="attributes to be deleted") parser.add_argument("-a", "--add", dest="add", default=False, type=str, help="file of attributes to be added") parser.add_argument("-f", "--filter", dest="filter", default=False, type=str, help="file of attribute to filter on") parser.add_argument("-o", "--output", dest="output", default=False, type=str, required=True, help="output filename") parser.add_argument("-m", "--move", dest="move", default=False, help="list of attributes to move to the front") args = parser.parse_args() if not os.path.isfile(args.gtf): logging.error("%s cannot be found." %(args.gtf)) parser.print_help() exit(-1) if os.path.isfile(args.output): logging.error("%s already exists." %(args.output)) parser.print_help() exit(-1) # check to make sure arguments make sense if args.source or args.replace: if len(args.source) != len(args.replace): logging.error("Source and replacement lengths must be the same.") parser.print_help() exit(-1) if not (args.source or args.replace or args.delete or args.add or args.filter or args.move): logging.error("Must provide at least one action.") parser.print_help() exit(-1) gtflines = GTFtoDict(args.gtf) if args.source: logging.info("Swapping attributes.") gtflines = swapAttributes(gtflines, args.source, args.replace) if args.add: if not os.path.isfile(args.add): logging.error("%s cannot be found." %(args.add)) exit(-1) logging.info("Adding attributes.") gtflines = addAttribute(gtflines, args.add) if args.delete: logging.info("Deleting attributes.") gtflines = delAttributes(gtflines, args.delete) if args.filter: logging.info("Filtering by attribute in file %s." %(args.filter)) gtflines = filterAttributes(gtflines, args.filter) if args.move: logging.info("Moving %s to the front of the attributes." %(args.move)) gtflines = reorderAttributes(gtflines, args.move) outputGTF(gtflines, args.output)
def main(): logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s', level=logging.INFO) description = "Counts up size of transcripts in a GTF file. Can also " \ "filter on minimum transcript size." parser = ArgumentParser(description=description) parser.add_argument("-g", "--gtf", dest="gtf", default=False, type=str, required=True, help="gtf file to analyze") parser.add_argument("-l", "--length", dest="length", default=False, action="store_true", help="output length of each transcript") parser.add_argument("-m", "--min_size", dest="min_size", default=False, type=int, help="remove transcripts below this size.") parser.add_argument("-M", "--max_size", dest="max_size", default=False, type=int, help="remove transcripts greater than this size.") parser.add_argument("-o", "--outfile", dest="outfn", default=False, type=str) args = parser.parse_args() if not os.path.isfile(args.gtf): logging.error("%s cannot be found." % (args.gtf)) parser.print_help() exit(-1) gtflines = GTFtoDict(args.gtf) def checkOutFile(args): if not args.outfn: logging.error("need to provide an output filename.") parser.print_help() exit(-1) if os.path.isfile(args.outfn): logging.error("%s already exists, aborting." % (args.outfn)) exit(-1) if args.length: checkOutFile(args) lengths = calculateLengths(gtflines) outputLengths(lengths, args.outfn) exit(1) if args.min_size: checkOutFile(args) gtflines = filterByMinLength(gtflines, args.min_size) if args.max_size: checkOutFile(args) gtflines = filterByMaxLength(gtflines, args.max_size) outputGTF(gtflines, args.outfn)