예제 #1
0
def main():
    parser = OptionParser()
    parser.add_option("-f", dest="gtf", default=None,
                      help="combined.gtf file from Cuffcompare")
    parser.add_option("-t", dest="tracking", default=None,
                      help="tracking file from Cuffcompare")
    parser.add_option("-s", dest="samples", default=0,
                      type="int",
                      help="number of samples a ID must appear in")
    parser.add_option("-e", dest="exon_thresh", default=0,
                      type="float",
                      help="minimum proportion of exons of longest isoform " \
                      "that must be in a transcript")

    (options, args) = parser.parse_args()

    if options.samples > 0:
        if options.tracking is None:
            print parser.print_help()
            exit(-1)
        trackout = options.tracking + ".filtered"
        if os.path.isfile(trackout):
            print "%s already exists, aborting." %(trackout)
            exit(-1)

        if options.gtf is None:
            print parser.print_help()
            exit(-1)
        gtfout = options.gtf + ".filtered"
        if os.path.isfile(gtfout):
            print "%s already exists, aborting." %(trackout)
            exit(-1)
            
    if options.exon_thresh > 0:
        if options.gtf is None:
            print parser.print_help()
            exit(-1)

        gtfout = options.gtf + ".filtered"
        if os.path.isfile(gtfout):
            print "%s already exists, aborting." %(gtfout)
            exit(-1)
            
    if options.samples + options.exon_thresh == 0.0:
        print parser.print_help()
        exit(-1)
        
    gtflines = GTFtoDict(options.gtf)
    if options.samples > 0:
        tracklines = parseTracking(options.tracking)
        tracklines = filterByNumsamples(tracklines, int(options.samples))
        gtflines = filterGTFByTracking(gtflines, tracklines)
        outputTracking(tracklines, trackout)

    if options.exon_thresh > 0:
        gtflines = filterGTFByExonCount(gtflines, options.exon_thresh)


    outputGTF(gtflines, gtfout)
예제 #2
0
def main():
    logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s',
                        level=logging.INFO)

    description = "Counts up size of transcripts in a GTF file. Can also " \
                  "filter on minimum transcript size."

    parser = ArgumentParser(description=description)

    parser.add_argument("-g", "--gtf", dest="gtf", default=False,
                        type=str, required=True,
                        help="gtf file to analyze")
    parser.add_argument("-l", "--length", dest="length", default=False,
                        action="store_true",
                        help="output length of each transcript")
    parser.add_argument("-m", "--min_size", dest="min_size", default=False,
                        type=int,
                        help="remove transcripts below this size.")
    parser.add_argument("-M", "--max_size", dest="max_size", default=False,
                        type=int,
                        help="remove transcripts greater than this size.")
    parser.add_argument("-o", "--outfile", dest="outfn", default=False,
                        type=str)

    args = parser.parse_args()

    if not os.path.isfile(args.gtf):
        logging.error("%s cannot be found." %(args.gtf))
        parser.print_help()
        exit(-1)

    gtflines = GTFtoDict(args.gtf)

    def checkOutFile(args):
        if not args.outfn:
            logging.error("need to provide an output filename.")
            parser.print_help()
            exit(-1)
        if os.path.isfile(args.outfn):
            logging.error("%s already exists, aborting." %(args.outfn))
            exit(-1)

    if args.length:
        checkOutFile(args)
        lengths = calculateLengths(gtflines)
        outputLengths(lengths, args.outfn)
        exit(1)

    if args.min_size:
        checkOutFile(args)
        gtflines = filterByMinLength(gtflines, args.min_size)
        
    if args.max_size:
        checkOutFile(args)
        gtflines = filterByMaxLength(gtflines, args.max_size)

    outputGTF(gtflines, args.outfn)
예제 #3
0
def main():

    logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s',
                        level=logging.INFO)

    description = "Replaces the attributes in -r with the ones from -s. " \
                  "Deletes the attributes in -d."
    parser = ArgumentParser(description=description)
    parser.add_argument("-g", "--gtf", dest="gtf", default=False,
                        type=str, required=True,
                        help="combined.gtf file from Cuffcompare")
    parser.add_argument("-s", "--source", nargs="*", dest="source",
                        default=False,
                        help="attributes to replace the -r attributes")
    parser.add_argument("-r", "--replace", nargs="*", dest="replace",
                        default=False,
                        help="attributes to be replaced")
    parser.add_argument("-d", "--delete", nargs="*", dest="delete",
                        default=False,
                        help="attributes to be deleted")
    parser.add_argument("-a", "--add", dest="add", default=False,
                        type=str,
                        help="file of attributes to be added")
    parser.add_argument("-f", "--filter", dest="filter", default=False,
                        type=str,
                        help="file of attribute to filter on")
    parser.add_argument("-o", "--output", dest="output", default=False,
                        type=str, required=True,
                        help="output filename")
    parser.add_argument("-m", "--move", dest="move", default=False,
                        help="list of attributes to move to the front")
    args = parser.parse_args()

    if not os.path.isfile(args.gtf):
        logging.error("%s cannot be found." %(args.gtf))
        parser.print_help()
        exit(-1)

    if os.path.isfile(args.output):
        logging.error("%s already exists." %(args.output))
        parser.print_help()
        exit(-1)

    # check to make sure arguments make sense
    if args.source or args.replace:
        if len(args.source) != len(args.replace):
            logging.error("Source and replacement lengths must be the same.")
            parser.print_help()
            exit(-1)

    if not (args.source or args.replace or args.delete or
            args.add or args.filter or args.move):
        logging.error("Must provide at least one action.")
        parser.print_help()
        exit(-1)

    gtflines = GTFtoDict(args.gtf)
    if args.source:
        logging.info("Swapping attributes.")
        gtflines = swapAttributes(gtflines, args.source, args.replace)

    if args.add:
        if not os.path.isfile(args.add):
            logging.error("%s cannot be found." %(args.add))
            exit(-1)
        logging.info("Adding attributes.")
        gtflines = addAttribute(gtflines, args.add)
        
    if args.delete:
        logging.info("Deleting attributes.")
        gtflines = delAttributes(gtflines, args.delete)

    if args.filter:
        logging.info("Filtering by attribute in file %s." %(args.filter))
        gtflines = filterAttributes(gtflines, args.filter)

    if args.move:
        logging.info("Moving %s to the front of the attributes." %(args.move))
        gtflines = reorderAttributes(gtflines, args.move)

    outputGTF(gtflines, args.output)
예제 #4
0
def main():
    logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s',
                        level=logging.INFO)

    description = "Counts up size of transcripts in a GTF file. Can also " \
                  "filter on minimum transcript size."

    parser = ArgumentParser(description=description)

    parser.add_argument("-g",
                        "--gtf",
                        dest="gtf",
                        default=False,
                        type=str,
                        required=True,
                        help="gtf file to analyze")
    parser.add_argument("-l",
                        "--length",
                        dest="length",
                        default=False,
                        action="store_true",
                        help="output length of each transcript")
    parser.add_argument("-m",
                        "--min_size",
                        dest="min_size",
                        default=False,
                        type=int,
                        help="remove transcripts below this size.")
    parser.add_argument("-M",
                        "--max_size",
                        dest="max_size",
                        default=False,
                        type=int,
                        help="remove transcripts greater than this size.")
    parser.add_argument("-o",
                        "--outfile",
                        dest="outfn",
                        default=False,
                        type=str)

    args = parser.parse_args()

    if not os.path.isfile(args.gtf):
        logging.error("%s cannot be found." % (args.gtf))
        parser.print_help()
        exit(-1)

    gtflines = GTFtoDict(args.gtf)

    def checkOutFile(args):
        if not args.outfn:
            logging.error("need to provide an output filename.")
            parser.print_help()
            exit(-1)
        if os.path.isfile(args.outfn):
            logging.error("%s already exists, aborting." % (args.outfn))
            exit(-1)

    if args.length:
        checkOutFile(args)
        lengths = calculateLengths(gtflines)
        outputLengths(lengths, args.outfn)
        exit(1)

    if args.min_size:
        checkOutFile(args)
        gtflines = filterByMinLength(gtflines, args.min_size)

    if args.max_size:
        checkOutFile(args)
        gtflines = filterByMaxLength(gtflines, args.max_size)

    outputGTF(gtflines, args.outfn)