def main(): #Parse the inputs args/options parser = OptionParser( usage="usage: samFile, readFastqFile, referenceFastaFile [options]", version="%prog 0.1") #Options parser.add_option("--identity", dest="identity", help="Print identity of alignments", default=False, action="store_true") parser.add_option("--readCoverage", dest="readCoverage", help="Print read coverage of alignments", default=False, action="store_true") parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", help="Print mismatches per aligned base", default=False, action="store_true") parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", help="Print deletions per base of alignments", default=False, action="store_true") parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", help="Print insertions per base of alignments", default=False, action="store_true") parser.add_option( "--localAlignment", dest="localAlignment", help= "Ignore unaligned prefix and suffix of each read in making calculation", default=False, action="store_true") parser.add_option( "--printValuePerReadAlignment", dest="printValuePerReadAlignment", help="Prints the value of statistics for each read alignment", default=False, action="store_true") parser.add_option( "--noStats", dest="noStats", help= "Do not print stats (avg, median, min, max, mode) of desired statistic", default=False, action="store_true") parser.add_option( "--printAlignmentData", dest="printAlignmentData", help= "Print all stats for each read alignment in tabular format; include unaligned with --includeUnaligned", default=False, action="store_true") parser.add_option( "--includeUnaligned", dest="includeUnaligned", help="Includes unaligned reads when printing alignment data", default=False, action="store_true") addLoggingOptions(parser) #Parse the options/arguments options, args = parser.parse_args() #Setup logging setLoggingFromOptions(options) #Print help message if no input if len(sys.argv) == 1: parser.print_help() sys.exit(0) #Exit if the arguments are not what we expect if len(args) != 3: raise RuntimeError("Expected three arguments, got: %s" % " ".join(args)) #Now do the stats calculation samFile, readFastqFile, referenceFastaFile = args readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats( samFile, readFastqFile, referenceFastaFile, globalAlignment=not options.localAlignment, includeUnaligned=options.includeUnaligned) def report(values, statisticName): if not options.noStats: print "Average" + statisticName, numpy.average(values) print "Median" + statisticName, numpy.median(values) print "Min" + statisticName, min(values) print "Max" + statisticName, max(values) if options.printValuePerReadAlignment: print "Values" + statisticName, "\t".join(map(str, values)) def report_alignment_data(): name = map(lambda rAS: rAS.readName(), readAlignmentStats) ref_id = map(lambda rAS: rAS.referenceID(), readAlignmentStats) read_type = map(lambda rAS: rAS.readType(), readAlignmentStats) length = map(lambda rAS: rAS.readLength(), readAlignmentStats) identity = map(lambda rAS: rAS.identity(), readAlignmentStats) read_coverage = map(lambda rAS: rAS.readCoverage(), readAlignmentStats) ref_coverage = map(lambda rAS: rAS.referenceCoverage(), readAlignmentStats) mismatch = map(lambda rAS: rAS.mismatchesPerAlignedBase(), readAlignmentStats) insertion = map(lambda rAS: rAS.insertionsPerReadBase(), readAlignmentStats) deletion = map(lambda rAS: rAS.deletionsPerReadBase(), readAlignmentStats) mean_quality = map(lambda rAS: rAS.readMeanQuality(), readAlignmentStats) aligned = map(lambda rAS: rAS.isAligned(), readAlignmentStats) aligned_length = map(lambda rAS: rAS.alignedReadLength(), readAlignmentStats) ref_c_content = map(lambda rAS: rAS.getRefCContent(), readAlignmentStats) ref_gc_content = map(lambda rAS: rAS.getRefGcContent(), readAlignmentStats) print "\t".join(["Name", "ReferenceID", "ReadType", "Length", "Aligned", \ "AlignedLength", "Identity", "ReadCoverage", \ "ReferenceCoverage", "MismatchPerBase", \ "InsertionPerBase", "DeletionPerBase", "MeanQuality", "RefCContent", "RefGcContent"]) for read in zip(name, ref_id, read_type, length, aligned, aligned_length, \ identity, read_coverage, ref_coverage, mismatch, insertion,\ deletion, mean_quality, ref_c_content, ref_gc_content): print "\t".join(map(str, read)) if options.printAlignmentData: report_alignment_data() else: if options.identity: report(map(lambda rAS: rAS.identity(), readAlignmentStats), "Identity") if options.readCoverage: report(map(lambda rAS: rAS.readCoverage(), readAlignmentStats), "ReadCoverage") if options.mismatchesPerAlignedBase: report( map(lambda rAS: rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase") if options.deletionsPerReadBase: report( map(lambda rAS: rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase") if options.insertionsPerReadBase: report( map(lambda rAS: rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase")
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: samFile, readFastqFile, referenceFastaFile [options]", version="%prog 0.1") #Options parser.add_option("--identity", dest="identity", help="Print identity of alignments", default=False, action="store_true") parser.add_option("--readCoverage", dest="readCoverage", help="Print read coverage of alignments", default=False, action="store_true") parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", help="Print mismatches per aligned base", default=False, action="store_true") parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", help="Print deletions per base of alignments", default=False, action="store_true") parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", help="Print insertions per base of alignments", default=False, action="store_true") parser.add_option("--localAlignment", dest="localAlignment", help="Ignore unaligned prefix and suffix of each read in making calculation", default=False, action="store_true") parser.add_option("--printValuePerReadAlignment", dest="printValuePerReadAlignment", help="Prints the value of statistics for each read alignment", default=False, action="store_true") parser.add_option("--noStats", dest="noStats", help="Do not print stats (avg, median, min, max, mode) of desired statistic", default=False, action="store_true") addLoggingOptions(parser) #Parse the options/arguments options, args = parser.parse_args() #Setup logging setLoggingFromOptions(options) #Print help message if no input if len(sys.argv) == 1: parser.print_help() sys.exit(0) #Exit if the arguments are not what we expect if len(args) != 3: raise RuntimeError("Expected three arguments, got: %s" % " ".join(args)) #Now do the stats calculation samFile, readFastqFile, referenceFastaFile = args readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, referenceFastaFile, globalAlignment=not options.localAlignment) def report(values, statisticName): if not options.noStats: print "Average" + statisticName, numpy.average(values) print "Median" + statisticName, numpy.median(values) print "Min" + statisticName, min(values) print "Max" + statisticName, max(values) if options.printValuePerReadAlignment: print "Values" + statisticName, "\t".join(map(str, values)) if options.identity: report(map(lambda rAS : rAS.identity(), readAlignmentStats), "Identity") if options.readCoverage: report(map(lambda rAS : rAS.readCoverage(), readAlignmentStats), "ReadCoverage") if options.mismatchesPerAlignedBase: report(map(lambda rAS : rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase") if options.deletionsPerReadBase: report(map(lambda rAS : rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase") if options.insertionsPerReadBase: report(map(lambda rAS : rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase")
def addOptions(parser): addLoggingOptions(parser)#This adds the logging stuff.. parser.add_option("--command", dest="command", help="The command to run (which will generate subsequent jobs)", default=None) parser.add_option("--jobTree", dest="jobTree", help="Directory in which to place job management files \ (this needs to be globally accessible by all machines running jobs).\n\ If you pass an existing directory it will check if it's a valid existin job tree, then\ try and restart the jobs in it", default=None) parser.add_option("--batchSystem", dest="batchSystem", help="The type of batch system to run the job(s) with, currently can be 'singleMachine'/'parasol'/'acidTest'/'gridEngine'", default=detectQueueSystem()) parser.add_option("--retryCount", dest="retryCount", help="Number of times to try a failing job before giving up and labelling job failed", default=0) parser.add_option("--waitDuration", dest="waitDuration", help="Period of time to pause after updating the running jobs (default is set by batch system)") parser.add_option("--rescueJobsFrequency", dest="rescueJobsFrequency", help="Period of time to wait (in seconds) between checking for missing/overlong jobs (default is set by the batch system)") parser.add_option("--maxJobDuration", dest="maxJobDuration", help="Maximum runtime of a job (in seconds) before we kill it (this is an approximate time, and the actual time before killing the job may be longer)", default=str(sys.maxint)) parser.add_option("--jobTime", dest="jobTime", help="The approximate time (in seconds) that you'd like a list of child jobs to be run serially before being parallised. \ This parameter allows one to avoid over parallelising tiny jobs, and therefore paying significant scheduling overhead, by \ running tiny jobs in series on a single node/core of the cluster.", default=30) parser.add_option("--maxLogFileSize", dest="maxLogFileSize", help="The maximum size of a log file to keep (in bytes), log files larger than this will be truncated to the last X bytes. Default is 50 kilobytes", default=50120) parser.add_option("--defaultMemory", dest="defaultMemory", help="The default amount of memory to request for a job (in bytes), by default is 2^31 = 2 gigabytes", default=2147483648) parser.add_option("--defaultCpu", dest="defaultCpu", help="The default the number of cpus to dedicate a job, the default is 1", default=1) parser.add_option("--maxJobs", dest="maxJobs", help="The maximum number of jobs to issue to the batch system at any one time", default=sys.maxint) parser.add_option("--maxThreads", dest="maxThreads", help="The maximum number of threads to use when running in single machine mode", default=4) parser.add_option("--stats", dest="stats", action="store_true", help="Records statistics about the job-tree to be used by jobTreeStats", default=False)
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: samFile, readFastqFile, referenceFastaFile [options]", version="%prog 0.1") #Options parser.add_option("--readIdentity", dest="readIdentity", help="Print readIdentity of alignments", default=False, action="store_true") parser.add_option("--alignmentIdentity", dest="alignmentIdentity", help="Print alignmentIdentity", default=False, action="store_true") parser.add_option("--readCoverage", dest="readCoverage", help="Print read coverage of alignments", default=False, action="store_true") parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", help="Print mismatches per aligned base", default=False, action="store_true") parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", help="Print deletions per base of alignments", default=False, action="store_true") parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", help="Print insertions per base of alignments", default=False, action="store_true") parser.add_option("--readLength", dest="readLength", help="Print read lengths of aligned reads", default=False, action="store_true") parser.add_option("--localAlignment", dest="localAlignment", help="Ignore unaligned prefix and suffix of each read in making calculation", default=False, action="store_true") parser.add_option("--printValuePerReadAlignment", dest="printValuePerReadAlignment", help="Prints the value of statistics for each read alignment", default=False, action="store_true") parser.add_option("--noStats", dest="noStats", help="Do not print stats (avg, median, min, max, mode) of desired statistic", default=False, action="store_true") addLoggingOptions(parser) #Parse the options/arguments options, args = parser.parse_args() #Setup logging setLoggingFromOptions(options) #Print help message if no input if len(sys.argv) == 1: parser.print_help() sys.exit(0) #Exit if the arguments are not what we expect if len(args) != 3: raise RuntimeError("Expected three arguments, got: %s" % " ".join(args)) #Now do the stats calculation samFile, readFastqFile, referenceFastaFile = args readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, referenceFastaFile, globalAlignment=not options.localAlignment) def report(values, statisticName): if not options.noStats: print "Average" + statisticName, numpy.average(values) print "Median" + statisticName, numpy.median(values) print "Min" + statisticName, min(values) print "Max" + statisticName, max(values) if options.printValuePerReadAlignment: print "Values" + statisticName, "\t".join(map(str, values)) if options.readIdentity: report(map(lambda rAS : rAS.readIdentity(), readAlignmentStats), "ReadIdentity") if options.alignmentIdentity: report(map(lambda rAS : rAS.alignmentIdentity(), readAlignmentStats), "AlignmentIdentity") if options.readCoverage: report(map(lambda rAS : rAS.readCoverage(), readAlignmentStats), "ReadCoverage") if options.mismatchesPerAlignedBase: report(map(lambda rAS : rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase") if options.deletionsPerReadBase: report(map(lambda rAS : rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase") if options.insertionsPerReadBase: report(map(lambda rAS : rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase") if options.readLength: report(map(lambda rAS : rAS.readLength(), readAlignmentStats), "ReadLength")