Example #1
0
def main():
    #Parse the inputs args/options
    parser = OptionParser(
        usage="usage: samFile, readFastqFile, referenceFastaFile [options]",
        version="%prog 0.1")

    #Options
    parser.add_option("--identity",
                      dest="identity",
                      help="Print identity of alignments",
                      default=False,
                      action="store_true")

    parser.add_option("--readCoverage",
                      dest="readCoverage",
                      help="Print read coverage of alignments",
                      default=False,
                      action="store_true")

    parser.add_option("--mismatchesPerAlignedBase",
                      dest="mismatchesPerAlignedBase",
                      help="Print mismatches per aligned base",
                      default=False,
                      action="store_true")

    parser.add_option("--deletionsPerReadBase",
                      dest="deletionsPerReadBase",
                      help="Print deletions per base of alignments",
                      default=False,
                      action="store_true")

    parser.add_option("--insertionsPerReadBase",
                      dest="insertionsPerReadBase",
                      help="Print insertions per base of alignments",
                      default=False,
                      action="store_true")

    parser.add_option(
        "--localAlignment",
        dest="localAlignment",
        help=
        "Ignore unaligned prefix and suffix of each read in making calculation",
        default=False,
        action="store_true")

    parser.add_option(
        "--printValuePerReadAlignment",
        dest="printValuePerReadAlignment",
        help="Prints the value of statistics for each read alignment",
        default=False,
        action="store_true")

    parser.add_option(
        "--noStats",
        dest="noStats",
        help=
        "Do not print stats (avg, median, min, max, mode) of desired statistic",
        default=False,
        action="store_true")

    parser.add_option(
        "--printAlignmentData",
        dest="printAlignmentData",
        help=
        "Print all stats for each read alignment in tabular format; include unaligned with --includeUnaligned",
        default=False,
        action="store_true")

    parser.add_option(
        "--includeUnaligned",
        dest="includeUnaligned",
        help="Includes unaligned reads when printing alignment data",
        default=False,
        action="store_true")

    addLoggingOptions(parser)

    #Parse the options/arguments
    options, args = parser.parse_args()

    #Setup logging
    setLoggingFromOptions(options)

    #Print help message if no input
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    #Exit if the arguments are not what we expect
    if len(args) != 3:
        raise RuntimeError("Expected three arguments, got: %s" %
                           " ".join(args))

    #Now do the stats calculation
    samFile, readFastqFile, referenceFastaFile = args

    readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(
        samFile,
        readFastqFile,
        referenceFastaFile,
        globalAlignment=not options.localAlignment,
        includeUnaligned=options.includeUnaligned)

    def report(values, statisticName):
        if not options.noStats:
            print "Average" + statisticName, numpy.average(values)
            print "Median" + statisticName, numpy.median(values)
            print "Min" + statisticName, min(values)
            print "Max" + statisticName, max(values)
        if options.printValuePerReadAlignment:
            print "Values" + statisticName, "\t".join(map(str, values))

    def report_alignment_data():
        name = map(lambda rAS: rAS.readName(), readAlignmentStats)
        ref_id = map(lambda rAS: rAS.referenceID(), readAlignmentStats)
        read_type = map(lambda rAS: rAS.readType(), readAlignmentStats)
        length = map(lambda rAS: rAS.readLength(), readAlignmentStats)
        identity = map(lambda rAS: rAS.identity(), readAlignmentStats)
        read_coverage = map(lambda rAS: rAS.readCoverage(), readAlignmentStats)
        ref_coverage = map(lambda rAS: rAS.referenceCoverage(),
                           readAlignmentStats)
        mismatch = map(lambda rAS: rAS.mismatchesPerAlignedBase(),
                       readAlignmentStats)
        insertion = map(lambda rAS: rAS.insertionsPerReadBase(),
                        readAlignmentStats)
        deletion = map(lambda rAS: rAS.deletionsPerReadBase(),
                       readAlignmentStats)
        mean_quality = map(lambda rAS: rAS.readMeanQuality(),
                           readAlignmentStats)
        aligned = map(lambda rAS: rAS.isAligned(), readAlignmentStats)
        aligned_length = map(lambda rAS: rAS.alignedReadLength(),
                             readAlignmentStats)
        ref_c_content = map(lambda rAS: rAS.getRefCContent(),
                            readAlignmentStats)
        ref_gc_content = map(lambda rAS: rAS.getRefGcContent(),
                             readAlignmentStats)

        print "\t".join(["Name", "ReferenceID", "ReadType", "Length", "Aligned", \
                        "AlignedLength", "Identity", "ReadCoverage", \
                        "ReferenceCoverage", "MismatchPerBase", \
                        "InsertionPerBase", "DeletionPerBase", "MeanQuality",
                        "RefCContent", "RefGcContent"])

        for read in zip(name, ref_id, read_type, length, aligned, aligned_length, \
                        identity, read_coverage, ref_coverage, mismatch, insertion,\
                        deletion, mean_quality, ref_c_content, ref_gc_content):
            print "\t".join(map(str, read))

    if options.printAlignmentData:
        report_alignment_data()

    else:
        if options.identity:
            report(map(lambda rAS: rAS.identity(), readAlignmentStats),
                   "Identity")

        if options.readCoverage:
            report(map(lambda rAS: rAS.readCoverage(), readAlignmentStats),
                   "ReadCoverage")

        if options.mismatchesPerAlignedBase:
            report(
                map(lambda rAS: rAS.mismatchesPerAlignedBase(),
                    readAlignmentStats), "MismatchesPerAlignedBase")

        if options.deletionsPerReadBase:
            report(
                map(lambda rAS: rAS.deletionsPerReadBase(),
                    readAlignmentStats), "DeletionsPerReadBase")

        if options.insertionsPerReadBase:
            report(
                map(lambda rAS: rAS.insertionsPerReadBase(),
                    readAlignmentStats), "InsertionsPerReadBase")
Example #2
0
def main():
    #Parse the inputs args/options
    parser = OptionParser(usage="usage: samFile, readFastqFile, referenceFastaFile [options]", 
                          version="%prog 0.1")
    
    #Options
    parser.add_option("--identity", dest="identity", 
                      help="Print identity of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--readCoverage", dest="readCoverage", 
                      help="Print read coverage of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", 
                      help="Print mismatches per aligned base", 
                      default=False, action="store_true")
    
    parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", 
                      help="Print deletions per base of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", 
                      help="Print insertions per base of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--localAlignment", dest="localAlignment", 
                      help="Ignore unaligned prefix and suffix of each read in making calculation", 
                      default=False, action="store_true")
    
    parser.add_option("--printValuePerReadAlignment", dest="printValuePerReadAlignment", 
                      help="Prints the value of statistics for each read alignment", 
                      default=False, action="store_true")
    
    parser.add_option("--noStats", dest="noStats", 
                      help="Do not print stats (avg, median, min, max, mode) of desired statistic", 
                      default=False, action="store_true")
    
    addLoggingOptions(parser)
    
    #Parse the options/arguments
    options, args = parser.parse_args()
    
    #Setup logging
    setLoggingFromOptions(options)
    
    #Print help message if no input
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    #Exit if the arguments are not what we expect
    if len(args) != 3:
        raise RuntimeError("Expected three arguments, got: %s" % " ".join(args))
    
    #Now do the stats calculation
    samFile, readFastqFile, referenceFastaFile = args
    
    readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, 
                                             referenceFastaFile, globalAlignment=not options.localAlignment)
    
    def report(values, statisticName):
        if not options.noStats:
            print "Average" + statisticName, numpy.average(values)
            print "Median" + statisticName, numpy.median(values)
            print "Min" + statisticName, min(values)
            print "Max" + statisticName, max(values)
        if options.printValuePerReadAlignment:
            print "Values" + statisticName, "\t".join(map(str, values))
    
    if options.identity:
        report(map(lambda rAS : rAS.identity(), readAlignmentStats), "Identity")
    
    if options.readCoverage:
        report(map(lambda rAS : rAS.readCoverage(), readAlignmentStats), "ReadCoverage")
    
    if options.mismatchesPerAlignedBase:
        report(map(lambda rAS : rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase")
    
    if options.deletionsPerReadBase:
        report(map(lambda rAS : rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase")
    
    if options.insertionsPerReadBase:
        report(map(lambda rAS : rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase")
Example #3
0
def addOptions(parser):
    addLoggingOptions(parser)#This adds the logging stuff..
    
    parser.add_option("--command", dest="command", 
                      help="The command to run (which will generate subsequent jobs)",
                      default=None)
    
    parser.add_option("--jobTree", dest="jobTree", 
                      help="Directory in which to place job management files \
(this needs to be globally accessible by all machines running jobs).\n\
If you pass an existing directory it will check if it's a valid existin job tree, then\
try and restart the jobs in it",
                      default=None)
    
    parser.add_option("--batchSystem", dest="batchSystem",
                      help="The type of batch system to run the job(s) with, currently can be 'singleMachine'/'parasol'/'acidTest'/'gridEngine'",
                      default=detectQueueSystem())
    
    parser.add_option("--retryCount", dest="retryCount", 
                      help="Number of times to try a failing job before giving up and labelling job failed",
                      default=0)
    
    parser.add_option("--waitDuration", dest="waitDuration", 
                      help="Period of time to pause after updating the running jobs (default is set by batch system)")
    
    parser.add_option("--rescueJobsFrequency", dest="rescueJobsFrequency", 
                      help="Period of time to wait (in seconds) between checking for missing/overlong jobs (default is set by the batch system)")
    
    parser.add_option("--maxJobDuration", dest="maxJobDuration", 
                      help="Maximum runtime of a job (in seconds) before we kill it (this is an approximate time, and the actual time before killing the job may be longer)",
                      default=str(sys.maxint))
    
    parser.add_option("--jobTime", dest="jobTime", 
                      help="The approximate time (in seconds) that you'd like a list of child jobs to be run serially before being parallised. \
                      This parameter allows one to avoid over parallelising tiny jobs, and therefore paying significant scheduling overhead, by \
                      running tiny jobs in series on a single node/core of the cluster.",
                      default=30)
    
    parser.add_option("--maxLogFileSize", dest="maxLogFileSize", 
                      help="The maximum size of a log file to keep (in bytes), log files larger than this will be truncated to the last X bytes. Default is 50 kilobytes",
                      default=50120)
    
    parser.add_option("--defaultMemory", dest="defaultMemory", 
                      help="The default amount of memory to request for a job (in bytes), by default is 2^31 = 2 gigabytes",
                      default=2147483648)
    
    parser.add_option("--defaultCpu", dest="defaultCpu", 
                      help="The default the number of cpus to dedicate a job, the default is 1",
                      default=1)
    
    parser.add_option("--maxJobs", dest="maxJobs", 
                      help="The maximum number of jobs to issue to the batch system at any one time",
                      default=sys.maxint)
    
    parser.add_option("--maxThreads", dest="maxThreads", 
                      help="The maximum number of threads to use when running in single machine mode",
                      default=4)
    
    parser.add_option("--stats", dest="stats", action="store_true",
                      help="Records statistics about the job-tree to be used by jobTreeStats",
                      default=False)
Example #4
0
def main():
    #Parse the inputs args/options
    parser = OptionParser(usage="usage: samFile, readFastqFile, referenceFastaFile [options]", 
                          version="%prog 0.1")
    
    #Options
    parser.add_option("--readIdentity", dest="readIdentity", 
                      help="Print readIdentity of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--alignmentIdentity", dest="alignmentIdentity", 
                      help="Print alignmentIdentity", 
                      default=False, action="store_true")
    
    parser.add_option("--readCoverage", dest="readCoverage", 
                      help="Print read coverage of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", 
                      help="Print mismatches per aligned base", 
                      default=False, action="store_true")
    
    parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", 
                      help="Print deletions per base of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", 
                      help="Print insertions per base of alignments", 
                      default=False, action="store_true")
    
    parser.add_option("--readLength", dest="readLength", 
                      help="Print read lengths of aligned reads", 
                      default=False, action="store_true")

    parser.add_option("--localAlignment", dest="localAlignment", 
                      help="Ignore unaligned prefix and suffix of each read in making calculation", 
                      default=False, action="store_true")
    
    parser.add_option("--printValuePerReadAlignment", dest="printValuePerReadAlignment", 
                      help="Prints the value of statistics for each read alignment", 
                      default=False, action="store_true")
    
    parser.add_option("--noStats", dest="noStats", 
                      help="Do not print stats (avg, median, min, max, mode) of desired statistic", 
                      default=False, action="store_true")
    
    addLoggingOptions(parser)
    
    #Parse the options/arguments
    options, args = parser.parse_args()
    
    #Setup logging
    setLoggingFromOptions(options)
    
    #Print help message if no input
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    #Exit if the arguments are not what we expect
    if len(args) != 3:
        raise RuntimeError("Expected three arguments, got: %s" % " ".join(args))
    
    #Now do the stats calculation
    samFile, readFastqFile, referenceFastaFile = args
    
    readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, 
                                             referenceFastaFile, globalAlignment=not options.localAlignment)
    
    def report(values, statisticName):
        if not options.noStats:
            print "Average" + statisticName, numpy.average(values)
            print "Median" + statisticName, numpy.median(values)
            print "Min" + statisticName, min(values)
            print "Max" + statisticName, max(values)
        if options.printValuePerReadAlignment:
            print "Values" + statisticName, "\t".join(map(str, values))
    
    if options.readIdentity:
        report(map(lambda rAS : rAS.readIdentity(), readAlignmentStats), "ReadIdentity")
    
    if options.alignmentIdentity:
        report(map(lambda rAS : rAS.alignmentIdentity(), readAlignmentStats), "AlignmentIdentity")
    
    if options.readCoverage:
        report(map(lambda rAS : rAS.readCoverage(), readAlignmentStats), "ReadCoverage")
    
    if options.mismatchesPerAlignedBase:
        report(map(lambda rAS : rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase")
    
    if options.deletionsPerReadBase:
        report(map(lambda rAS : rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase")
    
    if options.insertionsPerReadBase:
        report(map(lambda rAS : rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase")

    if options.readLength:
        report(map(lambda rAS : rAS.readLength(), readAlignmentStats), "ReadLength")