def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "--guess-format", dest="guess_format", type="choice", choices=('sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'), help="The default behaviour of the script is to guess the quality " "format of the input fastq file. The user can specify the " "quality format of the input file using the --guess-format option. " "The script will use this format if the " "sequence qualities are ambiguous.[default=%default].") parser.add_option( "--target-format", dest="target_format", type="choice", choices=('sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'), help="The script will convert quality scores to the destination " "format unless [default=%default].") parser.set_defaults( target_format=None, guess_format=None, min_quality=10, ) # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) c = E.Counter() if options.target_format: iterator = Fastq.iterate_convert(options.stdin, format=options.target_format, guess=options.guess_format) else: iterator = Fastq.iterate_guess(options.stdin, guess=options.guess_format) options.stdout.write("read\tnfailed\tnN\t%s\n" % ("\t".join(Stats.Summary().getHeaders()))) min_quality = options.min_quality for record in iterator: c.input += 1 quals = record.toPhred() nfailed = len([x for x in quals if x < min_quality]) nns = record.seq.count("N") + record.seq.count(".") options.stdout.write( "%s\t%i\t%i\t%s\n" % (record.identifier, nfailed, nns, str(Stats.Summary(quals)))) c.output += 1 # write footer and output benchmark information. E.info("%s" % str(c)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "--guess-format", dest="guess_format", type="choice", choices=('sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'), help="The default behaviour of the script is to guess \ the quality format of the input fastq file. The user \ can specify the quality format of the input file using \ the --format option. The script will use this format if \ sequences qualities are ambiguous.[default=%default].") parser.add_option( "-f", "--target-format", dest="change_format", type="choice", choices=('sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'), help="The script guesses the quality format of the input \ file and converts quality scores to the destination \ format unless --format is specified [default=%default].") parser.set_defaults( change_format=None, guess_format=None, min_quality=10) # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) if options.change_format: iterator = Fastq.iterate_convert(options.stdin, format=options.change_format, guess=options.guess_format) else: iterator = Fastq.iterate_guess(options.stdin, guess=options.guess_format) min_quality = options.min_quality number_of_reads = 0 number_of_bases = 0 read_lengths = [] read_qualities = [] bases_below_min = 0 for record in iterator: number_of_reads += 1 quals = record.toPhred() length_read = len(quals) number_of_bases += length_read bases_below_min += len([x for x in quals if x < min_quality]) read_lengths.append(length_read) read_qualities.append(np.mean(quals)) mean_length = round(np.mean(read_lengths), 2) median_length = round(np.median(read_lengths), 2) mean_quality = round(np.mean(read_qualities), 2) median_quality = round(np.median(read_qualities), 2) options.stdout.write( "reads\tbases\tmean_length\tmedian_length\tmean_quality\tmedian_quality\tnfailed\n") options.stdout.write( "%i\t%i\t%s\t%s\t%s\t%s\t%i\n" % (number_of_reads, number_of_bases, str(mean_length), str(median_length), str(mean_quality), str(median_quality), bases_below_min)) E.stop()