def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("--filter-query", dest="filename_filter_query", type="string", help="filename with intervals in the query " "to filter (in gff format) [default=%default].") parser.add_option("--filter-target", dest="filename_filter_target", type="string", help="filename with intervals in the target to " "filter (in gff format) [default=%default].") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=("map", "merge", "add-sequence", "complement", "select-query", "test", "filter-keep", "filter-remove", "rename-query", "sanitize", "filter-fasta", "remove-overlapping-query", "remove-overlapping-target"), help="""action to perform [default=%default].""") parser.add_option("--select", dest="select", type="choice", choices=("most-nmatches", "least-nmatches", "most-nmismatches", "least-nmismatches"), help="entry to select [default=%default].") parser.add_option("--header-names", dest="header", type="choice", choices=("none", "table", "full"), help="output psl header [default=%default].") parser.add_option("--format", dest="format", type="choice", choices=("gff", "gtf"), help="format of intervals [default=%default].") parser.add_option("--queries-tsv-file", dest="filename_queries", type="string", help="fasta filename with queries.") parser.add_option("--target-psl-file", dest="filename_sbjcts", type="string", help="fasta filename with sbjct [default=%default].") parser.add_option("--id-format", dest="id_format", type="string", help="format of new identifiers for the rename " "function [default=%default].") parser.add_option("--unique", dest="unique", action="store_true", help="in the rename function, make each match " "unique [default=%default].") parser.add_option("--output-filename-map", dest="output_filename_map", type="string", help="filename with map of old to new labels for " "rename function [default=%default].") parser.add_option("--complement-min-length", dest="complement_min_length", type="int", help="minimum length for complemented blocks " "[default=%default].") parser.add_option("--complement-border", dest="complement_border", type="int", help="number of residues to exclude before alignment " "at either end [default=%default].") parser.add_option("--complement-aligner", dest="complement_aligner", type="choice", choices=("clustal", "dba", "dialign", "dialign-lgs"), help="aligner for complemented segments " "[default=%default].") parser.add_option("--threshold-merge-distance", dest="threshold_merge_distance", type="int", help="distance in nucleotides at which two adjacent " "reads shall be merged even if they are not " "overlapping [%default].") parser.add_option("--test", dest="test", type="int", help="for debugging purposes - stop after x " "iterations [default=%default].") parser.set_defaults(filename_filter_target=None, filename_filter_query=None, filename_queries=None, filename_sbjcts=None, threshold_merge_distance=0, report_step=100000, min_aligned=100, methods=[], format="gff", select="most-nmatches", id_format="%06i", unique=False, output_filename_map=None, header=None, test=None) (options, args) = E.Start(parser, add_pipe_options=True) if options.filename_queries: query_fasta = IndexedFasta.IndexedFasta(options.filename_queries) else: query_fasta = None if options.filename_sbjcts: sbjct_fasta = IndexedFasta.IndexedFasta(options.filename_sbjcts) else: sbjct_fasta = None if "add-sequence" in options.methods and \ (sbjct_fasta is None or query_fasta is None): raise ValueError( "please supply both indexed query and " "target/genome sequence data.") iterator = Blat.iterator(options.stdin) if options.header is not None or options.header != "none": if options.header == "table": options.stdout.write("\t".join(Blat.FIELDS) + "\n") elif options.header == "full": options.stdout.write(Blat.HEADER + "\n") for method in options.methods: if "map" == method: pslMap(options) break elif "filter-keep" == method: pslFilter(options, keep=True) break elif "filter-remove" == method: pslFilter(options, keep=False) break elif "merge" == method: pslMerge(options) break elif "add-sequence" == method: pslAddSequence(query_fasta, sbjct_fasta, options) break elif "complement" == method: pslComplement(query_fasta, sbjct_fasta, options) break elif "select-query" == method: pslSelectQuery(options) break elif "test" == method: iterator = Blat.iterator_test(iterator, options.report_step) elif "rename-query" == method: iterator = iterator_rename_query(iterator, options) elif "sanitize" == method: iterator = iterator_sanitize( iterator, query_fasta, sbjct_fasta, options) elif "filter-fasta" == method: iterator = iterator_filter_fasta( iterator, query_fasta, sbjct_fasta, options) elif "remove-overlapping-query" == method: iterator = iterator_filter_overlapping_query(iterator, options) elif "remove-overlapping-target" == method: iterator = iterator_filter_overlapping_target(iterator, options) for psl in iterator: options.stdout.write("%s\n" % str(psl)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("--filter-query", dest="filename_filter_query", type="string", help="filename with intervals in the query " "to filter (in gff format) [default=%default].") parser.add_option("--filter-target", dest="filename_filter_target", type="string", help="filename with intervals in the target to " "filter (in gff format) [default=%default].") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=("map", "merge", "add-sequence", "complement", "select-query", "test", "filter-keep", "filter-remove", "rename-query", "sanitize", "filter-fasta", "remove-overlapping-query", "remove-overlapping-target"), help="""action to perform [default=%default].""") parser.add_option("--select", dest="select", type="choice", choices=("most-nmatches", "least-nmatches", "most-nmismatches", "least-nmismatches"), help="entry to select [default=%default].") parser.add_option("--header-names", dest="header", type="choice", choices=("none", "table", "full"), help="output psl header [default=%default].") parser.add_option("--format", dest="format", type="choice", choices=("gff", "gtf"), help="format of intervals [default=%default].") parser.add_option("--queries-tsv-file", dest="filename_queries", type="string", help="fasta filename with queries.") parser.add_option("--target-psl-file", dest="filename_sbjcts", type="string", help="fasta filename with sbjct [default=%default].") parser.add_option("--id-format", dest="id_format", type="string", help="format of new identifiers for the rename " "function [default=%default].") parser.add_option("--unique", dest="unique", action="store_true", help="in the rename function, make each match " "unique [default=%default].") parser.add_option("--output-filename-map", dest="output_filename_map", type="string", help="filename with map of old to new labels for " "rename function [default=%default].") parser.add_option("--complement-min-length", dest="complement_min_length", type="int", help="minimum length for complemented blocks " "[default=%default].") parser.add_option("--complement-border", dest="complement_border", type="int", help="number of residues to exclude before alignment " "at either end [default=%default].") parser.add_option("--complement-aligner", dest="complement_aligner", type="choice", choices=("clustal", "dba", "dialign", "dialign-lgs"), help="aligner for complemented segments " "[default=%default].") parser.add_option("--threshold-merge-distance", dest="threshold_merge_distance", type="int", help="distance in nucleotides at which two adjacent " "reads shall be merged even if they are not " "overlapping [%default].") parser.add_option("--test", dest="test", type="int", help="for debugging purposes - stop after x " "iterations [default=%default].") parser.set_defaults(filename_filter_target=None, filename_filter_query=None, filename_queries=None, filename_sbjcts=None, threshold_merge_distance=0, report_step=100000, min_aligned=100, methods=[], format="gff", select="most-nmatches", id_format="%06i", unique=False, output_filename_map=None, header=None, test=None) (options, args) = E.start(parser, add_pipe_options=True) if options.filename_queries: query_fasta = IndexedFasta.IndexedFasta(options.filename_queries) else: query_fasta = None if options.filename_sbjcts: sbjct_fasta = IndexedFasta.IndexedFasta(options.filename_sbjcts) else: sbjct_fasta = None if "add-sequence" in options.methods and \ (sbjct_fasta is None or query_fasta is None): raise ValueError( "please supply both indexed query and " "target/genome sequence data.") iterator = Blat.iterator(options.stdin) if options.header is not None or options.header != "none": if options.header == "table": options.stdout.write("\t".join(Blat.FIELDS) + "\n") elif options.header == "full": options.stdout.write(Blat.HEADER + "\n") for method in options.methods: if "map" == method: pslMap(options) break elif "filter-keep" == method: pslFilter(options, keep=True) break elif "filter-remove" == method: pslFilter(options, keep=False) break elif "merge" == method: pslMerge(options) break elif "add-sequence" == method: pslAddSequence(query_fasta, sbjct_fasta, options) break elif "complement" == method: pslComplement(query_fasta, sbjct_fasta, options) break elif "select-query" == method: pslSelectQuery(options) break elif "test" == method: iterator = Blat.iterator_test(iterator, options.report_step) elif "rename-query" == method: iterator = iterator_rename_query(iterator, options) elif "sanitize" == method: iterator = iterator_sanitize( iterator, query_fasta, sbjct_fasta, options) elif "filter-fasta" == method: iterator = iterator_filter_fasta( iterator, query_fasta, sbjct_fasta, options) elif "remove-overlapping-query" == method: iterator = iterator_filter_overlapping_query(iterator, options) elif "remove-overlapping-target" == method: iterator = iterator_filter_overlapping_target(iterator, options) for psl in iterator: options.stdout.write("%s\n" % str(psl)) E.stop()
pslFilter( options, keep = False ) break elif "merge" == method: pslMerge( options ) break elif "add-sequence" == method: pslAddSequence( query_fasta, sbjct_fasta, options ) break elif "complement" == method: pslComplement( query_fasta, sbjct_fasta, options ) break elif "select-query" == method: pslSelectQuery( options ) break elif "test" == method: iterator = Blat.iterator_test( iterator, options.report_step ) elif "rename-query" == method: iterator = iterator_rename_query( iterator, options ) elif "sanitize" == method: iterator = iterator_sanitize( iterator, query_fasta, sbjct_fasta, options ) elif "filter-fasta" == method: iterator = iterator_filter_fasta( iterator, query_fasta, sbjct_fasta, options ) elif "remove-overlapping-query" == method: iterator = iterator_filter_overlapping_query( iterator, options ) elif "remove-overlapping-target" == method: iterator = iterator_filter_overlapping_target( iterator, options ) for psl in iterator: options.stdout.write( "%s\n" % str( psl ) ) E.Stop()