Esempio n. 1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id$",
        usage=globals()["__doc__"])

    parser.add_option("--filter-query", dest="filename_filter_query",
                      type="string",
                      help="filename with intervals in the query "
                      "to filter (in gff format) [default=%default].")

    parser.add_option("--filter-target", dest="filename_filter_target",
                      type="string",
                      help="filename with intervals in the target to "
                      "filter (in gff format) [default=%default].")

    parser.add_option("-m", "--method", dest="methods", type="choice",
                      action="append",
                      choices=("map", "merge",
                               "add-sequence", "complement",
                               "select-query", "test",
                               "filter-keep", "filter-remove",
                               "rename-query",
                               "sanitize",
                               "filter-fasta",
                               "remove-overlapping-query",
                               "remove-overlapping-target"),
                      help="""action to perform [default=%default].""")

    parser.add_option("--select", dest="select", type="choice",
                      choices=("most-nmatches", "least-nmatches",
                               "most-nmismatches", "least-nmismatches"),
                      help="entry to select [default=%default].")

    parser.add_option("--header-names", dest="header", type="choice",
                      choices=("none", "table", "full"),
                      help="output psl header [default=%default].")

    parser.add_option("--format", dest="format", type="choice",
                      choices=("gff", "gtf"),
                      help="format of intervals [default=%default].")

    parser.add_option("--queries-tsv-file", dest="filename_queries",
                      type="string",
                      help="fasta filename with queries.")

    parser.add_option("--target-psl-file", dest="filename_sbjcts",
                      type="string",
                      help="fasta filename with sbjct [default=%default].")

    parser.add_option("--id-format", dest="id_format", type="string",
                      help="format of new identifiers for the rename "
                      "function [default=%default].")

    parser.add_option("--unique", dest="unique", action="store_true",
                      help="in the rename function, make each match "
                      "unique [default=%default].")

    parser.add_option("--output-filename-map", dest="output_filename_map",
                      type="string",
                      help="filename with map of old to new labels for "
                      "rename function [default=%default].")

    parser.add_option("--complement-min-length", dest="complement_min_length",
                      type="int",
                      help="minimum length for complemented blocks "
                      "[default=%default].")

    parser.add_option("--complement-border", dest="complement_border",
                      type="int",
                      help="number of residues to exclude before alignment "
                      "at either end [default=%default].")

    parser.add_option("--complement-aligner", dest="complement_aligner",
                      type="choice",
                      choices=("clustal", "dba", "dialign", "dialign-lgs"),
                      help="aligner for complemented segments "
                      "[default=%default].")

    parser.add_option("--threshold-merge-distance",
                      dest="threshold_merge_distance", type="int",
                      help="distance in nucleotides at which two adjacent "
                      "reads shall be merged even if they are not "
                      "overlapping [%default].")

    parser.add_option("--test", dest="test", type="int",
                      help="for debugging purposes - stop after x "
                      "iterations [default=%default].")

    parser.set_defaults(filename_filter_target=None,
                        filename_filter_query=None,
                        filename_queries=None,
                        filename_sbjcts=None,
                        threshold_merge_distance=0,
                        report_step=100000,
                        min_aligned=100,
                        methods=[],
                        format="gff",
                        select="most-nmatches",
                        id_format="%06i",
                        unique=False,
                        output_filename_map=None,
                        header=None,
                        test=None)

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.filename_queries:
        query_fasta = IndexedFasta.IndexedFasta(options.filename_queries)
    else:
        query_fasta = None

    if options.filename_sbjcts:
        sbjct_fasta = IndexedFasta.IndexedFasta(options.filename_sbjcts)
    else:
        sbjct_fasta = None

    if "add-sequence" in options.methods and \
       (sbjct_fasta is None or query_fasta is None):
        raise ValueError(
            "please supply both indexed query and "
            "target/genome sequence data.")

    iterator = Blat.iterator(options.stdin)

    if options.header is not None or options.header != "none":
        if options.header == "table":
            options.stdout.write("\t".join(Blat.FIELDS) + "\n")
        elif options.header == "full":
            options.stdout.write(Blat.HEADER + "\n")

    for method in options.methods:

        if "map" == method:
            pslMap(options)
            break
        elif "filter-keep" == method:
            pslFilter(options, keep=True)
            break
        elif "filter-remove" == method:
            pslFilter(options, keep=False)
            break
        elif "merge" == method:
            pslMerge(options)
            break
        elif "add-sequence" == method:
            pslAddSequence(query_fasta, sbjct_fasta, options)
            break
        elif "complement" == method:
            pslComplement(query_fasta, sbjct_fasta, options)
            break
        elif "select-query" == method:
            pslSelectQuery(options)
            break
        elif "test" == method:
            iterator = Blat.iterator_test(iterator, options.report_step)
        elif "rename-query" == method:
            iterator = iterator_rename_query(iterator, options)
        elif "sanitize" == method:
            iterator = iterator_sanitize(
                iterator, query_fasta, sbjct_fasta, options)
        elif "filter-fasta" == method:
            iterator = iterator_filter_fasta(
                iterator, query_fasta, sbjct_fasta, options)
        elif "remove-overlapping-query" == method:
            iterator = iterator_filter_overlapping_query(iterator, options)
        elif "remove-overlapping-target" == method:
            iterator = iterator_filter_overlapping_target(iterator, options)

    for psl in iterator:
        options.stdout.write("%s\n" % str(psl))

    E.Stop()
Esempio n. 2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id$",
        usage=globals()["__doc__"])

    parser.add_option("--filter-query", dest="filename_filter_query",
                      type="string",
                      help="filename with intervals in the query "
                      "to filter (in gff format) [default=%default].")

    parser.add_option("--filter-target", dest="filename_filter_target",
                      type="string",
                      help="filename with intervals in the target to "
                      "filter (in gff format) [default=%default].")

    parser.add_option("-m", "--method", dest="methods", type="choice",
                      action="append",
                      choices=("map", "merge",
                               "add-sequence", "complement",
                               "select-query", "test",
                               "filter-keep", "filter-remove",
                               "rename-query",
                               "sanitize",
                               "filter-fasta",
                               "remove-overlapping-query",
                               "remove-overlapping-target"),
                      help="""action to perform [default=%default].""")

    parser.add_option("--select", dest="select", type="choice",
                      choices=("most-nmatches", "least-nmatches",
                               "most-nmismatches", "least-nmismatches"),
                      help="entry to select [default=%default].")

    parser.add_option("--header-names", dest="header", type="choice",
                      choices=("none", "table", "full"),
                      help="output psl header [default=%default].")

    parser.add_option("--format", dest="format", type="choice",
                      choices=("gff", "gtf"),
                      help="format of intervals [default=%default].")

    parser.add_option("--queries-tsv-file", dest="filename_queries",
                      type="string",
                      help="fasta filename with queries.")

    parser.add_option("--target-psl-file", dest="filename_sbjcts",
                      type="string",
                      help="fasta filename with sbjct [default=%default].")

    parser.add_option("--id-format", dest="id_format", type="string",
                      help="format of new identifiers for the rename "
                      "function [default=%default].")

    parser.add_option("--unique", dest="unique", action="store_true",
                      help="in the rename function, make each match "
                      "unique [default=%default].")

    parser.add_option("--output-filename-map", dest="output_filename_map",
                      type="string",
                      help="filename with map of old to new labels for "
                      "rename function [default=%default].")

    parser.add_option("--complement-min-length", dest="complement_min_length",
                      type="int",
                      help="minimum length for complemented blocks "
                      "[default=%default].")

    parser.add_option("--complement-border", dest="complement_border",
                      type="int",
                      help="number of residues to exclude before alignment "
                      "at either end [default=%default].")

    parser.add_option("--complement-aligner", dest="complement_aligner",
                      type="choice",
                      choices=("clustal", "dba", "dialign", "dialign-lgs"),
                      help="aligner for complemented segments "
                      "[default=%default].")

    parser.add_option("--threshold-merge-distance",
                      dest="threshold_merge_distance", type="int",
                      help="distance in nucleotides at which two adjacent "
                      "reads shall be merged even if they are not "
                      "overlapping [%default].")

    parser.add_option("--test", dest="test", type="int",
                      help="for debugging purposes - stop after x "
                      "iterations [default=%default].")

    parser.set_defaults(filename_filter_target=None,
                        filename_filter_query=None,
                        filename_queries=None,
                        filename_sbjcts=None,
                        threshold_merge_distance=0,
                        report_step=100000,
                        min_aligned=100,
                        methods=[],
                        format="gff",
                        select="most-nmatches",
                        id_format="%06i",
                        unique=False,
                        output_filename_map=None,
                        header=None,
                        test=None)

    (options, args) = E.start(parser, add_pipe_options=True)

    if options.filename_queries:
        query_fasta = IndexedFasta.IndexedFasta(options.filename_queries)
    else:
        query_fasta = None

    if options.filename_sbjcts:
        sbjct_fasta = IndexedFasta.IndexedFasta(options.filename_sbjcts)
    else:
        sbjct_fasta = None

    if "add-sequence" in options.methods and \
       (sbjct_fasta is None or query_fasta is None):
        raise ValueError(
            "please supply both indexed query and "
            "target/genome sequence data.")

    iterator = Blat.iterator(options.stdin)

    if options.header is not None or options.header != "none":
        if options.header == "table":
            options.stdout.write("\t".join(Blat.FIELDS) + "\n")
        elif options.header == "full":
            options.stdout.write(Blat.HEADER + "\n")

    for method in options.methods:

        if "map" == method:
            pslMap(options)
            break
        elif "filter-keep" == method:
            pslFilter(options, keep=True)
            break
        elif "filter-remove" == method:
            pslFilter(options, keep=False)
            break
        elif "merge" == method:
            pslMerge(options)
            break
        elif "add-sequence" == method:
            pslAddSequence(query_fasta, sbjct_fasta, options)
            break
        elif "complement" == method:
            pslComplement(query_fasta, sbjct_fasta, options)
            break
        elif "select-query" == method:
            pslSelectQuery(options)
            break
        elif "test" == method:
            iterator = Blat.iterator_test(iterator, options.report_step)
        elif "rename-query" == method:
            iterator = iterator_rename_query(iterator, options)
        elif "sanitize" == method:
            iterator = iterator_sanitize(
                iterator, query_fasta, sbjct_fasta, options)
        elif "filter-fasta" == method:
            iterator = iterator_filter_fasta(
                iterator, query_fasta, sbjct_fasta, options)
        elif "remove-overlapping-query" == method:
            iterator = iterator_filter_overlapping_query(iterator, options)
        elif "remove-overlapping-target" == method:
            iterator = iterator_filter_overlapping_target(iterator, options)

    for psl in iterator:
        options.stdout.write("%s\n" % str(psl))

    E.stop()
Esempio n. 3
0
            pslFilter( options, keep = False )
            break
        elif "merge" == method:
            pslMerge( options )
            break
        elif "add-sequence" == method:
            pslAddSequence( query_fasta, sbjct_fasta, options )
            break
        elif "complement" == method:
            pslComplement( query_fasta, sbjct_fasta, options )
            break
        elif "select-query" == method:
            pslSelectQuery( options )
            break
        elif "test" == method:
            iterator = Blat.iterator_test( iterator, options.report_step )
        elif "rename-query" == method:
            iterator = iterator_rename_query( iterator, options )
        elif "sanitize" == method:
            iterator = iterator_sanitize( iterator, query_fasta, sbjct_fasta, options )
        elif "filter-fasta" == method:
            iterator = iterator_filter_fasta( iterator, query_fasta, sbjct_fasta, options )
        elif "remove-overlapping-query" == method:
            iterator = iterator_filter_overlapping_query( iterator, options )
        elif "remove-overlapping-target" == method:
            iterator = iterator_filter_overlapping_target( iterator, options )

    for psl in iterator:
        options.stdout.write( "%s\n" % str( psl ) )

    E.Stop()