def generate_ngrams_subparser(subparsers): """Adds a sub-command parser to `subparsers` to add n-grams data to the data store.""" parser = subparsers.add_parser('ngrams', description=constants.NGRAMS_DESCRIPTION, epilog=constants.NGRAMS_EPILOG, formatter_class=ParagraphFormatter, help=constants.NGRAMS_HELP) parser.set_defaults(func=generate_ngrams) utils.add_common_arguments(parser) parser.add_argument('-c', '--catalogue', dest='catalogue', help=constants.NGRAMS_CATALOGUE_HELP, metavar='CATALOGUE') utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) parser.add_argument('min_size', help=constants.NGRAMS_MINIMUM_HELP, metavar='MINIMUM', type=int) parser.add_argument('max_size', help=constants.NGRAMS_MAXIMUM_HELP, metavar='MAXIMUM', type=int)
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG, formatter_class=ParagraphFormatter) utils.add_db_arguments(parser) utils.add_tokenizer_argument(parser) utils.add_query_arguments(parser) parser.add_argument('parent', help=PARENT_LABEL_HELP, metavar='PARENT_LABEL') parser.add_argument('child', help=CHILD_LABEL_HELP, metavar='CHILD_LABEL') parser.add_argument('unrelated', help=UNRELATED_LABEL_HELP, metavar='UNRELATED_LABEL') parser.add_argument('max_works', help=MAX_WORKS_HELP, metavar='MAXIMUM', type=int) parser.add_argument('output_dir', help=OUTPUT_DIR_HELP, metavar='DIRECTORY') args = parser.parse_args() catalogue = utils.get_catalogue(args) data_store = utils.get_data_store(args) tokenizer = utils.get_tokenizer(args) try: test = taclextra.paternity.PaternityTest(data_store, catalogue, tokenizer, args.parent, args.child, args.unrelated, args.max_works, args.output_dir) test.process() except Exception as e: parser.error(e)
def generate_counts_subparser(subparsers): """Adds a sub-command parser to `subparsers` to make a counts query.""" parser = subparsers.add_parser( 'counts', description=constants.COUNTS_DESCRIPTION, epilog=constants.COUNTS_EPILOG, formatter_class=ParagraphFormatter, help=constants.COUNTS_HELP) parser.set_defaults(func=ngram_counts) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser)
def generate_supplied_diff_subparser(subparsers): """Adds a sub-command parser to `subparsers` to run a diff query using the supplied results sets.""" parser = subparsers.add_parser( 'sdiff', description=constants.SUPPLIED_DIFF_DESCRIPTION, epilog=constants.SUPPLIED_DIFF_EPILOG, formatter_class=ParagraphFormatter, help=constants.SUPPLIED_DIFF_HELP) parser.set_defaults(func=supplied_diff) utils.add_common_arguments(parser) utils.add_tokenizer_argument(parser) utils.add_db_arguments(parser, True) utils.add_supplied_query_arguments(parser)
def generate_intersect_subparser(subparsers): """Adds a sub-command parser to `subparsers` to make an intersection query.""" parser = subparsers.add_parser( 'intersect', description=constants.INTERSECT_DESCRIPTION, epilog=constants.INTERSECT_EPILOG, formatter_class=ParagraphFormatter, help=constants.INTERSECT_HELP) parser.set_defaults(func=ngram_intersection) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser)
def generate_supplied_intersect_subparser(subparsers): """Adds a sub-command parser to `subparsers` to run an intersect query using the supplied results sets.""" parser = subparsers.add_parser( 'sintersect', description=constants.SUPPLIED_INTERSECT_DESCRIPTION, epilog=constants.SUPPLIED_INTERSECT_EPILOG, formatter_class=ParagraphFormatter, help=constants.SUPPLIED_INTERSECT_HELP) parser.set_defaults(func=supplied_intersect) utils.add_common_arguments(parser) utils.add_db_arguments(parser, True) utils.add_supplied_query_arguments(parser)
def generate_supplied_intersect_subparser(subparsers): """Adds a sub-command parser to `subparsers` to run an intersect query using the supplied results sets.""" parser = subparsers.add_parser( 'sintersect', description=constants.SUPPLIED_INTERSECT_DESCRIPTION, epilog=constants.SUPPLIED_INTERSECT_EPILOG, formatter_class=ParagraphFormatter, help=constants.SUPPLIED_INTERSECT_HELP) parser.set_defaults(func=supplied_intersect) utils.add_common_arguments(parser) utils.add_db_arguments(parser, True) utils.add_supplied_query_arguments(parser)
def generate_intersect_subparser(subparsers): """Adds a sub-command parser to `subparsers` to make an intersection query.""" parser = subparsers.add_parser('intersect', description=constants.INTERSECT_DESCRIPTION, epilog=constants.INTERSECT_EPILOG, formatter_class=ParagraphFormatter, help=constants.INTERSECT_HELP) parser.set_defaults(func=ngram_intersection) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser)
def generate_counts_subparser(subparsers): """Adds a sub-command parser to `subparsers` to make a counts query.""" parser = subparsers.add_parser('counts', description=constants.COUNTS_DESCRIPTION, epilog=constants.COUNTS_EPILOG, formatter_class=ParagraphFormatter, help=constants.COUNTS_HELP) parser.set_defaults(func=ngram_counts) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser)
def generate_supplied_diff_subparser(subparsers): """Adds a sub-command parser to `subparsers` to run a diff query using the supplied results sets.""" parser = subparsers.add_parser( 'sdiff', description=constants.SUPPLIED_DIFF_DESCRIPTION, epilog=constants.SUPPLIED_DIFF_EPILOG, formatter_class=ParagraphFormatter, help=constants.SUPPLIED_DIFF_HELP) parser.set_defaults(func=supplied_diff) utils.add_common_arguments(parser) utils.add_tokenizer_argument(parser) utils.add_db_arguments(parser, True) utils.add_supplied_query_arguments(parser)
def generate_parser(): """Return a parser for the command.""" parser = argparse.ArgumentParser(description=JITC_DESCRIPTION, epilog=JITC_EPILOG, formatter_class=ParagraphFormatter) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser) parser.add_argument('label', help=JITC_LABEL_HELP, metavar='LABEL') parser.add_argument('output', help=constants.REPORT_OUTPUT_HELP, metavar='OUTPUT') return parser
def generate_search_subparser(subparsers): """Adds a sub-command parser to `subparsers` to generate search results for a set of n-grams.""" parser = subparsers.add_parser( 'search', description=constants.SEARCH_DESCRIPTION, epilog=constants.SEARCH_EPILOG, formatter_class=ParagraphFormatter, help=constants.SEARCH_HELP) parser.set_defaults(func=search_texts) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser) parser.add_argument('ngrams', help=constants.SEARCH_NGRAMS_HELP, nargs='*', metavar='NGRAMS')
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) utils.add_db_arguments(parser) utils.add_tokenizer_argument(parser) utils.add_query_arguments(parser) parser.add_argument('output', help=HELP_OUTPUT, metavar='DIRECTORY') args = parser.parse_args() data_store = utils.get_data_store(args) catalogue = utils.get_catalogue(args) tokenizer = utils.get_tokenizer(args) output_dir = os.path.abspath(args.output) reporter = lifetime.LifetimeReporter(data_store, catalogue, tokenizer, output_dir) reporter.process()
def generate_diff_subparser(subparsers): """Adds a sub-command parser to `subparsers` to make a diff query.""" parser = subparsers.add_parser( 'diff', description=constants.DIFF_DESCRIPTION, epilog=constants.DIFF_EPILOG, formatter_class=ParagraphFormatter, help=constants.DIFF_HELP) parser.set_defaults(func=ngram_diff) group = parser.add_mutually_exclusive_group() group.add_argument('-a', '--asymmetric', help=constants.ASYMMETRIC_HELP, metavar='LABEL') utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser)
def generate_search_subparser(subparsers): """Adds a sub-command parser to `subparsers` to generate search results for a set of n-grams.""" parser = subparsers.add_parser('search', description=constants.SEARCH_DESCRIPTION, epilog=constants.SEARCH_EPILOG, formatter_class=ParagraphFormatter, help=constants.SEARCH_HELP) parser.set_defaults(func=search_texts) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser) parser.add_argument('ngrams', help=constants.SEARCH_NGRAMS_HELP, nargs='*', metavar='NGRAMS')
def generate_diff_subparser(subparsers): """Adds a sub-command parser to `subparsers` to make a diff query.""" parser = subparsers.add_parser('diff', description=constants.DIFF_DESCRIPTION, epilog=constants.DIFF_EPILOG, formatter_class=ParagraphFormatter, help=constants.DIFF_HELP) parser.set_defaults(func=ngram_diff) group = parser.add_mutually_exclusive_group() group.add_argument('-a', '--asymmetric', help=constants.ASYMMETRIC_HELP, metavar='LABEL') utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser)
def generate_ngrams_subparser(subparsers): """Adds a sub-command parser to `subparsers` to add n-grams data to the data store.""" parser = subparsers.add_parser( 'ngrams', description=constants.NGRAMS_DESCRIPTION, epilog=constants.NGRAMS_EPILOG, formatter_class=ParagraphFormatter, help=constants.NGRAMS_HELP) parser.set_defaults(func=generate_ngrams) utils.add_common_arguments(parser) parser.add_argument('-c', '--catalogue', dest='catalogue', help=constants.NGRAMS_CATALOGUE_HELP, metavar='CATALOGUE') utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) parser.add_argument('min_size', help=constants.NGRAMS_MINIMUM_HELP, metavar='MINIMUM', type=int) parser.add_argument('max_size', help=constants.NGRAMS_MAXIMUM_HELP, metavar='MAXIMUM', type=int)
def generate_query_subparser(subparsers): """Adds a sub-command parser to `subparsers` to send SQL to the data store and return the results.""" parser = subparsers.add_parser('query', description=constants.QUERY_DESCRIPTION, formatter_class=ParagraphFormatter, help=constants.QUERY_HELP) utils.add_common_arguments(parser) utils.add_db_arguments(parser, True) parser.add_argument('-q', '--query', help=constants.QUERY_QUERY_HELP, metavar='QUERY', required=True) parser.add_argument('-p', '--parameters', help=constants.QUERY_PARAMETERS_HELP, metavar='PARAMETER', nargs='*') parser.set_defaults(func=query_data_store)
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG, formatter_class=ParagraphFormatter) parser.add_argument('--min_size', default=1, help=MINIMUM_HELP, metavar='MINIMUM', type=int) parser.add_argument('--max_size', default=10, help=MAXIMUM_HELP, metavar='MAXIMUM', type=int) utils.add_common_arguments(parser) utils.add_db_arguments(parser) utils.add_corpus_arguments(parser) utils.add_query_arguments(parser) parser.add_argument('output_dir', help='Path to output directory', metavar='DIRECTORY') parser.add_argument('tracker_path', help='Path to tracking file', metavar='TRACKING') args = parser.parse_args() logger = logging.getLogger('taclextra') if hasattr(args, 'verbose'): utils.configure_logging(args.verbose, logger) corpus = utils.get_corpus(args) if args.db == 'memory': data_store = None else: data_store = utils.get_data_store(args) tokenizer = utils.get_tokenizer(args) catalogue = utils.get_catalogue(args) pi = paired_intersector.PairedIntersector(data_store, corpus, tokenizer, catalogue, args.output_dir, args.tracker_path, args.min_size, args.max_size) pi.intersect_all()