Example #1
0
def generate_ngrams_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to add n-grams data
    to the data store."""
    parser = subparsers.add_parser('ngrams',
                                   description=constants.NGRAMS_DESCRIPTION,
                                   epilog=constants.NGRAMS_EPILOG,
                                   formatter_class=ParagraphFormatter,
                                   help=constants.NGRAMS_HELP)
    parser.set_defaults(func=generate_ngrams)
    utils.add_common_arguments(parser)
    parser.add_argument('-c',
                        '--catalogue',
                        dest='catalogue',
                        help=constants.NGRAMS_CATALOGUE_HELP,
                        metavar='CATALOGUE')
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    parser.add_argument('min_size',
                        help=constants.NGRAMS_MINIMUM_HELP,
                        metavar='MINIMUM',
                        type=int)
    parser.add_argument('max_size',
                        help=constants.NGRAMS_MAXIMUM_HELP,
                        metavar='MAXIMUM',
                        type=int)
Example #2
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION,
                                     epilog=EPILOG,
                                     formatter_class=ParagraphFormatter)
    utils.add_db_arguments(parser)
    utils.add_tokenizer_argument(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('parent',
                        help=PARENT_LABEL_HELP,
                        metavar='PARENT_LABEL')
    parser.add_argument('child', help=CHILD_LABEL_HELP, metavar='CHILD_LABEL')
    parser.add_argument('unrelated',
                        help=UNRELATED_LABEL_HELP,
                        metavar='UNRELATED_LABEL')
    parser.add_argument('max_works',
                        help=MAX_WORKS_HELP,
                        metavar='MAXIMUM',
                        type=int)
    parser.add_argument('output_dir',
                        help=OUTPUT_DIR_HELP,
                        metavar='DIRECTORY')
    args = parser.parse_args()
    catalogue = utils.get_catalogue(args)
    data_store = utils.get_data_store(args)
    tokenizer = utils.get_tokenizer(args)
    try:
        test = taclextra.paternity.PaternityTest(data_store, catalogue,
                                                 tokenizer, args.parent,
                                                 args.child, args.unrelated,
                                                 args.max_works,
                                                 args.output_dir)
        test.process()
    except Exception as e:
        parser.error(e)
Example #3
0
def generate_counts_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to make a counts
    query."""
    parser = subparsers.add_parser(
        'counts', description=constants.COUNTS_DESCRIPTION,
        epilog=constants.COUNTS_EPILOG, formatter_class=ParagraphFormatter,
        help=constants.COUNTS_HELP)
    parser.set_defaults(func=ngram_counts)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
Example #4
0
def generate_supplied_diff_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to run a diff query using
    the supplied results sets."""
    parser = subparsers.add_parser(
        'sdiff', description=constants.SUPPLIED_DIFF_DESCRIPTION,
        epilog=constants.SUPPLIED_DIFF_EPILOG,
        formatter_class=ParagraphFormatter, help=constants.SUPPLIED_DIFF_HELP)
    parser.set_defaults(func=supplied_diff)
    utils.add_common_arguments(parser)
    utils.add_tokenizer_argument(parser)
    utils.add_db_arguments(parser, True)
    utils.add_supplied_query_arguments(parser)
Example #5
0
def generate_intersect_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to make an
    intersection query."""
    parser = subparsers.add_parser(
        'intersect', description=constants.INTERSECT_DESCRIPTION,
        epilog=constants.INTERSECT_EPILOG, formatter_class=ParagraphFormatter,
        help=constants.INTERSECT_HELP)
    parser.set_defaults(func=ngram_intersection)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
Example #6
0
def generate_supplied_intersect_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to run an intersect query
    using the supplied results sets."""
    parser = subparsers.add_parser(
        'sintersect', description=constants.SUPPLIED_INTERSECT_DESCRIPTION,
        epilog=constants.SUPPLIED_INTERSECT_EPILOG,
        formatter_class=ParagraphFormatter,
        help=constants.SUPPLIED_INTERSECT_HELP)
    parser.set_defaults(func=supplied_intersect)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser, True)
    utils.add_supplied_query_arguments(parser)
Example #7
0
def generate_supplied_intersect_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to run an intersect query
    using the supplied results sets."""
    parser = subparsers.add_parser(
        'sintersect',
        description=constants.SUPPLIED_INTERSECT_DESCRIPTION,
        epilog=constants.SUPPLIED_INTERSECT_EPILOG,
        formatter_class=ParagraphFormatter,
        help=constants.SUPPLIED_INTERSECT_HELP)
    parser.set_defaults(func=supplied_intersect)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser, True)
    utils.add_supplied_query_arguments(parser)
Example #8
0
def generate_intersect_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to make an
    intersection query."""
    parser = subparsers.add_parser('intersect',
                                   description=constants.INTERSECT_DESCRIPTION,
                                   epilog=constants.INTERSECT_EPILOG,
                                   formatter_class=ParagraphFormatter,
                                   help=constants.INTERSECT_HELP)
    parser.set_defaults(func=ngram_intersection)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
Example #9
0
def generate_counts_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to make a counts
    query."""
    parser = subparsers.add_parser('counts',
                                   description=constants.COUNTS_DESCRIPTION,
                                   epilog=constants.COUNTS_EPILOG,
                                   formatter_class=ParagraphFormatter,
                                   help=constants.COUNTS_HELP)
    parser.set_defaults(func=ngram_counts)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
Example #10
0
def generate_supplied_diff_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to run a diff query using
    the supplied results sets."""
    parser = subparsers.add_parser(
        'sdiff',
        description=constants.SUPPLIED_DIFF_DESCRIPTION,
        epilog=constants.SUPPLIED_DIFF_EPILOG,
        formatter_class=ParagraphFormatter,
        help=constants.SUPPLIED_DIFF_HELP)
    parser.set_defaults(func=supplied_diff)
    utils.add_common_arguments(parser)
    utils.add_tokenizer_argument(parser)
    utils.add_db_arguments(parser, True)
    utils.add_supplied_query_arguments(parser)
Example #11
0
def generate_parser():
    """Return a parser for the command."""
    parser = argparse.ArgumentParser(description=JITC_DESCRIPTION,
                                     epilog=JITC_EPILOG,
                                     formatter_class=ParagraphFormatter)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('label', help=JITC_LABEL_HELP, metavar='LABEL')
    parser.add_argument('output',
                        help=constants.REPORT_OUTPUT_HELP,
                        metavar='OUTPUT')
    return parser
Example #12
0
def generate_search_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to generate search
    results for a set of n-grams."""
    parser = subparsers.add_parser(
        'search', description=constants.SEARCH_DESCRIPTION,
        epilog=constants.SEARCH_EPILOG, formatter_class=ParagraphFormatter,
        help=constants.SEARCH_HELP)
    parser.set_defaults(func=search_texts)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('ngrams', help=constants.SEARCH_NGRAMS_HELP,
                        nargs='*', metavar='NGRAMS')
Example #13
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    utils.add_db_arguments(parser)
    utils.add_tokenizer_argument(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('output', help=HELP_OUTPUT, metavar='DIRECTORY')
    args = parser.parse_args()
    data_store = utils.get_data_store(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    output_dir = os.path.abspath(args.output)
    reporter = lifetime.LifetimeReporter(data_store, catalogue, tokenizer,
                                         output_dir)
    reporter.process()
Example #14
0
def generate_diff_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to make a diff
    query."""
    parser = subparsers.add_parser(
        'diff', description=constants.DIFF_DESCRIPTION,
        epilog=constants.DIFF_EPILOG, formatter_class=ParagraphFormatter,
        help=constants.DIFF_HELP)
    parser.set_defaults(func=ngram_diff)
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-a', '--asymmetric', help=constants.ASYMMETRIC_HELP,
                       metavar='LABEL')
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
Example #15
0
def generate_search_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to generate search
    results for a set of n-grams."""
    parser = subparsers.add_parser('search',
                                   description=constants.SEARCH_DESCRIPTION,
                                   epilog=constants.SEARCH_EPILOG,
                                   formatter_class=ParagraphFormatter,
                                   help=constants.SEARCH_HELP)
    parser.set_defaults(func=search_texts)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('ngrams',
                        help=constants.SEARCH_NGRAMS_HELP,
                        nargs='*',
                        metavar='NGRAMS')
Example #16
0
def generate_diff_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to make a diff
    query."""
    parser = subparsers.add_parser('diff',
                                   description=constants.DIFF_DESCRIPTION,
                                   epilog=constants.DIFF_EPILOG,
                                   formatter_class=ParagraphFormatter,
                                   help=constants.DIFF_HELP)
    parser.set_defaults(func=ngram_diff)
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-a',
                       '--asymmetric',
                       help=constants.ASYMMETRIC_HELP,
                       metavar='LABEL')
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
Example #17
0
def generate_ngrams_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to add n-grams data
    to the data store."""
    parser = subparsers.add_parser(
        'ngrams', description=constants.NGRAMS_DESCRIPTION,
        epilog=constants.NGRAMS_EPILOG, formatter_class=ParagraphFormatter,
        help=constants.NGRAMS_HELP)
    parser.set_defaults(func=generate_ngrams)
    utils.add_common_arguments(parser)
    parser.add_argument('-c', '--catalogue', dest='catalogue',
                        help=constants.NGRAMS_CATALOGUE_HELP,
                        metavar='CATALOGUE')
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    parser.add_argument('min_size', help=constants.NGRAMS_MINIMUM_HELP,
                        metavar='MINIMUM', type=int)
    parser.add_argument('max_size', help=constants.NGRAMS_MAXIMUM_HELP,
                        metavar='MAXIMUM', type=int)
Example #18
0
def generate_query_subparser(subparsers):
    """Adds a sub-command parser to `subparsers` to send SQL to the data
    store and return the results."""
    parser = subparsers.add_parser('query',
                                   description=constants.QUERY_DESCRIPTION,
                                   formatter_class=ParagraphFormatter,
                                   help=constants.QUERY_HELP)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser, True)
    parser.add_argument('-q',
                        '--query',
                        help=constants.QUERY_QUERY_HELP,
                        metavar='QUERY',
                        required=True)
    parser.add_argument('-p',
                        '--parameters',
                        help=constants.QUERY_PARAMETERS_HELP,
                        metavar='PARAMETER',
                        nargs='*')
    parser.set_defaults(func=query_data_store)
Example #19
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION,
                                     epilog=EPILOG,
                                     formatter_class=ParagraphFormatter)
    parser.add_argument('--min_size',
                        default=1,
                        help=MINIMUM_HELP,
                        metavar='MINIMUM',
                        type=int)
    parser.add_argument('--max_size',
                        default=10,
                        help=MAXIMUM_HELP,
                        metavar='MAXIMUM',
                        type=int)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('output_dir',
                        help='Path to output directory',
                        metavar='DIRECTORY')
    parser.add_argument('tracker_path',
                        help='Path to tracking file',
                        metavar='TRACKING')
    args = parser.parse_args()
    logger = logging.getLogger('taclextra')
    if hasattr(args, 'verbose'):
        utils.configure_logging(args.verbose, logger)
    corpus = utils.get_corpus(args)
    if args.db == 'memory':
        data_store = None
    else:
        data_store = utils.get_data_store(args)
    tokenizer = utils.get_tokenizer(args)
    catalogue = utils.get_catalogue(args)
    pi = paired_intersector.PairedIntersector(data_store, corpus, tokenizer,
                                              catalogue, args.output_dir,
                                              args.tracker_path, args.min_size,
                                              args.max_size)
    pi.intersect_all()