Exemple #1
0
def ngram_intersection(args, parser):
    """Outputs the results of performing an intersection query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    store.validate(corpus, catalogue)
    store.intersection(catalogue, sys.stdout)
Exemple #2
0
def ngram_intersection(args, parser):
    """Outputs the results of performing an intersection query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    store.validate(corpus, catalogue)
    store.intersection(catalogue, sys.stdout)
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION,
                                     epilog=EPILOG,
                                     formatter_class=ParagraphFormatter)
    utils.add_db_arguments(parser)
    utils.add_tokenizer_argument(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('parent',
                        help=PARENT_LABEL_HELP,
                        metavar='PARENT_LABEL')
    parser.add_argument('child', help=CHILD_LABEL_HELP, metavar='CHILD_LABEL')
    parser.add_argument('unrelated',
                        help=UNRELATED_LABEL_HELP,
                        metavar='UNRELATED_LABEL')
    parser.add_argument('max_works',
                        help=MAX_WORKS_HELP,
                        metavar='MAXIMUM',
                        type=int)
    parser.add_argument('output_dir',
                        help=OUTPUT_DIR_HELP,
                        metavar='DIRECTORY')
    args = parser.parse_args()
    catalogue = utils.get_catalogue(args)
    data_store = utils.get_data_store(args)
    tokenizer = utils.get_tokenizer(args)
    try:
        test = taclextra.paternity.PaternityTest(data_store, catalogue,
                                                 tokenizer, args.parent,
                                                 args.child, args.unrelated,
                                                 args.max_works,
                                                 args.output_dir)
        test.process()
    except Exception as e:
        parser.error(e)
Exemple #4
0
def lifetime_report(args, parser):
    """Generates a lifetime report."""
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    results = tacl.Results(args.results, tokenizer)
    output_dir = os.path.abspath(args.output)
    os.makedirs(output_dir, exist_ok=True)
    report = tacl.LifetimeReport()
    report.generate(output_dir, catalogue, results, args.label)
Exemple #5
0
def generate_ngrams(args, parser):
    """Adds n-grams data to the data store."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    if args.catalogue:
        catalogue = utils.get_catalogue(args)
    else:
        catalogue = None
    store.add_ngrams(corpus, args.min_size, args.max_size, catalogue)
Exemple #6
0
def lifetime_report(args, parser):
    """Generates a lifetime report."""
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    results = tacl.Results(args.results, tokenizer)
    output_dir = os.path.abspath(args.output)
    os.makedirs(output_dir, exist_ok=True)
    report = tacl.LifetimeReport()
    report.generate(output_dir, catalogue, results, args.label)
Exemple #7
0
def generate_ngrams(args, parser):
    """Adds n-grams data to the data store."""
    store = utils.get_data_store(args, must_exist=False)
    corpus = utils.get_corpus(args)
    if args.catalogue:
        catalogue = utils.get_catalogue(args)
    else:
        catalogue = None
    store.add_ngrams(corpus, args.min_size, args.max_size, catalogue)
Exemple #8
0
def search_texts(args, parser):
    """Searches texts for presence of n-grams."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    store.validate(corpus, catalogue)
    ngrams = []
    for ngram_file in args.ngrams:
        ngrams.extend(utils.get_ngrams(ngram_file))
    store.search(catalogue, ngrams, sys.stdout)
Exemple #9
0
def search_texts(args, parser):
    """Searches texts for presence of n-grams."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    store.validate(corpus, catalogue)
    ngrams = []
    for ngram_file in args.ngrams:
        ngrams.extend(utils.get_ngrams(ngram_file))
    store.search(catalogue, ngrams, sys.stdout)
Exemple #10
0
def ngram_diff(args, parser):
    """Outputs the results of performing a diff query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    store.validate(corpus, catalogue)
    if args.asymmetric:
        store.diff_asymmetric(catalogue, args.asymmetric, tokenizer,
                              sys.stdout)
    else:
        store.diff(catalogue, tokenizer, sys.stdout)
Exemple #11
0
def ngram_diff(args, parser):
    """Outputs the results of performing a diff query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    store.validate(corpus, catalogue)
    if args.asymmetric:
        store.diff_asymmetric(catalogue, args.asymmetric, tokenizer,
                              sys.stdout)
    else:
        store.diff(catalogue, tokenizer, sys.stdout)
Exemple #12
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    utils.add_db_arguments(parser)
    utils.add_tokenizer_argument(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('output', help=HELP_OUTPUT, metavar='DIRECTORY')
    args = parser.parse_args()
    data_store = utils.get_data_store(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    output_dir = os.path.abspath(args.output)
    reporter = lifetime.LifetimeReporter(data_store, catalogue, tokenizer,
                                         output_dir)
    reporter.process()
Exemple #13
0
def main():
    parser = generate_parser()
    args = parser.parse_args()
    if hasattr(args, 'verbose'):
        utils.configure_logging(args.verbose, logger)
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    try:
        check_catalogue(catalogue, args.label)
    except Exception as e:
        parser.error(str(e))
    store.validate(corpus, catalogue)
    output_dir = os.path.abspath(args.output)
    if os.path.exists(output_dir):
        logger.warning('Output directory already exists; any results therein '
                       'will be reused rather than regenerated.')
    os.makedirs(output_dir, exist_ok=True)
    report = jitc.JitCReport(store, corpus, tokenizer)
    report.generate(output_dir, catalogue, args.label)
Exemple #14
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION,
                                     epilog=EPILOG,
                                     formatter_class=ParagraphFormatter)
    parser.add_argument('--min_size',
                        default=1,
                        help=MINIMUM_HELP,
                        metavar='MINIMUM',
                        type=int)
    parser.add_argument('--max_size',
                        default=10,
                        help=MAXIMUM_HELP,
                        metavar='MAXIMUM',
                        type=int)
    utils.add_common_arguments(parser)
    utils.add_db_arguments(parser)
    utils.add_corpus_arguments(parser)
    utils.add_query_arguments(parser)
    parser.add_argument('output_dir',
                        help='Path to output directory',
                        metavar='DIRECTORY')
    parser.add_argument('tracker_path',
                        help='Path to tracking file',
                        metavar='TRACKING')
    args = parser.parse_args()
    logger = logging.getLogger('taclextra')
    if hasattr(args, 'verbose'):
        utils.configure_logging(args.verbose, logger)
    corpus = utils.get_corpus(args)
    if args.db == 'memory':
        data_store = None
    else:
        data_store = utils.get_data_store(args)
    tokenizer = utils.get_tokenizer(args)
    catalogue = utils.get_catalogue(args)
    pi = paired_intersector.PairedIntersector(data_store, corpus, tokenizer,
                                              catalogue, args.output_dir,
                                              args.tracker_path, args.min_size,
                                              args.max_size)
    pi.intersect_all()