Example #1
0
def ngram_intersection(args, parser):
    """Outputs the results of performing an intersection query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args.catalogue)
    store.validate(corpus, catalogue)
    store.intersection(catalogue, sys.stdout)
Example #2
0
def ngram_intersection(args, parser):
    """Outputs the results of performing an intersection query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    store.validate(corpus, catalogue)
    store.intersection(catalogue, sys.stdout)
Example #3
0
def search_texts(args, parser):
    """Searches texts for presence of n-grams."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    store.validate(corpus, catalogue)
    ngrams = utils.get_ngrams(args.ngrams)
    store.search(catalogue, ngrams, sys.stdout)
Example #4
0
def generate_ngrams(args, parser):
    """Adds n-grams data to the data store."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    if args.catalogue:
        catalogue = utils.get_catalogue(args)
    else:
        catalogue = None
    store.add_ngrams(corpus, args.min_size, args.max_size, catalogue)
Example #5
0
def generate_ngrams(args, parser):
    """Adds n-grams data to the data store."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    if args.catalogue:
        catalogue = utils.get_catalogue(args.catalogue)
    else:
        catalogue = None
    store.add_ngrams(corpus, args.min_size, args.max_size, catalogue)
Example #6
0
def search_texts(args, parser):
    """Searches texts for presence of n-grams."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = tacl.Catalogue()
    if args.catalogue:
        catalogue.load(args.catalogue)
    store.validate(corpus, catalogue)
    ngrams = utils.get_ngrams(args.ngrams)
    store.search(catalogue, ngrams, sys.stdout)
Example #7
0
def ngram_diff(args, parser):
    """Outputs the results of performing a diff query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args.catalogue)
    tokenizer = utils.get_tokenizer(args)
    store.validate(corpus, catalogue)
    if args.asymmetric:
        store.diff_asymmetric(catalogue, args.asymmetric, tokenizer, sys.stdout)
    else:
        store.diff(catalogue, tokenizer, sys.stdout)
Example #8
0
def ngram_diff(args, parser):
    """Outputs the results of performing a diff query."""
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    store.validate(corpus, catalogue)
    if args.asymmetric:
        store.diff_asymmetric(catalogue, args.asymmetric, tokenizer,
                              sys.stdout)
    else:
        store.diff(catalogue, tokenizer, sys.stdout)
Example #9
0
def main():
    parser = generate_parser()
    args = parser.parse_args()
    if hasattr(args, 'verbose'):
        utils.configure_logging(args.verbose, logger)
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args)
    tokenizer = utils.get_tokenizer(args)
    check_catalogue(catalogue, args.label)
    store.validate(corpus, catalogue)
    output_dir = os.path.abspath(args.output)
    if os.path.exists(output_dir):
        logger.warning('Output directory already exists; any results therein '
                       'will be reused rather than regenerated.')
    os.makedirs(output_dir, exist_ok=True)
    report = tacl.JitCReport(store, corpus, tokenizer)
    report.generate(output_dir, catalogue, args.label)
Example #10
0
def main():
    parser = generate_parser()
    args = parser.parse_args()
    if hasattr(args, 'verbose'):
        utils.configure_logging(args.verbose, logger)
    store = utils.get_data_store(args)
    corpus = utils.get_corpus(args)
    catalogue = utils.get_catalogue(args.catalogue)
    tokenizer = utils.get_tokenizer(args)
    check_catalogue(catalogue, args.label)
    store.validate(corpus, catalogue)
    output_dir = os.path.abspath(args.output)
    if os.path.exists(output_dir):
        logger.warning('Output directory already exists; any results therein '
                       'will be reused rather than regenerated.')
    os.makedirs(output_dir, exist_ok=True)
    report = tacl.JitCReport(store, corpus, tokenizer)
    report.generate(output_dir, catalogue, args.label)
Example #11
0
def supplied_intersect(args, parser):
    labels = args.labels
    results = args.supplied
    store = utils.get_data_store(args)
    store.intersection_supplied(results, labels, sys.stdout)
Example #12
0
def supplied_diff(args, parser):
    labels = args.labels
    results = args.supplied
    store = utils.get_data_store(args)
    tokenizer = utils.get_tokenizer(args)
    store.diff_supplied(results, labels, tokenizer, sys.stdout)
Example #13
0
def supplied_intersect(args, parser):
    labels = args.labels
    results = args.supplied
    store = utils.get_data_store(args)
    store.intersection_supplied(results, labels, sys.stdout)
Example #14
0
def supplied_diff(args, parser):
    labels = args.labels
    results = args.supplied
    store = utils.get_data_store(args)
    tokenizer = utils.get_tokenizer(args)
    store.diff_supplied(results, labels, tokenizer, sys.stdout)