def ngram_intersection(args, parser): """Outputs the results of performing an intersection query.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args.catalogue) store.validate(corpus, catalogue) store.intersection(catalogue, sys.stdout)
def ngram_intersection(args, parser): """Outputs the results of performing an intersection query.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args) store.validate(corpus, catalogue) store.intersection(catalogue, sys.stdout)
def search_texts(args, parser): """Searches texts for presence of n-grams.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args) store.validate(corpus, catalogue) ngrams = utils.get_ngrams(args.ngrams) store.search(catalogue, ngrams, sys.stdout)
def generate_ngrams(args, parser): """Adds n-grams data to the data store.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) if args.catalogue: catalogue = utils.get_catalogue(args) else: catalogue = None store.add_ngrams(corpus, args.min_size, args.max_size, catalogue)
def generate_ngrams(args, parser): """Adds n-grams data to the data store.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) if args.catalogue: catalogue = utils.get_catalogue(args.catalogue) else: catalogue = None store.add_ngrams(corpus, args.min_size, args.max_size, catalogue)
def search_texts(args, parser): """Searches texts for presence of n-grams.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = tacl.Catalogue() if args.catalogue: catalogue.load(args.catalogue) store.validate(corpus, catalogue) ngrams = utils.get_ngrams(args.ngrams) store.search(catalogue, ngrams, sys.stdout)
def ngram_diff(args, parser): """Outputs the results of performing a diff query.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args.catalogue) tokenizer = utils.get_tokenizer(args) store.validate(corpus, catalogue) if args.asymmetric: store.diff_asymmetric(catalogue, args.asymmetric, tokenizer, sys.stdout) else: store.diff(catalogue, tokenizer, sys.stdout)
def ngram_diff(args, parser): """Outputs the results of performing a diff query.""" store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args) tokenizer = utils.get_tokenizer(args) store.validate(corpus, catalogue) if args.asymmetric: store.diff_asymmetric(catalogue, args.asymmetric, tokenizer, sys.stdout) else: store.diff(catalogue, tokenizer, sys.stdout)
def main(): parser = generate_parser() args = parser.parse_args() if hasattr(args, 'verbose'): utils.configure_logging(args.verbose, logger) store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args) tokenizer = utils.get_tokenizer(args) check_catalogue(catalogue, args.label) store.validate(corpus, catalogue) output_dir = os.path.abspath(args.output) if os.path.exists(output_dir): logger.warning('Output directory already exists; any results therein ' 'will be reused rather than regenerated.') os.makedirs(output_dir, exist_ok=True) report = tacl.JitCReport(store, corpus, tokenizer) report.generate(output_dir, catalogue, args.label)
def main(): parser = generate_parser() args = parser.parse_args() if hasattr(args, 'verbose'): utils.configure_logging(args.verbose, logger) store = utils.get_data_store(args) corpus = utils.get_corpus(args) catalogue = utils.get_catalogue(args.catalogue) tokenizer = utils.get_tokenizer(args) check_catalogue(catalogue, args.label) store.validate(corpus, catalogue) output_dir = os.path.abspath(args.output) if os.path.exists(output_dir): logger.warning('Output directory already exists; any results therein ' 'will be reused rather than regenerated.') os.makedirs(output_dir, exist_ok=True) report = tacl.JitCReport(store, corpus, tokenizer) report.generate(output_dir, catalogue, args.label)
def supplied_intersect(args, parser): labels = args.labels results = args.supplied store = utils.get_data_store(args) store.intersection_supplied(results, labels, sys.stdout)
def supplied_diff(args, parser): labels = args.labels results = args.supplied store = utils.get_data_store(args) tokenizer = utils.get_tokenizer(args) store.diff_supplied(results, labels, tokenizer, sys.stdout)