def test_run_corpus_failure() -> None: fail_corpus = [ ('never produced', '2015-12-12T12:30', ('today', 'heute')) ] with pytest.raises(Exception): run_corpus(fail_corpus)
def test_run_corpus() -> None: """The corpus passes if ctparse generates the desired solution for each test at least once. Otherwise it fails. """ X, y = run_corpus(corpus) assert isinstance(y[0], bool) assert isinstance(X[0][0], str)
def main(): args = parse_args() logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s [%(name)s] %(message)s") X_combined = [] y_combined = [] if args.legacy: logger.info("Loading legacy dataset") X, y = run_corpus(corpus.corpus + auto_corpus.corpus) X_combined.extend(X) y_combined.extend(y) if args.dataset: logger.info("Loading dataset {}".format(args.dataset)) entries = load_timeparse_corpus(args.dataset) X, y = zip(*make_partial_rule_dataset( entries, scorer=DummyScorer(), timeout=30, max_stack_depth=100, progress=True, )) X_combined.extend(X) y_combined.extend(y) if len(X) == 0: raise ValueError("Need to specify at least a dataset for training") mdl = train_naive_bayes(X_combined, y_combined) save_naive_bayes(mdl, DEFAULT_MODEL_FILE)
def test_run_corpus_failure() -> None: fail_corpus = [("never produced", "2015-12-12T12:30", ("today", "heute"))] with pytest.raises(Exception): run_corpus(fail_corpus)