예제 #1
0
def test_run_corpus_failure() -> None:
    fail_corpus = [
        ('never produced',
            '2015-12-12T12:30',
            ('today', 'heute'))
    ]
    with pytest.raises(Exception):
        run_corpus(fail_corpus)
예제 #2
0
def test_run_corpus() -> None:
    """The corpus passes if ctparse generates the desired
    solution for each test at least once. Otherwise it fails.
    """
    X, y = run_corpus(corpus)
    assert isinstance(y[0], bool)
    assert isinstance(X[0][0], str)
예제 #3
0
def main():
    args = parse_args()
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s [%(name)s] %(message)s")

    X_combined = []
    y_combined = []

    if args.legacy:
        logger.info("Loading legacy dataset")
        X, y = run_corpus(corpus.corpus + auto_corpus.corpus)
        X_combined.extend(X)
        y_combined.extend(y)

    if args.dataset:
        logger.info("Loading dataset {}".format(args.dataset))
        entries = load_timeparse_corpus(args.dataset)
        X, y = zip(*make_partial_rule_dataset(
            entries,
            scorer=DummyScorer(),
            timeout=30,
            max_stack_depth=100,
            progress=True,
        ))
        X_combined.extend(X)
        y_combined.extend(y)

    if len(X) == 0:
        raise ValueError("Need to specify at least a dataset for training")

    mdl = train_naive_bayes(X_combined, y_combined)
    save_naive_bayes(mdl, DEFAULT_MODEL_FILE)
예제 #4
0
def test_run_corpus_failure() -> None:
    fail_corpus = [("never produced", "2015-12-12T12:30", ("today", "heute"))]
    with pytest.raises(Exception):
        run_corpus(fail_corpus)