Example #1
0
def main(args):
    print("Loading dataset...")
    train_set = utils.load_dataset(args.training_file)
    print("FILE: ", args.training_file)
    test_set = utils.load_dataset(args.test_file)
    print("Loaded dataset")
    print()

    #for algorithm, name in zip([baseline, viterbi_p1, viterbi_p2, extra], ['Baseline', 'Viterbi_p1', 'Viterbi_p2', 'extra']):
    for algorithm, name in zip([viterbi_p2, extra], ['Viterbi_p2', 'extra']):
        print("Running {}...".format(name))
        testtag_predictions = algorithm(train_set, utils.strip_tags(test_set))
        baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies(
            test_set, testtag_predictions)
        multitags_acc, unseen_acc, = utils.specialword_accuracies(
            train_set, test_set, testtag_predictions)

        print("Accuracy: {:.2f}%".format(baseline_acc * 100))
        print("\tTop K Wrong Word-Tag Predictions: {}".format(
            utils.topk_wordtagcounter(wrong_wordtagcounter, k=4)))
        print("\tTop K Correct Word-Tag Predictions: {}".format(
            utils.topk_wordtagcounter(correct_wordtagcounter, k=4)))
        print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100))
        print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100))
        print()
Example #2
0
def main(args):
    print("Loading dataset...")
    train_set = utils.load_dataset(args.training_file)
    test_set = utils.load_dataset(args.test_file)
    print("Loaded dataset")
    print()

    algorithms = {
        "baseline": baseline,
        "viterbi_1": viterbi_1,
        "viterbi_2": viterbi_2,
        "viterbi_ec": viterbi_ec
    }
    algorithm = algorithms[args.algorithm]

    print("Running {}...".format(args.algorithm))
    testtag_predictions = algorithm(train_set, utils.strip_tags(test_set))
    baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies(
        testtag_predictions, test_set)
    multitags_acc, unseen_acc, = utils.specialword_accuracies(
        train_set, testtag_predictions, test_set)

    print("Accuracy: {:.2f}%".format(baseline_acc * 100))
    print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100))
    print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100))
    print("\tTop K Wrong Word-Tag Predictions: {}".format(
        utils.topk_wordtagcounter(wrong_wordtagcounter, k=4)))
    print("\tTop K Correct Word-Tag Predictions: {}".format(
        utils.topk_wordtagcounter(correct_wordtagcounter, k=4)))

    print()