def main(args): print("Loading dataset...") train_set = utils.load_dataset(args.training_file) print("FILE: ", args.training_file) test_set = utils.load_dataset(args.test_file) print("Loaded dataset") print() #for algorithm, name in zip([baseline, viterbi_p1, viterbi_p2, extra], ['Baseline', 'Viterbi_p1', 'Viterbi_p2', 'extra']): for algorithm, name in zip([viterbi_p2, extra], ['Viterbi_p2', 'extra']): print("Running {}...".format(name)) testtag_predictions = algorithm(train_set, utils.strip_tags(test_set)) baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies( test_set, testtag_predictions) multitags_acc, unseen_acc, = utils.specialword_accuracies( train_set, test_set, testtag_predictions) print("Accuracy: {:.2f}%".format(baseline_acc * 100)) print("\tTop K Wrong Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(wrong_wordtagcounter, k=4))) print("\tTop K Correct Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(correct_wordtagcounter, k=4))) print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100)) print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100)) print()
def main(args): print("Loading dataset...") train_set = utils.load_dataset(args.training_file) test_set = utils.load_dataset(args.test_file) print("Loaded dataset") print() algorithms = { "baseline": baseline, "viterbi_1": viterbi_1, "viterbi_2": viterbi_2, "viterbi_ec": viterbi_ec } algorithm = algorithms[args.algorithm] print("Running {}...".format(args.algorithm)) testtag_predictions = algorithm(train_set, utils.strip_tags(test_set)) baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies( testtag_predictions, test_set) multitags_acc, unseen_acc, = utils.specialword_accuracies( train_set, testtag_predictions, test_set) print("Accuracy: {:.2f}%".format(baseline_acc * 100)) print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100)) print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100)) print("\tTop K Wrong Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(wrong_wordtagcounter, k=4))) print("\tTop K Correct Word-Tag Predictions: {}".format( utils.topk_wordtagcounter(correct_wordtagcounter, k=4))) print()