tag_order = 1 print("TRAIN: Precision for DS" + str(tag_order) + " " + str(train_corpus.calculate_precision())) print("Possible rules are generating...") rules = PossibleRules(train_corpus.tags[:20]).rules # just try first 20 words in the training corpus since it is expensive to walk through all the words print(str(len(train_corpus.all_words_in_corpus)) + " words in training set.") learned_rules_with_precision = [] for rule in rules: for i in range(100, 400): original_word = train_corpus.all_words_in_corpus[i] word_after = train_corpus.all_words_in_corpus[i + 1] rule_changed_any_tag = rule.apply(original_word, word_after, train_corpus.words) if rule_changed_any_tag is True: precision = train_corpus.calculate_precision() learned_rules_with_precision.append((rule, precision)) print("Precision for " + rule.name + " is " + str(precision)) train_corpus.output_rules("./Output/rules.txt", learned_rules_with_precision) print("Rules are trained succesfully...") test_corpus = Corpus(["./dataset/TestSet/file9.txt", "./dataset/TestSet/file10.txt"]) test_corpus.tag_words_with_most_likely_parses() tag_order = 1 print("TEST: Precision for DS" + str(tag_order) + " " + str(test_corpus.calculate_precision()))
rules = PossibleRules( train_corpus.tags[:20] ).rules # just try first 20 words in the training corpus since it is expensive to walk through all the words print(str(len(train_corpus.all_words_in_corpus)) + " words in training set.") learned_rules_with_precision = [] for rule in rules: for i in range(100, 400): original_word = train_corpus.all_words_in_corpus[i] word_after = train_corpus.all_words_in_corpus[i + 1] rule_changed_any_tag = rule.apply(original_word, word_after, train_corpus.words) if rule_changed_any_tag is True: precision = train_corpus.calculate_precision() learned_rules_with_precision.append((rule, precision)) print("Precision for " + rule.name + " is " + str(precision)) train_corpus.output_rules("./Output/rules.txt", learned_rules_with_precision) print("Rules are trained succesfully...") test_corpus = Corpus( ["./dataset/TestSet/file9.txt", "./dataset/TestSet/file10.txt"]) test_corpus.tag_words_with_most_likely_parses() tag_order = 1 print("TEST: Precision for DS" + str(tag_order) + " " + str(test_corpus.calculate_precision()))