예제 #1
0
파일: start.py 프로젝트: erkekin/Brilltag
tag_order = 1
print("TRAIN: Precision for DS" + str(tag_order) + " " + str(train_corpus.calculate_precision()))

print("Possible rules are generating...")
rules = PossibleRules(train_corpus.tags[:20]).rules  # just try first 20 words in the training corpus since it is expensive to walk through all the words
print(str(len(train_corpus.all_words_in_corpus)) + " words in training set.")

learned_rules_with_precision = []
for rule in rules:
    for i in range(100, 400):

        original_word = train_corpus.all_words_in_corpus[i]
        word_after = train_corpus.all_words_in_corpus[i + 1]

        rule_changed_any_tag = rule.apply(original_word, word_after, train_corpus.words)
        if rule_changed_any_tag is True:
            precision = train_corpus.calculate_precision()
            learned_rules_with_precision.append((rule, precision))
            print("Precision for " + rule.name + " is " + str(precision))

train_corpus.output_rules("./Output/rules.txt", learned_rules_with_precision)

print("Rules are trained succesfully...")

test_corpus = Corpus(["./dataset/TestSet/file9.txt", "./dataset/TestSet/file10.txt"])

test_corpus.tag_words_with_most_likely_parses()
tag_order = 1
print("TEST: Precision for DS" + str(tag_order) + " " + str(test_corpus.calculate_precision()))

예제 #2
0
파일: start.py 프로젝트: erkekin/Brilltag
rules = PossibleRules(
    train_corpus.tags[:20]
).rules  # just try first 20 words in the training corpus since it is expensive to walk through all the words
print(str(len(train_corpus.all_words_in_corpus)) + " words in training set.")

learned_rules_with_precision = []
for rule in rules:
    for i in range(100, 400):

        original_word = train_corpus.all_words_in_corpus[i]
        word_after = train_corpus.all_words_in_corpus[i + 1]

        rule_changed_any_tag = rule.apply(original_word, word_after,
                                          train_corpus.words)
        if rule_changed_any_tag is True:
            precision = train_corpus.calculate_precision()
            learned_rules_with_precision.append((rule, precision))
            print("Precision for " + rule.name + " is " + str(precision))

train_corpus.output_rules("./Output/rules.txt", learned_rules_with_precision)

print("Rules are trained succesfully...")

test_corpus = Corpus(
    ["./dataset/TestSet/file9.txt", "./dataset/TestSet/file10.txt"])

test_corpus.tag_words_with_most_likely_parses()
tag_order = 1
print("TEST: Precision for DS" + str(tag_order) + " " +
      str(test_corpus.calculate_precision()))