예제 #1
0
파일: start.py 프로젝트: erkekin/Brilltag
print("Analysis starting...")
train_corpus = Corpus(["./dataset/TrainingSet/file1.txt"
                 , "./dataset/TrainingSet/file2.txt"
                 , "./dataset/TrainingSet/file3.txt"
                 , "./dataset/TrainingSet/file4.txt"
                 , "./dataset/TrainingSet/file5.txt"
                 , "./dataset/TrainingSet/file6.txt"
                 , "./dataset/TrainingSet/file7.txt"
                 , "./dataset/TrainingSet/file8.txt"

                       ])

train_corpus.outputWords("./Output/MostLikelyMorphParseForWord.txt")
print("Most likely morphological parses for words are written to ./Output/MostLikelyMorphParseForWord.txt")

train_corpus.outputPOStags("./Output/MostLikelyTag.txt")
print("Most likely tags are written to ./Output/MostLikelyTag.txt")

train_corpus.tag_words_with_most_likely_parses()
tag_order = 1
print("TRAIN: Precision for DS" + str(tag_order) + " " + str(train_corpus.calculate_precision()))

print("Possible rules are generating...")
rules = PossibleRules(train_corpus.tags[:20]).rules  # just try first 20 words in the training corpus since it is expensive to walk through all the words
print(str(len(train_corpus.all_words_in_corpus)) + " words in training set.")

learned_rules_with_precision = []
for rule in rules:
    for i in range(100, 400):

        original_word = train_corpus.all_words_in_corpus[i]
예제 #2
0
파일: start.py 프로젝트: erkekin/Brilltag
from Rule import PossibleRules

print("Analysis starting...")
train_corpus = Corpus([
    "./dataset/TrainingSet/file1.txt", "./dataset/TrainingSet/file2.txt",
    "./dataset/TrainingSet/file3.txt", "./dataset/TrainingSet/file4.txt",
    "./dataset/TrainingSet/file5.txt", "./dataset/TrainingSet/file6.txt",
    "./dataset/TrainingSet/file7.txt", "./dataset/TrainingSet/file8.txt"
])

train_corpus.outputWords("./Output/MostLikelyMorphParseForWord.txt")
print(
    "Most likely morphological parses for words are written to ./Output/MostLikelyMorphParseForWord.txt"
)

train_corpus.outputPOStags("./Output/MostLikelyTag.txt")
print("Most likely tags are written to ./Output/MostLikelyTag.txt")

train_corpus.tag_words_with_most_likely_parses()
tag_order = 1
print("TRAIN: Precision for DS" + str(tag_order) + " " +
      str(train_corpus.calculate_precision()))

print("Possible rules are generating...")
rules = PossibleRules(
    train_corpus.tags[:20]
).rules  # just try first 20 words in the training corpus since it is expensive to walk through all the words
print(str(len(train_corpus.all_words_in_corpus)) + " words in training set.")

learned_rules_with_precision = []
for rule in rules: