def test_Accuracy(self): correct = 0 total = 0 lesk = Lesk(self.wordNet, self.fsm) corpus1 = AnnotatedCorpus("../../new-sentences") corpus2 = AnnotatedCorpus("../../old-sentences") for i in range(corpus1.sentenceCount()): sentence1 = corpus1.getSentence(i) lesk.autoSemantic(sentence1) sentence2 = corpus2.getSentence(i) for j in range(sentence1.wordCount()): total = total + 1 word1 = sentence1.getWord(j) word2 = sentence2.getWord(j) if word1.getSemantic() is not None and word1.getSemantic() == word2.getSemantic(): correct = correct + 1 self.assertEqual(549, total) self.assertEqual(268, correct)
class SentenceDisambiguationCorpusGenerator: __annotatedCorpus: AnnotatedCorpus def __init__(self, folder: str, pattern: str): """ Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the training files included. The constructor loads the corpus from the given directory including the given files the given pattern. PARAMETERS ---------- folder : str Directory where the sentence files reside. pattern : str Pattern of the tree files to be included in the corpus. Use "." for all files. """ self.__annotatedCorpus = AnnotatedCorpus(folder, pattern) def generate(self) -> DisambiguationCorpus: """ Creates a morphological disambiguation corpus from the corpus. RETURNS ------- DisambiguationCorpus Created disambiguation corpus. """ corpus = DisambiguationCorpus() for i in range(self.__annotatedCorpus.sentenceCount()): sentence = self.__annotatedCorpus.getSentence(i) disambiguationSentence = AnnotatedSentence() for j in range(sentence.wordCount()): annotatedWord = sentence.getWord(j) if isinstance(annotatedWord, AnnotatedWord): disambiguationSentence.addWord( DisambiguatedWord(annotatedWord.getName(), annotatedWord.getParse())) corpus.addSentence(disambiguationSentence) return corpus