예제 #1
0
 def test_Accuracy(self):
     correct = 0
     total = 0
     lesk = Lesk(self.wordNet, self.fsm)
     corpus1 = AnnotatedCorpus("../../new-sentences")
     corpus2 = AnnotatedCorpus("../../old-sentences")
     for i in range(corpus1.sentenceCount()):
         sentence1 = corpus1.getSentence(i)
         lesk.autoSemantic(sentence1)
         sentence2 = corpus2.getSentence(i)
         for j in range(sentence1.wordCount()):
             total = total + 1
             word1 = sentence1.getWord(j)
             word2 = sentence2.getWord(j)
             if word1.getSemantic() is not None and word1.getSemantic() == word2.getSemantic():
                 correct = correct + 1
     self.assertEqual(549, total)
     self.assertEqual(268, correct)
class SentenceDisambiguationCorpusGenerator:

    __annotatedCorpus: AnnotatedCorpus

    def __init__(self, folder: str, pattern: str):
        """
        Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the
        training files included. The constructor loads the corpus from the given directory including the given files
        the given pattern.

        PARAMETERS
        ----------
        folder : str
            Directory where the sentence files reside.
        pattern : str
            Pattern of the tree files to be included in the corpus. Use "." for all files.
        """
        self.__annotatedCorpus = AnnotatedCorpus(folder, pattern)

    def generate(self) -> DisambiguationCorpus:
        """
        Creates a morphological disambiguation corpus from the corpus.

        RETURNS
        -------
        DisambiguationCorpus
            Created disambiguation corpus.
        """
        corpus = DisambiguationCorpus()
        for i in range(self.__annotatedCorpus.sentenceCount()):
            sentence = self.__annotatedCorpus.getSentence(i)
            disambiguationSentence = AnnotatedSentence()
            for j in range(sentence.wordCount()):
                annotatedWord = sentence.getWord(j)
                if isinstance(annotatedWord, AnnotatedWord):
                    disambiguationSentence.addWord(
                        DisambiguatedWord(annotatedWord.getName(),
                                          annotatedWord.getParse()))
            corpus.addSentence(disambiguationSentence)
        return corpus