def generate(self) -> DisambiguationCorpus:
        """
        Creates a morphological disambiguation corpus from the corpus.

        RETURNS
        -------
        DisambiguationCorpus
            Created disambiguation corpus.
        """
        corpus = DisambiguationCorpus()
        for i in range(self.__annotatedCorpus.sentenceCount()):
            sentence = self.__annotatedCorpus.getSentence(i)
            disambiguationSentence = AnnotatedSentence()
            for j in range(sentence.wordCount()):
                annotatedWord = sentence.getWord(j)
                if isinstance(annotatedWord, AnnotatedWord):
                    disambiguationSentence.addWord(
                        DisambiguatedWord(annotatedWord.getName(),
                                          annotatedWord.getParse()))
            corpus.addSentence(disambiguationSentence)
        return corpus
    def generate(self) -> DisambiguationCorpus:
        """
        Creates a morphological disambiguation corpus from the treeBank. Calls generateAnnotatedSentence for each parse
        tree in the treebank.

        RETURNS
        -------
        DisambiguationCorpus
            Created disambiguation corpus.
        """
        corpus = DisambiguationCorpus()
        for i in range(self.__treeBank.size()):
            parseTree = self.__treeBank.get(i)
            if parseTree.layerAll(ViewLayerType.INFLECTIONAL_GROUP):
                sentence = parseTree.generateAnnotatedSentence()
                disambiguationSentence = AnnotatedSentence()
                for j in range(sentence.wordCount()):
                    annotatedWord = sentence.getWord(j)
                    if isinstance(annotatedWord, AnnotatedWord):
                        disambiguationSentence.addWord(
                            DisambiguatedWord(annotatedWord.getName(),
                                              annotatedWord.getParse()))
                corpus.addSentence(disambiguationSentence)
        return corpus
 def test_Disambiguation(self):
     fsm = FsmMorphologicalAnalyzer()
     corpus = DisambiguationCorpus("../penntreebank.txt")
     algorithm = HmmDisambiguation()
     algorithm.train(corpus)
     correctParse = 0
     correctRoot = 0
     for i in range(corpus.sentenceCount()):
         sentenceAnalyses = fsm.robustMorphologicalAnalysis(corpus.getSentence(i))
         fsmParses = algorithm.disambiguate(sentenceAnalyses)
         for j in range(corpus.getSentence(i).wordCount()):
             word = corpus.getSentence(i).getWord(j)
             if isinstance(word, DisambiguatedWord):
                 if fsmParses[j].transitionList().lower() == word.getParse().__str__().lower():
                     correctParse = correctParse + 1
                 if fsmParses[j].getWord() == word.getParse().getWord():
                     correctRoot = correctRoot + 1
     self.assertAlmostEqual(0.9233, (correctRoot + 0.0) / corpus.numberOfWords(), 3)
     self.assertAlmostEqual(0.8630, (correctParse + 0.0) / corpus.numberOfWords(), 3)
 def test_Corpus(self):
     corpus = DisambiguationCorpus("../penntreebank.txt")
     self.assertEqual(19109, corpus.sentenceCount())
     self.assertEqual(170211, corpus.numberOfWords())