def test_Disambiguation(self): fsm = FsmMorphologicalAnalyzer("../turkish_dictionary.txt", "../turkish_misspellings.txt", "../turkish_finite_state_machine.xml") corpus = DisambiguationCorpus("../penntreebank.txt") algorithm = RootFirstDisambiguation() algorithm.train(corpus) correctParse = 0 correctRoot = 0 for i in range(corpus.sentenceCount()): sentenceAnalyses = fsm.robustMorphologicalAnalysis( corpus.getSentence(i)) fsmParses = algorithm.disambiguate(sentenceAnalyses) for j in range(corpus.getSentence(i).wordCount()): word = corpus.getSentence(i).getWord(j) if isinstance(word, DisambiguatedWord): if fsmParses[j].transitionList() == word.getParse( ).__str__(): correctParse = correctParse + 1 if fsmParses[j].getWord() == word.getParse().getWord(): correctRoot = correctRoot + 1 self.assertEqual(0.9590, (correctRoot + 0.0) / corpus.numberOfWords(), 0.002) self.assertEqual(0.8639, (correctParse + 0.0) / corpus.numberOfWords(), 0.002)
def test_Disambiguation(self): fsm = FsmMorphologicalAnalyzer() corpus = DisambiguationCorpus("../penntreebank.txt") algorithm = HmmDisambiguation() algorithm.train(corpus) correctParse = 0 correctRoot = 0 for i in range(corpus.sentenceCount()): sentenceAnalyses = fsm.robustMorphologicalAnalysis(corpus.getSentence(i)) fsmParses = algorithm.disambiguate(sentenceAnalyses) for j in range(corpus.getSentence(i).wordCount()): word = corpus.getSentence(i).getWord(j) if isinstance(word, DisambiguatedWord): if fsmParses[j].transitionList().lower() == word.getParse().__str__().lower(): correctParse = correctParse + 1 if fsmParses[j].getWord() == word.getParse().getWord(): correctRoot = correctRoot + 1 self.assertAlmostEqual(0.9233, (correctRoot + 0.0) / corpus.numberOfWords(), 3) self.assertAlmostEqual(0.8630, (correctParse + 0.0) / corpus.numberOfWords(), 3)