class TestSentenceDelayTransform(unittest.TestCase): maxDiff = None def setUp(self): self.analyzer = FinHeuristicSentenceAnalyzer() def test_delay(self): transform = SentenceDelayTransform(buffer_size=2) sentences = list(self.analyzer.analyze_text('Eka lause tää on. Tämä on toka. Kolmatta viedään. Neljäskin löytyi.')) delayed = list(transform.transform_stream(sentences)) self.assertEqual(delayed[0:2], [Sentence(), Sentence()]) self.assertEqual(delayed[2:], sentences) def test_delay_loop(self): transform = SentenceDelayTransform(buffer_size=2, looping=True) sentences = list(self.analyzer.analyze_text('Eka lause tää on. Tämä on toka. Kolmatta viedään. Neljäskin löytyi.')) delayed = list(take(10, transform.transform_stream(sentences))) self.assertEqual(delayed[:2], [Sentence(), Sentence()]) self.assertEqual(delayed[2:6], sentences) self.assertEqual(delayed[6:8], sentences[-2:]) self.assertEqual(delayed[8:10], sentences[-2:]) def test_delay_loop_change_state(self): transform = SentenceDelayTransform(buffer_size=2, looping=True) sentences = list(self.analyzer.analyze_text('Eka lause tää on. Tämä on toka. Kolmatta viedään. Neljäskin löytyi.')) sentences2 = list(self.analyzer.analyze_text('Viides lause menossa. Kuudes tulossa. Seitsemäs päättää.')) delayed = list(take(3, transform.transform_stream(sentences))) delayed2 = list(take(7, transform.transform_stream(sentences2))) delayed = delayed + delayed2 self.assertEqual(delayed[:2], [Sentence(), Sentence()]) self.assertEqual(delayed[2:5], sentences[:3]) self.assertEqual(delayed[5:8], sentences2) self.assertEqual(delayed[8:], sentences2[-2:])
class TestFinNounGenerator(unittest.TestCase): def setUp(self): # analyzer returns lemmas only with unambiguous analyses self.analyzer = FinHeuristicSentenceAnalyzer() self.generator = SentenceTokenGenerator() def test_generate(self): text = self.generator._generate('vuosi+N+Pl+Par') self.assertEqual(text, 'vuosia') def test_token_with_new_lemma_known(self): token = self.analyzer.analyze_word('keppien') new_word = self.generator.token_with_new_lemma(token, 'gouda').text self.assertEqual(new_word, 'goudien') def test_token_with_new_lemma(self): token = self.analyzer.analyze_word('keppien') new_word = self.generator.token_with_new_lemma(token, 'Truuda').text self.assertEqual(new_word, 'Truudien') def test_token_with_new_lemma_harder(self): token = self.analyzer.analyze_word('kasveilla') new_word = self.generator.token_with_new_lemma(token, 'Truuda').text self.assertEqual(new_word, 'Truudilla') def test_token_with_new_lemma_no_replacement(self): token = self.analyzer.analyze_word('lakkien') new_word = self.generator.token_with_new_lemma(token, 'Merhab').text self.assertEqual(new_word, 'Merhabien') def test_token_with_new_lemma_no_replacement_harder(self): token = self.analyzer.analyze_word('hipeillä') new_word = self.generator.token_with_new_lemma(token, 'Mozokoz').text self.assertEqual(new_word, 'Mozokozeilla')
class TestFinAnalysisTense(unittest.TestCase): def setUp(self): self.analyzer = FinHeuristicSentenceAnalyzer() def test_tense_analysis(self): text = 'Tietäisin. Mistä tiesit? Miten olisitte voineet haluta tietää? Miten hän oli tietänyt?' sentences = list(self.analyzer.analyze_text(text)) self.assertEqual(sentences[0].tokens[0].morphology, 'tietää+V+Act+Cond+Sg1') self.assertEqual(sentences[1].tokens[1].morphology, 'tietää+V+Act+Ind+Prt+Sg2') self.assertEqual(sentences[2].tokens[1].morphology, 'olla+V+Act+Cond+Pl2') self.assertEqual(sentences[2].tokens[2].morphology, 'voida+V+Act+PrfPrc+Pl+Nom') self.assertEqual(sentences[2].tokens[3].morphology, 'haluta+V+Act+InfA+Sg+Lat') self.assertEqual(sentences[2].tokens[4].morphology, 'tietää+V+Act+InfA+Sg+Lat') self.assertEqual(sentences[3].tokens[2].morphology, 'olla+V+Act+Ind+Prt+Sg3') self.assertEqual(sentences[3].tokens[3].morphology, 'tietää+V+Act+PrfPrc+Sg+Nom')
class TestFinVerbGenerator(unittest.TestCase): def setUp(self): # analyzer returns lemmas only with unambiguous analyses self.analyzer = FinHeuristicSentenceAnalyzer() self.generator = SentenceTokenGenerator() def test_generate(self): text = self.generator._generate('juosta+V+Act+Ind+Prt+Pl3') self.assertEqual(text, 'juoksivat') def test_token_with_new_lemma_known(self): token = self.analyzer.analyze_word('kepitimme') new_word = self.generator.token_with_new_lemma(token, 'kimmeltää').text self.assertEqual(new_word, 'kimmelsimme') def test_token_with_new_lemma(self): token = self.analyzer.analyze_word('kipitimme') new_word = self.generator.token_with_new_lemma(token, 'huheltaa').text self.assertEqual(new_word, 'huhelsimme')
def setUp(self): self.analyzer = FinHeuristicSentenceAnalyzer()
def setUp(self): # analyzer returns lemmas only with unambiguous analyses self.analyzer = FinHeuristicSentenceAnalyzer() self.generator = SentenceTokenGenerator()
def get_sentence(text: str) -> Sentence: analyzer = FinHeuristicSentenceAnalyzer() return list(analyzer.analyze_text(text))[0]
def get_example_sentence(self) -> Sentence: analyzer = FinHeuristicSentenceAnalyzer() return list(analyzer.analyze_text('On ilo testata.'))[0]