def test_iter_morphemes_prefixes(self): """Test iter_morphemes with prefixes.""" word = 'pfxone#pfxtwo#stem:POS|stem&FUS=stemgloss' actual_output = list(JapaneseMiiProReader.iter_morphemes(word)) desired_output = [('pfxone', '', 'pfx'), ('pfxtwo', '', 'pfx'), ('stem&FUS', 'stemgloss', 'stem:POS')] self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_compound_gloss(self): """Test iter_morphemes with compound and stem gloss.""" word = 'CMPPOS|+CMPPOSONE|cmpstemone+CMPPOSTWO|cmpstemtwo=cmpgloss' actual_output = list(JapaneseMiiProReader.iter_morphemes(word)) desired_output = [('=cmpstemone', 'cmpgloss', 'CMPPOSONE'), ('=cmpstemtwo', 'cmpgloss', 'CMPPOSTWO')] self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_suffixes_stemgloss(self): """Test iter_morphemes with suffixes and stem gloss.""" word = 'stem:POS|stem&FUS-SFXONE-SFXTWO=stemgloss' actual_output = list(JapaneseMiiProReader.iter_morphemes(word)) desired_output = [('stem&FUS', 'stemgloss', 'stem:POS'), ('', 'SFXONE', 'sfx'), ('', 'SFXTWO', 'sfx')] self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_suffixes_colon(self): """Test iter_morphemes with suffix and colon.""" word = 'stem:POS|stem&FUS-SFXONE:contr-SFXTWO:SFXTWOseg=stemgloss' actual_output = list(JapaneseMiiProReader.iter_morphemes(word)) desired_output = [('stem&FUS', 'stemgloss', 'stem:POS'), ('', 'SFXONE:contr', 'sfx'), ('SFXTWOseg', 'SFXTWO', 'sfx')] self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_compound_suffixes(self): """Test iter_morphemes with compound and suffixes.""" word = ('CMPPOS|+CMPPOSONE|cmpstemone-SFXONE' '+CMPPOSTWO|cmpstemtwo-SFXTWO=cmpgloss') actual_output = list(JapaneseMiiProReader.iter_morphemes(word)) desired_output = [('=cmpstemone', 'cmpgloss', 'CMPPOSONE'), ('', 'SFXONE', 'sfx'), ('=cmpstemtwo', 'cmpgloss', 'CMPPOSTWO'), ('', 'SFXTWO', 'sfx')] self.assertEqual(actual_output, desired_output)
def test_parse(self): """Test parse().""" session_str = ('tom20010724.cha:*MOT:\tdoozo . 4087868_4089193\n' '%xtrn:\tn:prop|Hono-chan co:g|doozo .\n' '%ort:\tホノちゃんどうぞ。\n' '@End') parser = JapaneseMiiProSessionParser(self.dummy_cha_path) parser.reader = JapaneseMiiProReader(io.StringIO(session_str)) session = parser.parse() utt = session.utterances[0] utterance = [ utt.source_id == 'dummy_0', utt.addressee is None, utt.utterance_raw == 'doozo .', utt.utterance == 'doozo', utt.translation == '', utt.morpheme_raw == 'n:prop|Hono-chan co:g|doozo .', utt.gloss_raw == 'n:prop|Hono-chan co:g|doozo .', utt.pos_raw == 'n:prop|Hono-chan co:g|doozo .', utt.sentence_type == 'default', utt.start_raw == '4087868', utt.end_raw == '4089193', utt.comment == '', utt.warning == '' ] w = utt.words[0] words = [ w.word_language == 'Japanese', w.word == 'doozo', w.word_actual == 'doozo', w.word_target == 'doozo', w.warning == '' ] m1 = utt.morphemes[0][0] m2 = utt.morphemes[0][1] m3 = utt.morphemes[1][0] morphemes = [ m1.gloss_raw == '', m1.morpheme == 'Hono', m1.morpheme_language == '', m1.pos_raw == 'n:prop', m2.gloss_raw == 'chan', m2.morpheme == 'chan', m2.morpheme_language == '', m2.pos_raw == 'sfx', m3.gloss_raw == '', m3.morpheme == 'doozo', m3.morpheme_language == '', m3.pos_raw == 'co:g' ] assert (False not in utterance and False not in words and False not in morphemes)
def test_iter_morphemes_stem_gloss(self): """Test iter_morphemes with stem and gloss.""" word = 'stem:POS|stem&FUS=stemgloss' actual_output = list(JapaneseMiiProReader.iter_morphemes(word)) desired_output = [('stem&FUS', 'stemgloss', 'stem:POS')] self.assertEqual(actual_output, desired_output)
def get_reader(session_file): return JapaneseMiiProReader(session_file)