def test_infer_poses_empty_string(self): """Test infer_poses with an empty string.""" reader = SesothoReader(io.StringIO('')) mor = '' actual_output = reader.infer_pos(mor, 1) desired_output = '' self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_empty_string(self): """Test iter_morphemes with an empty string.""" reader = SesothoReader(io.StringIO('')) morph_word = '' actual_output = list(reader.iter_gloss_pos(morph_word)) desired_output = [('', '')] self.assertEqual(actual_output, desired_output)
def test_get_poses_empty_string(self): """Test get_poses with an empty string.""" reader = SesothoReader(io.StringIO('')) seg_word = '' actual_output = reader.get_poses(seg_word) desired_output = [''] self.assertEqual(actual_output, desired_output)
def test_infer_poses_ideophone(self): """Test infer_poses with an ideophone.""" reader = SesothoReader(io.StringIO('')) mor = 'id^jump' actual_output = reader.infer_pos(mor, 1) desired_output = 'ideoph' self.assertEqual(actual_output, desired_output)
def test_infer_poses_untranscibed(self): """Test infer_poses with an untranscribed morpheme word.""" reader = SesothoReader(io.StringIO('')) mor = 'xxx' actual_output = reader.infer_pos(mor, 1) desired_output = 'none' self.assertEqual(actual_output, desired_output)
def test_infer_poses_free_person_marker(self): """Test infer_poses with a free person marker.""" reader = SesothoReader(io.StringIO('')) mor = 'sm1s' actual_output = reader.infer_pos(mor, 1) desired_output = 'afx.detached' self.assertEqual(actual_output, desired_output)
def test_infer_poses_copula(self): """Test infer_poses with a copula.""" reader = SesothoReader(io.StringIO('')) mor = 'cp' actual_output = reader.infer_pos(mor, 1) desired_output = 'cop' self.assertEqual(actual_output, desired_output)
def test_infer_poses_nominal_concord(self): """Test infer_poses with a nominal concord.""" reader = SesothoReader(io.StringIO('')) mor = 'obr3' actual_output = reader.infer_pos(mor, 1) desired_output = 'obr' self.assertEqual(actual_output, desired_output)
def test_infer_poses_particle(self): """Test infer_poses with a particle.""" reader = SesothoReader(io.StringIO('')) mor = 'loc' actual_output = reader.infer_pos(mor, 1) desired_output = 'loc' self.assertEqual(actual_output, desired_output)
def test_get_poses_standard_case(self): """Test get_poses with a hyphen separated pos word.""" reader = SesothoReader(io.StringIO('')) seg_word = 'sm1-t^p-v^say-m^in' actual_output = reader.get_poses(seg_word) desired_output = ['pfx', 'pfx', 'v', 'sfx'] self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_single(self): """Test iter_morphemes with a morpheme word containing one morpheme.""" reader = SesothoReader(io.StringIO('')) morph_word = 'id^jump' actual_output = list(reader.iter_gloss_pos(morph_word)) desired_output = [('id^jump', 'ideoph')] self.assertEqual(actual_output, desired_output)
def test_iter_morphemes_multiple(self): """Test iter_morphemes with a morpheme word containing 4 morphemes.""" reader = SesothoReader(io.StringIO('')) morph_word = 'sm1-t^p-v^say-m^in' actual_output = list(reader.iter_gloss_pos(morph_word)) desired_output = [('sm1', 'pfx'), ('t^p', 'pfx'), ('v^say', 'v'), ('m^in', 'sfx')] self.assertEqual(actual_output, desired_output)
def test_get_utterance(self): """Test get_utterance.""" record = ('*CHI:\tbla blu bli .\n%gls:\ter-e m-ph-e ntho ena .\n%cod:' '\tplaceholder\n@End') reader = SesothoReader(io.StringIO(record)) reader.load_next_record() actual_output = reader.get_utterance() desired_output = 'bla blu bli .' self.assertEqual(actual_output, desired_output)
def test_get_target_utterance(self): """Test get_target_utterance with standard case of only hyphens.""" record = ('*CHI:\tplaceholder\n%gls:\ter-e m-ph-e ntho ena .\n%cod:' '\tplaceholder\n@End') reader = SesothoReader(io.StringIO(record)) reader.load_next_record() actual_output = reader.get_target_utterance() desired_output = 'ere mphe ntho ena .' self.assertEqual(actual_output, desired_output)
def test_join_morph_to_utt_only_hyphens(self): """Test join_morph_to_utt with standard case of only hyphens.""" record = ('*CHI:\tplaceholder\n%gls:\ter-e m-ph-e ntho ena .\n%cod:' '\tplaceholder\n@End') reader = SesothoReader(io.StringIO(record)) reader.load_next_record() actual_output = reader._join_morph_to_utt() desired_output = 'ere mphe ntho ena .' self.assertEqual(actual_output, desired_output)
def test_infer_poses_verb_prefix(self): """Test infer_poses with the prefix of a verb. The entire morpheme word is: 'sm2s-t^f1-v^say-m^in' """ reader = SesothoReader(io.StringIO('')) mor = 'sm2s' actual_output = reader.infer_pos(mor, 4) desired_output = 'pfx' self.assertEqual(actual_output, desired_output)
def test_infer_poses_noun_stem(self): """Test infer_poses with a noun stem. The entire morpheme word is: n^6-eye(5 , 6)' """ reader = SesothoReader(io.StringIO('')) mor = 'eye(5 , 6)' actual_output = reader.infer_pos(mor, 2) desired_output = 'n' self.assertEqual(actual_output, desired_output)
def test_infer_poses_verb_stem(self): """Test infer_poses with a verb stem. The verb contains 2 suffixes and one prefix. The entire morpheme word is: 'sm2s-t^f1-v^say-m^in' """ reader = SesothoReader(io.StringIO('')) mor = 'v^say' actual_output = reader.infer_pos(mor, 4) desired_output = 'v' self.assertEqual(actual_output, desired_output)
def test_infer_poses_verb_suffix(self): """Test infer_poses with the suffix of a verb. The entire morpheme word is: 'sm2s-t^f1-v^say-m^in' """ reader = SesothoReader(io.StringIO('')) mor = 'm^in' # First infer pos of stem for passed_stem to be set to True. reader.infer_pos('v^say', 4) actual_output = reader.infer_pos(mor, 4) desired_output = 'sfx' self.assertEqual(actual_output, desired_output)
def test_join_morph_to_utt_empty_string(self): """Test join_morph_to_utt with an empty string. The wrong naming of the gloss tier leads to an empty string to be processed. """ record = ( '*CHI:\tplaceholder\n%gla:\ter-e m-ph-e (ag)ntho ena .\n%cod:' '\tplaceholder\n@End') reader = SesothoReader(io.StringIO(record)) reader.load_next_record() actual_output = reader._join_morph_to_utt() desired_output = '' self.assertEqual(actual_output, desired_output)
def get_reader(session_file): return SesothoReader(session_file)
def test_parse(self): """Test parse().""" session_str = ('*NHM:\te tsamo . 113200_115376\n' '%gls:\te tsamay-a .\n' '%xcod:\tv^leave-m^i .\n' '%eng:\tYes go and\n' '@End') parser = SesothoSessionParser(self.dummy_cha_path) parser.reader = SesothoReader(io.StringIO(session_str)) session = parser.parse() utt = session.utterances[0] utterance = [ utt.source_id == 'dummy_0', utt.addressee is None, utt.utterance_raw == 'e tsamo .', utt.utterance == 'e tsamaya', utt.translation == 'Yes go and', utt.morpheme_raw == 'e tsamay-a .', utt.gloss_raw == 'v^leave-m^i .', utt.pos_raw == 'v^leave-m^i .', utt.sentence_type == 'default', utt.start_raw == '113200', utt.end_raw == '115376', utt.comment == '', utt.warning == '' ] w1 = utt.words[0] w2 = utt.words[1] words = [ w1.word_language == '', w1.word == 'e', w1.word_actual == 'e', w1.word_target == 'e', w1.warning == '', w2.word_language == '', w2.word == 'tsamaya', w2.word_actual == 'tsamaya', w2.word_target == 'tsamaya', w2.warning == '' ] m1 = utt.morphemes[0][0] m2 = utt.morphemes[0][1] morphemes = [ m1.gloss_raw == 'v^leave', m1.morpheme == '', m1.morpheme_language == '', m1.pos_raw == 'v', m2.gloss_raw == 'm^i', m2.morpheme == '', m2.morpheme_language == '', m2.pos_raw == 'sfx' ] assert (False not in utterance and False not in words and False not in morphemes)
def test_get_segments_standard_case(self): """Test get_segments with hyphen separated segment word.""" seg_word = 'prefix-stem-affix' actual_output = SesothoReader.get_segments(seg_word) desired_output = ['prefix', 'stem', 'affix'] self.assertEqual(actual_output, desired_output)
def test_get_segments_empty_string(self): """Test get_segments with an empty string.""" seg_word = '' actual_output = SesothoReader.get_segments(seg_word) desired_output = [''] self.assertEqual(actual_output, desired_output)