Exemple #1
0
    def get_record_dict(cls, record):
        """Get the record dictionary.

        Metadata is ignored and returned as an empty dictionary.

        Args:
            record (str): Toolbox record.

        Returns:
            dict: Key and content of tiers.
        """
        rec_dict = {}

        # iter tiers of the record
        for tier in cls.get_tiers(record):
            # get field marker and content of tier
            field_marker, content = cls.get_tier(tier)
            # clean the content
            content = ToolboxCleaner.remove_redundant_whitespaces(content)

            # add content to dictionary
            rec_dict[field_marker] = content

        return rec_dict
Exemple #2
0
 def test_clean_word_xxx(self):
     utterance = 'xxx-less'
     actual_output = ToolboxCleaner.clean_utterance(utterance)
     desired_output = '???-less'
     self.assertEqual(actual_output, desired_output)
Exemple #3
0
 def test_remove_redundant_whitespaces(self):
     string = '  no such     thing       '
     actual_output = ToolboxCleaner.remove_redundant_whitespaces(string)
     desired_output = 'no such thing'
     self.assertEqual(actual_output, desired_output)
Exemple #4
0
 def test_clean_utterance_stars_xxx(self):
     utterance = 'These *** some good xxx .'
     actual_output = ToolboxCleaner.clean_utterance(utterance)
     desired_output = 'These ??? some good ??? .'
     self.assertEqual(actual_output, desired_output)
Exemple #5
0
 def test_unify_unknown_empty_string(self):
     utterance = ''
     actual_output = ToolboxCleaner.unify_unknown(utterance)
     desired_output = ''
     self.assertEqual(actual_output, desired_output)
Exemple #6
0
 def test_unify_unknown_xxx(self):
     utterance = 'xxx are xxx'
     actual_output = ToolboxCleaner.unify_unknown(utterance)
     desired_output = '??? are ???'
     self.assertEqual(actual_output, desired_output)
Exemple #7
0
 def test_clean_lang_tier(self):
     lang_tier = 'the lang tier'
     actual_output = ToolboxCleaner.clean_lang_tier(lang_tier)
     desired_output = 'the lang tier'
     self.assertEqual(actual_output, desired_output)
Exemple #8
0
 def test_clean_gloss_tier(self):
     gloss_tier = 'the gloss tier'
     actual_output = ToolboxCleaner.clean_gloss_tier(gloss_tier)
     desired_output = 'the gloss tier'
     self.assertEqual(actual_output, desired_output)
Exemple #9
0
 def test_clean_pos(self):
     pos = 'pos'
     actual_output = ToolboxCleaner.clean_pos_raw(pos)
     desired_output = 'pos'
     self.assertEqual(actual_output, desired_output)
Exemple #10
0
 def test_clean_gloss(self):
     gloss = 'gloss'
     actual_output = ToolboxCleaner.clean_gloss_raw(gloss)
     desired_output = 'gloss'
     self.assertEqual(actual_output, desired_output)
Exemple #11
0
 def test_clean_seg(self):
     seg = 'seg'
     actual_output = ToolboxCleaner.clean_seg(seg)
     desired_output = 'seg'
     self.assertEqual(actual_output, desired_output)
Exemple #12
0
 def test_clean_morpheme(self):
     morpheme = 'morpheme'
     actual_output = ToolboxCleaner.clean_morpheme(morpheme)
     desired_output = 'morpheme'
     self.assertEqual(actual_output, desired_output)
Exemple #13
0
 def test_clean_lang_word(self):
     lang_word = 'lang-word'
     actual_output = ToolboxCleaner.clean_lang_word(lang_word)
     desired_output = 'lang-word'
     self.assertEqual(actual_output, desired_output)
Exemple #14
0
 def test_clean_pos_word(self):
     pos_word = 'pos-word'
     actual_output = ToolboxCleaner.clean_pos_word(pos_word)
     desired_output = 'pos-word'
     self.assertEqual(actual_output, desired_output)
Exemple #15
0
 def test_clean_gloss_word(self):
     gloss_word = 'gloss-word'
     actual_output = ToolboxCleaner.clean_gloss_word(gloss_word)
     desired_output = 'gloss-word'
     self.assertEqual(actual_output, desired_output)
Exemple #16
0
 def test_clean_word_empty_string(self):
     utterance = ''
     actual_output = ToolboxCleaner.clean_utterance(utterance)
     desired_output = ''
     self.assertEqual(actual_output, desired_output)
Exemple #17
0
 def test_clean_morph_tier(self):
     morph_tier = 'the morph tier'
     actual_output = ToolboxCleaner.clean_morph_tier(morph_tier)
     desired_output = 'the morph tier'
     self.assertEqual(actual_output, desired_output)
Exemple #18
0
 def test_clean_lang(self):
     lang = 'lang'
     actual_output = ToolboxCleaner.clean_lang(lang)
     desired_output = 'lang'
     self.assertEqual(actual_output, desired_output)
Exemple #19
0
 def test_clean_pos_tier(self):
     pos_tier = 'the pos tier'
     actual_output = ToolboxCleaner.clean_pos_tier(pos_tier)
     desired_output = 'the pos tier'
     self.assertEqual(actual_output, desired_output)
Exemple #20
0
 def test_unify_unknown_stars(self):
     utterance = 'here *** examples'
     actual_output = ToolboxCleaner.unify_unknown(utterance)
     desired_output = 'here ??? examples'
     self.assertEqual(actual_output, desired_output)
Exemple #21
0
 def test_unify_unknown_xxx_stars_www(self):
     utterance = 'xxx *** www, ***'
     actual_output = ToolboxCleaner.unify_unknown(utterance)
     desired_output = '??? ??? ???, ???'
     self.assertEqual(actual_output, desired_output)
Exemple #22
0
 def test_clean_morpheme_word(self):
     morpheme_word = 'mor-word'
     actual_output = ToolboxCleaner.clean_morpheme_word(morpheme_word)
     desired_output = 'mor-word'
     self.assertEqual(actual_output, desired_output)