Example #1
0
 def process(self, context, word_string):
     YLogger.debug(context, "Splitting Chinese into parsable words...")
     chars = []
     for ch in word_string:
         if ChineseLanguage.is_language(ch):
             chars.append(" %s " % ch)
         else:
             chars.append(ch)
     text = "".join(chars).strip()
     return re.sub(' +', ' ', text)
Example #2
0
 def process(self, context, word_string):
     YLogger.debug(context, "Splitting Chinese into parsable words...")
     chars = []
     for ch in word_string:
         if ChineseLanguage.is_language(ch):
             chars.append(" %s "%ch)
         else:
             chars.append(ch)
     text = "".join(chars).strip()
     return re.sub(' +',' ', text)
Example #3
0
 def process(self, bot, clientid, word_string):
     if logging.getLogger().isEnabledFor(logging.DEBUG):
         logging.debug("Splitting Chinese into parsable words...")
     chars = []
     for ch in word_string:
         if ChineseLanguage.is_language(ch):
             chars.append(" %s " % ch)
         else:
             chars.append(ch)
     text = "".join(chars).strip()
     return re.sub(' +', ' ', text)
Example #4
0
 def __init__(self, word, userid='*'):
     PatternNode.__init__(self, userid)
     chars = []
     for ch in word:
         if ChineseLanguage.is_language(ch):
             chars.append(" %s " % ch)
         else:
             chars.append(ch)
     text = "".join(chars).strip()
     word = re.sub(' +', ' ', text)
     self._word = word
Example #5
0
    def process(self, context, word_string):
        YLogger.debug(context, "Merging Chinese into understandable words...")

        words = word_string.split(" ")
        processed = ""
        for word in words:
            if ChineseLanguage.is_language(word):
                processed += word
            else:
                processed += " " + word + " "
        processed = re.sub(r'\s+', ' ', processed)
        return processed.strip()
Example #6
0
    def process(self, context, word_string):
        YLogger.debug(context, "Merging Chinese into understandable words...")

        words = word_string.split(" ")
        str = ""
        for word in words:
            if ChineseLanguage.is_language(word):
                str += word
            else:
                str += " " + word + " "
        str = re.sub(r'\s+', ' ', str)
        return str.strip()
Example #7
0
 def test_split_with_spaces(self):
     self.assertEquals('', ChineseLanguage.split_with_spaces([]))
     self.assertEquals('X', ChineseLanguage.split_with_spaces(['X']))
     self.assertEquals('你', ChineseLanguage.split_with_spaces(['你']))
     self.assertEquals('你  好', ChineseLanguage.split_with_spaces(['你', '好']))
     self.assertEquals('X  你  好', ChineseLanguage.split_with_spaces(['X', '你', '好']))
     self.assertEquals('X  你  好  Y', ChineseLanguage.split_with_spaces(['X', '你', '好', 'Y']))
Example #8
0
 def test_split_unicode(self):
     self.assertEquals([], ChineseLanguage.split_unicode(""))
     self.assertEquals(['X'], ChineseLanguage.split_unicode("X"))
     self.assertEquals(['你'], ChineseLanguage.split_unicode("你"))
     self.assertEquals(['你', '好'], ChineseLanguage.split_unicode("你好"))
     self.assertEquals(['X', '你', '好'], ChineseLanguage.split_unicode("X你好"))
     self.assertEquals(['X', '你', '好', 'Y'], ChineseLanguage.split_unicode("X你好Y"))
Example #9
0
    def process(self, bot, clientid, word_string):
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            logging.debug("Merging Chinese into understandable words...")

        words = word_string.split(" ")
        str = ""
        for word in words:
            if ChineseLanguage.is_language(word):
                str += word
            else:
                str += " " + word + " "
        str = re.sub(r'\s+', ' ', str)
        return str.strip()
Example #10
0
 def test_split_unicode(self):
     self.assertEqual([], ChineseLanguage.split_unicode(""))
     self.assertEqual(['X'], ChineseLanguage.split_unicode("X"))
     self.assertEqual(['你'], ChineseLanguage.split_unicode("你"))
     self.assertEqual(['你', '好'], ChineseLanguage.split_unicode("你好"))
     self.assertEqual(['X', '你', '好'], ChineseLanguage.split_unicode("X你好"))
     self.assertEqual(['X', '你', '好', 'Y'],
                      ChineseLanguage.split_unicode("X你好Y"))
Example #11
0
 def test_split_with_spaces(self):
     self.assertEqual('', ChineseLanguage.split_with_spaces([]))
     self.assertEqual('X', ChineseLanguage.split_with_spaces(['X']))
     self.assertEqual('你', ChineseLanguage.split_with_spaces(['你']))
     self.assertEqual('你  好', ChineseLanguage.split_with_spaces(['你', '好']))
     self.assertEqual('X  你  好',
                      ChineseLanguage.split_with_spaces(['X', '你', '好']))
     self.assertEqual(
         'X  你  好  Y',
         ChineseLanguage.split_with_spaces(['X', '你', '好', 'Y']))
     self.assertEqual('X  你 ?Y',
                      ChineseLanguage.split_with_spaces(['X', '你', '?Y']))
Example #12
0
    def _parse_text(self, pattern_text, current_node):

        stripped = pattern_text.strip()
        if self._aiml_parser is not None and \
                        self._aiml_parser.brain is not None and \
                        self._aiml_parser.brain.configuration.language.chinese is True:
            words = ChineseLanguage.split_unicode(stripped)
        else:
            words = stripped.split(" ")

        for word in words:
            if word != '':  # Blank nodes add no value, ignore them
                word = TextUtils.strip_whitespace(word)

                new_node = self.node_from_text(word)

                current_node = current_node.add_child(new_node)

        return current_node
Example #13
0
    def parse_text(self, graph, text):
        if text is not None:
            string = text.strip()
            if string:

                if graph._aiml_parser is not None and \
                                graph._aiml_parser.brain is not None and \
                                graph._aiml_parser.brain.configuration.language.chinese is True:
                    from programy.utils.language.chinese import ChineseLanguage
                    words = ChineseLanguage.split_unicode(string)
                else:
                    words = string.split(" ")
                #words = string.split(" ")

                for word in words:
                    if word is not None and word:
                        word_class = graph.get_node_class_by_name('word')
                        word_node = word_class(word.strip())
                        self.children.append(word_node)
                return True
        return False
Example #14
0
 def test_is_language(self):
     self.assertFalse(ChineseLanguage.is_language(""))
     self.assertFalse(ChineseLanguage.is_language("H"))
     self.assertTrue(ChineseLanguage.is_language("你"))
     self.assertFalse(ChineseLanguage.is_language(32))
Example #15
0
 def test_is_language(self):
     self.assertFalse(ChineseLanguage.is_language(""))
     self.assertFalse(ChineseLanguage.is_language("H"))
     self.assertTrue(ChineseLanguage.is_language("你"))