def process(self, context, word_string): YLogger.debug(context, "Splitting Chinese into parsable words...") chars = [] for ch in word_string: if ChineseLanguage.is_language(ch): chars.append(" %s " % ch) else: chars.append(ch) text = "".join(chars).strip() return re.sub(' +', ' ', text)
def process(self, context, word_string): YLogger.debug(context, "Splitting Chinese into parsable words...") chars = [] for ch in word_string: if ChineseLanguage.is_language(ch): chars.append(" %s "%ch) else: chars.append(ch) text = "".join(chars).strip() return re.sub(' +',' ', text)
def process(self, bot, clientid, word_string): if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Splitting Chinese into parsable words...") chars = [] for ch in word_string: if ChineseLanguage.is_language(ch): chars.append(" %s " % ch) else: chars.append(ch) text = "".join(chars).strip() return re.sub(' +', ' ', text)
def __init__(self, word, userid='*'): PatternNode.__init__(self, userid) chars = [] for ch in word: if ChineseLanguage.is_language(ch): chars.append(" %s " % ch) else: chars.append(ch) text = "".join(chars).strip() word = re.sub(' +', ' ', text) self._word = word
def process(self, context, word_string): YLogger.debug(context, "Merging Chinese into understandable words...") words = word_string.split(" ") processed = "" for word in words: if ChineseLanguage.is_language(word): processed += word else: processed += " " + word + " " processed = re.sub(r'\s+', ' ', processed) return processed.strip()
def process(self, context, word_string): YLogger.debug(context, "Merging Chinese into understandable words...") words = word_string.split(" ") str = "" for word in words: if ChineseLanguage.is_language(word): str += word else: str += " " + word + " " str = re.sub(r'\s+', ' ', str) return str.strip()
def test_split_with_spaces(self): self.assertEquals('', ChineseLanguage.split_with_spaces([])) self.assertEquals('X', ChineseLanguage.split_with_spaces(['X'])) self.assertEquals('你', ChineseLanguage.split_with_spaces(['你'])) self.assertEquals('你 好', ChineseLanguage.split_with_spaces(['你', '好'])) self.assertEquals('X 你 好', ChineseLanguage.split_with_spaces(['X', '你', '好'])) self.assertEquals('X 你 好 Y', ChineseLanguage.split_with_spaces(['X', '你', '好', 'Y']))
def test_split_unicode(self): self.assertEquals([], ChineseLanguage.split_unicode("")) self.assertEquals(['X'], ChineseLanguage.split_unicode("X")) self.assertEquals(['你'], ChineseLanguage.split_unicode("你")) self.assertEquals(['你', '好'], ChineseLanguage.split_unicode("你好")) self.assertEquals(['X', '你', '好'], ChineseLanguage.split_unicode("X你好")) self.assertEquals(['X', '你', '好', 'Y'], ChineseLanguage.split_unicode("X你好Y"))
def process(self, bot, clientid, word_string): if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Merging Chinese into understandable words...") words = word_string.split(" ") str = "" for word in words: if ChineseLanguage.is_language(word): str += word else: str += " " + word + " " str = re.sub(r'\s+', ' ', str) return str.strip()
def test_split_unicode(self): self.assertEqual([], ChineseLanguage.split_unicode("")) self.assertEqual(['X'], ChineseLanguage.split_unicode("X")) self.assertEqual(['你'], ChineseLanguage.split_unicode("你")) self.assertEqual(['你', '好'], ChineseLanguage.split_unicode("你好")) self.assertEqual(['X', '你', '好'], ChineseLanguage.split_unicode("X你好")) self.assertEqual(['X', '你', '好', 'Y'], ChineseLanguage.split_unicode("X你好Y"))
def test_split_with_spaces(self): self.assertEqual('', ChineseLanguage.split_with_spaces([])) self.assertEqual('X', ChineseLanguage.split_with_spaces(['X'])) self.assertEqual('你', ChineseLanguage.split_with_spaces(['你'])) self.assertEqual('你 好', ChineseLanguage.split_with_spaces(['你', '好'])) self.assertEqual('X 你 好', ChineseLanguage.split_with_spaces(['X', '你', '好'])) self.assertEqual( 'X 你 好 Y', ChineseLanguage.split_with_spaces(['X', '你', '好', 'Y'])) self.assertEqual('X 你 ?Y', ChineseLanguage.split_with_spaces(['X', '你', '?Y']))
def _parse_text(self, pattern_text, current_node): stripped = pattern_text.strip() if self._aiml_parser is not None and \ self._aiml_parser.brain is not None and \ self._aiml_parser.brain.configuration.language.chinese is True: words = ChineseLanguage.split_unicode(stripped) else: words = stripped.split(" ") for word in words: if word != '': # Blank nodes add no value, ignore them word = TextUtils.strip_whitespace(word) new_node = self.node_from_text(word) current_node = current_node.add_child(new_node) return current_node
def parse_text(self, graph, text): if text is not None: string = text.strip() if string: if graph._aiml_parser is not None and \ graph._aiml_parser.brain is not None and \ graph._aiml_parser.brain.configuration.language.chinese is True: from programy.utils.language.chinese import ChineseLanguage words = ChineseLanguage.split_unicode(string) else: words = string.split(" ") #words = string.split(" ") for word in words: if word is not None and word: word_class = graph.get_node_class_by_name('word') word_node = word_class(word.strip()) self.children.append(word_node) return True return False
def test_is_language(self): self.assertFalse(ChineseLanguage.is_language("")) self.assertFalse(ChineseLanguage.is_language("H")) self.assertTrue(ChineseLanguage.is_language("你")) self.assertFalse(ChineseLanguage.is_language(32))
def test_is_language(self): self.assertFalse(ChineseLanguage.is_language("")) self.assertFalse(ChineseLanguage.is_language("H")) self.assertTrue(ChineseLanguage.is_language("你"))