def sentences(s): """Split the string s into a list of sentences.""" try: s+"" except: raise TypeError, "s must be a string" s = u" ".join(splitChinese(s)) #TODO do better for normalChinese s = normalChinese(s) pos = 0 sentenceList = [] l = len(s) while pos < l: try: p = s.index('.', pos) except: p = l+1 try: q = s.index('?', pos) except: q = l+1 try: e = s.index('!', pos) except: e = l+1 try: f = s.index('~', pos) except: f = l+1 end = min(p,q,e,f) sentenceList.append( s[pos:end].strip() ) pos = end+1 # If no sentences were found, return a one-item list containing # the entire input string. if len(sentenceList) == 0: sentenceList.append(s) return sentenceList
def sentences(s): """Split the string s into a list of sentences.""" try: s + "" except: raise TypeError, "s must be a string" pos = 0 sentenceList = [] l = len(s) while pos < l: try: p = s.index('.', pos) except: p = l + 1 try: q = s.index('?', pos) except: q = l + 1 try: e = s.index('!', pos) except: e = l + 1 end = min(p, q, e) sentenceList.append(s[pos:end].strip()) pos = end + 1 # If no sentences were found, return a one-item list containing # the entire input string. if len(sentenceList) == 0: sentenceList.append(s) # auto convert chinese return map(lambda s: u' '.join(splitChinese(s)), sentenceList)
def _respond(self, input, sessionID): """Private version of respond(), does the real work.""" if len(input) == 0: return "" # guard against infinite recursion inputStack = self.getPredicate(self._inputStack, sessionID) if len(inputStack) > self._maxRecursionDepth: if self._verboseMode: err = "WARNING: maximum recursion depth exceeded (input='%s')" % input.encode( self._textEncoding, 'replace') sys.stderr.write(err) return "" # push the input onto the input stack inputStack = self.getPredicate(self._inputStack, sessionID) inputStack.append(input) self.setPredicate(self._inputStack, inputStack, sessionID) # run the input through the 'normal' subber input = u' '.join(splitChinese(input)) if input.find(" ") < 0: input = " " + input subbedInput = u" ".join( self._subbers['normal'].sub(input).strip().split()) # fetch the bot's previous response, to pass to the match() # function as 'that'. outputHistory = self.getPredicate(self._outputHistory, sessionID) try: that = outputHistory[-1] except IndexError: that = "" subbedThat = self._subbers['normal'].sub(that) # fetch the current topic topic = self.getPredicate("topic", sessionID) subbedTopic = self._subbers['normal'].sub(topic) # Determine the final response. response = "" elem = self._brain.match(subbedInput, subbedThat, subbedTopic) if elem is None: if self._verboseMode: err = "WARNING: No match found for input: %s\n" % input.encode( self._textEncoding) sys.stderr.write(err) else: # Process the element into a response string. response += self._processElement(elem, sessionID).strip() response += " " response = response.strip() # pop the top entry off the input stack. inputStack = self.getPredicate(self._inputStack, sessionID) inputStack.pop() self.setPredicate(self._inputStack, inputStack, sessionID) return response
def _respond(self, input, sessionID): """Private version of respond(), does the real work.""" if len(input) == 0: return "" # guard against infinite recursion inputStack = self.getPredicate(self._inputStack, sessionID) if len(inputStack) > self._maxRecursionDepth: if self._verboseMode: err = "WARNING: maximum recursion depth exceeded (input='%s')" % input.encode(self._textEncoding, 'replace') sys.stderr.write(err) return "" # push the input onto the input stack inputStack = self.getPredicate(self._inputStack, sessionID) inputStack.append(input) self.setPredicate(self._inputStack, inputStack, sessionID) # run the input through the 'normal' subber input=u' '.join(splitChinese(input)) if input.find(" ") < 0 : input=" "+input subbedInput = u" ".join(self._subbers['normal'].sub(input).strip().split()) # fetch the bot's previous response, to pass to the match() # function as 'that'. outputHistory = self.getPredicate(self._outputHistory, sessionID) try: that = outputHistory[-1] except IndexError: that = "" subbedThat = self._subbers['normal'].sub(that) # fetch the current topic topic = self.getPredicate("topic", sessionID) subbedTopic = self._subbers['normal'].sub(topic) # Determine the final response. response = "" elem = self._brain.match(subbedInput, subbedThat, subbedTopic) if elem is None: if self._verboseMode: err = "WARNING: No match found for input: %s\n" % input.encode(self._textEncoding) sys.stderr.write(err) else: # Process the element into a response string. response += self._processElement(elem, sessionID).strip() response += " " response = response.strip() # pop the top entry off the input stack. inputStack = self.getPredicate(self._inputStack, sessionID) inputStack.pop() self.setPredicate(self._inputStack, inputStack, sessionID) return response
def _processSrai(self,elem, sessionID): """Process a <srai> AIML element. <srai> elements recursively process their contents, and then pass the results right back into the AIML interpreter as a new piece of input. The results of this new input string are returned. """ newInput = "" for e in elem[2:]: newInput += self._processElement(e, sessionID) newInput = u' '.join(splitChinese(newInput)) return self._respond(newInput, sessionID)
def sentences(s): """Split the string s into a list of sentences.""" try: s+"" except: raise TypeError, "s must be a string" pos = 0 sentenceList = [] l = len(s) while pos < l: try: p = s.index('.', pos) except: p = l+1 try: q = s.index('?', pos) except: q = l+1 try: e = s.index('!', pos) except: e = l+1 end = min(p,q,e) sentenceList.append( s[pos:end].strip() ) pos = end+1 # If no sentences were found, return a one-item list containing # the entire input string. if len(sentenceList) == 0: sentenceList.append(s) # auto convert chinese return map(lambda s: u' '.join(splitChinese(s)), sentenceList)
def _endElement(self, name): """验证AIML结束元素在当前上下文中是否有效。 如果遇到非法的结束元素,则引发AimlParserError。 """ if name == "aiml": # </aiml> 标签只有在 InsideAiml 状态才是合法的 if self._state != self._STATE_InsideAiml: raise AimlParserError("Unexpected </aiml> tag " + self._location()) self._state = self._STATE_OutsideAiml self._whitespaceBehaviorStack.pop() elif name == "topic": # </topic> 标签只有在InsideAiml 状态, 而且 _insideTopic 为 true才是合法的。 if self._state != self._STATE_InsideAiml or not self._insideTopic: raise AimlParserError("Unexpected </topic> tag " + self._location()) self._insideTopic = False self._currentTopic = u"" elif name == "category": # </category> 标签只有在 AfterTemplate 状态才是合法的 if self._state != self._STATE_AfterTemplate: raise AimlParserError("Unexpected </category> tag " + self._location()) self._state = self._STATE_InsideAiml # 结束当前类别。 将当前 pattern/ that / topic和元素存储在类别字典中。 #【注意:这里修改了当前模式,用中文分割结果做了替换。。】 self._currentPattern = u' '.join(splitChinese( self._currentPattern)) key = (self._currentPattern.strip(), self._currentThat.strip(), self._currentTopic.strip()) self.categories[key] = self._elemStack[-1] self._whitespaceBehaviorStack.pop() elif name == "pattern": # </pattern> 标签只有在 InsidePattern 状态才是合法的。 if self._state != self._STATE_InsidePattern: raise AimlParserError("Unexpected </pattern> tag " + self._location()) self._state = self._STATE_AfterPattern elif name == "that" and self._state == self._STATE_InsideThat: # </ that>标签只允许在<template>元素内部,或InsideThat状态下。本条款处理后一种情况。 self._state = self._STATE_AfterThat elif name == "template": # </template> 标签只允许在 InsideTemplate 状态出现。 if self._state != self._STATE_InsideTemplate: raise AimlParserError("Unexpected </template> tag " + self._location()) self._state = self._STATE_AfterTemplate self._whitespaceBehaviorStack.pop() elif self._state == self._STATE_InsidePattern: # 特定的标签允许在 <pattern> 元素内部出现。 if name not in ["bot"]: raise AimlParserError(("Unexpected </%s> tag " % name) + self._location()) elif self._state == self._STATE_InsideThat: # 特定的标签允许在 <that> 元素内部出现. if name not in ["bot"]: raise AimlParserError(("Unexpected </%s> tag " % name) + self._location()) elif self._state == self._STATE_InsideTemplate: # 当前模板内的元素结束。 将堆栈顶部的元素追加到下面的元素上。 elem = self._elemStack.pop() self._elemStack[-1].append(elem) self._whitespaceBehaviorStack.pop() # 如果元素是一个条件,那么也可以从foundDefaultLiStack中弹出一个条目。 if elem[0] == "condition": self._foundDefaultLiStack.pop() else: # 意外的关闭标签 raise AimlParserError(("Unexpected </%s> tag " % name) + self._location())
def _endElement(self, name): """Verify that an AIML end element is valid in the current context. Raises an AimlParserError if an illegal end element is encountered. """ if name == "aiml": # </aiml> tags are only legal in the InsideAiml state if self._state != self._STATE_InsideAiml: raise AimlParserError, "Unexpected </aiml> tag "+self._location() self._state = self._STATE_OutsideAiml self._whitespaceBehaviorStack.pop() elif name == "topic": # </topic> tags are only legal in the InsideAiml state, and # only if _insideTopic is true. if self._state != self._STATE_InsideAiml or not self._insideTopic: raise AimlParserError, "Unexpected </topic> tag "+self._location() self._insideTopic = False self._currentTopic = u"" elif name == "category": # </category> tags are only legal in the AfterTemplate state if self._state != self._STATE_AfterTemplate: raise AimlParserError, "Unexpected </category> tag "+self._location() self._state = self._STATE_InsideAiml # End the current category. Store the current pattern/that/topic and # element in the categories dictionary. self._currentPattern = u' '.join(splitChinese(self._currentPattern)) for template in self._elemStack: key = (self._currentPattern.strip(), self._currentThat.strip(), # template[1].strip()) self._currentTopic.strip()+" "+template[1].strip()) self.categories[key] = template logging.info("-"*80) logging.info("ADD Pattern:"+str(key)) logging.info("ADD Template:"+str(template)) logging.info("-"*80) # key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip()) # self.categories[key] = self._elemStack[-1] logging.debug("-"*80) logging.debug("End Category:"+str(self._elemStack)) logging.debug("-"*80) self._whitespaceBehaviorStack.pop() elif name == "pattern": # </pattern> tags are only legal in the InsidePattern state if self._state != self._STATE_InsidePattern: raise AimlParserError, "Unexpected </pattern> tag "+self._location() self._state = self._STATE_AfterPattern elif name == "that" and self._state == self._STATE_InsideThat: # </that> tags are only allowed inside <template> elements or in # the InsideThat state. This clause handles the latter case. self._state = self._STATE_AfterThat elif name == "template": # </template> tags are only allowed in the InsideTemplate state. if self._state != self._STATE_InsideTemplate: raise AimlParserError, "Unexpected </template> tag "+self._location() self._state = self._STATE_AfterTemplate self._whitespaceBehaviorStack.pop() elif self._state == self._STATE_InsidePattern: # Certain tags are allowed inside <pattern> elements. if name not in ["bot"]: raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location() elif self._state == self._STATE_InsideThat: # Certain tags are allowed inside <that> elements. if name not in ["bot"]: raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location() elif self._state == self._STATE_InsideTemplate: # End of an element inside the current template. Append the # element at the top of the stack onto the one beneath it. elem = self._elemStack.pop() self._elemStack[-1].append(elem) self._whitespaceBehaviorStack.pop() # If the element was a condition, pop an item off the # foundDefaultLiStack as well. if elem[0] == "condition": self._foundDefaultLiStack.pop() else: # Unexpected closing tag raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
def _endElement(self, name): """Verify that an AIML end element is valid in the current context. Raises an AimlParserError if an illegal end element is encountered. """ if name == "aiml": # </aiml> tags are only legal in the InsideAiml state if self._state != self._STATE_InsideAiml: raise AimlParserError, "Unexpected </aiml> tag "+self._location() self._state = self._STATE_OutsideAiml self._whitespaceBehaviorStack.pop() elif name == "topic": # </topic> tags are only legal in the InsideAiml state, and # only if _insideTopic is true. if self._state != self._STATE_InsideAiml or not self._insideTopic: raise AimlParserError, "Unexpected </topic> tag "+self._location() self._insideTopic = False self._currentTopic = u"" elif name == "category": # </category> tags are only legal in the AfterTemplate state if self._state != self._STATE_AfterTemplate: raise AimlParserError, "Unexpected </category> tag "+self._location() self._state = self._STATE_InsideAiml # End the current category. Store the current pattern/that/topic and # element in the categories dictionary. self._currentPattern = u' '.join(splitChinese(self._currentPattern)) key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip()) self.categories[key] = self._elemStack[-1] self._whitespaceBehaviorStack.pop() elif name == "pattern": # </pattern> tags are only legal in the InsidePattern state if self._state != self._STATE_InsidePattern: raise AimlParserError, "Unexpected </pattern> tag "+self._location() self._state = self._STATE_AfterPattern elif name == "that" and self._state == self._STATE_InsideThat: # </that> tags are only allowed inside <template> elements or in # the InsideThat state. This clause handles the latter case. self._state = self._STATE_AfterThat elif name == "template": # </template> tags are only allowed in the InsideTemplate state. if self._state != self._STATE_InsideTemplate: raise AimlParserError, "Unexpected </template> tag "+self._location() self._state = self._STATE_AfterTemplate self._whitespaceBehaviorStack.pop() elif self._state == self._STATE_InsidePattern: # Certain tags are allowed inside <pattern> elements. if name not in ["bot"]: raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location() elif self._state == self._STATE_InsideThat: # Certain tags are allowed inside <that> elements. if name not in ["bot"]: raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location() elif self._state == self._STATE_InsideTemplate: # End of an element inside the current template. Append the # element at the top of the stack onto the one beneath it. elem = self._elemStack.pop() self._elemStack[-1].append(elem) self._whitespaceBehaviorStack.pop() # If the element was a condition, pop an item off the # foundDefaultLiStack as well. if elem[0] == "condition": self._foundDefaultLiStack.pop() else: # Unexpected closing tag raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()