def sentences(s): """Split the string s into a list of sentences.""" try: s+"" except: raise TypeError, "s must be a string" s = u" ".join(splitChinese(s)) #TODO do better for normalChinese s = normalChinese(s) pos = 0 sentenceList = [] l = len(s) while pos < l: try: p = s.index('.', pos) except: p = l+1 try: q = s.index('?', pos) except: q = l+1 try: e = s.index('!', pos) except: e = l+1 try: f = s.index('~', pos) except: f = l+1 end = min(p,q,e,f) sentenceList.append( s[pos:end].strip() ) pos = end+1 # If no sentences were found, return a one-item list containing # the entire input string. if len(sentenceList) == 0: sentenceList.append(s) return sentenceList
def _endElement(self, name): """Verify that an AIML end element is valid in the current context. Raises an AimlParserError if an illegal end element is encountered. """ if name == "aiml": # </aiml> tags are only legal in the InsideAiml state if self._state != self._STATE_InsideAiml: raise AimlParserError, "Unexpected </aiml> tag "+self._location() self._state = self._STATE_OutsideAiml self._whitespaceBehaviorStack.pop() elif name == "topic": # </topic> tags are only legal in the InsideAiml state, and # only if _insideTopic is true. if self._state != self._STATE_InsideAiml or not self._insideTopic: raise AimlParserError, "Unexpected </topic> tag "+self._location() self._insideTopic = False self._currentTopic = u"" elif name == "category": # </category> tags are only legal in the AfterTemplate state if self._state != self._STATE_AfterTemplate: raise AimlParserError, "Unexpected </category> tag "+self._location() self._state = self._STATE_InsideAiml # End the current category. Store the current pattern/that/topic and # element in the categories dictionary. # DEBUG jannon: parse the pattern, that, topic from *.aiml file 26/01/14 11:02:48 self._currentPattern = u' '.join(splitChinese(self._currentPattern)) #print self._currentPattern, " / ", self._currentPattern = normalChinese(self._currentPattern) #print self._currentPattern key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip()) self.categories[key] = self._elemStack[-1] self._whitespaceBehaviorStack.pop() elif name == "pattern": # </pattern> tags are only legal in the InsidePattern state if self._state != self._STATE_InsidePattern: raise AimlParserError, "Unexpected </pattern> tag "+self._location() self._state = self._STATE_AfterPattern elif name == "that" and self._state == self._STATE_InsideThat: # </that> tags are only allowed inside <template> elements or in # the InsideThat state. This clause handles the latter case. self._state = self._STATE_AfterThat elif name == "template": # </template> tags are only allowed in the InsideTemplate state. if self._state != self._STATE_InsideTemplate: raise AimlParserError, "Unexpected </template> tag "+self._location() self._state = self._STATE_AfterTemplate self._whitespaceBehaviorStack.pop() elif self._state == self._STATE_InsidePattern: # Certain tags are allowed inside <pattern> elements. if name not in ["bot"]: raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location() elif self._state == self._STATE_InsideThat: # Certain tags are allowed inside <that> elements. if name not in ["bot"]: raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location() elif self._state == self._STATE_InsideTemplate: # End of an element inside the current template. Append the # element at the top of the stack onto the one beneath it. elem = self._elemStack.pop() self._elemStack[-1].append(elem) self._whitespaceBehaviorStack.pop() # If the element was a condition, pop an item off the # foundDefaultLiStack as well. if elem[0] == "condition": self._foundDefaultLiStack.pop() else: # Unexpected closing tag raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()