Ejemplo n.º 1
0
def sentences(s):
    """Split the string s into a list of sentences."""
    try: s+""
    except: raise TypeError, "s must be a string"
    s = u" ".join(splitChinese(s))
    #TODO do better for normalChinese
    s = normalChinese(s)
    pos = 0
    sentenceList = []
    l = len(s)
    while pos < l:
        try: p = s.index('.', pos)
        except: p = l+1
        try: q = s.index('?', pos)
        except: q = l+1
        try: e = s.index('!', pos)
        except: e = l+1
        try: f = s.index('~', pos)
        except: f = l+1
        end = min(p,q,e,f)
        sentenceList.append( s[pos:end].strip() )
        pos = end+1
    # If no sentences were found, return a one-item list containing
    # the entire input string.
    if len(sentenceList) == 0: sentenceList.append(s)
    return sentenceList
Ejemplo n.º 2
0
    def _endElement(self, name):
        """Verify that an AIML end element is valid in the current
        context.

        Raises an AimlParserError if an illegal end element is encountered.

        """
        if name == "aiml":
            # </aiml> tags are only legal in the InsideAiml state
            if self._state != self._STATE_InsideAiml:
                raise AimlParserError, "Unexpected </aiml> tag "+self._location()
            self._state = self._STATE_OutsideAiml
            self._whitespaceBehaviorStack.pop()
        elif name == "topic":
            # </topic> tags are only legal in the InsideAiml state, and
            # only if _insideTopic is true.
            if self._state != self._STATE_InsideAiml or not self._insideTopic:
                raise AimlParserError, "Unexpected </topic> tag "+self._location()
            self._insideTopic = False
            self._currentTopic = u""
        elif name == "category":
            # </category> tags are only legal in the AfterTemplate state
            if self._state != self._STATE_AfterTemplate:
                raise AimlParserError, "Unexpected </category> tag "+self._location()
            self._state = self._STATE_InsideAiml
            # End the current category.  Store the current pattern/that/topic and
            # element in the categories dictionary.
            # DEBUG jannon: parse the pattern, that, topic from *.aiml file 26/01/14 11:02:48
            self._currentPattern = u' '.join(splitChinese(self._currentPattern))
            #print self._currentPattern, " / ", 
            self._currentPattern = normalChinese(self._currentPattern)
            #print self._currentPattern
            key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip())
            self.categories[key] = self._elemStack[-1]
            self._whitespaceBehaviorStack.pop()
        elif name == "pattern":
            # </pattern> tags are only legal in the InsidePattern state
            if self._state != self._STATE_InsidePattern:
                raise AimlParserError, "Unexpected </pattern> tag "+self._location()
            self._state = self._STATE_AfterPattern
        elif name == "that" and self._state == self._STATE_InsideThat:
            # </that> tags are only allowed inside <template> elements or in
            # the InsideThat state.  This clause handles the latter case.
            self._state = self._STATE_AfterThat
        elif name == "template":
            # </template> tags are only allowed in the InsideTemplate state.
            if self._state != self._STATE_InsideTemplate:
                raise AimlParserError, "Unexpected </template> tag "+self._location()
            self._state = self._STATE_AfterTemplate
            self._whitespaceBehaviorStack.pop()
        elif self._state == self._STATE_InsidePattern:
            # Certain tags are allowed inside <pattern> elements.
            if name not in ["bot"]:
                raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
        elif self._state == self._STATE_InsideThat:
            # Certain tags are allowed inside <that> elements.
            if name not in ["bot"]:
                raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
        elif self._state == self._STATE_InsideTemplate:
            # End of an element inside the current template.  Append the
            # element at the top of the stack onto the one beneath it.
            elem = self._elemStack.pop()
            self._elemStack[-1].append(elem)
            self._whitespaceBehaviorStack.pop()
            # If the element was a condition, pop an item off the
            # foundDefaultLiStack as well.
            if elem[0] == "condition": self._foundDefaultLiStack.pop()
        else:
            # Unexpected closing tag
            raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()