Exemple #1
0
def sentences(s):
    """Split the string s into a list of sentences."""
    try: s+""
    except: raise TypeError, "s must be a string"
    s = u" ".join(splitChinese(s))
    #TODO do better for normalChinese
    s = normalChinese(s)
    pos = 0
    sentenceList = []
    l = len(s)
    while pos < l:
        try: p = s.index('.', pos)
        except: p = l+1
        try: q = s.index('?', pos)
        except: q = l+1
        try: e = s.index('!', pos)
        except: e = l+1
        try: f = s.index('~', pos)
        except: f = l+1
        end = min(p,q,e,f)
        sentenceList.append( s[pos:end].strip() )
        pos = end+1
    # If no sentences were found, return a one-item list containing
    # the entire input string.
    if len(sentenceList) == 0: sentenceList.append(s)
    return sentenceList
Exemple #2
0
def sentences(s):
    """Split the string s into a list of sentences."""
    try:
        s + ""
    except:
        raise TypeError, "s must be a string"
    pos = 0
    sentenceList = []
    l = len(s)
    while pos < l:
        try:
            p = s.index('.', pos)
        except:
            p = l + 1
        try:
            q = s.index('?', pos)
        except:
            q = l + 1
        try:
            e = s.index('!', pos)
        except:
            e = l + 1
        end = min(p, q, e)
        sentenceList.append(s[pos:end].strip())
        pos = end + 1
    # If no sentences were found, return a one-item list containing
    # the entire input string.
    if len(sentenceList) == 0: sentenceList.append(s)
    # auto convert chinese
    return map(lambda s: u' '.join(splitChinese(s)), sentenceList)
Exemple #3
0
    def _respond(self, input, sessionID):
        """Private version of respond(), does the real work."""
        if len(input) == 0:
            return ""
        # guard against infinite recursion
        inputStack = self.getPredicate(self._inputStack, sessionID)
        if len(inputStack) > self._maxRecursionDepth:
            if self._verboseMode:
                err = "WARNING: maximum recursion depth exceeded (input='%s')" % input.encode(
                    self._textEncoding, 'replace')
                sys.stderr.write(err)
            return ""

        # push the input onto the input stack
        inputStack = self.getPredicate(self._inputStack, sessionID)
        inputStack.append(input)
        self.setPredicate(self._inputStack, inputStack, sessionID)

        # run the input through the 'normal' subber
        input = u' '.join(splitChinese(input))
        if input.find(" ") < 0:
            input = " " + input
        subbedInput = u" ".join(
            self._subbers['normal'].sub(input).strip().split())

        # fetch the bot's previous response, to pass to the match()
        # function as 'that'.
        outputHistory = self.getPredicate(self._outputHistory, sessionID)
        try:
            that = outputHistory[-1]
        except IndexError:
            that = ""
        subbedThat = self._subbers['normal'].sub(that)

        # fetch the current topic
        topic = self.getPredicate("topic", sessionID)
        subbedTopic = self._subbers['normal'].sub(topic)

        # Determine the final response.
        response = ""
        elem = self._brain.match(subbedInput, subbedThat, subbedTopic)
        if elem is None:
            if self._verboseMode:
                err = "WARNING: No match found for input: %s\n" % input.encode(
                    self._textEncoding)
                sys.stderr.write(err)
        else:
            # Process the element into a response string.
            response += self._processElement(elem, sessionID).strip()
            response += " "
        response = response.strip()

        # pop the top entry off the input stack.
        inputStack = self.getPredicate(self._inputStack, sessionID)
        inputStack.pop()
        self.setPredicate(self._inputStack, inputStack, sessionID)

        return response
Exemple #4
0
    def _respond(self, input, sessionID):
        """Private version of respond(), does the real work."""
        if len(input) == 0:
            return ""
        # guard against infinite recursion
        inputStack = self.getPredicate(self._inputStack, sessionID)
        if len(inputStack) > self._maxRecursionDepth:
            if self._verboseMode:
                err = "WARNING: maximum recursion depth exceeded (input='%s')" % input.encode(self._textEncoding, 'replace')
                sys.stderr.write(err)
            return ""

        # push the input onto the input stack
        inputStack = self.getPredicate(self._inputStack, sessionID)
        inputStack.append(input)
        self.setPredicate(self._inputStack, inputStack, sessionID)

        # run the input through the 'normal' subber
        input=u' '.join(splitChinese(input))
        if input.find(" ") < 0 :
            input=" "+input
        subbedInput = u" ".join(self._subbers['normal'].sub(input).strip().split())
        
        # fetch the bot's previous response, to pass to the match()
        # function as 'that'.
        outputHistory = self.getPredicate(self._outputHistory, sessionID)
        try: that = outputHistory[-1]
        except IndexError: that = ""
        subbedThat = self._subbers['normal'].sub(that)

        # fetch the current topic
        topic = self.getPredicate("topic", sessionID)
        subbedTopic = self._subbers['normal'].sub(topic)

        # Determine the final response.
        response = ""
        elem = self._brain.match(subbedInput, subbedThat, subbedTopic)
        if elem is None:
            if self._verboseMode:
                err = "WARNING: No match found for input: %s\n" % input.encode(self._textEncoding)
                sys.stderr.write(err)
        else:
            # Process the element into a response string.
            response += self._processElement(elem, sessionID).strip()
            response += " "
        response = response.strip()

        # pop the top entry off the input stack.
        inputStack = self.getPredicate(self._inputStack, sessionID)
        inputStack.pop()
        self.setPredicate(self._inputStack, inputStack, sessionID)

        return response
Exemple #5
0
    def _processSrai(self,elem, sessionID):
        """Process a <srai> AIML element.

        <srai> elements recursively process their contents, and then
        pass the results right back into the AIML interpreter as a new
        piece of input.  The results of this new input string are
        returned.

        """
        newInput = ""
        for e in elem[2:]:
            newInput += self._processElement(e, sessionID)
        newInput = u' '.join(splitChinese(newInput))
        return self._respond(newInput, sessionID)
Exemple #6
0
def sentences(s):
    """Split the string s into a list of sentences."""
    try: s+""
    except: raise TypeError, "s must be a string"
    pos = 0
    sentenceList = []
    l = len(s)
    while pos < l:
        try: p = s.index('.', pos)
        except: p = l+1
        try: q = s.index('?', pos)
        except: q = l+1
        try: e = s.index('!', pos)
        except: e = l+1
        end = min(p,q,e)
        sentenceList.append( s[pos:end].strip() )
        pos = end+1
    # If no sentences were found, return a one-item list containing
    # the entire input string.
    if len(sentenceList) == 0: sentenceList.append(s)
    # auto convert chinese
    return map(lambda s: u' '.join(splitChinese(s)), sentenceList)
Exemple #7
0
 def _endElement(self, name):
     """验证AIML结束元素在当前上下文中是否有效。 如果遇到非法的结束元素,则引发AimlParserError。        """
     if name == "aiml":
         # </aiml> 标签只有在 InsideAiml 状态才是合法的
         if self._state != self._STATE_InsideAiml:
             raise AimlParserError("Unexpected </aiml> tag " +
                                   self._location())
         self._state = self._STATE_OutsideAiml
         self._whitespaceBehaviorStack.pop()
     elif name == "topic":
         # </topic> 标签只有在InsideAiml 状态, 而且 _insideTopic 为 true才是合法的。
         if self._state != self._STATE_InsideAiml or not self._insideTopic:
             raise AimlParserError("Unexpected </topic> tag " +
                                   self._location())
         self._insideTopic = False
         self._currentTopic = u""
     elif name == "category":
         # </category> 标签只有在 AfterTemplate  状态才是合法的
         if self._state != self._STATE_AfterTemplate:
             raise AimlParserError("Unexpected </category> tag " +
                                   self._location())
         self._state = self._STATE_InsideAiml
         # 结束当前类别。 将当前 pattern/ that / topic和元素存储在类别字典中。
         #【注意:这里修改了当前模式,用中文分割结果做了替换。。】
         self._currentPattern = u' '.join(splitChinese(
             self._currentPattern))
         key = (self._currentPattern.strip(), self._currentThat.strip(),
                self._currentTopic.strip())
         self.categories[key] = self._elemStack[-1]
         self._whitespaceBehaviorStack.pop()
     elif name == "pattern":
         # </pattern> 标签只有在 InsidePattern 状态才是合法的。
         if self._state != self._STATE_InsidePattern:
             raise AimlParserError("Unexpected </pattern> tag " +
                                   self._location())
         self._state = self._STATE_AfterPattern
     elif name == "that" and self._state == self._STATE_InsideThat:
         #  </ that>标签只允许在<template>元素内部,或InsideThat状态下。本条款处理后一种情况。
         self._state = self._STATE_AfterThat
     elif name == "template":
         # </template> 标签只允许在 InsideTemplate 状态出现。
         if self._state != self._STATE_InsideTemplate:
             raise AimlParserError("Unexpected </template> tag " +
                                   self._location())
         self._state = self._STATE_AfterTemplate
         self._whitespaceBehaviorStack.pop()
     elif self._state == self._STATE_InsidePattern:
         # 特定的标签允许在 <pattern> 元素内部出现。
         if name not in ["bot"]:
             raise AimlParserError(("Unexpected </%s> tag " % name) +
                                   self._location())
     elif self._state == self._STATE_InsideThat:
         # 特定的标签允许在 <that> 元素内部出现.
         if name not in ["bot"]:
             raise AimlParserError(("Unexpected </%s> tag " % name) +
                                   self._location())
     elif self._state == self._STATE_InsideTemplate:
         # 当前模板内的元素结束。 将堆栈顶部的元素追加到下面的元素上。
         elem = self._elemStack.pop()
         self._elemStack[-1].append(elem)
         self._whitespaceBehaviorStack.pop()
         #  如果元素是一个条件,那么也可以从foundDefaultLiStack中弹出一个条目。
         if elem[0] == "condition": self._foundDefaultLiStack.pop()
     else:
         # 意外的关闭标签
         raise AimlParserError(("Unexpected </%s> tag " % name) +
                               self._location())
Exemple #8
0
        def _endElement(self, name):
                """Verify that an AIML end element is valid in the current
                context.

                Raises an AimlParserError if an illegal end element is encountered.

                """
                if name == "aiml":
                        # </aiml> tags are only legal in the InsideAiml state
                        if self._state != self._STATE_InsideAiml:
                                raise AimlParserError, "Unexpected </aiml> tag "+self._location()
                        self._state = self._STATE_OutsideAiml
                        self._whitespaceBehaviorStack.pop()
                elif name == "topic":
                        # </topic> tags are only legal in the InsideAiml state, and
                        # only if _insideTopic is true.
                        if self._state != self._STATE_InsideAiml or not self._insideTopic:
                                raise AimlParserError, "Unexpected </topic> tag "+self._location()
                        self._insideTopic = False
                        self._currentTopic = u""
                elif name == "category":
                        # </category> tags are only legal in the AfterTemplate state
                        if self._state != self._STATE_AfterTemplate:
                                raise AimlParserError, "Unexpected </category> tag "+self._location()
                        self._state = self._STATE_InsideAiml
                        # End the current category.  Store the current pattern/that/topic and
                        # element in the categories dictionary.
                        self._currentPattern = u' '.join(splitChinese(self._currentPattern))

                        for template in self._elemStack:
                                key = (self._currentPattern.strip(), self._currentThat.strip(),
                                        # template[1].strip())
                                        self._currentTopic.strip()+" "+template[1].strip())
                                self.categories[key] = template
                                logging.info("-"*80)
                                logging.info("ADD Pattern:"+str(key))
                                logging.info("ADD Template:"+str(template))
                                logging.info("-"*80)

                        # key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip())
                        # self.categories[key] = self._elemStack[-1]
                        logging.debug("-"*80)
                        logging.debug("End Category:"+str(self._elemStack))
                        logging.debug("-"*80)
                        self._whitespaceBehaviorStack.pop()
                elif name == "pattern":
                        # </pattern> tags are only legal in the InsidePattern state
                        if self._state != self._STATE_InsidePattern:
                                raise AimlParserError, "Unexpected </pattern> tag "+self._location()
                        self._state = self._STATE_AfterPattern
                elif name == "that" and self._state == self._STATE_InsideThat:
                        # </that> tags are only allowed inside <template> elements or in
                        # the InsideThat state.  This clause handles the latter case.
                        self._state = self._STATE_AfterThat
                elif name == "template":
                        # </template> tags are only allowed in the InsideTemplate state.
                        if self._state != self._STATE_InsideTemplate:
                                raise AimlParserError, "Unexpected </template> tag "+self._location()
                        self._state = self._STATE_AfterTemplate
                        self._whitespaceBehaviorStack.pop()
                elif self._state == self._STATE_InsidePattern:
                        # Certain tags are allowed inside <pattern> elements.
                        if name not in ["bot"]:
                                raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
                elif self._state == self._STATE_InsideThat:
                        # Certain tags are allowed inside <that> elements.
                        if name not in ["bot"]:
                                raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
                elif self._state == self._STATE_InsideTemplate:
                        # End of an element inside the current template.  Append the
                        # element at the top of the stack onto the one beneath it.
                        elem = self._elemStack.pop()
                        self._elemStack[-1].append(elem)
                        self._whitespaceBehaviorStack.pop()
                        # If the element was a condition, pop an item off the
                        # foundDefaultLiStack as well.
                        if elem[0] == "condition": self._foundDefaultLiStack.pop()
                else:
                        # Unexpected closing tag
                        raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
Exemple #9
0
        def _endElement(self, name):
                """Verify that an AIML end element is valid in the current
                context.

                Raises an AimlParserError if an illegal end element is encountered.

                """
                if name == "aiml":
                        # </aiml> tags are only legal in the InsideAiml state
                        if self._state != self._STATE_InsideAiml:
                                raise AimlParserError, "Unexpected </aiml> tag "+self._location()
                        self._state = self._STATE_OutsideAiml
                        self._whitespaceBehaviorStack.pop()
                elif name == "topic":
                        # </topic> tags are only legal in the InsideAiml state, and
                        # only if _insideTopic is true.
                        if self._state != self._STATE_InsideAiml or not self._insideTopic:
                                raise AimlParserError, "Unexpected </topic> tag "+self._location()
                        self._insideTopic = False
                        self._currentTopic = u""
                elif name == "category":
                        # </category> tags are only legal in the AfterTemplate state
                        if self._state != self._STATE_AfterTemplate:
                                raise AimlParserError, "Unexpected </category> tag "+self._location()
                        self._state = self._STATE_InsideAiml
                        # End the current category.  Store the current pattern/that/topic and
                        # element in the categories dictionary.
                        self._currentPattern = u' '.join(splitChinese(self._currentPattern))
                        key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip())
                        self.categories[key] = self._elemStack[-1]
                        self._whitespaceBehaviorStack.pop()
                elif name == "pattern":
                        # </pattern> tags are only legal in the InsidePattern state
                        if self._state != self._STATE_InsidePattern:
                                raise AimlParserError, "Unexpected </pattern> tag "+self._location()
                        self._state = self._STATE_AfterPattern
                elif name == "that" and self._state == self._STATE_InsideThat:
                        # </that> tags are only allowed inside <template> elements or in
                        # the InsideThat state.  This clause handles the latter case.
                        self._state = self._STATE_AfterThat
                elif name == "template":
                        # </template> tags are only allowed in the InsideTemplate state.
                        if self._state != self._STATE_InsideTemplate:
                                raise AimlParserError, "Unexpected </template> tag "+self._location()
                        self._state = self._STATE_AfterTemplate
                        self._whitespaceBehaviorStack.pop()
                elif self._state == self._STATE_InsidePattern:
                        # Certain tags are allowed inside <pattern> elements.
                        if name not in ["bot"]:
                                raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
                elif self._state == self._STATE_InsideThat:
                        # Certain tags are allowed inside <that> elements.
                        if name not in ["bot"]:
                                raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
                elif self._state == self._STATE_InsideTemplate:
                        # End of an element inside the current template.  Append the
                        # element at the top of the stack onto the one beneath it.
                        elem = self._elemStack.pop()
                        self._elemStack[-1].append(elem)
                        self._whitespaceBehaviorStack.pop()
                        # If the element was a condition, pop an item off the
                        # foundDefaultLiStack as well.
                        if elem[0] == "condition": self._foundDefaultLiStack.pop()
                else:
                        # Unexpected closing tag
                        raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()