Example #1
0
class TestWordSub(unittest.TestCase):

    longMessage = True

    def setUp(self):
        self.subber = WordSub()
        self.subber["apple"] = "banana"
        self.subber["orange"] = "pear"
        self.subber["banana"] = "apple"
        self.subber["he"] = "she"
        self.subber["I'd"] = "I would"

    def tearDown(self):
        del self.subber

    def test01_sub(self):
        '''test wordsub'''
        inStr = "He said he'd like to go with me"
        outStr = "She said she'd like to go with me"
        self.assertEqual(outStr, self.subber.sub(inStr))

    def test02_case(self):
        '''test case insensitivity'''
        inStr = "I'd like one apple, one Orange and one BANANA."
        outStr = "I Would like one banana, one Pear and one APPLE."
        self.assertEqual(outStr, self.subber.sub(inStr))
Example #2
0
 def setUp(self):
     self.subber = WordSub()
     self.subber["apple"] = "banana"
     self.subber["orange"] = "pear"
     self.subber["banana"] = "apple"
     self.subber["he"] = "she"
     self.subber["I'd"] = "I would"
Example #3
0
    def addSub(self, name, items, reset=False):
        ''' 
        Add a new WordSub instance
          @param name (str): Wordsub name
          @param items (iterable of tuples): subs to add
          @param reset (bool): delete all current subs in this WordSub
          @return (int): number of subs added
        '''
        # Just define default (English) subbers
        if name == 'default':
            import aiml.DefaultSubs as DefaultSubs
            self._subbers = {}
            self._subbers['gender'] = WordSub(DefaultSubs.defaultGender)
            self._subbers['person'] = WordSub(DefaultSubs.defaultPerson)
            self._subbers['person2'] = WordSub(DefaultSubs.defaultPerson2)
            self._subbers['normal'] = WordSub(DefaultSubs.defaultNormal)
            return 'default subs defined'

        # Reset current dictionary, if requested
        if reset and name in self._subbers:
            del self._subbers[name]
        # and ensure it exists
        if name not in self._subbers:
            self._subbers[name] = WordSub()
        # Add all subs
        n = -1
        for n, kv in enumerate(items):
            #print("Sub", n+1, kv)
            self._subbers[name][kv[0]] = kv[1]
        # We need at least one, or substitution will crash
        if n == -1:
            self._subbers[name]['DUMMYSUB'] = 'DUMMYSUB'
        return n + 1
Example #4
0
    def __init__(self):
        self._verboseMode = True
        self._version = "python-aiml {}".format(VERSION)
        self._brain = PatternMgr()
        self._respondLock = threading.RLock()
        self.setTextEncoding( None if PY3 else "utf-8" )

        # 建立会话
        self._sessions = {}
        self._addSession(self._globalSessionID)

        # 设置机器人谓词
        self._botPredicates = {}
        self.setBotPredicate("name", "Nameless")

        # 设置单词替换器 (subbers),来自WordSub文件:
        self._subbers = {}
        self._subbers['gender'] = WordSub(aiml.DefaultSubs.defaultGender)
        self._subbers['person'] = WordSub(aiml.DefaultSubs.defaultPerson)
        self._subbers['person2'] = WordSub(aiml.DefaultSubs.defaultPerson2)
        self._subbers['normal'] = WordSub(aiml.DefaultSubs.defaultNormal)
        
        # 设置元素处理器
        self._elementProcessors = {
            "bot":          self._processBot,
            "condition":    self._processCondition,
            "date":         self._processDate,
            "formal":       self._processFormal,
            "gender":       self._processGender,
            "get":          self._processGet,
            "gossip":       self._processGossip,
            "id":           self._processId,
            "input":        self._processInput,
            "javascript":   self._processJavascript,
            "learn":        self._processLearn,
            "li":           self._processLi,
            "lowercase":    self._processLowercase,
            "person":       self._processPerson,
            "person2":      self._processPerson2,
            "random":       self._processRandom,
            "text":         self._processText,
            "sentence":     self._processSentence,
            "set":          self._processSet,
            "size":         self._processSize,
            "sr":           self._processSr,
            "srai":         self._processSrai,
            "star":         self._processStar,
            "system":       self._processSystem,
            "template":     self._processTemplate,
            "that":         self._processThat,
            "thatstar":     self._processThatstar,
            "think":        self._processThink,
            "topicstar":    self._processTopicstar,
            "uppercase":    self._processUppercase,
            "version":      self._processVersion,
        }
    def __init__(self):
        self._classifier = {
            "#": self._process_hash,
            "-": self._process_hyphen,
            "$": self._process_dollar,
        }

        self._hash_type = {
            "intent": self._process_patent,
            "pattern": self._process_patent,
            "topic": self._process_topic,
        }

        self.categories = []
        self.topic = {
            'topic_name': '',
            'flag': 0,
            'topic_begins': '<topic name="topic_name">\n',
            'topic_ends': '</topic>\n'
        }

        self.subs = WordSub(DefaultSubs.defaultNormal)
Example #6
0
    def loadSubs(self, filename):
        """"加载替换文件。
    该文件必须采用Windows风格的INI格式(有关此格式的信息,请参阅标准的ConfigParser模块文档)。
        文件的每个部分都被加载到自己的替代者中。        """
        parser = ConfigParser()
        with open(filename) as f:
            parser.read_file(f)

        for s in parser.sections():
            # 为此部分添加一个新的WordSub实例。 如果已经存在,请将其删除。
            if s in self._subbers:
                del(self._subbers[s])
            self._subbers[s] = WordSub()
            # 遍历键-值对,并将它们添加到subber   替换者
            for k,v in parser.items(s):
                self._subbers[s][k] = v
class Aiml_Generator(object):
    def print_categories(self):
        # For testing purpose.
        for cat in self.categories:
            print('pattern_head: ', cat['pattern_head'])
            print('srai: ', cat['srai'])
            print('think_dict: ', cat['think_dict'])
            print('condition_flag: ', cat['condition_flag'])
            print('response: ', cat['response'])
            print('topic_flag_begin: ', cat['topic_flag_begin'])
            print('topic_flag_end: ', cat['topic_flag_end'])
            print()

    def __init__(self):
        self._classifier = {
            "#": self._process_hash,
            "-": self._process_hyphen,
            "$": self._process_dollar,
        }

        self._hash_type = {
            "intent": self._process_patent,
            "pattern": self._process_patent,
            "topic": self._process_topic,
        }

        self.categories = []
        self.topic = {
            'topic_name': '',
            'flag': 0,
            'topic_begins': '<topic name="topic_name">\n',
            'topic_ends': '</topic>\n'
        }

        self.subs = WordSub(DefaultSubs.defaultNormal)

    def classify_line(self, line, line_number):
        """ This method takes two inputs line and it's number. The line has been stripped of extra leading and trailing spaces.
            It then checks for the first element and accordingly calls the handler function.
        """
        if line[0] in self._classifier:
            handler_function = self._classifier[line[0]]
            handler_function(line, line_number)
        elif self.categories[-1]['condition_flag'] == 1 and line[0] in [
                'c', 'd', '?'
        ]:
            # It is a valid <condition> format
            self._process_condition(line)
        else:
            print("ERROR: go to line: ", line_number)
            print(line)
            exit()

    def _process_hash(self, line, line_number):
        # line_type should be in ['intent','pattern','topic']
        line_type = line.split(':')[0].split(' ')[1]
        try:
            handler_function = self._hash_type[line_type]
        except:
            print(
                'You can only use "intent", "pattern", or "topic". Error in line number:',
                line_number)
            exit()
        handler_function(line, line_type)

    def _process_hyphen(self, line, line_number):
        text = line.split('-')[1].strip()
        text, key, value = self.extract_entity(text)
        temp = {}
        if key:
            temp[key] = value
        array = [text, temp]
        self.categories[-1]['srai'].append(array)

    def _process_dollar(self, line, line_number):
        text = line.split('$')[1].strip()
        if text[0] == '?':
            # <condition> tag begins, make condition_flag = 1, and remove the '?' and ':'
            condition_name = text[1:-1]
            self.categories[-1]['response'].append('<condition>')
            self.categories[-1]['response'].append([])
            self.categories[-1]['response'][-1].append(condition_name)
            self.categories[-1]['response'][-1].append([])
            self.categories[-1]['condition_flag'] = 1
        elif self.categories[-1]['condition_flag'] == 1:
            self._process_condition(line)
        else:
            try:
                text = self.set_entity(text)
            except:
                pass
            try:
                text = self.get_entity(text)
            except:
                pass
            self.categories[-1]['response'].append(text)

    def _process_condition(self, line):
        if line == '?':
            # condition evaluation done
            self.condition_flag = 0
        elif line[0] == '$':
            # Extract the possible responses
            text = line.split('$')[1].strip()
            self.categories[-1]['response'][-1][-1][-1].append(text)
        elif 'case' in line:
            # Extract the possible condition value
            value = line.split()[1]
            # Remove the quotes and ':'
            value = value[1:-2]
            self.categories[-1]['response'][-1][-1].append(value)
            self.categories[-1]['response'][-1][-1].append([])
        elif 'default' in line:
            value = 'default'
            self.categories[-1]['response'][-1][-1].append(value)
            self.categories[-1]['response'][-1][-1].append([])

    def set_entity(self, text):
        key = text.split('(')[1].split(')')[0]
        value = text.split('[')[1].split(']')[0]
        # Remove the [text]
        temp = re.sub(r'\([^)]*\)', '', text)
        # Replace the (text) with <set name="key">value</set>
        repl = '<set name="' + key + '">' + value + '</set>'
        temp = re.sub(r'\[[^]]*\]', repl, temp)
        return temp

    def get_entity(self, text):
        key = text.split('{')[1].split('}')[0]
        # Replace the {text} with '<get name="text"/>'
        repl = '<get name="' + key + '"/>'
        temp = re.sub(r'\{[^}]*\}', repl, text)
        return temp

    def extract_entity(self, text):
        # Extract entity if present in text within () and remove '[', ']'.
        if '(' in text:
            key = text.split('(')[1].split(')')[0]
            value = text.split('[')[1].split(']')[0]
            result = re.sub(r'\([^)]*\)', '', text)
            brackets = re.compile("|".join([r'\[', r'\]']))
            result = brackets.sub('', result)
            return result, key, value
        return text, '', ''

    def create_dictionary(self, text):
        # This method creates think_dict = {}, setting variable name as it's key and variable value as it's value
        think_dict = {}
        extract_dictionary = text.split(',')
        for elem in extract_dictionary:
            elem = elem.split(':')
            key, value = elem[0].strip(), elem[1].strip()
            think_dict[key.strip()[1:-1]] = value.strip()[1:-1]
        return think_dict

    def create_category(self, pattern_head, srai, think_dict, condition_flag,
                        response, topic_flag_begin, topic_flag_end):
        temp = {}
        temp['pattern_head'] = pattern_head
        temp['srai'] = srai
        temp['think_dict'] = think_dict
        temp['response'] = response
        temp['condition_flag'] = 0
        temp['topic_flag_begin'] = topic_flag_begin
        temp['topic_flag_end'] = topic_flag_end
        return temp

    def _process_patent(self, line, line_type):
        text = line
        think_dict = {}
        if '{' in text:
            extract_dictionary = text[text.index('{') +
                                      1:text.index('}')].strip()
            think_dict = self.create_dictionary(extract_dictionary)
            text = re.sub(r'\{[^}]*\}', '', text)
        text = text.split(':')[1]
        text, key, value = self.extract_entity(text)
        if key:
            think_dict[key] = value
        pattern_head = text
        elem = self.create_category(pattern_head, [], think_dict, 0, [],
                                    self.topic['flag'], 0)
        self.topic['flag'] = 0
        self.categories.append(elem)

    def _process_topic(self, line, line_number):
        temp = line.split(':')[1]
        if temp == 'end_topic':
            self.categories[-1]['topic_flag_end'] = 1
        else:
            self.topic['topic_name'] = temp.upper()
            self.topic['flag'] = 1

    def combine(self):
        # Remove last entry and prepare it's respective tags
        elem = self.categories.pop()
        start = ''
        end = ''
        if elem['topic_flag_begin'] == 1:
            start = re.sub(r'topic_name', self.topic['topic_name'],
                           self.topic['topic_begins'])
        # Template for <srai> tags
        # Remove the punctuations
        regex = re.compile('|'.join([r'\.', r'\?', r'!']))
        pattern_head = self.subs.sub(elem['pattern_head'].lower(),
                                     'normal').upper()
        pattern_head = regex.sub('', pattern_head)
        string = '<category>\n<pattern>srai_data</pattern>\n<template><srai>' + pattern_head + '</srai><think><set name="variable_name">variable_value</set></think></template>\n</category>\n\n'
        srai_category = start
        for srai in elem['srai']:
            srai_text = srai[0].lower()
            srai_think = srai[1]
            srai_text = self.subs.sub(srai_text, 'normal')
            srai_text = srai_text.upper()
            srai_text = regex.sub('', srai_text)
            temp = re.sub(r'srai_data', srai_text, string)
            if bool(srai_think):
                key = list(srai_think.keys())[0]
                value = srai_think[key]
                temp = re.sub(r'variable_name', key, temp)
                temp = re.sub(r'variable_value', value, temp)
            else:
                temp = re.sub(r'<think>.*</think>', '', temp)
            srai_category += temp

        # Prepare the main unit
        str_category = '<category>\n<pattern>' + self.subs.sub(
            elem['pattern_head'].lower(), 'normal').upper() + '</pattern>\n'
        head_category = ''
        that_clause = []
        if len(self.categories):
            # Fetch the previous response of bot for <that>
            that_clause = self._process_that(self.categories[-1]['response'])
            str_category += '<that>that_clause</that>\n'

        # Create <template> tag only if <think> tag or some response is present
        if bool(elem['think_dict']) or len(elem['response']):
            str_category += '<template>'
            if bool(elem['think_dict']):
                # If there are variables to be set using <think> and <set>
                think_dict = elem['think_dict']
                str_category += '\n<think>\n'
                for key in think_dict:
                    value = think_dict[key]
                    str_category += '<set name="' + key + '">' + value + '</set>'
                str_category += '</think>\n'
            if len(elem['response']):
                response = elem['response']
                template = self._form_template(response)
                str_category += template
            str_category += '</template>\n'
            str_category += '</category>\n\n'
            if len(that_clause):
                for t in that_clause:
                    repl = self.subs.sub(t.lower(), 'normal').upper()
                    repl = regex.sub('', repl)
                    head_category += re.sub(r'that_clause', repl, str_category)
            else:
                head_category = str_category
        else:
            # Erase the str_category
            str_category = ''
        if elem['topic_flag_end'] == 1:
            end = self.topic['topic_ends']
            head_category += end
        return srai_category + head_category

    def _process_that(self, response):
        """This method receives the self.categories[-1]['response'] list.
           It then classifies the type of it's last entry and accordingly returns that_clause. 
        """
        that_clause = []
        last_entry = response[-1]
        if len(response) > 1 and self.classify_reply(response[-2]) == 3:
            that_clause.append(last_entry[1][-1][-1])
        elif self.classify_reply(last_entry) == 2:
            text = re.sub(r'\]\s*\[', ']|[', last_entry)
            text = text.split('|')[-1]
            # Remove the '[',']'
            text = text[1:-1]
            if ',' in text:
                text = text.split(',')
                for t in text:
                    t = t.strip()[1:-1]
                    that_clause.append(t)
            else:
                # Remove the leading and trailing quotes
                that_clause.append(text[1:-1])
        else:
            that_clause.append(last_entry)
        return that_clause

    def classify_reply(self, reply):
        # Classify the type of entry of response
        reply_type = 1
        if reply == '<condition>':
            reply_type = 3
        elif re.search(r'\[', reply):
            reply_type = 2
        return reply_type

    def _form_template(self, response):
        """ Reponse is a list. It's each entry needs to be processed and accordingly added to li.
            li[] is a list storing the converted aiml format of each entry of response[].
            It's length determines the usage of <random>.
        """
        index = 0
        li = []
        while index < len(response):
            reply = response[index]
            reply_type = self.classify_reply(reply)
            if reply_type == 3:
                # Move to next element
                index += 1
            text = self._process_response(response[index], reply_type)
            li.append(text)
            index += 1
        # Compile all those responses :)
        if len(li) == 1:
            text = li[0]
        else:
            text = '<random>\n'
            for l in li:
                text += '<li>' + l + '</li>\n'
            text += '</random>'
        return text

    def _process_response(self, reply, reply_type):
        """response[] has three types of entries:
               i. One line response ['Hello. What's up?', 'Hi']
               ii. Multi line response ["['Howdy.','Yo']['How are you?', 'What's up?']", "['good'] ['XYZ','xyz']"]
               iii. Conditional response ['<condition>', "['condition_name', ['hot', ['yes', 'very hot'], 'cold', ['cold'], 'dry' ['very much'], 'default', ['default case'] ] ]" ]
           This method identifies the type of reply and convert it to aiml format accordingly.
        """
        answer = ''
        if reply_type == 3:
            condition_name = reply[0]
            answer = '<condition name="' + condition_name + '">\n'
            count = 0
            text = reply[1]
            while count < len(text):
                item = text[count]
                if count % 2 == 0:
                    # On even indices, possible condition_value is present
                    value = item
                    answer += '<li value="' + value + '">value_text</li>\n'
                    if value == 'default':
                        answer = re.sub(r'\s*value="default"', '', answer)
                else:
                    # On odd indices, responses for previously extracted condition_value are present, item is a list in this case
                    i = 0
                    while i < len(item):
                        reply_type = self.classify_reply(item[i])
                        if reply_type == 3:
                            i += 1
                        value = self._process_response(item[i], reply_type)
                        i += 1
                        answer = re.sub(r'value_text', value, answer)
                count += 1
            answer += '</condition>\n'
        elif reply_type == 2:
            # Replace the '] [' with ']|['
            text = re.sub(r'\]\s*\[', ']|[', reply)
            text = text.split('|')
            for t in text:
                # Remove '[' and ']'
                t = t[1:-1]
                temp = t.split(',')
                if len(temp) == 1:
                    ans = temp[0][1:-1] + '\n'
                else:
                    ans = '\n<random>\n'
                    for v in temp:
                        ans += '<li>' + v.strip()[1:-1] + '</li>\n'
                    ans += '</random>\n'
                answer += ans
        else:
            answer = reply
        return answer