예제 #1
0
파일: robot.py 프로젝트: hustym/spirit
 def __init__(self):
     self.map = {}
     self.word_tree = WordsTree()
     self.syntax_tree = SyntaxTree()
     self.mistake_map = {}
예제 #2
0
파일: robot.py 프로젝트: hustym/spirit
class Robot:
    def __init__(self):
        self.map = {}
        self.word_tree = WordsTree()
        self.syntax_tree = SyntaxTree()
        self.mistake_map = {}
    
    def trans_gbk(self, string):
        list = []
        high= 0
        for s in string:
            if high != 0:
                list.append( (high << 8) + ord(s) )
                high = 0
            else:
                if ord(s) > 0x80:
                    high = ord(s)
                else:
                    list.append( ord(s) )
        if high:
            print "trans_gbk ERROR. omit"
        return list
        
 #   def pre_process(self, words):
        
        
    def trans_utf8(self, string):
        list = []
        high = 0
        for s in string:
            if ord(s) < 0x80:
                if high:
                    list.append( high )
                    high = 0
                list.append( ord(s) )
            else:
                if ord(s) & 0x40:
                    if high:
                        list.append( high )
                    high = ord(s)
                else: 
                    high = (high << 8) + ord(s)
        if high:
            list.append( high )
        return list
        
    def from_file(self, filename):
        lines=file(filename, 'r').read().split('\n')     
        a = None
        for l in lines:
            l = l.strip()
            if len(l) == 0:
                continue
            if l[0] == '[' and l[-1] == ']':
                a=l[1:-1]
            else:
                if a is None:
                    print "Error:", l
                else:
                    self.add_question(a, l)
                    a = None

    def walk(self, dir):
        import os
        for root, dirs, files in os.walk(dir):
            for filename in files:
                self.from_file( os.path.join(root, filename) ) 
        
    def add_question(self, quest, answer):
        self.map[quest] = answer
        
    def analyse(self):
        list = []
        for word in self.map:
            list += word.split()
        list = set(list)
        for item in list:
  #          print "add_item", item
            self.word_tree.add( self.trans_gbk(item), True )
        
        for key, value in self.map.items():
  #          print "begin to insert to syntax tree:", key.split(), '---', value
            self.syntax_tree.add( map(tuple, map(self.trans_gbk, key.split() ) ), value)
            
    def ask(self, quest):
        words = self.split_word(quest)
 #       print words
        return self.syntax_tree.search( map(tuple, words) )
    #    print self.syntax_tree.syntax_map

    def split_word(self, quest):
        sentence = self.trans_gbk(quest)
        words = []
        pos_start = 0
        pos_end = pos_start + 1
        length = len( sentence )
    #    print sentence
        while pos_start <  length:
            word = None
            pos_end = pos_start + 1
    #        print "reset pos-end", pos_start, pos_end
            pos, target = self.word_tree.search(sentence[ pos_start : pos_end ])
            if pos > 0:
     #           print "found start:", sentence[ pos_start : pos_end ], pos
                if target:
                    word = sentence[pos_start : pos_end]
      #              print "found match.", word
                for i in xrange( pos_end + 1, len( sentence ) + 1, 1 ):
       #             print "begin to search:", i, len(sentence), sentence[ pos_start : i ]
                    _pos, _target = self.word_tree.search( sentence[ pos_start : i ]  )
                    if _pos > pos:
        #                print "re-found start:", sentence[ pos_start : i ], _pos
                        pos = _pos
                        if _target:
                            word = sentence[pos_start : i]
                            pos_end = i
         #                   print "re-found match.", word
                    else:
          #              print "break"
                        break
                if word:
                    pos_start = pos_end
           #         print "reset start", pos_start
                    words.append( word )
                else:
                    pos_start += 1
                 
            else:
            #    print "restart.", pos_start
                pos_start += 1
        return words