Example #1
0
def down_select(syntax_tree, sql, relation2):
    if syntax_tree.op == 'SELECT':
        condition = syntax_tree.cond
        sql = condition.split('&')
        relation = []
        # 找出关系名
        for i in range(len(sql)):
            if lookfor(sql[i]) is not None:
                relation.append(lookfor(sql[i]))
                relation2.append(lookfor2(sql[i]))
        syntax_tree = down_select(syntax_tree.lfchild, sql, relation2)
    elif syntax_tree.op == 'PROJECTION':
        # 跳过投影
        syntax_tree.lfchild = down_select(syntax_tree.lfchild, sql, relation2)
    elif syntax_tree.op == 'JOIN':
        # 有JOIN,将SELECT与关系名下推
        first_tree = SyntaxTree()
        first_tree.op = 'SELECT'
        first_tree.cond = sql[0]
        first_tree.lfchild = syntax_tree.lfchild
        syntax_tree.lfchild = first_tree
        if len(sql) == 1:
            return syntax_tree
        second_tree = SyntaxTree()
        second_tree.op = 'SELECT'
        second_tree.cond = sql[1]
        second_tree.rchild = syntax_tree.rchild
        syntax_tree.rchild = second_tree
    return syntax_tree
Example #2
0
def parsesql(sql_statement):
    sql = sql_statement.split()
    tree = SyntaxTree()

    index = 0
    while True:
        if index >= len(sql):
            break
        elif sql[index] == 'SELECT' or sql[index] == 'PROJECTION':
            tree.op = sql[index]
            index += 2
            condition = ''
            # 把[]里的内容全部记录下来
            while sql[index] != ']':
                condition += sql[index]
                condition += ' '
                index += 1
            index += 1
            tree.cond = condition
        elif sql[index] == 'JOIN':
            # 连接操作创建子树
            tree.op = sql[index]
            tree.lfchild = SyntaxTree()
            tree.lfchild.attr = sql[index - 1]
            tree.rchild = SyntaxTree()
            tree.rchild.attr = sql[index + 1]
            index += 1
        elif sql[index] == '(':
            # 遇到(再创建一个子树
            index += 1
            statement = ''
            while index < len(sql) and sql[index] != ')':
                statement += sql[index]
                statement += ' '
                index += 1
            index += 1
            tree.lfchild = parsesql(statement)  # 递归构造子树
        else:
            index += 1

    return tree
Example #3
0
 def __init__(self):
     self.map = {}
     self.word_tree = WordsTree()
     self.syntax_tree = SyntaxTree()
     self.mistake_map = {}
Example #4
0
def down_proj(syntax_tree, sql, relation2, same):
    if syntax_tree.op == 'SELECT':
        syntax_tree.lfchild = down_proj(syntax_tree.lfchild, sql, relation2,
                                        same)
    elif syntax_tree.op == 'PROJECTION':
        # 将投影下推
        sql = syntax_tree.cond.split(",")
        for i in range(len(sql)):
            if lookfor(sql[i]) is not None:
                if lookfor2(sql[i]) in relation2:
                    pass
                else:
                    relation2.append(lookfor2(sql[i]))
        # 找相交的元素
        same += findthesame(relation2[0], relation2[1])
        syntax_tree.lfchild = down_proj(syntax_tree.lfchild, sql, relation2,
                                        same)
    elif syntax_tree.op == 'JOIN':
        # 有JOIN,将条件和投影下推
        if len(sql) > 0 and '=' not in sql[0]:
            first_tree = SyntaxTree()
            first_tree.op = 'PROJECTION'
            first_tree.cond = sql[0] + ', ' + same
            first_tree.lfchild = syntax_tree.lfchild
            syntax_tree.lfchild = first_tree
            if len(sql) > 1:
                second_tree = SyntaxTree()
                second_tree.op = 'PROJECTION'
                second_tree.cond = sql[1] + ', ' + same
                second_tree.rchild = syntax_tree.rchild
                syntax_tree.rchild = second_tree
            elif same != '':
                second_tree = SyntaxTree()
                second_tree.op = 'PROJECTION'
                second_tree.cond = same
                second_tree.rchild = syntax_tree.rchild
                syntax_tree.rchild = second_tree
    return syntax_tree
Example #5
0
class Robot:
    def __init__(self):
        self.map = {}
        self.word_tree = WordsTree()
        self.syntax_tree = SyntaxTree()
        self.mistake_map = {}
    
    def trans_gbk(self, string):
        list = []
        high= 0
        for s in string:
            if high != 0:
                list.append( (high << 8) + ord(s) )
                high = 0
            else:
                if ord(s) > 0x80:
                    high = ord(s)
                else:
                    list.append( ord(s) )
        if high:
            print "trans_gbk ERROR. omit"
        return list
        
 #   def pre_process(self, words):
        
        
    def trans_utf8(self, string):
        list = []
        high = 0
        for s in string:
            if ord(s) < 0x80:
                if high:
                    list.append( high )
                    high = 0
                list.append( ord(s) )
            else:
                if ord(s) & 0x40:
                    if high:
                        list.append( high )
                    high = ord(s)
                else: 
                    high = (high << 8) + ord(s)
        if high:
            list.append( high )
        return list
        
    def from_file(self, filename):
        lines=file(filename, 'r').read().split('\n')     
        a = None
        for l in lines:
            l = l.strip()
            if len(l) == 0:
                continue
            if l[0] == '[' and l[-1] == ']':
                a=l[1:-1]
            else:
                if a is None:
                    print "Error:", l
                else:
                    self.add_question(a, l)
                    a = None

    def walk(self, dir):
        import os
        for root, dirs, files in os.walk(dir):
            for filename in files:
                self.from_file( os.path.join(root, filename) ) 
        
    def add_question(self, quest, answer):
        self.map[quest] = answer
        
    def analyse(self):
        list = []
        for word in self.map:
            list += word.split()
        list = set(list)
        for item in list:
  #          print "add_item", item
            self.word_tree.add( self.trans_gbk(item), True )
        
        for key, value in self.map.items():
  #          print "begin to insert to syntax tree:", key.split(), '---', value
            self.syntax_tree.add( map(tuple, map(self.trans_gbk, key.split() ) ), value)
            
    def ask(self, quest):
        words = self.split_word(quest)
 #       print words
        return self.syntax_tree.search( map(tuple, words) )
    #    print self.syntax_tree.syntax_map

    def split_word(self, quest):
        sentence = self.trans_gbk(quest)
        words = []
        pos_start = 0
        pos_end = pos_start + 1
        length = len( sentence )
    #    print sentence
        while pos_start <  length:
            word = None
            pos_end = pos_start + 1
    #        print "reset pos-end", pos_start, pos_end
            pos, target = self.word_tree.search(sentence[ pos_start : pos_end ])
            if pos > 0:
     #           print "found start:", sentence[ pos_start : pos_end ], pos
                if target:
                    word = sentence[pos_start : pos_end]
      #              print "found match.", word
                for i in xrange( pos_end + 1, len( sentence ) + 1, 1 ):
       #             print "begin to search:", i, len(sentence), sentence[ pos_start : i ]
                    _pos, _target = self.word_tree.search( sentence[ pos_start : i ]  )
                    if _pos > pos:
        #                print "re-found start:", sentence[ pos_start : i ], _pos
                        pos = _pos
                        if _target:
                            word = sentence[pos_start : i]
                            pos_end = i
         #                   print "re-found match.", word
                    else:
          #              print "break"
                        break
                if word:
                    pos_start = pos_end
           #         print "reset start", pos_start
                    words.append( word )
                else:
                    pos_start += 1
                 
            else:
            #    print "restart.", pos_start
                pos_start += 1
        return words