def down_select(syntax_tree, sql, relation2): if syntax_tree.op == 'SELECT': condition = syntax_tree.cond sql = condition.split('&') relation = [] # 找出关系名 for i in range(len(sql)): if lookfor(sql[i]) is not None: relation.append(lookfor(sql[i])) relation2.append(lookfor2(sql[i])) syntax_tree = down_select(syntax_tree.lfchild, sql, relation2) elif syntax_tree.op == 'PROJECTION': # 跳过投影 syntax_tree.lfchild = down_select(syntax_tree.lfchild, sql, relation2) elif syntax_tree.op == 'JOIN': # 有JOIN,将SELECT与关系名下推 first_tree = SyntaxTree() first_tree.op = 'SELECT' first_tree.cond = sql[0] first_tree.lfchild = syntax_tree.lfchild syntax_tree.lfchild = first_tree if len(sql) == 1: return syntax_tree second_tree = SyntaxTree() second_tree.op = 'SELECT' second_tree.cond = sql[1] second_tree.rchild = syntax_tree.rchild syntax_tree.rchild = second_tree return syntax_tree
def parsesql(sql_statement): sql = sql_statement.split() tree = SyntaxTree() index = 0 while True: if index >= len(sql): break elif sql[index] == 'SELECT' or sql[index] == 'PROJECTION': tree.op = sql[index] index += 2 condition = '' # 把[]里的内容全部记录下来 while sql[index] != ']': condition += sql[index] condition += ' ' index += 1 index += 1 tree.cond = condition elif sql[index] == 'JOIN': # 连接操作创建子树 tree.op = sql[index] tree.lfchild = SyntaxTree() tree.lfchild.attr = sql[index - 1] tree.rchild = SyntaxTree() tree.rchild.attr = sql[index + 1] index += 1 elif sql[index] == '(': # 遇到(再创建一个子树 index += 1 statement = '' while index < len(sql) and sql[index] != ')': statement += sql[index] statement += ' ' index += 1 index += 1 tree.lfchild = parsesql(statement) # 递归构造子树 else: index += 1 return tree
def __init__(self): self.map = {} self.word_tree = WordsTree() self.syntax_tree = SyntaxTree() self.mistake_map = {}
def down_proj(syntax_tree, sql, relation2, same): if syntax_tree.op == 'SELECT': syntax_tree.lfchild = down_proj(syntax_tree.lfchild, sql, relation2, same) elif syntax_tree.op == 'PROJECTION': # 将投影下推 sql = syntax_tree.cond.split(",") for i in range(len(sql)): if lookfor(sql[i]) is not None: if lookfor2(sql[i]) in relation2: pass else: relation2.append(lookfor2(sql[i])) # 找相交的元素 same += findthesame(relation2[0], relation2[1]) syntax_tree.lfchild = down_proj(syntax_tree.lfchild, sql, relation2, same) elif syntax_tree.op == 'JOIN': # 有JOIN,将条件和投影下推 if len(sql) > 0 and '=' not in sql[0]: first_tree = SyntaxTree() first_tree.op = 'PROJECTION' first_tree.cond = sql[0] + ', ' + same first_tree.lfchild = syntax_tree.lfchild syntax_tree.lfchild = first_tree if len(sql) > 1: second_tree = SyntaxTree() second_tree.op = 'PROJECTION' second_tree.cond = sql[1] + ', ' + same second_tree.rchild = syntax_tree.rchild syntax_tree.rchild = second_tree elif same != '': second_tree = SyntaxTree() second_tree.op = 'PROJECTION' second_tree.cond = same second_tree.rchild = syntax_tree.rchild syntax_tree.rchild = second_tree return syntax_tree
class Robot: def __init__(self): self.map = {} self.word_tree = WordsTree() self.syntax_tree = SyntaxTree() self.mistake_map = {} def trans_gbk(self, string): list = [] high= 0 for s in string: if high != 0: list.append( (high << 8) + ord(s) ) high = 0 else: if ord(s) > 0x80: high = ord(s) else: list.append( ord(s) ) if high: print "trans_gbk ERROR. omit" return list # def pre_process(self, words): def trans_utf8(self, string): list = [] high = 0 for s in string: if ord(s) < 0x80: if high: list.append( high ) high = 0 list.append( ord(s) ) else: if ord(s) & 0x40: if high: list.append( high ) high = ord(s) else: high = (high << 8) + ord(s) if high: list.append( high ) return list def from_file(self, filename): lines=file(filename, 'r').read().split('\n') a = None for l in lines: l = l.strip() if len(l) == 0: continue if l[0] == '[' and l[-1] == ']': a=l[1:-1] else: if a is None: print "Error:", l else: self.add_question(a, l) a = None def walk(self, dir): import os for root, dirs, files in os.walk(dir): for filename in files: self.from_file( os.path.join(root, filename) ) def add_question(self, quest, answer): self.map[quest] = answer def analyse(self): list = [] for word in self.map: list += word.split() list = set(list) for item in list: # print "add_item", item self.word_tree.add( self.trans_gbk(item), True ) for key, value in self.map.items(): # print "begin to insert to syntax tree:", key.split(), '---', value self.syntax_tree.add( map(tuple, map(self.trans_gbk, key.split() ) ), value) def ask(self, quest): words = self.split_word(quest) # print words return self.syntax_tree.search( map(tuple, words) ) # print self.syntax_tree.syntax_map def split_word(self, quest): sentence = self.trans_gbk(quest) words = [] pos_start = 0 pos_end = pos_start + 1 length = len( sentence ) # print sentence while pos_start < length: word = None pos_end = pos_start + 1 # print "reset pos-end", pos_start, pos_end pos, target = self.word_tree.search(sentence[ pos_start : pos_end ]) if pos > 0: # print "found start:", sentence[ pos_start : pos_end ], pos if target: word = sentence[pos_start : pos_end] # print "found match.", word for i in xrange( pos_end + 1, len( sentence ) + 1, 1 ): # print "begin to search:", i, len(sentence), sentence[ pos_start : i ] _pos, _target = self.word_tree.search( sentence[ pos_start : i ] ) if _pos > pos: # print "re-found start:", sentence[ pos_start : i ], _pos pos = _pos if _target: word = sentence[pos_start : i] pos_end = i # print "re-found match.", word else: # print "break" break if word: pos_start = pos_end # print "reset start", pos_start words.append( word ) else: pos_start += 1 else: # print "restart.", pos_start pos_start += 1 return words