def parse_func_2(self): """ parse each expression to get useful information: input is different input --> ***[count(*), sum(a), sum(a+b*c)]*** output --> ***[count(*), sum(a) + 1, avg(a+b*c)]*** expression --> ['count(*)', 'sum(a)', 'avg(a+b*c)'] ==> [ ['count', 1, 0, '*', 1, 0], ['sum', 2, 0, 'a', 2, 1, [1], lambda _a:_a+1 ], ['sum', 3, 0, 'a+b*c', 3, 0] ] """ # if there is avg function, convert it to sum and count avg_pos = [] for fun in self.function: if fun.find("avg(") != -1: avg_pos.append(self.function.index(fun)) if avg_pos != []: if "count(*)" not in self.function: self.function.append("count(*)") # sometimes, a result column has more than one function, then the pos in # the self.expr and self.expression is not the same, this counter is to # count the pos of a function in self.expr and pos_map shows the mapping # of these two kinds of postions. fun_counter = 0 pos_map = {} for expr_num in range(len(self.function)): expr = self.function[expr_num] pos_map[expr_num] = fun_counter # parse "sum(a) + 1" --> ['sum(a)', '+', '1'] if expr.find("count(*)") != -1: expr = expr.replace("count(*)", "count(a_a)") _expr = newparser.parse_column_expr(expr) # to describe the pos of elements in the outer arithmetic operation outer_ao_pos = [] new_expr = expr new_args = [] ParaLiteLog.debug(self.input) for ele in _expr: if re.match("(.*)\((.*)\)", ele): parsed_expr = [] new_expr = new_expr.replace(ele, "_col%s" % str(fun_counter)) new_args.append("_col%s" % str(fun_counter)) func_name = ele[0:ele.find("(")] if func_name not in conf.GENERAL_FUNC: return False, "ParaLite cannot support aggregation function %s" % func_name if func_name == "avg": ele = ele.replace("avg", "sum") fun_counter += 1 func_attr = ele[ele.find("(") + 1 : ele.rfind(")")] parsed_expr.append(func_name) if func_attr == "a_a": func_attr = "*" ele = ele.replace("a_a", "*") expr = expr.replace("count(a_a)", "count(*)") opexpr = [func_attr] pos_in_input = self.input.index(ele) parsed_expr.append(pos_in_input) parsed_expr.append(0) parsed_expr.append(ele) if expr in self.output: parsed_expr.append(self.output.index(expr)) else: parsed_expr.append(-1) self.expr.append(parsed_expr) outer_ao_pos.append(fun_counter - 1) else: # other operator element: + - * / ^ continue cur_pos = pos_map[expr_num] if cur_pos >= len(self.expr): # the exception that select sum(a), count(*), avg(a) ... does not need # to do anything for avg(a) continue if len(_expr) == 1: self.expr[cur_pos].append(0) else: self.expr[cur_pos].append(1) self.expr[cur_pos].append(outer_ao_pos) tempexpr = new_expr tempargs = ",".join(new_args) for eacharg in new_args: newarg = eacharg.replace(".", "_") tempexpr = tempexpr.replace(eacharg, newarg) tempargs = tempargs.replace(eacharg, newarg) self.expr[cur_pos].append( eval("lambda %s:%s" % (tempargs, tempexpr))) self.pos_map = pos_map return True, None
def parse_func_1(self): """ parse each expression to get useful information: input --> [a, b, c] output --> [count(*), sum(a) + 1, avg(a+b*c)] expression --> ['count(*)', 'sum(a)', 'avg(a+b*c)'] ==> [ ['count', -1, 0, '*', 1, 0], ['sum', 1, 0, 'a', 2, 1, [1], lambda _a:_a+1 ], ['sum', [1,2,3], 1, lambda a,b,c:a+b*c, 3, 0] ] """ flag = 0 # if there is avg function, convert it to sum and count avg_pos = [] for fun in self.function: if fun.find("avg(") != -1: avg_pos.append(self.function.index(fun)) if avg_pos != []: if "count(*)" not in self.function: self.function.append("count(*)") # sometimes, a result column has more than one function, then the pos in # the self.expr and self.expression is not the same, this counter is to # count the pos of a function in self.expr and pos_map shows the mapping # of these two kinds of postions. fun_counter = 0 pos_map = {} for expr_num in range(len(self.function)): expr = self.function[expr_num] pos_map[expr_num] = fun_counter # parse "sum(a) + 1" --> ['sum(a)', '+', '1'] if expr.find("count(*)") != -1: expr = expr.replace("count(*)", "count(a_a)") _expr = newparser.parse_column_expr(expr) # to describe the pos of elements in the outer arithmetic operation outer_ao_pos = [] new_expr = expr new_args = [] for ele in _expr: if re.match("(.*)\((.*)\)", ele): parsed_expr = [] new_expr = new_expr.replace(ele, "_col%s" % str(fun_counter)) new_args.append("_col%s" % str(fun_counter)) # aggregate element: sum(a) func_name = ele[0:ele.find("(")] if func_name not in conf.GENERAL_FUNC: return False, "ParaLite cannot support aggregation function %s" % func_name if func_name == "avg": ele = ele.replace("avg", "sum") fun_counter += 1 func_attr = ele[ele.find("(") + 1 : ele.rfind(")")] parsed_expr.append(func_name) if func_attr == "a_a": func_attr = "*" expr = expr.replace("count(a_a)", "count(*)") opexpr = [func_attr] else: opexpr = newparser.parse_column_expr(func_attr) if len(opexpr) == 1: # only a regular argument, the pos in the input string has two # cases: (1), local aggregation is done: # input = [key, sum, count...] # (2), local aggregation is not done: # input = [key, col1, col2] arg = opexpr[0] if arg in self.input: parsed_expr.append(self.input.index(arg)) elif expr in self.input: parsed_expr.append(self.input.index(expr)) else: parsed_expr.append(-1) parsed_expr.append(0) parsed_expr.append(opexpr[0]) else: # argument with airthmatical operation pos = [] args = [] for var in opexpr: if re.search('^[a-zA-Z][a-zA-Z0-9_.]*$',var): pos.append(self.input.index(var)) args.append(var) parsed_expr.append(pos) parsed_expr.append(1) # replace . in func with _ tempargs = ",".join(args) for eacharg in args: newarg = eacharg.replace(".", "_") func_attr = func_attr.replace(eacharg, newarg) tempargs = tempargs.replace(eacharg, newarg) ao = eval(("lambda %s:%s" % (tempargs, func_attr))) parsed_expr.append(ao) if expr in self.output: parsed_expr.append(self.output.index(expr)) else: parsed_expr.append(-1) self.expr.append(parsed_expr) outer_ao_pos.append(fun_counter - 1) elif re.search('^[a-zA-Z][a-zA-Z0-9_.]*$', ele): # column element: a, b. it should be one of the group key pos = self.input.index(ele) outer_ao_pos.append(pos) else: # other operator element: + - * / ^ continue cur_pos = pos_map[expr_num] if cur_pos >= len(self.expr): # the exception that select sum(a), count(*), avg(a) ... does not need # to do anything for avg(a) continue if len(_expr) == 1: self.expr[cur_pos].append(0) else: self.expr[cur_pos].append(1) self.expr[cur_pos].append(outer_ao_pos) tempexpr = new_expr tempargs = ",".join(new_args) for eacharg in new_args: newarg = eacharg.replace(".", "_") tempexpr = tempexpr.replace(eacharg, newarg) tempargs = tempargs.replace(eacharg, newarg) self.expr[cur_pos].append(eval( ("lambda %s:%s" % (tempargs, tempexpr)))) self.pos_map = pos_map return True, None