Exemple #1
0
    def parse_func_2(self):
        """
        parse each expression to get useful information: input is different
        input -->      ***[count(*), sum(a), sum(a+b*c)]***
        output -->     ***[count(*), sum(a) + 1, avg(a+b*c)]***
        expression --> ['count(*)', 'sum(a)', 'avg(a+b*c)']
        ==> [ 
             ['count', 1,       0, '*',       1,  0],
             ['sum',   2,       0, 'a',       2,  1, [1], lambda _a:_a+1 ],
             ['sum',   3,       0, 'a+b*c',   3,  0]
            ]
        """
        # if there is avg function, convert it to sum and count
        avg_pos = []
        for fun in self.function:
            if fun.find("avg(") != -1:
                avg_pos.append(self.function.index(fun))
        if avg_pos != []:
            if "count(*)" not in self.function:
                self.function.append("count(*)")
        # sometimes, a result column has more than one function, then the pos in
        # the self.expr and self.expression is not the same, this counter is to 
        # count the pos of a function in self.expr and pos_map shows the mapping
        # of these two kinds of postions.
        fun_counter = 0 
        pos_map = {}
        for expr_num in range(len(self.function)):
            expr = self.function[expr_num]
            pos_map[expr_num] = fun_counter

            # parse "sum(a) + 1" --> ['sum(a)', '+', '1']
            if expr.find("count(*)") != -1:
                expr = expr.replace("count(*)", "count(a_a)")
            _expr = newparser.parse_column_expr(expr)
            
            #  to describe the pos of elements in the outer arithmetic operation
            outer_ao_pos = [] 
            new_expr = expr
            new_args = []
            ParaLiteLog.debug(self.input)
            for ele in _expr:
                if re.match("(.*)\((.*)\)", ele):
                    parsed_expr = []                                
                    new_expr = new_expr.replace(ele, "_col%s" % str(fun_counter))
                    new_args.append("_col%s" % str(fun_counter))
                    func_name = ele[0:ele.find("(")]
                    if func_name not in conf.GENERAL_FUNC:
                        return False, "ParaLite cannot support aggregation function %s" % func_name
                    if func_name == "avg":
                        ele = ele.replace("avg", "sum")
                    fun_counter += 1
                    func_attr = ele[ele.find("(") + 1 : ele.rfind(")")]
                    parsed_expr.append(func_name)
                    if func_attr == "a_a": 
                        func_attr = "*"
                        ele = ele.replace("a_a", "*")
                        expr = expr.replace("count(a_a)", "count(*)")
                        opexpr = [func_attr]
                    pos_in_input = self.input.index(ele)
                    parsed_expr.append(pos_in_input)
                    parsed_expr.append(0)
                    parsed_expr.append(ele)
                    if expr in self.output: parsed_expr.append(self.output.index(expr))
                    else: parsed_expr.append(-1)
                    self.expr.append(parsed_expr)
                    outer_ao_pos.append(fun_counter - 1)
                else:
                    # other operator element: + - * / ^
                    continue
            cur_pos = pos_map[expr_num]
            if cur_pos >= len(self.expr):
                # the exception that select sum(a), count(*), avg(a) ... does not need
                # to do anything for avg(a)
                continue
            if len(_expr) == 1:
                self.expr[cur_pos].append(0)
            else:
                self.expr[cur_pos].append(1)
                self.expr[cur_pos].append(outer_ao_pos)

                tempexpr = new_expr
                tempargs = ",".join(new_args)
                for eacharg in new_args:
                    newarg = eacharg.replace(".", "_")
                    tempexpr = tempexpr.replace(eacharg, newarg)
                    tempargs = tempargs.replace(eacharg, newarg)
                    
                self.expr[cur_pos].append(
                    eval("lambda %s:%s" % (tempargs, tempexpr)))
        self.pos_map = pos_map
        return True, None
Exemple #2
0
    def parse_func_1(self):
        """
        parse each expression to get useful information:
        input -->      [a, b, c]
        output -->     [count(*), sum(a) + 1, avg(a+b*c)]
        expression --> ['count(*)', 'sum(a)', 'avg(a+b*c)']
        ==> [ 
             ['count', -1,      0, '*',                  1, 0],
             ['sum',   1,       0, 'a',                  2, 1, [1], lambda _a:_a+1 ],
             ['sum',   [1,2,3], 1, lambda a,b,c:a+b*c,   3, 0]
            ]
        """
        flag = 0
        # if there is avg function, convert it to sum and count
        avg_pos = []
        for fun in self.function:
            if fun.find("avg(") != -1:
                avg_pos.append(self.function.index(fun))
        if avg_pos != []:
            if "count(*)" not in self.function:
                self.function.append("count(*)")
        
        # sometimes, a result column has more than one function, then the pos in
        # the self.expr and self.expression is not the same, this counter is to 
        # count the pos of a function in self.expr and pos_map shows the mapping
        # of these two kinds of postions.
        fun_counter = 0 
        pos_map = {}
        for expr_num in range(len(self.function)):
            expr = self.function[expr_num]
            pos_map[expr_num] = fun_counter
            
            # parse "sum(a) + 1" --> ['sum(a)', '+', '1']
            if expr.find("count(*)") != -1:
                expr = expr.replace("count(*)", "count(a_a)")
            _expr = newparser.parse_column_expr(expr)
            
            #  to describe the pos of elements in the outer arithmetic operation
            outer_ao_pos = [] 
            new_expr = expr
            new_args = []
            for ele in _expr:
                if re.match("(.*)\((.*)\)", ele):
                    parsed_expr = []
                    new_expr = new_expr.replace(ele, "_col%s" % str(fun_counter))
                    new_args.append("_col%s" % str(fun_counter))
                    
                    # aggregate element: sum(a)
                    func_name = ele[0:ele.find("(")]
                    if func_name not in conf.GENERAL_FUNC:
                        return False, "ParaLite cannot support aggregation function %s" % func_name
                    if func_name == "avg":
                        ele = ele.replace("avg", "sum")
                    fun_counter += 1
                    func_attr = ele[ele.find("(") + 1 : ele.rfind(")")]
                    parsed_expr.append(func_name)
                    if func_attr == "a_a": 
                        func_attr = "*"
                        expr = expr.replace("count(a_a)", "count(*)")
                        opexpr = [func_attr]
                    else:
                        opexpr = newparser.parse_column_expr(func_attr)
                    if len(opexpr) == 1:
                        # only a regular argument, the pos in the input string has two
                        # cases: (1), local aggregation is done: 
                        #                 input = [key, sum, count...]
                        #        (2), local aggregation is not done: 
                        #                 input = [key, col1, col2]
                        arg = opexpr[0]
                        if arg in self.input: parsed_expr.append(self.input.index(arg))
                        elif expr in self.input: parsed_expr.append(self.input.index(expr))
                        else: parsed_expr.append(-1)
                        parsed_expr.append(0)
                        parsed_expr.append(opexpr[0])
                    else: 
                        # argument with airthmatical operation
                        pos = []
                        args = []
                        for var in opexpr:
                            if re.search('^[a-zA-Z][a-zA-Z0-9_.]*$',var):
                                pos.append(self.input.index(var))
                                args.append(var)
                        parsed_expr.append(pos)
                        parsed_expr.append(1)
                        
                        # replace . in func with _
                        tempargs = ",".join(args)                        
                        for eacharg in args:
                            newarg = eacharg.replace(".", "_")
                            func_attr = func_attr.replace(eacharg, newarg)
                            tempargs = tempargs.replace(eacharg, newarg)
                        ao = eval(("lambda %s:%s" % (tempargs, func_attr)))
                        
                        parsed_expr.append(ao)
                    if expr in self.output: parsed_expr.append(self.output.index(expr))
                    else: parsed_expr.append(-1)
                    self.expr.append(parsed_expr)
                    outer_ao_pos.append(fun_counter - 1)
                elif re.search('^[a-zA-Z][a-zA-Z0-9_.]*$', ele):
                    # column element: a, b. it should be one of the group key
                    pos = self.input.index(ele)
                    outer_ao_pos.append(pos)
                else:
                    # other operator element: + - * / ^
                    continue
            cur_pos = pos_map[expr_num]
            if cur_pos >= len(self.expr):
                # the exception that select sum(a), count(*), avg(a) ... does not need
                # to do anything for avg(a)
                continue
            if len(_expr) == 1:
                self.expr[cur_pos].append(0)
            else:
                self.expr[cur_pos].append(1)
                self.expr[cur_pos].append(outer_ao_pos)

                tempexpr = new_expr
                tempargs = ",".join(new_args)
                for eacharg in new_args:
                    newarg = eacharg.replace(".", "_")
                    tempexpr = tempexpr.replace(eacharg, newarg)
                    tempargs = tempargs.replace(eacharg, newarg)
                    
                self.expr[cur_pos].append(eval(
                    ("lambda %s:%s" % (tempargs, tempexpr))))
        self.pos_map = pos_map
        return True, None