def test_aggregation_parsing(aggregation, expected_function): dataset = get_dataset("events") function = parse_aggregation( aggregation[0], aggregation[1], aggregation[2], dataset.get_abstract_columnset(), set(), ) assert function == expected_function, expected_function
def test_aggregation_parsing(aggregation: Any, expected_function: FunctionCall) -> None: entity = get_entity(EntityKey.EVENTS) function = parse_aggregation( aggregation[0], aggregation[1], aggregation[2], entity.get_data_model(), set(), ) assert function == expected_function, expected_function
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query: def build_selected_expressions( raw_expressions: Sequence[Any], ) -> List[SelectedExpression]: output = [] for raw_expression in raw_expressions: exp = parse_expression(tuplify(raw_expression), entity.get_data_model(), set()) output.append( SelectedExpression( # An expression in the query can be a string or a # complex list with an alias. In the second case # we trust the parser to find the alias. name=raw_expression if isinstance(raw_expression, str) else exp.alias, expression=exp, )) return output aggregations = [] for aggregation in body.get("aggregations", []): if not isinstance(aggregation, Sequence): raise ParsingException(( f"Invalid aggregation structure {aggregation}. " "It must be a sequence containing expression, column and alias." )) aggregation_function = aggregation[0] column_expr = aggregation[1] column_expr = column_expr if column_expr else [] alias = aggregation[2] alias = alias if alias else None aggregations.append( SelectedExpression( name=alias, expression=parse_aggregation( aggregation_function, column_expr, alias, entity.get_data_model(), set(), ), )) groupby_clause = build_selected_expressions( to_list(body.get("groupby", []))) select_clause = ( groupby_clause + aggregations + build_selected_expressions(body.get("selected_columns", []))) array_join_cols = set() arrayjoin = body.get("arrayjoin") # TODO: Properly detect all array join columns in all clauses of the query. # This is missing an arrayJoin in condition with an alias that is then # used in the select. if arrayjoin: array_join_cols.add(arrayjoin) array_join_expr: Optional[Expression] = parse_expression( body["arrayjoin"], entity.get_data_model(), {arrayjoin}) else: array_join_expr = None for select_expr in select_clause: if isinstance(select_expr.expression, FunctionCall): if select_expr.expression.function_name == "arrayJoin": parameters = select_expr.expression.parameters if len(parameters) != 1: raise ParsingException( "arrayJoin(...) only accepts a single parameter.") if isinstance(parameters[0], Column): array_join_cols.add(parameters[0].column_name) else: # We only accepts columns or functions that do not # reference columns. We could not say whether we are # actually arrayjoining on the values of the column # if it is nested in an arbitrary function. But # functions of literals are fine. for e in parameters[0]: if isinstance(e, Column): raise ParsingException( "arrayJoin(...) cannot contain columns nested in functions." ) where_expr = parse_conditions_to_expr(body.get("conditions", []), entity, array_join_cols) having_expr = parse_conditions_to_expr(body.get("having", []), entity, array_join_cols) orderby_exprs = [] for orderby in to_list(body.get("orderby", [])): if isinstance(orderby, str): match = NEGATE_RE.match(orderby) if match is None: raise ParsingException(( f"Invalid Order By clause {orderby}. If the Order By is a string, " "it must respect the format `[-]column`")) direction, col = match.groups() orderby = col elif is_function(orderby): match = NEGATE_RE.match(orderby[0]) if match is None: raise ParsingException(( f"Invalid Order By clause {orderby}. If the Order By is an expression, " "the function name must respect the format `[-]func_name`" )) direction, col = match.groups() orderby = [col] + orderby[1:] else: raise ParsingException( (f"Invalid Order By clause {orderby}. The Clause was neither " "a string nor a function call.")) orderby_parsed = parse_expression(tuplify(orderby), entity.get_data_model(), set()) orderby_exprs.append( OrderBy( OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC, orderby_parsed, )) return Query( body, None, selected_columns=select_clause, array_join=array_join_expr, condition=where_expr, groupby=[g.expression for g in groupby_clause], having=having_expr, order_by=orderby_exprs, )
def _parse_query_impl(body: MutableMapping[str, Any], dataset: Dataset) -> Query: aggregate_exprs = [] for aggregation in body.get("aggregations", []): assert isinstance(aggregation, (list, tuple)) aggregation_function = aggregation[0] column_expr = aggregation[1] column_expr = column_expr if column_expr else [] alias = aggregation[2] alias = alias if alias else None aggregate_exprs.append( parse_aggregation(aggregation_function, column_expr, alias)) groupby_exprs = [ parse_expression(tuplify(group_by)) for group_by in to_list(body.get("groupby", [])) ] select_exprs = [ parse_expression(tuplify(select)) for select in body.get("selected_columns", []) ] selected_cols = groupby_exprs + aggregate_exprs + select_exprs arrayjoin = body.get("arrayjoin") if arrayjoin: array_join_expr: Optional[Expression] = parse_expression( body["arrayjoin"]) else: array_join_expr = None where_expr = parse_conditions_to_expr(body.get("conditions", []), dataset, arrayjoin) having_expr = parse_conditions_to_expr(body.get("having", []), dataset, arrayjoin) orderby_exprs = [] for orderby in to_list(body.get("orderby", [])): if isinstance(orderby, str): match = NEGATE_RE.match(orderby) assert match is not None, f"Invalid Order By clause {orderby}" direction, col = match.groups() orderby = col elif is_function(orderby): match = NEGATE_RE.match(orderby[0]) assert match is not None, f"Invalid Order By clause {orderby}" direction, col = match.groups() orderby = [col] + orderby[1:] else: raise ValueError(f"Invalid Order By clause {orderby}") orderby_parsed = parse_expression(tuplify(orderby)) orderby_exprs.append( OrderBy( OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC, orderby_parsed, )) source = dataset.get_dataset_schemas().get_read_schema().get_data_source() return Query( body, source, selected_columns=selected_cols, array_join=array_join_expr, condition=where_expr, groupby=groupby_exprs, having=having_expr, order_by=orderby_exprs, )
def test_aggregation_parsing(aggregation, expected_function): function = parse_aggregation(aggregation[0], aggregation[1], aggregation[2]) assert function == expected_function