def column_expr( dataset, column_name, query: Query, parsing_context: ParsingContext, alias=None, aggregate=None, ): """ Certain special column names expand into more complex expressions. Return a 2-tuple of: (expanded column expression, sanitized alias) Needs the body of the request for some extra data used to expand column expressions. """ assert column_name or aggregate assert not aggregate or (aggregate and (column_name or alias)) column_name = column_name or "" if is_function(column_name, 0): return complex_column_expr(dataset, column_name, query, parsing_context) elif isinstance(column_name, (list, tuple)) and aggregate: return complex_column_expr( dataset, [aggregate, column_name, alias], query, parsing_context ) elif isinstance(column_name, str) and QUOTED_LITERAL_RE.match(column_name): return escape_literal(column_name[1:-1]) else: expr = dataset.column_expr(column_name, query, parsing_context) if aggregate: expr = function_expr(aggregate, expr) # in the ORDER BY clause, column_expr may receive column names prefixed with # `-`. This is meant to be used for ORDER BY ... DESC. # This means we need to keep the `-` outside of the aliased expression when # we produce something like (COL AS alias) otherwise we build an invalid # syntax. # Worse, since escape_alias already does half of this work and keeps `-` # outside of the escaped expression we end up in this situation: # # -events.event_id becomes (-events.event_id AS -`events.event_id`) # # Thus here we strip the `-` before processing escaping and aliases and we # attach it back to the expression right before returning so that # -events.event_id becomes -(events.event_id AS `events.event_id`) # or # -`events.event_id` # if the alias already existed. # # The proper solution would be to strip the `-` before getting to column # processing, but this will be done with the new column abstraction. negate, col = NEGATE_RE.match(column_name).groups() alias = escape_alias(alias or col) expr_negate, expr = NEGATE_RE.match(expr).groups() # expr_negate and negate should never be inconsistent with each other. Though # will ensure this works properly before moving the `-` stripping at the beginning # of the method to cover tags as well. return f"{negate or expr_negate}{alias_expr(expr, alias, parsing_context)}"
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query: def build_selected_expressions( raw_expressions: Sequence[Any], ) -> List[SelectedExpression]: output = [] for raw_expression in raw_expressions: exp = parse_expression(tuplify(raw_expression), entity.get_data_model(), set()) output.append( SelectedExpression( # An expression in the query can be a string or a # complex list with an alias. In the second case # we trust the parser to find the alias. name=raw_expression if isinstance(raw_expression, str) else exp.alias, expression=exp, )) return output aggregations = [] for aggregation in body.get("aggregations", []): if not isinstance(aggregation, Sequence): raise ParsingException(( f"Invalid aggregation structure {aggregation}. " "It must be a sequence containing expression, column and alias." )) aggregation_function = aggregation[0] column_expr = aggregation[1] column_expr = column_expr if column_expr else [] alias = aggregation[2] alias = alias if alias else None aggregations.append( SelectedExpression( name=alias, expression=parse_aggregation( aggregation_function, column_expr, alias, entity.get_data_model(), set(), ), )) groupby_clause = build_selected_expressions( to_list(body.get("groupby", []))) select_clause = ( groupby_clause + aggregations + build_selected_expressions(body.get("selected_columns", []))) array_join_cols = set() arrayjoin = body.get("arrayjoin") # TODO: Properly detect all array join columns in all clauses of the query. # This is missing an arrayJoin in condition with an alias that is then # used in the select. if arrayjoin: array_join_cols.add(arrayjoin) array_join_expr: Optional[Expression] = parse_expression( body["arrayjoin"], entity.get_data_model(), {arrayjoin}) else: array_join_expr = None for select_expr in select_clause: if isinstance(select_expr.expression, FunctionCall): if select_expr.expression.function_name == "arrayJoin": parameters = select_expr.expression.parameters if len(parameters) != 1: raise ParsingException( "arrayJoin(...) only accepts a single parameter.") if isinstance(parameters[0], Column): array_join_cols.add(parameters[0].column_name) else: # We only accepts columns or functions that do not # reference columns. We could not say whether we are # actually arrayjoining on the values of the column # if it is nested in an arbitrary function. But # functions of literals are fine. for e in parameters[0]: if isinstance(e, Column): raise ParsingException( "arrayJoin(...) cannot contain columns nested in functions." ) where_expr = parse_conditions_to_expr(body.get("conditions", []), entity, array_join_cols) having_expr = parse_conditions_to_expr(body.get("having", []), entity, array_join_cols) orderby_exprs = [] for orderby in to_list(body.get("orderby", [])): if isinstance(orderby, str): match = NEGATE_RE.match(orderby) if match is None: raise ParsingException(( f"Invalid Order By clause {orderby}. If the Order By is a string, " "it must respect the format `[-]column`")) direction, col = match.groups() orderby = col elif is_function(orderby): match = NEGATE_RE.match(orderby[0]) if match is None: raise ParsingException(( f"Invalid Order By clause {orderby}. If the Order By is an expression, " "the function name must respect the format `[-]func_name`" )) direction, col = match.groups() orderby = [col] + orderby[1:] else: raise ParsingException( (f"Invalid Order By clause {orderby}. The Clause was neither " "a string nor a function call.")) orderby_parsed = parse_expression(tuplify(orderby), entity.get_data_model(), set()) orderby_exprs.append( OrderBy( OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC, orderby_parsed, )) return Query( body, None, selected_columns=select_clause, array_join=array_join_expr, condition=where_expr, groupby=[g.expression for g in groupby_clause], having=having_expr, order_by=orderby_exprs, )
def _parse_query_impl(body: MutableMapping[str, Any], dataset: Dataset) -> Query: aggregate_exprs = [] for aggregation in body.get("aggregations", []): assert isinstance(aggregation, (list, tuple)) aggregation_function = aggregation[0] column_expr = aggregation[1] column_expr = column_expr if column_expr else [] alias = aggregation[2] alias = alias if alias else None aggregate_exprs.append( parse_aggregation(aggregation_function, column_expr, alias)) groupby_exprs = [ parse_expression(tuplify(group_by)) for group_by in to_list(body.get("groupby", [])) ] select_exprs = [ parse_expression(tuplify(select)) for select in body.get("selected_columns", []) ] selected_cols = groupby_exprs + aggregate_exprs + select_exprs arrayjoin = body.get("arrayjoin") if arrayjoin: array_join_expr: Optional[Expression] = parse_expression( body["arrayjoin"]) else: array_join_expr = None where_expr = parse_conditions_to_expr(body.get("conditions", []), dataset, arrayjoin) having_expr = parse_conditions_to_expr(body.get("having", []), dataset, arrayjoin) orderby_exprs = [] for orderby in to_list(body.get("orderby", [])): if isinstance(orderby, str): match = NEGATE_RE.match(orderby) assert match is not None, f"Invalid Order By clause {orderby}" direction, col = match.groups() orderby = col elif is_function(orderby): match = NEGATE_RE.match(orderby[0]) assert match is not None, f"Invalid Order By clause {orderby}" direction, col = match.groups() orderby = [col] + orderby[1:] else: raise ValueError(f"Invalid Order By clause {orderby}") orderby_parsed = parse_expression(tuplify(orderby)) orderby_exprs.append( OrderBy( OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC, orderby_parsed, )) source = dataset.get_dataset_schemas().get_read_schema().get_data_source() return Query( body, source, selected_columns=selected_cols, array_join=array_join_expr, condition=where_expr, groupby=groupby_exprs, having=having_expr, order_by=orderby_exprs, )