def add_order_not_null(condition): order_field_not_null = BinaryOperation( op='is not', args=[ Identifier(parts=[predictor_time_column_name]), NullConstant() ]) if condition is not None: condition = BinaryOperation( op='and', args=[condition, order_field_not_null]) else: condition = order_field_not_null return condition
def test_select_in_operation(self, dialect): sql = """SELECT * FROM t1 WHERE col1 IN ("a", "b")""" ast = parse_sql(sql, dialect=dialect) assert isinstance(ast, Select) assert ast.where expected_where = BinaryOperation( op='IN', args=[ Identifier.from_path_str('col1'), Tuple(items=[Constant('a'), Constant("b")]), ]) assert ast.where.to_tree() == expected_where.to_tree() assert ast.where == expected_where
def test_operator_chained_and(self, dialect): sql = f"""SELECT column1 AND column2 AND column3""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op='AND', args=( BinaryOperation( op='and', args=( Identifier.from_path_str("column1"), Identifier.from_path_str("column2"))), Identifier.from_path_str("column3"), )) ]) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree()
def test_where_and_or_precedence(self, dialect): sql = "SELECT col1 FROM tab WHERE col1 AND col2 OR col3" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[Identifier.from_path_str('col1')], from_table=Identifier.from_path_str('tab'), where=BinaryOperation( op='or', args=( BinaryOperation( op='and', args=( Identifier.from_path_str('col1'), Identifier.from_path_str('col2'), )), Identifier.from_path_str('col3'), ))) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() sql = "SELECT col1 FROM tab WHERE col1 = 1 AND col2 = 1 OR col3 = 1" ast = parse_sql(sql, dialect=dialect) expected_ast = Select( targets=[Identifier.from_path_str('col1')], from_table=Identifier.from_path_str('tab'), where=BinaryOperation( op='or', args=( BinaryOperation( op='and', args=( BinaryOperation( op='=', args=( Identifier.from_path_str('col1'), Constant(1), )), BinaryOperation( op='=', args=( Identifier.from_path_str('col2'), Constant(1), )), )), BinaryOperation(op='=', args=( Identifier.from_path_str('col3'), Constant(1), )), ))) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_operator_precedence_sum_mult_parentheses(self, dialect): sql = f'SELECT (column1 + column2) * column3' ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation( op='*', args=( BinaryOperation(op='+', args=(Identifier.from_path_str('column1'), Identifier.from_path_str('column2')), parentheses=True), Identifier.from_path_str('column3'), ), ) ]) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_not_in(self, dialect): sql = f"""SELECT column1 NOT IN column2""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op='not in', args=(Identifier.from_path_str("column1"), Identifier.from_path_str("column2"))) ], ) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_select_varialbe_complex(self): sql = f"""SELECT * FROM tab1 WHERE column1 in (SELECT column2 + @variable FROM t2)""" ast = parse_sql(sql, dialect='mysql') expected_ast = Select(targets=[Star()], from_table=Identifier('tab1'), where=BinaryOperation( op='in', args=(Identifier('column1'), Select(targets=[ BinaryOperation( op='+', args=[ Identifier('column2'), Variable('variable') ]) ], from_table=Identifier('t2'), parentheses=True)))) assert ast.to_tree() == expected_ast.to_tree() assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast)
def test_is_false(self, dialect): sql = "SELECT col1 FROM t1 WHERE col1 IS FALSE" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[Identifier.from_path_str("col1")], from_table=Identifier.from_path_str('t1'), where=BinaryOperation( 'is', args=(Identifier.from_path_str('col1'), Constant(False)))) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_operation_converts_to_lowercase(self, dialect): sql = f'SELECT column1 IS column2 FROM tab' ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op='is', args=(Identifier.from_path_str('column1'), Identifier.from_path_str('column2'))), ], from_table=Identifier.from_path_str('tab')) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_between_with_and(self, dialect): sql = "SELECT col1 FROM t1 WHERE col2 > 1 AND col1 BETWEEN a AND b" ast = parse_sql(sql, dialect=dialect) expected_ast = Select( targets=[Identifier.from_path_str("col1")], from_table=Identifier.from_path_str('t1'), where=BinaryOperation( 'and', args=[ BinaryOperation('>', args=[ Identifier('col2'), Constant(1), ]), BetweenOperation(args=(Identifier.from_path_str('col1'), Identifier.from_path_str('a'), Identifier.from_path_str('b'))), ])) assert ast.to_tree() == expected_ast.to_tree() assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast)
def test_operator_precedence_or_and(self, dialect): sql = f'SELECT column1 OR column2 AND column3' ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op='or', args=( Identifier.from_path_str('column1'), BinaryOperation( op='and', args=( Identifier.from_path_str('column2'), Identifier.from_path_str('column3'))))) ]) assert str(ast).lower() == sql.lower() assert ast == expected_ast assert ast.to_tree() == expected_ast.to_tree() sql = f'SELECT column1 AND column2 OR column3' ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op='or', args=( BinaryOperation( op='and', args=( Identifier.from_path_str('column1'), Identifier.from_path_str('column2'))), Identifier.from_path_str('column3'), )) ]) assert str(ast).lower() == sql.lower() assert ast == expected_ast assert ast.to_tree() == expected_ast.to_tree()
def test_unary_is_special_values(self, dialect): args = [('NULL', NullConstant()), ('TRUE', Constant(value=True)), ('FALSE', Constant(value=False))] for sql_arg, python_obj in args: sql = f"""SELECT column1 IS {sql_arg}""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op='IS', args=(Identifier.from_path_str("column1"), python_obj)) ], ) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree()
def test_select_binary_operations(self, dialect): for op in [ '+', '-', '/', '*', '%', '=', '!=', '>', '<', '>=', '<=', 'is', 'IS NOT', 'like', 'in', 'and', 'or', '||' ]: sql = f'SELECT column1 {op.upper()} column2 FROM tab' ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[ BinaryOperation(op=op, args=(Identifier.from_path_str('column1'), Identifier.from_path_str('column2'))), ], from_table=Identifier.from_path_str('tab')) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def plan_select_from_predictor(self, select): predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier( select.from_table, self.default_namespace) if select.where == BinaryOperation('=', args=[Constant(1), Constant(0)]): # Hardcoded mysql way of getting predictor columns predictor_step = self.plan.add_step( GetPredictorColumns(namespace=predictor_namespace, predictor=predictor)) else: new_query_targets = [] for target in select.targets: if isinstance(target, Identifier): new_query_targets.append( disambiguate_predictor_column_identifier( target, predictor)) elif type(target) in (Star, Constant): new_query_targets.append(target) else: raise PlanningException( f'Unknown select target {type(target)}') if select.group_by or select.having: raise PlanningException( f'Unsupported operation when querying predictor. Only WHERE is allowed and required.' ) row_dict = {} where_clause = select.where if not where_clause: raise PlanningException( f'WHERE clause required when selecting from predictor') recursively_extract_column_values(where_clause, row_dict, predictor) predictor_step = self.plan.add_step( ApplyPredictorRowStep(namespace=predictor_namespace, predictor=predictor, row_dict=row_dict)) project_step = self.plan_project(select, predictor_step.result) return predictor_step, project_step
def plan_timeseries_predictor(self, query, table, predictor_namespace, predictor): predictor_name = predictor.to_string(alias=False).lower() # to original case predictor_name = self.predictor_names[predictor_name] predictor_time_column_name = self.predictor_metadata[predictor_name][ 'order_by_column'] predictor_group_by_names = self.predictor_metadata[predictor_name][ 'group_by_columns'] if predictor_group_by_names is None: predictor_group_by_names = [] predictor_window = self.predictor_metadata[predictor_name]['window'] if query.order_by: raise PlanningException( f'Can\'t provide ORDER BY to time series predictor, it will be taken from predictor settings. Found: {query.order_by}' ) saved_limit = query.limit if query.group_by or query.having or query.offset: raise PlanningException( f'Unsupported query to timeseries predictor: {str(query)}') allowed_columns = [predictor_time_column_name.lower()] if len(predictor_group_by_names) > 0: allowed_columns += [i.lower() for i in predictor_group_by_names] validate_ts_where_condition(query.where, allowed_columns=allowed_columns) time_filter = find_time_filter( query.where, time_column_name=predictor_time_column_name) order_by = [ OrderBy(Identifier(parts=[predictor_time_column_name]), direction='DESC') ] preparation_where = copy.deepcopy(query.where) # add {order_by_field} is not null def add_order_not_null(condition): order_field_not_null = BinaryOperation( op='is not', args=[ Identifier(parts=[predictor_time_column_name]), NullConstant() ]) if condition is not None: condition = BinaryOperation( op='and', args=[condition, order_field_not_null]) else: condition = order_field_not_null return condition preparation_where2 = copy.deepcopy(preparation_where) preparation_where = add_order_not_null(preparation_where) # Obtain integration selects if isinstance(time_filter, BetweenOperation): between_from = time_filter.args[1] preparation_time_filter = BinaryOperation( '<', args=[Identifier(predictor_time_column_name), between_from]) preparation_where2 = replace_time_filter(preparation_where2, time_filter, preparation_time_filter) integration_select_1 = Select( targets=[Star()], from_table=table, where=add_order_not_null(preparation_where2), order_by=order_by, limit=Constant(predictor_window)) integration_select_2 = Select(targets=[Star()], from_table=table, where=preparation_where, order_by=order_by) integration_selects = [integration_select_1, integration_select_2] elif isinstance( time_filter, BinaryOperation ) and time_filter.op == '>' and time_filter.args[1] == Latest(): integration_select = Select( targets=[Star()], from_table=table, where=preparation_where, order_by=order_by, limit=Constant(predictor_window), ) integration_select.where = find_and_remove_time_filter( integration_select.where, time_filter) integration_selects = [integration_select] elif isinstance(time_filter, BinaryOperation) and time_filter.op in ('>', '>='): time_filter_date = time_filter.args[1] preparation_time_filter_op = {'>': '<=', '>=': '<'}[time_filter.op] preparation_time_filter = BinaryOperation( preparation_time_filter_op, args=[ Identifier(predictor_time_column_name), time_filter_date ]) preparation_where2 = replace_time_filter(preparation_where2, time_filter, preparation_time_filter) integration_select_1 = Select( targets=[Star()], from_table=table, where=add_order_not_null(preparation_where2), order_by=order_by, limit=Constant(predictor_window)) integration_select_2 = Select(targets=[Star()], from_table=table, where=preparation_where, order_by=order_by) integration_selects = [integration_select_1, integration_select_2] else: integration_select = Select( targets=[Star()], from_table=table, where=preparation_where, order_by=order_by, ) integration_selects = [integration_select] if len(predictor_group_by_names) == 0: # ts query without grouping # one or multistep if len(integration_selects) == 1: select_partition_step = self.get_integration_select_step( integration_selects[0]) else: select_partition_step = MultipleSteps(steps=[ self.get_integration_select_step(s) for s in integration_selects ], reduce='union') # fetch data step data_step = self.plan.add_step(select_partition_step) else: # inject $var to queries for integration_select in integration_selects: condition = integration_select.where for num, column in enumerate(predictor_group_by_names): cond = BinaryOperation( '=', args=[Identifier(column), Constant(f'$var[{column}]')]) # join to main condition if condition is None: condition = cond else: condition = BinaryOperation('and', args=[condition, cond]) integration_select.where = condition # one or multistep if len(integration_selects) == 1: select_partition_step = self.get_integration_select_step( integration_selects[0]) else: select_partition_step = MultipleSteps(steps=[ self.get_integration_select_step(s) for s in integration_selects ], reduce='union') # get groping values no_time_filter_query = copy.deepcopy(query) no_time_filter_query.where = find_and_remove_time_filter( no_time_filter_query.where, time_filter) select_partitions_step = self.plan_fetch_timeseries_partitions( no_time_filter_query, table, predictor_group_by_names) # sub-query by every grouping value map_reduce_step = self.plan.add_step( MapReduceStep(values=select_partitions_step.result, reduce='union', step=select_partition_step)) data_step = map_reduce_step predictor_step = self.plan.add_step( ApplyTimeseriesPredictorStep( output_time_filter=time_filter, namespace=predictor_namespace, dataframe=data_step.result, predictor=predictor, )) return { 'predictor': predictor_step, 'data': data_step, 'saved_limit': saved_limit, }