예제 #1
0
 def add_order_not_null(condition):
     order_field_not_null = BinaryOperation(
         op='is not',
         args=[
             Identifier(parts=[predictor_time_column_name]),
             NullConstant()
         ])
     if condition is not None:
         condition = BinaryOperation(
             op='and', args=[condition, order_field_not_null])
     else:
         condition = order_field_not_null
     return condition
예제 #2
0
    def test_select_in_operation(self, dialect):
        sql = """SELECT * FROM t1 WHERE col1 IN ("a", "b")"""

        ast = parse_sql(sql, dialect=dialect)

        assert isinstance(ast, Select)
        assert ast.where

        expected_where = BinaryOperation(
            op='IN',
            args=[
                Identifier.from_path_str('col1'),
                Tuple(items=[Constant('a'), Constant("b")]),
            ])

        assert ast.where.to_tree() == expected_where.to_tree()
        assert ast.where == expected_where
예제 #3
0
    def test_operator_chained_and(self, dialect):
        sql = f"""SELECT column1 AND column2 AND column3"""
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='AND',
                            args=(
                                BinaryOperation(
                                    op='and',
                                    args=(
                                        Identifier.from_path_str("column1"),
                                        Identifier.from_path_str("column2"))),
                                Identifier.from_path_str("column3"),
                            ))
        ])

        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
예제 #4
0
    def test_where_and_or_precedence(self, dialect):
        sql = "SELECT col1 FROM tab WHERE col1 AND col2 OR col3"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[Identifier.from_path_str('col1')],
                              from_table=Identifier.from_path_str('tab'),
                              where=BinaryOperation(
                                  op='or',
                                  args=(
                                      BinaryOperation(
                                          op='and',
                                          args=(
                                              Identifier.from_path_str('col1'),
                                              Identifier.from_path_str('col2'),
                                          )),
                                      Identifier.from_path_str('col3'),
                                  )))

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        sql = "SELECT col1 FROM tab WHERE col1 = 1 AND col2 = 1 OR col3 = 1"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Identifier.from_path_str('col1')],
            from_table=Identifier.from_path_str('tab'),
            where=BinaryOperation(
                op='or',
                args=(
                    BinaryOperation(
                        op='and',
                        args=(
                            BinaryOperation(
                                op='=',
                                args=(
                                    Identifier.from_path_str('col1'),
                                    Constant(1),
                                )),
                            BinaryOperation(
                                op='=',
                                args=(
                                    Identifier.from_path_str('col2'),
                                    Constant(1),
                                )),
                        )),
                    BinaryOperation(op='=',
                                    args=(
                                        Identifier.from_path_str('col3'),
                                        Constant(1),
                                    )),
                )))

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
예제 #5
0
    def test_operator_precedence_sum_mult_parentheses(self, dialect):
        sql = f'SELECT (column1 + column2) * column3'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(
                op='*',
                args=(
                    BinaryOperation(op='+',
                                    args=(Identifier.from_path_str('column1'),
                                          Identifier.from_path_str('column2')),
                                    parentheses=True),
                    Identifier.from_path_str('column3'),
                ),
            )
        ])

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
예제 #6
0
    def test_not_in(self, dialect):
        sql = f"""SELECT column1 NOT   IN column2"""
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='not in',
                            args=(Identifier.from_path_str("column1"),
                                  Identifier.from_path_str("column2")))
        ], )

        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
예제 #7
0
    def test_select_varialbe_complex(self):
        sql = f"""SELECT * FROM tab1 WHERE column1 in (SELECT column2 + @variable FROM t2)"""
        ast = parse_sql(sql, dialect='mysql')
        expected_ast = Select(targets=[Star()],
                              from_table=Identifier('tab1'),
                              where=BinaryOperation(
                                  op='in',
                                  args=(Identifier('column1'),
                                        Select(targets=[
                                            BinaryOperation(
                                                op='+',
                                                args=[
                                                    Identifier('column2'),
                                                    Variable('variable')
                                                ])
                                        ],
                                               from_table=Identifier('t2'),
                                               parentheses=True))))

        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
예제 #8
0
    def test_is_false(self, dialect):
        sql = "SELECT col1 FROM t1 WHERE col1 IS FALSE"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[Identifier.from_path_str("col1")],
                              from_table=Identifier.from_path_str('t1'),
                              where=BinaryOperation(
                                  'is',
                                  args=(Identifier.from_path_str('col1'),
                                        Constant(False))))
        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
예제 #9
0
    def test_operation_converts_to_lowercase(self, dialect):
        sql = f'SELECT column1 IS column2 FROM tab'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='is',
                            args=(Identifier.from_path_str('column1'),
                                  Identifier.from_path_str('column2'))),
        ],
                              from_table=Identifier.from_path_str('tab'))

        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
예제 #10
0
    def test_between_with_and(self, dialect):
        sql = "SELECT col1 FROM t1 WHERE col2 > 1 AND col1 BETWEEN a AND b"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Identifier.from_path_str("col1")],
            from_table=Identifier.from_path_str('t1'),
            where=BinaryOperation(
                'and',
                args=[
                    BinaryOperation('>',
                                    args=[
                                        Identifier('col2'),
                                        Constant(1),
                                    ]),
                    BetweenOperation(args=(Identifier.from_path_str('col1'),
                                           Identifier.from_path_str('a'),
                                           Identifier.from_path_str('b'))),
                ]))

        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
예제 #11
0
    def test_operator_precedence_or_and(self, dialect):
        sql = f'SELECT column1 OR column2 AND column3'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='or',
                            args=(
                                Identifier.from_path_str('column1'),
                                BinaryOperation(
                                    op='and',
                                    args=(
                                        Identifier.from_path_str('column2'),
                                        Identifier.from_path_str('column3')))))
        ])

        assert str(ast).lower() == sql.lower()
        assert ast == expected_ast
        assert ast.to_tree() == expected_ast.to_tree()

        sql = f'SELECT column1 AND column2 OR column3'
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[
            BinaryOperation(op='or',
                            args=(
                                BinaryOperation(
                                    op='and',
                                    args=(
                                        Identifier.from_path_str('column1'),
                                        Identifier.from_path_str('column2'))),
                                Identifier.from_path_str('column3'),
                            ))
        ])

        assert str(ast).lower() == sql.lower()
        assert ast == expected_ast
        assert ast.to_tree() == expected_ast.to_tree()
예제 #12
0
    def test_unary_is_special_values(self, dialect):
        args = [('NULL', NullConstant()), ('TRUE', Constant(value=True)),
                ('FALSE', Constant(value=False))]
        for sql_arg, python_obj in args:
            sql = f"""SELECT column1 IS {sql_arg}"""
            ast = parse_sql(sql, dialect=dialect)

            expected_ast = Select(targets=[
                BinaryOperation(op='IS',
                                args=(Identifier.from_path_str("column1"),
                                      python_obj))
            ], )

            assert str(ast).lower() == sql.lower()
            assert ast.to_tree() == expected_ast.to_tree()
예제 #13
0
    def test_select_binary_operations(self, dialect):
        for op in [
                '+', '-', '/', '*', '%', '=', '!=', '>', '<', '>=', '<=', 'is',
                'IS NOT', 'like', 'in', 'and', 'or', '||'
        ]:
            sql = f'SELECT column1 {op.upper()} column2 FROM tab'
            ast = parse_sql(sql, dialect=dialect)

            expected_ast = Select(targets=[
                BinaryOperation(op=op,
                                args=(Identifier.from_path_str('column1'),
                                      Identifier.from_path_str('column2'))),
            ],
                                  from_table=Identifier.from_path_str('tab'))

            assert str(ast).lower() == sql.lower()
            assert str(ast) == str(expected_ast)
            assert ast.to_tree() == expected_ast.to_tree()
예제 #14
0
    def plan_select_from_predictor(self, select):
        predictor_namespace, predictor = get_predictor_namespace_and_name_from_identifier(
            select.from_table, self.default_namespace)

        if select.where == BinaryOperation('=',
                                           args=[Constant(1),
                                                 Constant(0)]):
            # Hardcoded mysql way of getting predictor columns
            predictor_step = self.plan.add_step(
                GetPredictorColumns(namespace=predictor_namespace,
                                    predictor=predictor))
        else:
            new_query_targets = []
            for target in select.targets:
                if isinstance(target, Identifier):
                    new_query_targets.append(
                        disambiguate_predictor_column_identifier(
                            target, predictor))
                elif type(target) in (Star, Constant):
                    new_query_targets.append(target)
                else:
                    raise PlanningException(
                        f'Unknown select target {type(target)}')

            if select.group_by or select.having:
                raise PlanningException(
                    f'Unsupported operation when querying predictor. Only WHERE is allowed and required.'
                )

            row_dict = {}
            where_clause = select.where
            if not where_clause:
                raise PlanningException(
                    f'WHERE clause required when selecting from predictor')

            recursively_extract_column_values(where_clause, row_dict,
                                              predictor)

            predictor_step = self.plan.add_step(
                ApplyPredictorRowStep(namespace=predictor_namespace,
                                      predictor=predictor,
                                      row_dict=row_dict))
        project_step = self.plan_project(select, predictor_step.result)
        return predictor_step, project_step
예제 #15
0
    def plan_timeseries_predictor(self, query, table, predictor_namespace,
                                  predictor):
        predictor_name = predictor.to_string(alias=False).lower()
        # to original case
        predictor_name = self.predictor_names[predictor_name]

        predictor_time_column_name = self.predictor_metadata[predictor_name][
            'order_by_column']
        predictor_group_by_names = self.predictor_metadata[predictor_name][
            'group_by_columns']
        if predictor_group_by_names is None:
            predictor_group_by_names = []
        predictor_window = self.predictor_metadata[predictor_name]['window']

        if query.order_by:
            raise PlanningException(
                f'Can\'t provide ORDER BY to time series predictor, it will be taken from predictor settings. Found: {query.order_by}'
            )

        saved_limit = query.limit

        if query.group_by or query.having or query.offset:
            raise PlanningException(
                f'Unsupported query to timeseries predictor: {str(query)}')

        allowed_columns = [predictor_time_column_name.lower()]
        if len(predictor_group_by_names) > 0:
            allowed_columns += [i.lower() for i in predictor_group_by_names]
        validate_ts_where_condition(query.where,
                                    allowed_columns=allowed_columns)

        time_filter = find_time_filter(
            query.where, time_column_name=predictor_time_column_name)

        order_by = [
            OrderBy(Identifier(parts=[predictor_time_column_name]),
                    direction='DESC')
        ]

        preparation_where = copy.deepcopy(query.where)

        # add {order_by_field} is not null
        def add_order_not_null(condition):
            order_field_not_null = BinaryOperation(
                op='is not',
                args=[
                    Identifier(parts=[predictor_time_column_name]),
                    NullConstant()
                ])
            if condition is not None:
                condition = BinaryOperation(
                    op='and', args=[condition, order_field_not_null])
            else:
                condition = order_field_not_null
            return condition

        preparation_where2 = copy.deepcopy(preparation_where)
        preparation_where = add_order_not_null(preparation_where)

        # Obtain integration selects
        if isinstance(time_filter, BetweenOperation):
            between_from = time_filter.args[1]
            preparation_time_filter = BinaryOperation(
                '<',
                args=[Identifier(predictor_time_column_name), between_from])
            preparation_where2 = replace_time_filter(preparation_where2,
                                                     time_filter,
                                                     preparation_time_filter)
            integration_select_1 = Select(
                targets=[Star()],
                from_table=table,
                where=add_order_not_null(preparation_where2),
                order_by=order_by,
                limit=Constant(predictor_window))

            integration_select_2 = Select(targets=[Star()],
                                          from_table=table,
                                          where=preparation_where,
                                          order_by=order_by)

            integration_selects = [integration_select_1, integration_select_2]
        elif isinstance(
                time_filter, BinaryOperation
        ) and time_filter.op == '>' and time_filter.args[1] == Latest():
            integration_select = Select(
                targets=[Star()],
                from_table=table,
                where=preparation_where,
                order_by=order_by,
                limit=Constant(predictor_window),
            )
            integration_select.where = find_and_remove_time_filter(
                integration_select.where, time_filter)
            integration_selects = [integration_select]

        elif isinstance(time_filter,
                        BinaryOperation) and time_filter.op in ('>', '>='):
            time_filter_date = time_filter.args[1]
            preparation_time_filter_op = {'>': '<=', '>=': '<'}[time_filter.op]

            preparation_time_filter = BinaryOperation(
                preparation_time_filter_op,
                args=[
                    Identifier(predictor_time_column_name), time_filter_date
                ])
            preparation_where2 = replace_time_filter(preparation_where2,
                                                     time_filter,
                                                     preparation_time_filter)
            integration_select_1 = Select(
                targets=[Star()],
                from_table=table,
                where=add_order_not_null(preparation_where2),
                order_by=order_by,
                limit=Constant(predictor_window))

            integration_select_2 = Select(targets=[Star()],
                                          from_table=table,
                                          where=preparation_where,
                                          order_by=order_by)

            integration_selects = [integration_select_1, integration_select_2]
        else:
            integration_select = Select(
                targets=[Star()],
                from_table=table,
                where=preparation_where,
                order_by=order_by,
            )
            integration_selects = [integration_select]

        if len(predictor_group_by_names) == 0:
            # ts query without grouping
            # one or multistep
            if len(integration_selects) == 1:
                select_partition_step = self.get_integration_select_step(
                    integration_selects[0])
            else:
                select_partition_step = MultipleSteps(steps=[
                    self.get_integration_select_step(s)
                    for s in integration_selects
                ],
                                                      reduce='union')

            # fetch data step
            data_step = self.plan.add_step(select_partition_step)
        else:
            # inject $var to queries
            for integration_select in integration_selects:
                condition = integration_select.where
                for num, column in enumerate(predictor_group_by_names):
                    cond = BinaryOperation(
                        '=',
                        args=[Identifier(column),
                              Constant(f'$var[{column}]')])

                    # join to main condition
                    if condition is None:
                        condition = cond
                    else:
                        condition = BinaryOperation('and',
                                                    args=[condition, cond])

                integration_select.where = condition
            # one or multistep
            if len(integration_selects) == 1:
                select_partition_step = self.get_integration_select_step(
                    integration_selects[0])
            else:
                select_partition_step = MultipleSteps(steps=[
                    self.get_integration_select_step(s)
                    for s in integration_selects
                ],
                                                      reduce='union')

            # get groping values
            no_time_filter_query = copy.deepcopy(query)
            no_time_filter_query.where = find_and_remove_time_filter(
                no_time_filter_query.where, time_filter)
            select_partitions_step = self.plan_fetch_timeseries_partitions(
                no_time_filter_query, table, predictor_group_by_names)

            # sub-query by every grouping value
            map_reduce_step = self.plan.add_step(
                MapReduceStep(values=select_partitions_step.result,
                              reduce='union',
                              step=select_partition_step))
            data_step = map_reduce_step

        predictor_step = self.plan.add_step(
            ApplyTimeseriesPredictorStep(
                output_time_filter=time_filter,
                namespace=predictor_namespace,
                dataframe=data_step.result,
                predictor=predictor,
            ))

        return {
            'predictor': predictor_step,
            'data': data_step,
            'saved_limit': saved_limit,
        }