Ejemplo n.º 1
0
    def test_set(self, dialect):

        sql = "set var1 = NULL, var2 = 10"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Set(arg=Tuple(items=[
            BinaryOperation('=', args=[Identifier('var1'),
                                       NullConstant()]),
            BinaryOperation('=', args=[Identifier('var2'),
                                       Constant(10)]),
        ]))
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)

        sql = "SET NAMES some_name collate default"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Set(category="names",
                           arg=Identifier('some_name'),
                           params={'COLLATE': 'DEFAULT'})
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)

        sql = "SET NAMES some_name collate 'utf8mb4_general_ci'"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Set(category="names",
                           arg=Identifier('some_name'),
                           params={'COLLATE': Constant('utf8mb4_general_ci')})
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 2
0
    def test_window_function(self, dialect):
        query = "select SUM(col0) OVER (PARTITION BY col1 order by col2) as al from table1 "
        expected_ast = Select(
            targets=[
                WindowFunction(
                    function=Function(op='sum', args=[Identifier('col0')]),
                    partition=[Identifier('col1')],
                    order_by=[OrderBy(field=Identifier('col2'))],
                    alias=Identifier('al')
                )
            ],
            from_table=Identifier('table1')
        )
        ast = parse_sql(query, dialect=dialect)
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        # no partition
        query = "select SUM(col0) OVER (order by col2) from table1 "
        expected_ast = Select(
            targets=[
                WindowFunction(
                    function=Function(op='sum', args=[Identifier('col0')]),
                    order_by=[OrderBy(field=Identifier('col2'))],
                )
            ],
            from_table=Identifier('table1')
        )
        ast = parse_sql(query, dialect=dialect)
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        # no order by
        query = "select SUM(col0) OVER (PARTITION BY col1) from table1 "
        expected_ast = Select(
            targets=[
                WindowFunction(
                    function=Function(op='sum', args=[Identifier('col0')]),
                    partition=[Identifier('col1')],
                )
            ],
            from_table=Identifier('table1')
        )
        ast = parse_sql(query, dialect=dialect)
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        # just over()
        query = "select SUM(col0) OVER () from table1 "
        expected_ast = Select(
            targets=[
                WindowFunction(
                    function=Function(op='sum', args=[Identifier('col0')]),
                )
            ],
            from_table=Identifier('table1')
        )
        ast = parse_sql(query, dialect=dialect)
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 3
0
    def test_select_group_by(self, dialect):
        sql = f'SELECT column FROM tab WHERE column != 1 GROUP BY column1'
        ast = parse_sql(sql, dialect=dialect)
        assert str(ast).lower() == sql.lower()

        sql = f'SELECT column FROM tab WHERE column != 1 GROUP BY column1, column2'
        ast = parse_sql(sql, dialect=dialect)

        assert isinstance(ast, Select)
        assert len(ast.targets) == 1
        assert isinstance(ast.targets[0], Identifier)
        assert ast.targets[0].parts[0] == 'column'

        assert isinstance(ast.from_table, Identifier)
        assert ast.from_table.parts[0] == 'tab'

        assert isinstance(ast.where, BinaryOperation)
        assert ast.where.op == '!='

        assert isinstance(ast.group_by, list)
        assert isinstance(ast.group_by[0], Identifier)
        assert ast.group_by[0].parts[0] == 'column1'
        assert isinstance(ast.group_by[1], Identifier)
        assert ast.group_by[1].parts[0] == 'column2'

        assert str(ast).lower() == sql.lower()
Ejemplo n.º 4
0
    def test_select_order_by(self, dialect):
        sql = f'SELECT column1 FROM tab ORDER BY column2'
        ast = parse_sql(sql, dialect=dialect)
        assert str(ast).lower() == sql.lower()

        assert len(ast.order_by) == 1
        assert isinstance(ast.order_by[0], OrderBy)
        assert isinstance(ast.order_by[0].field, Identifier)
        assert ast.order_by[0].field.parts[0] == 'column2'
        assert ast.order_by[0].direction == 'default'

        sql = f'SELECT column1 FROM tab ORDER BY column2, column3 ASC, column4 DESC'
        ast = parse_sql(sql, dialect=dialect)
        assert str(ast).lower() == sql.lower()

        assert len(ast.order_by) == 3

        assert isinstance(ast.order_by[0], OrderBy)
        assert isinstance(ast.order_by[0].field, Identifier)
        assert ast.order_by[0].field.parts[0] == 'column2'
        assert ast.order_by[0].direction == 'default'

        assert isinstance(ast.order_by[1], OrderBy)
        assert isinstance(ast.order_by[1].field, Identifier)
        assert ast.order_by[1].field.parts[0] == 'column3'
        assert ast.order_by[1].direction == 'ASC'

        assert isinstance(ast.order_by[2], OrderBy)
        assert isinstance(ast.order_by[2].field, Identifier)
        assert ast.order_by[2].field.parts[0] == 'column4'
        assert ast.order_by[2].direction == 'DESC'
Ejemplo n.º 5
0
    def test_create_predictor_quotes(self):
        sql = """CREATE PREDICTOR xxx 
                 FROM `yyy` 
                  (SELECT * FROM zzz)
                  AS x 
                  PREDICT sss
                """
        ast = parse_sql(sql, dialect='mindsdb')
        expected_ast = CreatePredictor(
            name=Identifier('xxx'),
            integration_name=Identifier('yyy'),
            query_str="SELECT * FROM zzz",
            datasource_name=Identifier('x'),
            targets=[Identifier('sss')],
        )
        assert to_single_line(str(ast)) == to_single_line(str(expected_ast))
        assert ast.to_tree() == expected_ast.to_tree()

        # or replace
        sql = """CREATE or REPLACE PREDICTOR xxx 
                         FROM `yyy` 
                          (SELECT * FROM zzz)
                          AS x 
                          PREDICT sss
                        """
        ast = parse_sql(sql, dialect='mindsdb')
        expected_ast.is_replace = True
        assert to_single_line(str(ast)) == to_single_line(str(expected_ast))
        assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 6
0
    def test_drop_database(self, dialect):

        sql = "DROP DATABASE IF EXISTS dbname"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = DropDatabase(name=Identifier('dbname'), if_exists=True)

        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()

        sql = "DROP DATABASE dbname"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = DropDatabase(name=Identifier('dbname'), if_exists=False)

        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()

        # DROP SCHEMA is a synonym for DROP DATABASE.
        sql = "DROP SCHEMA dbname"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = DropDatabase(name=Identifier('dbname'))

        assert str(ast).lower() == 'DROP DATABASE dbname'.lower()
        assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 7
0
    def test_select_limit_offset_raises_nonint(self, dialect):
        sql = f'SELECT column FROM tab OFFSET 3.0'
        with pytest.raises(ParsingException):
            ast = parse_sql(sql, dialect=dialect)

        sql = "SELECT column FROM tab LIMIT \'string\'"
        with pytest.raises(ParsingException):
            ast = parse_sql(sql, dialect=dialect)
Ejemplo n.º 8
0
    def test_where_and_or_precedence(self, dialect):
        sql = "SELECT col1 FROM tab WHERE col1 AND col2 OR col3"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(targets=[Identifier.from_path_str('col1')],
                              from_table=Identifier.from_path_str('tab'),
                              where=BinaryOperation(
                                  op='or',
                                  args=(
                                      BinaryOperation(
                                          op='and',
                                          args=(
                                              Identifier.from_path_str('col1'),
                                              Identifier.from_path_str('col2'),
                                          )),
                                      Identifier.from_path_str('col3'),
                                  )))

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        sql = "SELECT col1 FROM tab WHERE col1 = 1 AND col2 = 1 OR col3 = 1"
        ast = parse_sql(sql, dialect=dialect)

        expected_ast = Select(
            targets=[Identifier.from_path_str('col1')],
            from_table=Identifier.from_path_str('tab'),
            where=BinaryOperation(
                op='or',
                args=(
                    BinaryOperation(
                        op='and',
                        args=(
                            BinaryOperation(
                                op='=',
                                args=(
                                    Identifier.from_path_str('col1'),
                                    Constant(1),
                                )),
                            BinaryOperation(
                                op='=',
                                args=(
                                    Identifier.from_path_str('col2'),
                                    Constant(1),
                                )),
                        )),
                    BinaryOperation(op='=',
                                    args=(
                                        Identifier.from_path_str('col3'),
                                        Constant(1),
                                    )),
                )))

        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 9
0
    def test_select_group_by_elaborate(self, dialect):
        query = """SELECT column1, column2, sum(column3) AS total FROM t1 GROUP BY column1, column2"""

        assert str(parse_sql(query)) == query

        assert str(parse_sql(query)) == str(Select(targets=[Identifier(parts=["column1"]),
                                                            Identifier(parts=["column2"]),
                                                            Function(op="sum",
                                                                         args=[Identifier(parts=["column3"])],
                                                                         alias=Identifier('total'))],
                                                   from_table=Identifier(parts=['t1']),
                                                   group_by=[Identifier(parts=["column1"]), Identifier(parts=["column2"])]))
Ejemplo n.º 10
0
    def test_type_convert(self, dialect):
        sql = f"""SELECT CONVERT(column1, float)"""
        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Select(targets=[TypeCast(type_name='float', arg=Identifier(parts=['column1']))])
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)

        sql = f"""SELECT CONVERT((column1 + column2) USING float)"""
        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Select(targets=[TypeCast(type_name='float', arg=BinaryOperation(op='+', parentheses=True, args=[
            Identifier(parts=['column1']), Identifier(parts=['column2'])]))])
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 11
0
    def test_select_from_elaborate(self, dialect):
        query = """SELECT *, column1, column1 AS aliased, column1 + column2 FROM t1"""

        assert str(parse_sql(query)) == query
        assert str(parse_sql(query)) == str(Select(targets=[Star(),
                                                            Identifier(parts=["column1"]),
                                                            Identifier(parts=["column1"], alias=Identifier('aliased')),
                                                            BinaryOperation(op="+",
                                                                            args=(Identifier(parts=['column1']),
                                                                                   Identifier(parts=['column2']))
                                                                            )
                                                            ],
                                                   from_table=Identifier(parts=['t1'])))
Ejemplo n.º 12
0
    def test_drop_predictor_table_syntax_ok(self, dialect):
        sql = "DROP TABLE mindsdb.tbl"
        ast = parse_sql(sql, dialect=dialect)
        expected_ast = DropTables(tables=[Identifier('mindsdb.tbl')])
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()

        sql = "DROP TABLE if exists mindsdb.tbl"
        ast = parse_sql(sql, dialect=dialect)
        expected_ast = DropTables(tables=[Identifier('mindsdb.tbl')],
                                  if_exists=True)
        assert str(ast) == str(expected_ast)
        assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 13
0
    def test_create_predictor_invalid_json(self):
        sql = """CREATE PREDICTOR pred
                FROM integration_name 
                (select * FROM table)
                AS ds_name
                PREDICT f1 as f1_alias, f2
                ORDER BY f_order_1 ASC, f_order_2, f_order_3 DESC
                GROUP BY f_group_1, f_group_2
                WINDOW 100
                HORIZON 7
                USING 'not_really_json'"""

        with pytest.raises(ParsingException):
            parse_sql(sql, dialect='mindsdb')
Ejemplo n.º 14
0
    def test_select_variable(self):
        sql = 'SELECT @version'
        ast = parse_sql(sql, dialect='mysql')
        expected_ast = Select(targets=[Variable('version')])
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)

        sql = 'SELECT @@version'
        ast = parse_sql(sql, dialect='mysql')
        expected_ast = Select(
            targets=[Variable('version', is_system_var=True)])
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast).lower() == sql.lower()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 15
0
    def test_select_from_predictor_get_columns(self):
        sql = f'SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0'
        query = parse_sql(sql, dialect='mindsdb')

        expected_query = Select(
            targets=[Identifier('GDP_per_capita_USD')],
            from_table=Identifier('hdi_predictor_external'),
            where=BinaryOperation(op="=", args=[Constant(1),
                                                Constant(0)]))
        assert query.to_tree() == expected_query.to_tree()

        expected_plan = QueryPlan(
            predictor_namespace='mindsdb',
            default_namespace='mindsdb',
            steps=[
                GetPredictorColumns(
                    namespace='mindsdb',
                    predictor=Identifier('hdi_predictor_external')),
                ProjectStep(dataframe=Result(0),
                            columns=[Identifier('GDP_per_capita_USD')]),
            ],
        )

        plan = plan_query(query,
                          predictor_namespace='mindsdb',
                          default_namespace='mindsdb',
                          predictor_metadata={'hdi_predictor_external': {}})

        assert plan.steps == expected_plan.steps
Ejemplo n.º 16
0
 def test_drop_predictor_if_exists(self):
     sql = "DROP PREDICTOR IF EXISTS mindsdb.pred"
     ast = parse_sql(sql, dialect='mindsdb')
     expected_ast = DropPredictor(name=Identifier('mindsdb.pred'),
                                  if_exists=True)
     assert str(ast) == str(expected_ast)
     assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 17
0
 def test_drop_predictor_ok(self):
     sql = "DROP PREDICTOR mindsdb.pred"
     ast = parse_sql(sql, dialect='mindsdb')
     expected_ast = DropPredictor(name=Identifier('mindsdb.pred'))
     assert str(ast).lower() == sql.lower()
     assert str(ast) == str(expected_ast)
     assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 18
0
    def test_select_limit_offset(self, dialect):
        sql = f'SELECT column FROM tab LIMIT 5 OFFSET 3'
        ast = parse_sql(sql, dialect=dialect)
        assert str(ast).lower() == sql.lower()

        assert ast.limit == Constant(value=5)
        assert ast.offset == Constant(value=3)
Ejemplo n.º 19
0
    def test_alter_table_keys(self, dialect):
        sql = "alter table some_table disable keys"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = AlterTable(target=Identifier('some_table'),
                                  arg='disable keys')
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)

        sql = "alter table some_table enable keys"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = AlterTable(target=Identifier('some_table'),
                                  arg='enable keys')
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 20
0
    def test_rollback(self, dialect):
        sql = "rollback"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = RollbackTransaction()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 21
0
    def test_start_transaction(self, dialect):
        sql = "start transaction"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = StartTransaction()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 22
0
    def test_select_from_inner_join(self, dialect):
        sql = """SELECT * FROM t1 INNER JOIN t2 ON t1.x1 = t2.x2 and t1.x2 = t2.x2"""

        expected_ast = Select(targets=[Star()],
                              from_table=Join(join_type=JoinType.INNER_JOIN,
                                              left=Identifier(parts=['t1']),
                                              right=Identifier(parts=['t2']),
                                              condition=
                                              BinaryOperation(op='and',
                                                              args=[
                                                                  BinaryOperation(op='=',
                                                                                  args=(
                                                                                      Identifier(
                                                                                          parts=['t1','x1']),
                                                                                      Identifier(
                                                                                          parts=['t2','x2']))),
                                                                  BinaryOperation(op='=',
                                                                                  args=(
                                                                                      Identifier(
                                                                                          parts=['t1','x2']),
                                                                                      Identifier(
                                                                                          parts=['t2','x2']))),
                                                              ])

                                              ))
        ast = parse_sql(sql, dialect=dialect)

        assert ast == expected_ast
Ejemplo n.º 23
0
 def test_select_from_engines(self, dialect):
     sql = 'select * from engines'
     ast = parse_sql(sql, dialect=dialect)
     expected_ast = Select(targets=[Star()],
                           from_table=Identifier.from_path_str('engines'))
     assert ast.to_tree() == expected_ast.to_tree()
     assert str(ast) == str(expected_ast)
Ejemplo n.º 24
0
    def test_select_subquery_target(self, dialect):
        sql = f"""SELECT *, (SELECT 1) FROM t1"""
        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Select(targets=[Star(), Select(targets=[Constant(1)], parentheses=True)],
                              from_table=Identifier(parts=['t1']))
        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)

        sql = f"""SELECT *, (SELECT 1) AS ones FROM t1"""
        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Select(targets=[Star(), Select(targets=[Constant(1)], alias=Identifier('ones'), parentheses=True)],
                              from_table=Identifier(parts=['t1']))
        assert str(ast).lower() == sql.lower()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 25
0
    def test_common_like_double_where_from_in_modes(self, dialect):
        categories = [
            'COLUMNS',
            'FIELDS',
            'INDEX',
            'INDEXES',
            'KEYS',
        ]
        modes = [
            ['EXTENDED'],
            ['FULL'],
            ['EXTENDED', 'FULL'],
        ]
        for cat in categories:
            for mode in modes:

                sql = f"SHOW {' '.join(mode)} {cat} from tab1 from db1 in tab2 in db2 like 'pattern' where a=1"
                ast = parse_sql(sql, dialect=dialect)
                expected_ast = Show(category=cat,
                                    like='pattern',
                                    from_table=Identifier('db1.tab1'),
                                    in_table=Identifier('db2.tab2'),
                                    modes=mode,
                                    where=BinaryOperation(
                                        op='=',
                                        args=[Identifier('a'),
                                              Constant(1)]))

                assert str(ast) == str(expected_ast)
                assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 26
0
    def test_commit(self, dialect):
        sql = "commit"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = CommitTransaction()
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 27
0
def query_df(df, query):
    """ Perform simple query ('select' from one table, without subqueries and joins) on DataFrame.

        Args:
            df (pandas.DataFrame): data
            query (mindsdb_sql.parser.ast.Select | str): select query

        Returns:
            pandas.DataFrame
    """

    query = parse_sql(str(query), dialect='mysql')
    if isinstance(query, Select) is False or isinstance(
            query.from_table, Identifier) is False:
        raise Exception(
            "Only 'SELECT from TABLE' statements supported for internal query")

    query.from_table.parts = ['df_table']
    for identifier in query.targets:
        if isinstance(identifier, Identifier):
            identifier.parts = [identifier.parts[-1]]
    if isinstance(query.order_by, list):
        for orderby in query.order_by:
            if isinstance(orderby, OrderBy) and isinstance(
                    orderby.field, Identifier):
                orderby.field.parts = [orderby.field.parts[-1]]
    _remove_table_name(query.where)

    # FIXME https://github.com/mindsdb/mindsdb_sql/issues/130
    # we need way to dump suery in postgres dialect
    sql_query = str(query).replace('`', '')
    res = duckdb.query_df(df, 'df_table', sql_query)
    result_df = res.df()
    result_df = result_df.where(pd.notnull(result_df), None)
    return result_df
Ejemplo n.º 28
0
    def test_explain(self, dialect):
        sql = "explain some_table"

        ast = parse_sql(sql, dialect=dialect)
        expected_ast = Explain(target=Identifier('some_table'))
        assert ast.to_tree() == expected_ast.to_tree()
        assert str(ast) == str(expected_ast)
Ejemplo n.º 29
0
 def test_drop_datasource(self):
     sql = "DROP DATASOURCE dsname"
     ast = parse_sql(sql, dialect='mindsdb')
     expected_ast = DropDatasource(name=Identifier('dsname'))
     assert str(ast).lower() == sql.lower()
     assert str(ast) == str(expected_ast)
     assert ast.to_tree() == expected_ast.to_tree()
Ejemplo n.º 30
0
    def df(self):
        view_interface = WithKWArgsWrapper(ViewController(),
                                           company_id=self.company_id)

        integration_controller = WithKWArgsWrapper(IntegrationController(),
                                                   company_id=self.company_id)

        data_store = WithKWArgsWrapper(DataStore(), company_id=self.company_id)

        query = self.query
        if self.source_type == 'view_query':
            if isinstance(query, str):
                query = parse_sql(query, dialect='mysql')
            query_str = str(query)

            table = query.from_table.parts[-1]
            view_metadata = view_interface.get(name=table)

            integration = integration_controller.get_by_id(
                view_metadata['integration_id'])
            integration_name = integration['name']

            dataset_name = data_store.get_vacant_name(table)
            data_store.save_datasource(dataset_name, integration_name,
                                       {'query': view_metadata['query']})
            try:
                dataset_object = data_store.get_datasource_obj(dataset_name)
                data_df = dataset_object.df
            finally:
                data_store.delete_datasource(dataset_name)
        else:
            raise Exception(f'Unknown source_type: {self.source_type}')
        return data_df