def test_ast_with_projection_join_filter(self): table = self.con.table('test1') table2 = self.con.table('test2') filter_pred = table['f'] > 0 table3 = table[filter_pred] join_pred = table3['g'] == table2['key'] joined = table2.inner_join(table3, [join_pred]) result = joined[[table3, table2['value']]] ast = build_ast(result) stmt = ast.queries[0] def foo(): table3 = table[filter_pred] joined = table2.inner_join(table3, [join_pred]) result = joined[[table3, table2['value']]] return result assert len(stmt.select_set) == 2 assert len(stmt.where) == 1 assert stmt.where[0] is filter_pred # Check that the join has been rebuilt to only include the root tables tbl = stmt.table_set tbl_node = tbl.op() assert isinstance(tbl_node, ops.InnerJoin) assert tbl_node.left is table2 assert tbl_node.right is table # table expression substitution has been made in the predicate assert tbl_node.predicates[0].equals(table['g'] == table2['key'])
def test_multiple_limits(self): t = self.con.table('functional_alltypes') expr = t.limit(20).limit(10) stmt = build_ast(expr).queries[0] assert stmt.limit['n'] == 10
def insert(self, table_name, expr, database=None, overwrite=False, validate=True): """ Insert into existing table Parameters ---------- table_name : string expr : TableExpr database : string, default None overwrite : boolean, default False If True, will replace existing contents of table validate : boolean, default True If True, do more rigorous validation that schema of table being inserted is compatible with the existing table Examples -------- con.insert('my_table', table_expr) # Completely overwrite contents con.insert('my_table', table_expr, overwrite=True) """ if validate: existing_schema = self.get_schema(table_name, database=database) insert_schema = expr.schema() if not insert_schema.equals(existing_schema): _validate_compatible(insert_schema, existing_schema) ast = sql.build_ast(expr) select = ast.queries[0] statement = ddl.InsertSelect(table_name, select, database=database, overwrite=overwrite) self._execute(statement)
def insert(self, table_name, expr, database=None, overwrite=False): """ Insert into existing table Parameters ---------- table_name : string expr : TableExpr database : string, default None overwrite : boolean, default False If True, will replace existing contents of table Examples -------- con.insert('my_table', table_expr) # Completely overwrite contents con.insert('my_table', table_expr, overwrite=True) """ ast = sql.build_ast(expr) select = ast.queries[0] statement = ddl.InsertSelect(table_name, select, database=database, overwrite=overwrite) self._execute(statement)
def explain(self, expr): """ Query for and return the query plan associated with the indicated expression or SQL query. Returns ------- plan : string """ if isinstance(expr, ir.Expr): ast = sql.build_ast(expr) if len(ast.queries) > 1: raise Exception('Multi-query expression') query = ast.queries[0].compile() else: query = expr statement = 'EXPLAIN {0}'.format(query) with self._execute(statement, results=True) as cur: result = self._get_list(cur) return 'Query:\n{0}\n\n{1}'.format(util.indent(query, 2), '\n'.join(result))
def _create_table(table_name, expr, database=None, can_exist=False, format='parquet'): ast = build_ast(expr) select = ast.queries[0] statement = ddl.CTAS(table_name, select, database=database, format=format, can_exist=can_exist) return statement
def create_table(self, table_name, expr=None, schema=None, database=None, format='parquet', overwrite=False, external=False, path=None): """ Create a new table in Impala using an Ibis table expression Parameters ---------- table_name : string expr : TableExpr, optional If passed, creates table from select statement results schema : ibis.Schema, optional Mutually exclusive with expr, creates an empty table with a particular schema database : string, default None (optional) format : {'parquet'} overwrite : boolean, default False Do not create table if table with indicated name already exists external : boolean, default False Create an external table; Impala will not delete the underlying data when the table is dropped path : string, default None Specify the path where Impala reads and writes files for the table Examples -------- con.create_table('new_table_name', table_expr) """ if expr is not None: ast = sql.build_ast(expr) select = ast.queries[0] statement = ddl.CTAS(table_name, select, database=database, overwrite=overwrite, format=format, external=external, path=path) elif schema is not None: statement = ddl.CreateTableWithSchema(table_name, schema, ddl.NoFormat(), database=database, format=format, overwrite=overwrite, external=external, path=path) else: raise com.IbisError('Must pass expr or schema') self._execute(statement)
def create_view(self, name, expr, database=None): """ Create an Impala view from a table expression Parameters ---------- name : string expr : ibis TableExpr database : string, default None """ ast = sql.build_ast(expr) select = ast.queries[0] statement = ddl.CreateView(name, select, database=database) self._execute(statement)
def create_table(self, table_name, expr=None, schema=None, database=None, format='parquet', overwrite=False, external=False, path=None): """ Create a new table in Impala using an Ibis table expression Parameters ---------- table_name : string expr : TableExpr, optional If passed, creates table from select statement results schema : ibis.Schema, optional Mutually exclusive with expr, creates an empty table with a particular schema database : string, default None (optional) format : {'parquet'} overwrite : boolean, default False Do not create table if table with indicated name already exists external : boolean, default False Create an external table; Impala will not delete the underlying data when the table is dropped path : string, default None Specify the path where Impala reads and writes files for the table Examples -------- con.create_table('new_table_name', table_expr) """ if expr is not None: ast = sql.build_ast(expr) select = ast.queries[0] statement = ddl.CTAS(table_name, select, database=database, overwrite=overwrite, format=format, external=external, path=path) elif schema is not None: statement = ddl.CreateTableWithSchema( table_name, schema, ddl.NoFormat(), database=database, format=format, overwrite=overwrite, external=external, path=path) else: raise com.IbisError('Must pass expr or schema') self._execute(statement)
def _build_ast_ensure_limit(self, expr, limit): ast = sql.build_ast(expr) # note: limit can still be None at this point, if the global # default_limit is None for query in reversed(ast.queries): if (isinstance(query, ddl.Select) and not isinstance(expr, ir.ScalarExpr) and query.table_set is not None): if query.limit is None: query_limit = limit or options.sql.default_limit if query_limit: query.limit = {'n': query_limit, 'offset': 0} elif limit is not None: query.limit = {'n': limit, 'offset': query.limit['offset']} return ast
def test_create_external_table_as(self): path = "/path/to/table" select = build_ast(self.con.table("test1")).queries[0] statement = ddl.CTAS("another_table", select, external=True, can_exist=False, path=path, database="foo") result = statement.compile() expected = """\ CREATE EXTERNAL TABLE foo.`another_table` STORED AS PARQUET LOCATION '{0}' AS SELECT * FROM test1""".format( path ) assert result == expected
def _build_ast_ensure_limit(self, expr, limit): ast = sql.build_ast(expr) if not limit: limit = options.sql.default_limit # note: limit can still be None at this point, if the global # default_limit is None if limit is not None: for query in reversed(ast.queries): if (isinstance(query, ddl.Select) and not isinstance(expr, ir.ScalarExpr) and query.table_set is not None): if query.limit is None: query.limit = {'n': limit, 'offset': 0} break return ast
def test_create_external_table_as(self): path = '/path/to/table' select = build_ast(self.con.table('test1')).queries[0] statement = ddl.CTAS('another_table', select, external=True, can_exist=False, path=path, database='foo') result = statement.compile() expected = """\ CREATE EXTERNAL TABLE foo.`another_table` STORED AS PARQUET LOCATION '{0}' AS SELECT * FROM test1""".format(path) assert result == expected
def test_simple_scalar_aggregates(self): from pandas import DataFrame # Things like table.column.{sum, mean, ...}() table = self.con.table('alltypes') expr = table[table.c > 0].f.sum() ast = build_ast(expr) query = ast.queries[0] sql_query = query.compile() expected = """SELECT sum(`f`) AS `tmp` FROM alltypes WHERE `c` > 0""" assert sql_query == expected # Maybe the result handler should act on the cursor. Not sure. handler = query.result_handler output = DataFrame({'tmp': [5]}) assert handler(output) == 5
def test_table_column_unbox(self): from pandas import DataFrame table = self.table m = table.f.sum().name('total') agged = table[table.c > 0].group_by('g').aggregate([m]) expr = agged.g ast = build_ast(expr) query = ast.queries[0] sql_query = query.compile() expected = """SELECT `g`, sum(`f`) AS `total` FROM alltypes WHERE `c` > 0 GROUP BY 1""" assert sql_query == expected # Maybe the result handler should act on the cursor. Not sure. handler = query.result_handler output = DataFrame({'g': ['foo', 'bar', 'baz']}) assert (handler(output) == output['g']).all()
def test_ast_with_aggregation_join_filter(self): table = self.con.table('test1') table2 = self.con.table('test2') filter_pred = table['f'] > 0 table3 = table[filter_pred] join_pred = table3['g'] == table2['key'] joined = table2.inner_join(table3, [join_pred]) met1 = (table3['f'] - table2['value']).mean().name('foo') result = joined.aggregate([met1, table3['f'].sum().name('bar')], by=[table3['g'], table2['key']]) ast = build_ast(result) stmt = ast.queries[0] # hoisted metrics ex_metrics = [(table['f'] - table2['value']).mean().name('foo'), table['f'].sum().name('bar')] ex_by = [table['g'], table2['key']] # hoisted join and aggregate expected_table_set = \ table2.inner_join(table, [table['g'] == table2['key']]) assert stmt.table_set.equals(expected_table_set) # Check various exprs for res, ex in zip(stmt.select_set, ex_by + ex_metrics): assert res.equals(ex) for res, ex in zip(stmt.group_by, ex_by): assert stmt.select_set[res].equals(ex) # Check we got the filter assert len(stmt.where) == 1 assert stmt.where[0].equals(filter_pred)
def _get_query(expr): ast = build_ast(expr) return ast.queries[0]
def _get_select(expr): ast = build_ast(expr) select = ast.queries[0] context = ast.context return select, context
def _create_table(table_name, expr, database=None, can_exist=False, format="parquet"): ast = build_ast(expr) select = ast.queries[0] statement = ddl.CTAS(table_name, select, database=database, format=format, can_exist=can_exist) return statement
def create_table(self, table_name, expr=None, schema=None, database=None, format='parquet', force=False, external=False, path=None, partition=None, like_parquet=None): """ Create a new table in Impala using an Ibis table expression Parameters ---------- table_name : string expr : TableExpr, optional If passed, creates table from select statement results schema : ibis.Schema, optional Mutually exclusive with expr, creates an empty table with a particular schema database : string, default None (optional) format : {'parquet'} force : boolean, default False Do not create table if table with indicated name already exists external : boolean, default False Create an external table; Impala will not delete the underlying data when the table is dropped path : string, default None Specify the path where Impala reads and writes files for the table partition : list of strings Must pass a schema to use this. Cannot partition from an expression (create-table-as-select) like_parquet : string (HDFS path), optional Can specify in lieu of a schema Examples -------- con.create_table('new_table_name', table_expr) """ if like_parquet is not None: raise NotImplementedError if expr is not None: ast = sql.build_ast(expr) select = ast.queries[0] if partition is not None: # Fairly certain this is currently the case raise ValueError('partition not supported with ' 'create-table-as-select') statement = ddl.CTAS(table_name, select, database=database, can_exist=force, format=format, external=external, path=path) elif schema is not None: statement = ddl.CreateTableWithSchema( table_name, schema, ddl.NoFormat(), database=database, format=format, can_exist=force, external=external, path=path, partition=partition) else: raise com.IbisError('Must pass expr or schema') self._execute(statement)
def create_table(self, table_name, expr=None, schema=None, database=None, format='parquet', overwrite=False, external=False, path=None, partition=None, like_parquet=None): """ Create a new table in Impala using an Ibis table expression Parameters ---------- table_name : string expr : TableExpr, optional If passed, creates table from select statement results schema : ibis.Schema, optional Mutually exclusive with expr, creates an empty table with a particular schema database : string, default None (optional) format : {'parquet'} overwrite : boolean, default False Do not create table if table with indicated name already exists external : boolean, default False Create an external table; Impala will not delete the underlying data when the table is dropped path : string, default None Specify the path where Impala reads and writes files for the table partition : list of strings Must pass a schema to use this. Cannot partition from an expression (create-table-as-select) like_parquet : string (HDFS path), optional Can specify in lieu of a schema Examples -------- con.create_table('new_table_name', table_expr) """ if like_parquet is not None: raise NotImplementedError if expr is not None: ast = sql.build_ast(expr) select = ast.queries[0] if partition is not None: # Fairly certain this is currently the case raise ValueError('partition not supported with ' 'create-table-as-select') statement = ddl.CTAS(table_name, select, database=database, overwrite=overwrite, format=format, external=external, path=path) elif schema is not None: statement = ddl.CreateTableWithSchema(table_name, schema, ddl.NoFormat(), database=database, format=format, overwrite=overwrite, external=external, path=path, partition=partition) else: raise com.IbisError('Must pass expr or schema') self._execute(statement)