def test_select_basics(t): name = 'testing123456' expr = t.limit(10) ast = build_ast(expr, SparkDialect.make_context()) select = ast.queries[0] stmt = ddl.InsertSelect(name, select, database='foo') result = stmt.compile() expected = """\ INSERT INTO foo.`testing123456` SELECT * FROM functional_alltypes LIMIT 10""" assert result == expected stmt = ddl.InsertSelect(name, select, database='foo', overwrite=True) result = stmt.compile() expected = """\ INSERT OVERWRITE TABLE foo.`testing123456` SELECT * FROM functional_alltypes LIMIT 10""" assert result == expected
def create_view( self, name, expr, database=None, can_exist=False, temporary=False, ): """ Create a Spark view from a table expression Parameters ---------- name : string expr : ibis TableExpr database : string, default None can_exist : boolean, default False Replace an existing view of the same name if it exists temporary : boolean, default False """ ast = self._build_ast(expr, SparkDialect.make_context()) select = ast.queries[0] statement = ddl.CreateView( name, select, database=database, can_exist=can_exist, temporary=temporary, ) return self._execute(statement.compile())
def _create_table( table_name, expr, database=None, can_exist=False, format='parquet' ): ast = build_ast(expr, SparkDialect.make_context()) select = ast.queries[0] statement = ddl.CTAS( table_name, select, database=database, format=format, can_exist=can_exist, ) return statement
def insert( self, obj=None, overwrite=False, values=None, validate=True, ): """ Insert into Spark table. Parameters ---------- obj : TableExpr or pandas DataFrame overwrite : boolean, default False If True, will replace existing contents of table validate : boolean, default True If True, do more rigorous validation that schema of table being inserted is compatible with the existing table Examples -------- >>> t.insert(table_expr) # doctest: +SKIP # Completely overwrite contents >>> t.insert(table_expr, overwrite=True) # doctest: +SKIP """ if isinstance(obj, pd.DataFrame): spark_df = self._session.createDataFrame(obj) spark_df.insertInto(self.name, overwrite=overwrite) return expr = obj if values is not None: raise NotImplementedError if validate: existing_schema = self.schema() insert_schema = expr.schema() if not insert_schema.equals(existing_schema): _validate_compatible(insert_schema, existing_schema) ast = build_ast(expr, SparkDialect.make_context()) select = ast.queries[0] statement = ddl.InsertSelect( self._qualified_name, select, overwrite=overwrite, ) return self._execute(statement.compile())
def create_table( self, table_name, obj=None, schema=None, database=None, force=False, # HDFS options format='parquet', ): """ Create a new table in Spark using an Ibis table expression. Parameters ---------- table_name : string obj : TableExpr or pandas.DataFrame, optional If passed, creates table from select statement results schema : ibis.Schema, optional Mutually exclusive with obj, creates an empty table with a particular schema database : string, default None (optional) force : boolean, default False If true, create table if table with indicated name already exists format : {'parquet'} Examples -------- >>> con.create_table('new_table_name', table_expr) # doctest: +SKIP """ if obj is not None: if isinstance(obj, pd.DataFrame): spark_df = self._session.createDataFrame(obj) mode = 'error' if force: mode = 'overwrite' spark_df.write.saveAsTable( table_name, format=format, mode=mode, ) return ast = self._build_ast(obj, SparkDialect.make_context()) select = ast.queries[0] statement = ddl.CTAS( table_name, select, database=database, can_exist=force, format=format, ) elif schema is not None: statement = ddl.CreateTableWithSchema( table_name, schema, database=database, format=format, can_exist=force, ) else: raise com.IbisError('Must pass expr or schema') return self._execute(statement.compile())