def get_batch_data_and_markers( self, batch_spec: BatchSpec) -> Tuple[Any, BatchMarkers]: if not isinstance( batch_spec, (SqlAlchemyDatasourceBatchSpec, RuntimeQueryBatchSpec)): raise InvalidBatchSpecError( f"""SqlAlchemyExecutionEngine accepts batch_spec only of type SqlAlchemyDatasourceBatchSpec or RuntimeQueryBatchSpec (illegal type "{str(type(batch_spec))}" was received). """) batch_data: Optional[SqlAlchemyBatchData] = None batch_markers: BatchMarkers = BatchMarkers({ "ge_load_time": datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") }) source_schema_name: str = batch_spec.get("schema_name", None) source_table_name: str = batch_spec.get("table_name", None) temp_table_schema_name: Optional[str] = batch_spec.get( "temp_table_schema_name") temp_table_name: Optional[str] = batch_spec.get("bigquery_temp_table") create_temp_table: bool = batch_spec.get("create_temp_table", self._create_temp_table) if isinstance(batch_spec, RuntimeQueryBatchSpec): # query != None is already checked when RuntimeQueryBatchSpec is instantiated query: str = batch_spec.query batch_spec.query = "SQLQuery" batch_data = SqlAlchemyBatchData( execution_engine=self, query=query, temp_table_schema_name=temp_table_schema_name, temp_table_name=temp_table_name, create_temp_table=create_temp_table, source_table_name=source_table_name, source_schema_name=source_schema_name, ) elif isinstance(batch_spec, SqlAlchemyDatasourceBatchSpec): if self.engine.dialect.name.lower() == "oracle": selectable: str = self._build_selectable_from_batch_spec( batch_spec=batch_spec) else: selectable: Selectable = self._build_selectable_from_batch_spec( batch_spec=batch_spec) batch_data = SqlAlchemyBatchData( execution_engine=self, selectable=selectable, temp_table_name=temp_table_name, create_temp_table=create_temp_table, source_table_name=source_table_name, source_schema_name=source_schema_name, ) return batch_data, batch_markers
def get_batch_data_and_markers( self, batch_spec: BatchSpec) -> Tuple[Any, BatchMarkers]: selectable = self._build_selectable_from_batch_spec( batch_spec=batch_spec) if "bigquery_temp_table" in batch_spec: temp_table_name = batch_spec.get("bigquery_temp_table") else: temp_table_name = None source_table_name = batch_spec.get("table_name", None) source_schema_name = batch_spec.get("schema_name", None) batch_data = SqlAlchemyBatchData( execution_engine=self, selectable=selectable, temp_table_name=temp_table_name, create_temp_table=batch_spec.get("create_temp_table", self._create_temp_table), source_table_name=source_table_name, source_schema_name=source_schema_name, ) batch_markers = BatchMarkers({ "ge_load_time": datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") }) return batch_data, batch_markers
def test_table_column_introspection(sa): db_file = file_relative_path( __file__, "../../test_sets/test_cases_for_sql_data_connector.db", ) eng = sa.create_engine(f"sqlite:///{db_file}") engine = SqlAlchemyExecutionEngine(engine=eng) batch_data = SqlAlchemyBatchData( execution_engine=engine, table_name="table_partitioned_by_date_column__A") engine.load_batch_data("__", batch_data) assert isinstance(batch_data.selectable, sa.Table) assert batch_data.selectable.name == "table_partitioned_by_date_column__A" assert batch_data.selectable.schema is None insp = reflection.Inspector.from_engine(eng) columns = insp.get_columns(batch_data.selectable.name, schema=batch_data.selectable.schema) assert [x["name"] for x in columns] == [ "index", "id", "date", "event_type", "favorite_color", ]
def test_instantiation_with_unknown_dialect(sqlite_view_engine): execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine( engine=sqlite_view_engine) execution_engine.engine.dialect.name = "not_a_supported_dialect" batch_data: SqlAlchemyBatchData = SqlAlchemyBatchData( execution_engine=execution_engine, table_name="test_table", ) assert batch_data.dialect == GESqlDialect.OTHER
def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa): print(get_sqlite_temp_table_names(sqlite_view_engine)) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 assert get_sqlite_temp_table_names(sqlite_view_engine) == { "test_temp_view" } engine = SqlAlchemyExecutionEngine(engine=sqlite_view_engine) # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True SqlAlchemyBatchData( execution_engine=engine, table_name="test_table", create_temp_table=True, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 selectable = sa.select("*").select_from(sa.text("test_table")) # If create_temp_table=False, a new temp table should NOT be created SqlAlchemyBatchData( execution_engine=engine, selectable=selectable, create_temp_table=False, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 # If create_temp_table=True, a new temp table should be created SqlAlchemyBatchData( execution_engine=engine, selectable=selectable, create_temp_table=True, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2 # If create_temp_table=True, a new temp table should be created SqlAlchemyBatchData( execution_engine=engine, selectable=selectable, # create_temp_table defaults to True ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 3
def test_instantiation_with_query(sqlite_view_engine, test_df): test_df.to_sql("test_table_0", con=sqlite_view_engine) query: str = "SELECT * FROM test_table_0" # If create_temp_table=False, a new temp table should NOT be created # noinspection PyUnusedLocal batch_data: SqlAlchemyBatchData = SqlAlchemyBatchData( execution_engine=sqlite_view_engine, query=query, create_temp_table=False, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
def test_instantiation_with_table_name(sqlite_view_engine): engine = SqlAlchemyExecutionEngine(engine=sqlite_view_engine) batch_data = SqlAlchemyBatchData( execution_engine=engine, table_name="test_table", ) # This is a very hacky type check. # A better way would be to figure out the proper parent class for dialects within SQLAlchemy assert (str(type( batch_data.sql_engine_dialect))[:28] == "<class 'sqlalchemy.dialects.") assert isinstance(batch_data.selectable, sqlalchemy.Table) assert type(batch_data.record_set_name) == str assert batch_data.record_set_name == "great_expectations_sub_selection" assert batch_data.use_quoted_name == False
def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa): print(get_sqlite_temp_table_names(sqlite_view_engine)) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 assert get_sqlite_temp_table_names(sqlite_view_engine) == { "test_temp_view" } execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine( engine=sqlite_view_engine) # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True SqlAlchemyBatchData( execution_engine=execution_engine, table_name="test_table", create_temp_table=True, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 selectable = sa.select("*").select_from(sa.text("main.test_table")) # If create_temp_table=False, a new temp table should NOT be created SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, create_temp_table=False, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 # If create_temp_table=True, a new temp table should be created SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, create_temp_table=True, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2 # If create_temp_table=True, a new temp table should be created SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, # create_temp_table defaults to True ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 3 # testing whether schema is supported selectable = sa.select("*").select_from( sa.table(name="test_table", schema="main")) SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, # create_temp_table defaults to True ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 4 # test schema with execution engine # TODO : Will20210222 Add tests for specifying schema with non-sqlite backend that actually supports new schema creation my_batch_spec = SqlAlchemyDatasourceBatchSpec( **{ "table_name": "test_table", "batch_identifiers": {}, "schema_name": "main", }) res = execution_engine.get_batch_data_and_markers(batch_spec=my_batch_spec) assert len(res) == 2
def get_sqlalchemy_runtime_validator_postgresql(df, schemas=None, caching=True, table_name=None): sa_engine_name = "postgresql" db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") try: engine = connection_manager.get_engine( f"postgresql://postgres@{db_hostname}/test_ci") except sqlalchemy.exc.OperationalError: return None sql_dtypes = {} if (schemas and sa_engine_name in schemas and isinstance(engine.dialect, postgresqltypes.dialect)): schema = schemas[sa_engine_name] sql_dtypes = { col: POSTGRESQL_TYPES[dtype] for (col, dtype) in schema.items() } for col in schema: type_ = schema[col] if type_ in ["INTEGER", "SMALLINT", "BIGINT"]: df[col] = pd.to_numeric(df[col], downcast="signed") elif type_ in ["FLOAT", "DOUBLE", "DOUBLE_PRECISION"]: df[col] = pd.to_numeric(df[col]) min_value_dbms = get_sql_dialect_floating_point_infinity_value( schema=sa_engine_name, negative=True) max_value_dbms = get_sql_dialect_floating_point_infinity_value( schema=sa_engine_name, negative=False) for api_schema_type in ["api_np", "api_cast"]: min_value_api = get_sql_dialect_floating_point_infinity_value( schema=api_schema_type, negative=True) max_value_api = get_sql_dialect_floating_point_infinity_value( schema=api_schema_type, negative=False) df.replace( to_replace=[min_value_api, max_value_api], value=[min_value_dbms, max_value_dbms], inplace=True, ) elif type_ in ["DATETIME", "TIMESTAMP"]: df[col] = pd.to_datetime(df[col]) if table_name is None: table_name = "test_data_" + "".join([ random.choice(string.ascii_letters + string.digits) for _ in range(8) ]) df.to_sql( name=table_name, con=engine, index=False, dtype=sql_dtypes, if_exists="replace", ) batch_data = SqlAlchemyBatchData(execution_engine=engine, table_name=table_name) batch = Batch(data=batch_data) execution_engine = SqlAlchemyExecutionEngine(caching=caching, engine=engine) batch_data = SqlAlchemyBatchData(execution_engine=execution_engine, table_name=table_name) batch = Batch(data=batch_data) return Validator(execution_engine=execution_engine, batches=(batch, ))
def get_batch_data_and_markers( self, batch_spec: BatchSpec ) -> Tuple[Any, BatchMarkers]: if not isinstance( batch_spec, (SqlAlchemyDatasourceBatchSpec, RuntimeQueryBatchSpec) ): raise InvalidBatchSpecError( f"""SqlAlchemyExecutionEngine accepts batch_spec only of type SqlAlchemyDatasourceBatchSpec or RuntimeQueryBatchSpec (illegal type "{str(type(batch_spec))}" was received). """ ) batch_data: Optional[SqlAlchemyBatchData] = None batch_markers: BatchMarkers = BatchMarkers( { "ge_load_time": datetime.datetime.now(datetime.timezone.utc).strftime( "%Y%m%dT%H%M%S.%fZ" ) } ) source_schema_name: str = batch_spec.get("schema_name", None) source_table_name: str = batch_spec.get("table_name", None) temp_table_schema_name: Optional[str] = batch_spec.get("temp_table_schema_name") if batch_spec.get("bigquery_temp_table"): # deprecated-v0.15.3 warnings.warn( "BigQuery tables that are created as the result of a query are no longer created as " "permanent tables. Thus, a named permanent table through the `bigquery_temp_table`" "parameter is not required. The `bigquery_temp_table` parameter is deprecated as of" "v0.15.3 and will be removed in v0.18.", DeprecationWarning, ) create_temp_table: bool = batch_spec.get( "create_temp_table", self._create_temp_table ) if isinstance(batch_spec, RuntimeQueryBatchSpec): # query != None is already checked when RuntimeQueryBatchSpec is instantiated query: str = batch_spec.query batch_spec.query = "SQLQuery" batch_data = SqlAlchemyBatchData( execution_engine=self, query=query, temp_table_schema_name=temp_table_schema_name, create_temp_table=create_temp_table, source_table_name=source_table_name, source_schema_name=source_schema_name, ) elif isinstance(batch_spec, SqlAlchemyDatasourceBatchSpec): if self.engine.dialect.name.lower() == "oracle": selectable: str = self._build_selectable_from_batch_spec( batch_spec=batch_spec ) else: selectable: Selectable = self._build_selectable_from_batch_spec( batch_spec=batch_spec ) batch_data = SqlAlchemyBatchData( execution_engine=self, selectable=selectable, create_temp_table=create_temp_table, source_table_name=source_table_name, source_schema_name=source_schema_name, ) return batch_data, batch_markers