Esempio n. 1
0
    def get_batch_data_and_markers(
            self, batch_spec: BatchSpec) -> Tuple[Any, BatchMarkers]:
        if not isinstance(
                batch_spec,
            (SqlAlchemyDatasourceBatchSpec, RuntimeQueryBatchSpec)):
            raise InvalidBatchSpecError(
                f"""SqlAlchemyExecutionEngine accepts batch_spec only of type SqlAlchemyDatasourceBatchSpec or
        RuntimeQueryBatchSpec (illegal type "{str(type(batch_spec))}" was received).
                        """)

        batch_data: Optional[SqlAlchemyBatchData] = None
        batch_markers: BatchMarkers = BatchMarkers({
            "ge_load_time":
            datetime.datetime.now(
                datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
        })

        source_schema_name: str = batch_spec.get("schema_name", None)
        source_table_name: str = batch_spec.get("table_name", None)

        temp_table_schema_name: Optional[str] = batch_spec.get(
            "temp_table_schema_name")
        temp_table_name: Optional[str] = batch_spec.get("bigquery_temp_table")

        create_temp_table: bool = batch_spec.get("create_temp_table",
                                                 self._create_temp_table)

        if isinstance(batch_spec, RuntimeQueryBatchSpec):
            # query != None is already checked when RuntimeQueryBatchSpec is instantiated
            query: str = batch_spec.query

            batch_spec.query = "SQLQuery"
            batch_data = SqlAlchemyBatchData(
                execution_engine=self,
                query=query,
                temp_table_schema_name=temp_table_schema_name,
                temp_table_name=temp_table_name,
                create_temp_table=create_temp_table,
                source_table_name=source_table_name,
                source_schema_name=source_schema_name,
            )
        elif isinstance(batch_spec, SqlAlchemyDatasourceBatchSpec):
            if self.engine.dialect.name.lower() == "oracle":
                selectable: str = self._build_selectable_from_batch_spec(
                    batch_spec=batch_spec)
            else:
                selectable: Selectable = self._build_selectable_from_batch_spec(
                    batch_spec=batch_spec)

            batch_data = SqlAlchemyBatchData(
                execution_engine=self,
                selectable=selectable,
                temp_table_name=temp_table_name,
                create_temp_table=create_temp_table,
                source_table_name=source_table_name,
                source_schema_name=source_schema_name,
            )

        return batch_data, batch_markers
    def get_batch_data_and_markers(
            self, batch_spec: BatchSpec) -> Tuple[Any, BatchMarkers]:
        selectable = self._build_selectable_from_batch_spec(
            batch_spec=batch_spec)

        if "bigquery_temp_table" in batch_spec:
            temp_table_name = batch_spec.get("bigquery_temp_table")
        else:
            temp_table_name = None

        source_table_name = batch_spec.get("table_name", None)
        source_schema_name = batch_spec.get("schema_name", None)

        batch_data = SqlAlchemyBatchData(
            execution_engine=self,
            selectable=selectable,
            temp_table_name=temp_table_name,
            create_temp_table=batch_spec.get("create_temp_table",
                                             self._create_temp_table),
            source_table_name=source_table_name,
            source_schema_name=source_schema_name,
        )
        batch_markers = BatchMarkers({
            "ge_load_time":
            datetime.datetime.now(
                datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
        })

        return batch_data, batch_markers
def test_table_column_introspection(sa):
    db_file = file_relative_path(
        __file__,
        "../../test_sets/test_cases_for_sql_data_connector.db",
    )
    eng = sa.create_engine(f"sqlite:///{db_file}")
    engine = SqlAlchemyExecutionEngine(engine=eng)
    batch_data = SqlAlchemyBatchData(
        execution_engine=engine,
        table_name="table_partitioned_by_date_column__A")
    engine.load_batch_data("__", batch_data)
    assert isinstance(batch_data.selectable, sa.Table)
    assert batch_data.selectable.name == "table_partitioned_by_date_column__A"
    assert batch_data.selectable.schema is None

    insp = reflection.Inspector.from_engine(eng)
    columns = insp.get_columns(batch_data.selectable.name,
                               schema=batch_data.selectable.schema)
    assert [x["name"] for x in columns] == [
        "index",
        "id",
        "date",
        "event_type",
        "favorite_color",
    ]
Esempio n. 4
0
def test_instantiation_with_unknown_dialect(sqlite_view_engine):
    execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
        engine=sqlite_view_engine)
    execution_engine.engine.dialect.name = "not_a_supported_dialect"
    batch_data: SqlAlchemyBatchData = SqlAlchemyBatchData(
        execution_engine=execution_engine,
        table_name="test_table",
    )

    assert batch_data.dialect == GESqlDialect.OTHER
Esempio n. 5
0
def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa):

    print(get_sqlite_temp_table_names(sqlite_view_engine))
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
    assert get_sqlite_temp_table_names(sqlite_view_engine) == {
        "test_temp_view"
    }

    engine = SqlAlchemyExecutionEngine(engine=sqlite_view_engine)
    # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True
    SqlAlchemyBatchData(
        execution_engine=engine,
        table_name="test_table",
        create_temp_table=True,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1

    selectable = sa.select("*").select_from(sa.text("test_table"))

    # If create_temp_table=False, a new temp table should NOT be created
    SqlAlchemyBatchData(
        execution_engine=engine,
        selectable=selectable,
        create_temp_table=False,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1

    # If create_temp_table=True, a new temp table should be created
    SqlAlchemyBatchData(
        execution_engine=engine,
        selectable=selectable,
        create_temp_table=True,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2

    # If create_temp_table=True, a new temp table should be created
    SqlAlchemyBatchData(
        execution_engine=engine,
        selectable=selectable,
        # create_temp_table defaults to True
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 3
def test_instantiation_with_query(sqlite_view_engine, test_df):
    test_df.to_sql("test_table_0", con=sqlite_view_engine)

    query: str = "SELECT * FROM test_table_0"
    # If create_temp_table=False, a new temp table should NOT be created
    # noinspection PyUnusedLocal
    batch_data: SqlAlchemyBatchData = SqlAlchemyBatchData(
        execution_engine=sqlite_view_engine,
        query=query,
        create_temp_table=False,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
Esempio n. 7
0
def test_instantiation_with_table_name(sqlite_view_engine):
    engine = SqlAlchemyExecutionEngine(engine=sqlite_view_engine)
    batch_data = SqlAlchemyBatchData(
        execution_engine=engine,
        table_name="test_table",
    )

    # This is a very hacky type check.
    # A better way would be to figure out the proper parent class for dialects within SQLAlchemy
    assert (str(type(
        batch_data.sql_engine_dialect))[:28] == "<class 'sqlalchemy.dialects.")

    assert isinstance(batch_data.selectable, sqlalchemy.Table)

    assert type(batch_data.record_set_name) == str
    assert batch_data.record_set_name == "great_expectations_sub_selection"

    assert batch_data.use_quoted_name == False
def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa):
    print(get_sqlite_temp_table_names(sqlite_view_engine))
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
    assert get_sqlite_temp_table_names(sqlite_view_engine) == {
        "test_temp_view"
    }

    execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
        engine=sqlite_view_engine)
    # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        table_name="test_table",
        create_temp_table=True,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1

    selectable = sa.select("*").select_from(sa.text("main.test_table"))

    # If create_temp_table=False, a new temp table should NOT be created
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        create_temp_table=False,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1

    # If create_temp_table=True, a new temp table should be created
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        create_temp_table=True,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2

    # If create_temp_table=True, a new temp table should be created
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        # create_temp_table defaults to True
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 3

    # testing whether schema is supported
    selectable = sa.select("*").select_from(
        sa.table(name="test_table", schema="main"))
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        # create_temp_table defaults to True
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 4

    # test schema with execution engine
    # TODO : Will20210222 Add tests for specifying schema with non-sqlite backend that actually supports new schema creation
    my_batch_spec = SqlAlchemyDatasourceBatchSpec(
        **{
            "table_name": "test_table",
            "batch_identifiers": {},
            "schema_name": "main",
        })
    res = execution_engine.get_batch_data_and_markers(batch_spec=my_batch_spec)
    assert len(res) == 2
Esempio n. 9
0
def get_sqlalchemy_runtime_validator_postgresql(df,
                                                schemas=None,
                                                caching=True,
                                                table_name=None):
    sa_engine_name = "postgresql"
    db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost")
    try:
        engine = connection_manager.get_engine(
            f"postgresql://postgres@{db_hostname}/test_ci")
    except sqlalchemy.exc.OperationalError:
        return None

    sql_dtypes = {}

    if (schemas and sa_engine_name in schemas
            and isinstance(engine.dialect, postgresqltypes.dialect)):
        schema = schemas[sa_engine_name]
        sql_dtypes = {
            col: POSTGRESQL_TYPES[dtype]
            for (col, dtype) in schema.items()
        }

        for col in schema:
            type_ = schema[col]
            if type_ in ["INTEGER", "SMALLINT", "BIGINT"]:
                df[col] = pd.to_numeric(df[col], downcast="signed")
            elif type_ in ["FLOAT", "DOUBLE", "DOUBLE_PRECISION"]:
                df[col] = pd.to_numeric(df[col])
                min_value_dbms = get_sql_dialect_floating_point_infinity_value(
                    schema=sa_engine_name, negative=True)
                max_value_dbms = get_sql_dialect_floating_point_infinity_value(
                    schema=sa_engine_name, negative=False)
                for api_schema_type in ["api_np", "api_cast"]:
                    min_value_api = get_sql_dialect_floating_point_infinity_value(
                        schema=api_schema_type, negative=True)
                    max_value_api = get_sql_dialect_floating_point_infinity_value(
                        schema=api_schema_type, negative=False)
                    df.replace(
                        to_replace=[min_value_api, max_value_api],
                        value=[min_value_dbms, max_value_dbms],
                        inplace=True,
                    )
            elif type_ in ["DATETIME", "TIMESTAMP"]:
                df[col] = pd.to_datetime(df[col])

    if table_name is None:
        table_name = "test_data_" + "".join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(8)
        ])
    df.to_sql(
        name=table_name,
        con=engine,
        index=False,
        dtype=sql_dtypes,
        if_exists="replace",
    )
    batch_data = SqlAlchemyBatchData(execution_engine=engine,
                                     table_name=table_name)
    batch = Batch(data=batch_data)
    execution_engine = SqlAlchemyExecutionEngine(caching=caching,
                                                 engine=engine)
    batch_data = SqlAlchemyBatchData(execution_engine=execution_engine,
                                     table_name=table_name)
    batch = Batch(data=batch_data)

    return Validator(execution_engine=execution_engine, batches=(batch, ))
Esempio n. 10
0
    def get_batch_data_and_markers(
        self, batch_spec: BatchSpec
    ) -> Tuple[Any, BatchMarkers]:
        if not isinstance(
            batch_spec, (SqlAlchemyDatasourceBatchSpec, RuntimeQueryBatchSpec)
        ):
            raise InvalidBatchSpecError(
                f"""SqlAlchemyExecutionEngine accepts batch_spec only of type SqlAlchemyDatasourceBatchSpec or
        RuntimeQueryBatchSpec (illegal type "{str(type(batch_spec))}" was received).
                        """
            )

        batch_data: Optional[SqlAlchemyBatchData] = None
        batch_markers: BatchMarkers = BatchMarkers(
            {
                "ge_load_time": datetime.datetime.now(datetime.timezone.utc).strftime(
                    "%Y%m%dT%H%M%S.%fZ"
                )
            }
        )

        source_schema_name: str = batch_spec.get("schema_name", None)
        source_table_name: str = batch_spec.get("table_name", None)

        temp_table_schema_name: Optional[str] = batch_spec.get("temp_table_schema_name")

        if batch_spec.get("bigquery_temp_table"):
            # deprecated-v0.15.3
            warnings.warn(
                "BigQuery tables that are created as the result of a query are no longer created as "
                "permanent tables. Thus, a named permanent table through the `bigquery_temp_table`"
                "parameter is not required. The `bigquery_temp_table` parameter is deprecated as of"
                "v0.15.3 and will be removed in v0.18.",
                DeprecationWarning,
            )

        create_temp_table: bool = batch_spec.get(
            "create_temp_table", self._create_temp_table
        )

        if isinstance(batch_spec, RuntimeQueryBatchSpec):
            # query != None is already checked when RuntimeQueryBatchSpec is instantiated
            query: str = batch_spec.query

            batch_spec.query = "SQLQuery"
            batch_data = SqlAlchemyBatchData(
                execution_engine=self,
                query=query,
                temp_table_schema_name=temp_table_schema_name,
                create_temp_table=create_temp_table,
                source_table_name=source_table_name,
                source_schema_name=source_schema_name,
            )
        elif isinstance(batch_spec, SqlAlchemyDatasourceBatchSpec):
            if self.engine.dialect.name.lower() == "oracle":
                selectable: str = self._build_selectable_from_batch_spec(
                    batch_spec=batch_spec
                )
            else:
                selectable: Selectable = self._build_selectable_from_batch_spec(
                    batch_spec=batch_spec
                )

            batch_data = SqlAlchemyBatchData(
                execution_engine=self,
                selectable=selectable,
                create_temp_table=create_temp_table,
                source_table_name=source_table_name,
                source_schema_name=source_schema_name,
            )

        return batch_data, batch_markers