Beispiel #1
0
def test_sqlalchemy_datasource_processes_dataset_options(
        test_db_connection_string):
    datasource = SqlAlchemyDatasource(
        "SqlAlchemy", credentials={"url": test_db_connection_string})
    batch_kwargs = datasource.process_batch_parameters(
        dataset_options={"caching": False})
    batch_kwargs["query"] = "select * from table_1;"
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False

    batch_kwargs = datasource.process_batch_parameters(
        dataset_options={"caching": True})
    batch_kwargs["query"] = "select * from table_1;"
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is True

    batch_kwargs = {
        "query": "select * from table_1;",
        "dataset_options": {
            "caching": False
        },
    }
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False
Beispiel #2
0
def test_ExpectationsStore_with_DatabaseStoreBackend():
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(ns_1,
                 ExpectationSuite(expectation_suite_name="a.b.c.warning"))
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning")

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))

    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_expectation_summary_in_ExpectationSuitePageRenderer_render_expectation_suite_notes(
):
    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(expectation_suite_name="test",
                         meta={},
                         expectations=None))
    # print(RenderedContent.rendered_content_list_to_json(result.text))
    assert RenderedContent.rendered_content_list_to_json(result.text) == [
        'This Expectation suite currently contains 0 total Expectations across 0 columns.'
    ]

    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(
            expectation_suite_name="test",
            meta={"notes": {
                "format": "markdown",
                "content": ["hi"]
            }}))
    # print(RenderedContent.rendered_content_list_to_json(result.text))

    try:
        mistune.markdown("*test*")
        assert RenderedContent.rendered_content_list_to_json(result.text) == [
            'This Expectation suite currently contains 0 total Expectations across 0 columns.',
            {
                'content_block_type': 'markdown',
                'styling': {
                    'parent': {}
                },
                'markdown': 'hi'
            }
        ]
    except OSError:
        assert RenderedContent.rendered_content_list_to_json(result.text) == [
            'This Expectation suite currently contains 0 total Expectations across 0 columns.',
            'hi',
        ]

    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(
            expectation_suite_name="test",
            meta={},
            expectations=[
                ExpectationConfiguration(
                    expectation_type="expect_table_row_count_to_be_between",
                    kwargs={
                        "min_value": 0,
                        "max_value": None
                    }),
                ExpectationConfiguration(
                    expectation_type="expect_column_to_exist",
                    kwargs={"column": "x"}),
                ExpectationConfiguration(
                    expectation_type="expect_column_to_exist",
                    kwargs={"column": "y"})
            ]))
    # print(RenderedContent.rendered_content_list_to_json(result.text)[0])
    assert RenderedContent.rendered_content_list_to_json(
        result.text
    )[0] == 'This Expectation suite currently contains 3 total Expectations across 2 columns.'
Beispiel #4
0
    def validate_expectations(cls, df_ge: PandasDataset,
                              specs: SchemaParserResult) -> defaultdict[list]:
        """
        Validates the dynamic expectations from the schema via the
        great expectations library.
        """
        invalid_elements = defaultdict(list)
        suite = ExpectationSuite(
            expectation_suite_name="custom_specifications")
        for column in specs.expectation_definitions.keys():
            for expectation in specs.expectation_definitions[column]:
                kwargs_extended = dict(expectation['kwargs'])
                kwargs_extended['column'] = column
                suite.append_expectation(
                    ExpectationConfiguration(
                        expectation_type=expectation['expectation_type'],
                        kwargs=kwargs_extended))
        # noinspection PyTypeChecker
        result = df_ge.validate(expectation_suite=suite, result_format="BASIC")
        for expectation_result in result.results:
            if expectation_result.exception_info['raised_exception']:
                continue
            column_name = expectation_result.expectation_config.kwargs[
                "column"]
            n_invalid = expectation_result.result['unexpected_count']
            invalid_elements[column_name].append(n_invalid)

        return invalid_elements
Beispiel #5
0
    def _profile(self,
                 schema: dict,
                 suite_name: str = None) -> ExpectationSuite:
        if not suite_name:
            raise ValueError(
                "Please provide a suite name when using this profiler.")
        expectations = []
        # TODO add recursion to allow creation of suites for nested schema files
        if schema["type"] == JsonSchemaTypes.OBJECT.value:
            for key, details in schema["properties"].items():
                expectations.append(
                    self._create_existence_expectation(key, details))

                type_expectation = self._create_type_expectation(key, details)
                if type_expectation:
                    expectations.append(type_expectation)

                range_expectation = self._create_range_expectation(
                    key, details)
                if range_expectation:
                    expectations.append(range_expectation)

                boolean_expectation = self._create_boolean_expectation(
                    key, details)
                if boolean_expectation:
                    expectations.append(boolean_expectation)

                set_expectation = self._create_set_expectation(key, details)
                if set_expectation:
                    expectations.append(set_expectation)

                string_len_expectation = self._create_string_length_expectation(
                    key, details)
                if string_len_expectation:
                    expectations.append(string_len_expectation)

                null_or_not_null_expectation = self._create_null_or_not_null_column_expectation(
                    key, details)
                if null_or_not_null_expectation:
                    expectations.append(null_or_not_null_expectation)
        description = schema.get("description", None)
        meta = None
        if description:
            meta = {
                "notes": {
                    "format": "markdown",
                    "content": [f"### Description:\n{description}"],
                }
            }
        suite = ExpectationSuite(suite_name,
                                 expectations=expectations,
                                 meta=meta)
        suite.add_citation(
            comment=f"This suite was built by the {self.__class__.__name__}", )
        return suite
Beispiel #6
0
def test_ge_cloud_validator_updates_self_suite_with_ge_cloud_ids_on_save(
    mock_emit,
    mock_context_get_suite,
    mock_context_save_suite,
    multi_batch_taxi_validator_ge_cloud_mode,
    empty_data_context_stats_enabled,
):
    """
    This checks that Validator in ge_cloud_mode properly updates underlying Expectation Suite on save.
    The multi_batch_taxi_validator_ge_cloud_mode fixture has a suite with a single expectation.
    :param mock_context_get_suite: Under normal circumstances, this would be ExpectationSuite object returned from GE Cloud
    :param mock_context_save_suite: Under normal circumstances, this would trigger post or patch to GE Cloud
    """
    context: DataContext = empty_data_context_stats_enabled
    mock_suite = ExpectationSuite(
        expectation_suite_name="validating_taxi_data",
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_between",
                kwargs={
                    "column": "passenger_count",
                    "min_value": 0,
                    "max_value": 99
                },
                meta={"notes": "This is an expectation."},
                ge_cloud_id=UUID("0faf94a9-f53a-41fb-8e94-32f218d4a774"),
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_between",
                kwargs={
                    "column": "trip_distance",
                    "min_value": 11,
                    "max_value": 22
                },
                meta={"notes": "This is an expectation."},
                ge_cloud_id=UUID("3e8eee33-b425-4b36-a831-6e9dd31ad5af"),
            ),
        ],
        data_context=context,
        meta={"notes": "This is an expectation suite."},
    )
    mock_context_save_suite.return_value = True
    mock_context_get_suite.return_value = mock_suite
    multi_batch_taxi_validator_ge_cloud_mode.expect_column_values_to_be_between(
        column="trip_distance", min_value=11, max_value=22)
    multi_batch_taxi_validator_ge_cloud_mode.save_expectation_suite()
    assert (multi_batch_taxi_validator_ge_cloud_mode.get_expectation_suite().
            to_json_dict() == mock_suite.to_json_dict())

    # add_expectation() will not send usage_statistics event when called from a Validator
    assert mock_emit.call_count == 0
    assert mock_emit.call_args_list == []
Beispiel #7
0
def test_sqlalchemy_source_limit(sqlitedb_engine):
    df1 = pd.DataFrame({
        "col_1": [1, 2, 3, 4, 5],
        "col_2": ["a", "b", "c", "d", "e"]
    })
    df2 = pd.DataFrame({
        "col_1": [0, 1, 2, 3, 4],
        "col_2": ["b", "c", "d", "e", "f"]
    })
    df1.to_sql("table_1", con=sqlitedb_engine, index=True)
    df2.to_sql("table_2", con=sqlitedb_engine, index=True, schema="main")
    datasource = SqlAlchemyDatasource("SqlAlchemy", engine=sqlitedb_engine)
    limited_batch = datasource.get_batch({
        "table": "table_1",
        "limit": 1,
        "offset": 2
    })
    assert isinstance(limited_batch, Batch)
    limited_dataset = Validator(
        limited_batch,
        expectation_suite=ExpectationSuite("test"),
        expectation_engine=SqlAlchemyDataset,
    ).get_dataset()
    assert limited_dataset._table.name.startswith(
        "ge_tmp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)["col_1"][0] == 3  # offset should have been applied
def save_expectation_suite_usage_statistics(
    data_context: "DataContext",  # noqa: F821
    expectation_suite: ExpectationSuite,
    expectation_suite_name: Optional[str] = None,
    **kwargs,
) -> dict:
    try:
        data_context_id = data_context.data_context_id
    except AttributeError:
        data_context_id = None
    anonymizer = _anonymizers.get(data_context_id, None)
    if anonymizer is None:
        anonymizer = Anonymizer(data_context_id)
        _anonymizers[data_context_id] = anonymizer
    payload = {}

    if expectation_suite_name is None:
        if isinstance(expectation_suite, ExpectationSuite):
            expectation_suite_name = expectation_suite.expectation_suite_name
        elif isinstance(expectation_suite, dict):
            expectation_suite_name = expectation_suite.get("expectation_suite_name")

    # noinspection PyBroadException
    try:
        payload["anonymized_expectation_suite_name"] = anonymizer.anonymize(
            obj=expectation_suite_name
        )
    except Exception as e:
        logger.debug(
            f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, save_expectation_suite_usage_statistics: Unable to create anonymized_expectation_suite_name payload field"
        )

    return payload
    def save_expectation_suite(
        self,
        expectation_suite: ExpectationSuite,
        expectation_suite_name: Optional[str] = None,
        overwrite_existing: bool = True,
        **kwargs: Dict[str, Any],
    ):
        """Save the provided expectation suite into the DataContext.

        Args:
            expectation_suite: the suite to save
            expectation_suite_name: the name of this expectation suite. If no name is provided the name will \
                be read from the suite

            overwrite_existing: bool setting whether to overwrite existing ExpectationSuite

        Returns:
            None
        """
        if expectation_suite_name is None:
            key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier(
                expectation_suite_name=expectation_suite.expectation_suite_name
            )
        else:
            expectation_suite.expectation_suite_name = expectation_suite_name
            key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier(
                expectation_suite_name=expectation_suite_name)
        if self.expectations_store.has_key(key) and not overwrite_existing:
            raise ge_exceptions.DataContextError(
                "expectation_suite with name {} already exists. If you would like to overwrite this "
                "expectation_suite, set overwrite_existing=True.".format(
                    expectation_suite_name))
        self._evaluation_parameter_dependencies_compiled = False
        return self.expectations_store.set(key, expectation_suite, **kwargs)
def suite_with_table_and_column_expectations(
    exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3
):
    suite = ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[
            exp1,
            exp2,
            exp3,
            exp4,
            column_pair_expectation,
            table_exp1,
            table_exp2,
            table_exp3,
        ],
        meta={"notes": "This is an expectation suite."},
    )
    assert suite.expectations == [
        exp1,
        exp2,
        exp3,
        exp4,
        column_pair_expectation,
        table_exp1,
        table_exp2,
        table_exp3,
    ]
    return suite
Beispiel #11
0
def test_sqlalchemy_source_limit(sqlitedb_engine):
    df1 = pd.DataFrame({
        'col_1': [1, 2, 3, 4, 5],
        'col_2': ['a', 'b', 'c', 'd', 'e']
    })
    df2 = pd.DataFrame({
        'col_1': [0, 1, 2, 3, 4],
        'col_2': ['b', 'c', 'd', 'e', 'f']
    })
    df1.to_sql('table_1', con=sqlitedb_engine, index=True)
    df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main')
    datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine)
    limited_batch = datasource.get_batch({
        "table": "table_1",
        "limit": 1,
        "offset": 2
    })
    assert isinstance(limited_batch, Batch)
    limited_dataset = Validator(
        limited_batch,
        expectation_suite=ExpectationSuite("test"),
        expectation_engine=SqlAlchemyDataset).get_dataset()
    assert limited_dataset._table.name.startswith(
        "ge_tmp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)['col_1'][0] == 3  # offset should have been applied
def test_golden_path_sql_datasource_configuration(
        sa, empty_data_context, test_connectable_postgresql_db):
    """Tests the golden path for setting up a StreamlinedSQLDatasource using test_yaml_config"""
    context = empty_data_context

    os.chdir(context.root_directory)

    # Everything below this line (except for asserts) is what we expect users to run as part of the golden path.
    import great_expectations as ge

    context = ge.get_context()

    db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost")
    yaml_config = f"""
class_name: SimpleSqlalchemyDatasource
credentials:
    drivername: postgresql
    username: postgres
    password: ""
    host: {db_hostname}
    port: 5432
    database: test_ci

introspection:
    whole_table_with_limits:
        sampling_method: _sample_using_limit
        sampling_kwargs:
            n: 10
"""
    # noinspection PyUnusedLocal
    report_object = context.test_yaml_config(
        name="my_datasource",
        yaml_config=yaml_config,
        return_mode="report_object",
    )
    print(json.dumps(report_object, indent=2))
    print(context.datasources)

    my_batch = context.get_batch(
        "my_datasource",
        "whole_table_with_limits",
        "test_df",
    )
    # assert len(my_batch.data.fetchall()) == 10

    with pytest.raises(KeyError):
        my_batch = context.get_batch(
            "my_datasource",
            "whole_table_with_limits",
            "DOES_NOT_EXIST",
        )

    my_validator = context.get_validator(
        datasource_name="my_datasource",
        data_connector_name="whole_table_with_limits",
        data_asset_name="test_df",
        expectation_suite=ExpectationSuite("my_expectation_suite"),
    )
    my_evr = my_validator.expect_table_columns_to_match_set(column_set=[])
    print(my_evr)
    def get_expectation_suite(
        self,
        expectation_suite_name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> ExpectationSuite:
        """Get an Expectation Suite by name or GE Cloud ID
        Args:
            expectation_suite_name (str): the name for the Expectation Suite
            ge_cloud_id (str): the GE Cloud ID for the Expectation Suite

        Returns:
            expectation_suite
        """
        key = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.EXPECTATION_SUITE,
            ge_cloud_id=ge_cloud_id,
        )
        if self.expectations_store.has_key(key):
            expectations_schema_dict: dict = cast(
                dict, self.expectations_store.get(key))
            # create the ExpectationSuite from constructor
            return ExpectationSuite(**expectations_schema_dict,
                                    data_context=self)

        else:
            raise ge_exceptions.DataContextError(
                f"expectation_suite {expectation_suite_name} not found")
    def create_expectation_suite(
        self,
        expectation_suite_name: str,
        overwrite_existing: bool = False,
        ge_cloud_id: Optional[str] = None,
        **kwargs: Optional[dict],
    ) -> ExpectationSuite:
        """Build a new expectation suite and save it into the data_context expectation store.

        Args:
            expectation_suite_name: The name of the expectation_suite to create
            overwrite_existing (boolean): Whether to overwrite expectation suite if expectation suite with given name
                already exists.

        Returns:
            A new (empty) expectation suite.
        """
        if not isinstance(overwrite_existing, bool):
            raise ValueError(
                "Parameter overwrite_existing must be of type BOOL")

        expectation_suite: ExpectationSuite = ExpectationSuite(
            expectation_suite_name=expectation_suite_name, data_context=self)
        key = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.EXPECTATION_SUITE,
            ge_cloud_id=ge_cloud_id,
        )
        if self.expectations_store.has_key(key) and not overwrite_existing:
            raise ge_exceptions.DataContextError(
                "expectation_suite with GE Cloud ID {} already exists. If you would like to overwrite this "
                "expectation_suite, set overwrite_existing=True.".format(
                    ge_cloud_id))
        self.expectations_store.set(key, expectation_suite, **kwargs)
        return expectation_suite
def test_ExpectationsStore_with_DatabaseStoreBackend_postgres(caplog):
    connection_kwargs = {
        "drivername": "postgresql",
        "username": "******",
        "password": "",
        "host": "localhost",
        "port": "5432",
        "database": "test_ci",
    }

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs,
    })
    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    # first suite to add to db
    default_suite = ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c"))
    # initial set and check if first suite exists
    my_store.set(ns_1, default_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    # update suite and check if new value exists
    updated_suite = ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
    my_store.set(ns_1, updated_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
Beispiel #16
0
def empty_suite():
    return ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[],
        meta={
            "notes": "This is an expectation suite."
        }
    )
def test_meta_version_warning():
    asset = ge.data_asset.DataAsset()

    with pytest.warns(UserWarning) as w:
        out = asset.validate(expectation_suite=ExpectationSuite(
            expectations=[], expectation_suite_name="test", meta={}))
    assert w[0].message.args[
        0] == "WARNING: No great_expectations version found in configuration object."

    with pytest.warns(UserWarning) as w:
        out = asset.validate(expectation_suite=ExpectationSuite(
            expectations=[],
            expectation_suite_name="test",
            meta={"great_expectations.__version__": "0.0.0"}))
    assert w[0].message.args[0] == \
            "WARNING: This configuration object was built using version 0.0.0 of great_expectations, but is currently "\
            "being validated by version %s." % ge.__version__
Beispiel #18
0
def equivalent_suite(exp1, exp3):
    return ExpectationSuite(
        expectation_suite_name="danger",
        expectations=[exp1, exp3],
        meta={
            "notes":
            "This is another expectation suite, with a different name and meta"
        })
    def render(self, expectations):
        if isinstance(expectations, dict):
            expectations = ExpectationSuite(**expectations, data_context=None)
        (
            columns,
            ordered_columns,
        ) = expectations.get_grouped_and_ordered_expectations_by_column()
        expectation_suite_name = expectations.expectation_suite_name

        overview_content_blocks = [
            self._render_expectation_suite_header(),
            self._render_expectation_suite_info(expectations),
        ]

        table_level_expectations_content_block = self._render_table_level_expectations(
            columns)
        if table_level_expectations_content_block is not None:
            overview_content_blocks.append(
                table_level_expectations_content_block)

        asset_notes_content_block = self._render_expectation_suite_notes(
            expectations)
        if asset_notes_content_block is not None:
            overview_content_blocks.append(asset_notes_content_block)

        sections = [
            RenderedSectionContent(
                **{
                    "section_name": "Overview",
                    "content_blocks": overview_content_blocks,
                })
        ]

        sections += [
            self._column_section_renderer.render(expectations=columns[column])
            for column in ordered_columns if column != "_nocolumn"
        ]
        return RenderedDocumentContent(
            **{
                "renderer_type": "ExpectationSuitePageRenderer",
                "page_title": f"Expectations / {str(expectation_suite_name)}",
                "expectation_suite_name": expectation_suite_name,
                "utm_medium": "expectation-suite-page",
                "sections": sections,
            })
Beispiel #20
0
    def self_check(self, pretty_print):
        return_obj = {}

        if pretty_print:
            print("Checking for existing keys...")

        return_obj["keys"] = self.list_keys()
        return_obj["len_keys"] = len(return_obj["keys"])
        len_keys = return_obj["len_keys"]

        if pretty_print:
            if return_obj["len_keys"] == 0:
                print(f"\t{len_keys} keys found")
            else:
                print(f"\t{len_keys} keys found:")
                for key in return_obj["keys"][:10]:
                    print("\t\t" + str(key))
            if len_keys > 10:
                print("\t\t...")
            print()

        test_key_name = "test-key-" + "".join(
            [random.choice(list("0123456789ABCDEF")) for i in range(20)]
        )
        test_key = self._key_class(test_key_name)
        test_value = ExpectationSuite(test_key_name)

        if pretty_print:
            print(f"Attempting to add a new test key: {test_key}...")
        self.set(key=test_key, value=test_value)
        if pretty_print:
            print("\tTest key successfully added.")
            print()

        if pretty_print:
            print(
                f"Attempting to retrieve the test value associated with key: {test_key}..."
            )
        test_value = self.get(
            key=test_key,
        )
        if pretty_print:
            print("\tTest value successfully retrieved.")
            print()

        if pretty_print:
            print(f"Cleaning up test key and value: {test_key}...")

        test_value = self.remove_key(
            # key=self.key_to_tuple(test_key),
            key=self.key_to_tuple(test_key),
        )
        if pretty_print:
            print("\tTest key and value successfully removed.")
            print()

        return return_obj
def titanic_profiled_expectations_1(empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled
    with open(
            file_relative_path(
                __file__, "./fixtures/BasicDatasetProfiler_expectations.json"),
    ) as infile:
        expectation_suite_dict: dict = expectationSuiteSchema.load(
            json.load(infile))
        return ExpectationSuite(**expectation_suite_dict, data_context=context)
def titanic_dataset_profiler_expectations(empty_data_context_module_scoped):
    context: DataContext = empty_data_context_module_scoped
    with open(
            file_relative_path(
                __file__, "./fixtures/BasicDatasetProfiler_expectations.json"),
    ) as infile:
        expectations_dict: dict = expectationSuiteSchema.load(
            json.load(fp=infile, object_pairs_hook=OrderedDict))
        return ExpectationSuite(**expectations_dict, data_context=context)
Beispiel #23
0
def get_or_create_expectation_suite(
    data_context: "BaseDataContext",  # noqa: F821
    expectation_suite: Optional["ExpectationSuite"] = None,  # noqa: F821
    expectation_suite_name: Optional[str] = None,
    component_name: Optional[str] = None,
    persist: bool = False,
) -> "ExpectationSuite":  # noqa: F821
    """
    Use "expectation_suite" if provided.  If not, then if "expectation_suite_name" is specified, then create
    "ExpectationSuite" from it.  Otherwise, generate temporary "expectation_suite_name" using supplied "component_name".
    """
    generate_temp_expectation_suite_name: bool
    create_expectation_suite: bool

    if expectation_suite is not None and expectation_suite_name is not None:
        if expectation_suite.expectation_suite_name != expectation_suite_name:
            raise ValueError(
                'Mutually inconsistent "expectation_suite" and "expectation_suite_name" were specified.'
            )

        return expectation_suite
    elif expectation_suite is None and expectation_suite_name is not None:
        generate_temp_expectation_suite_name = False
        create_expectation_suite = True
    elif expectation_suite is not None and expectation_suite_name is None:
        generate_temp_expectation_suite_name = False
        create_expectation_suite = False
    else:
        generate_temp_expectation_suite_name = True
        create_expectation_suite = True

    if generate_temp_expectation_suite_name:
        if not component_name:
            component_name = "test"

        expectation_suite_name = f"{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}.{component_name}.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}.{str(uuid.uuid4())[:8]}"

    if create_expectation_suite:
        if persist:
            try:
                # noinspection PyUnusedLocal
                expectation_suite = data_context.get_expectation_suite(
                    expectation_suite_name=expectation_suite_name)
            except ge_exceptions.DataContextError:
                expectation_suite = data_context.create_expectation_suite(
                    expectation_suite_name=expectation_suite_name)
                logger.info(
                    f'Created ExpectationSuite "{expectation_suite.expectation_suite_name}".'
                )
        else:
            expectation_suite = ExpectationSuite(
                expectation_suite_name=expectation_suite_name,
                data_context=data_context,
            )

    return expectation_suite
Beispiel #24
0
def parameterized_expectation_suite(empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled
    fixture_path = file_relative_path(
        __file__,
        "../test_fixtures/expectation_suites/parameterized_expression_expectation_suite_fixture.json",
    )
    with open(fixture_path, ) as suite:
        expectation_suite_dict: dict = expectationSuiteSchema.load(
            json.load(suite))
        return ExpectationSuite(**expectation_suite_dict, data_context=context)
def test_data_asset_expectation_suite():
    asset = DataAsset()
    default_suite = ExpectationSuite(
        expectation_suite_name="default",
        data_asset_type="DataAsset",
        meta={"great_expectations.__version__": ge_version},
        expectations=[])

    # We should have a default-initialized suite stored internally and available for getting
    assert asset._expectation_suite == default_suite
    assert asset.get_expectation_suite() == default_suite
Beispiel #26
0
def test_meta_version_warning():
    asset = ge.data_asset.DataAsset()

    with pytest.warns(UserWarning) as w:
        suite = ExpectationSuite(expectations=[],
                                 expectation_suite_name="test")
        # mangle the metadata
        suite.meta = {"foo": "bar"}
        out = asset.validate(expectation_suite=suite)
    assert (
        w[0].message.args[0] ==
        "WARNING: No great_expectations version found in configuration object."
    )

    with pytest.warns(UserWarning) as w:
        suite = ExpectationSuite(
            expectations=[],
            expectation_suite_name="test",
            meta={"great_expectations_version": "0.0.0"},
        )
        # mangle the metadata
        suite.meta = {"great_expectations_version": "0.0.0"}
        out = asset.validate(expectation_suite=suite)
    assert (
        w[0].message.args[0] ==
        "WARNING: This configuration object was built using version 0.0.0 of great_expectations, but is currently "
        "being validated by version %s." % ge.__version__)
Beispiel #27
0
def test_expectations_store():
    my_store = ExpectationsStore()

    with pytest.raises(TypeError):
        my_store.set("not_a_ValidationResultIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(ns_1,
                 ExpectationSuite(expectation_suite_name="a.b.c.warning"))
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning")

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_sqlalchemy_source_templating(sqlitedb_engine):
    datasource = SqlAlchemyDatasource(engine=sqlitedb_engine, generators={
        "foo": {
            "class_name": "QueryBatchKwargsGenerator"
        }
    })
    generator = datasource.get_generator("foo")
    generator.add_query("test", "select 'cat' as ${col_name};")
    batch = datasource.get_batch(generator.build_batch_kwargs("test", query_parameters={'col_name': "animal_name"}))
    dataset = Validator(batch, expectation_suite=ExpectationSuite("test"), expectation_engine=SqlAlchemyDataset).get_dataset()
    res = dataset.expect_column_to_exist("animal_name")
    assert res.success is True
    res = dataset.expect_column_values_to_be_in_set('animal_name', ['cat'])
    assert res.success is True
Beispiel #29
0
def test_pandas_datasource_processes_dataset_options(test_folder_connection_path):
    datasource = SparkDFDatasource('PandasCSV', generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path
            }
        }
    )
    batch_kwargs = datasource.build_batch_kwargs("subdir_reader", name="test")
    batch_kwargs["dataset_options"] = {"caching": False, "persist": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False
    assert dataset._persist is False
Beispiel #30
0
    def get_batch_kwargs(self, suite: ExpectationSuite, batch_kwargs: Union[dict, BatchKwargs]):
        if isinstance(batch_kwargs, dict):
            return self._fix_path_in_batch_kwargs(batch_kwargs)

        citations = suite.meta.get("citations")
        if not citations:
            return self._fix_path_in_batch_kwargs(batch_kwargs)

        citations = suite.get_citations(require_batch_kwargs=True)
        if not citations:
            return None

        citation = citations[-1]
        batch_kwargs = citation.get("batch_kwargs")
        return self._fix_path_in_batch_kwargs(batch_kwargs)