Beispiel #1
0
def test_ValidationsStore_with_InMemoryStoreBackend():
    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier.from_tuple(
        ("a", "b", "c", "quarantine", "prod-100"))
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple(
        ("a", "b", "c", "quarantine", "prod-200"))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Beispiel #2
0
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir")
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple((
        "asset",
        "quarantine",
        "prod-20",
        datetime.datetime.now(datetime.timezone.utc),
        "batch_id",
    ))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (gen_directory_tree_str(path) == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
""")
def test_ValidationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ValidationsStore(
        store_backend={
            "class_name": "DatabaseStoreBackend",
            "credentials": connection_kwargs,
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
Beispiel #4
0
def test_ValidationsStore_with_TupleS3StoreBackend():
    bucket = "test_validation_store_bucket"
    prefix = "test/prefix"

    # create a bucket in Moto's mock AWS environment
    conn = boto3.resource("s3", region_name="us-east-1")
    conn.create_bucket(Bucket=bucket)

    # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend
    my_store = ValidationsStore(store_backend={
        "class_name": "TupleS3StoreBackend",
        "bucket": bucket,
        "prefix": prefix,
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine", ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    # Verify that internals are working as expected, including the default filepath
    assert {
        s3_object_info["Key"]
        for s3_object_info in boto3.client("s3").list_objects_v2(
            Bucket=bucket, Prefix=prefix)["Contents"]
    } == {
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_100/20190926T134241.000000Z/batch_id.json",
        "test/prefix/asset/quarantine/20191007T151224.1234Z_prod_200/20190926T134241.000000Z/batch_id.json",
    }

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_ValidationsStore_with_InMemoryStoreBackend():
    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier.from_tuple(
        (
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-100",
        )
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-200",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
Beispiel #6
0
def test_StoreAction():
    fake_in_memory_store = ValidationsStore(
        store_backend={"class_name": "InMemoryStoreBackend",}
    )
    stores = {"fake_in_memory_store": fake_in_memory_store}

    class Object:
        pass

    data_context = Object()
    data_context.stores = stores

    action = StoreValidationResultAction(
        data_context=data_context, target_store_name="fake_in_memory_store",
    )
    assert fake_in_memory_store.list_keys() == []

    action.run(
        validation_result_suite_identifier=ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="default_expectations"
            ),
            run_id="prod_20190801",
            batch_identifier="1234",
        ),
        validation_result_suite=ExpectationSuiteValidationResult(
            success=False, results=[]
        ),
        data_asset=None,
    )

    expected_run_id = RunIdentifier(
        run_name="prod_20190801", run_time="20190926T134241.000000Z"
    )

    assert len(fake_in_memory_store.list_keys()) == 1
    stored_identifier = fake_in_memory_store.list_keys()[0]
    assert stored_identifier.batch_identifier == "1234"
    assert (
        stored_identifier.expectation_suite_identifier.expectation_suite_name
        == "default_expectations"
    )
    assert stored_identifier.run_id == expected_run_id

    assert fake_in_memory_store.get(
        ValidationResultIdentifier(
            expectation_suite_identifier=ExpectationSuiteIdentifier(
                expectation_suite_name="default_expectations"
            ),
            run_id=expected_run_id,
            batch_identifier="1234",
        )
    ) == ExpectationSuiteValidationResult(success=False, results=[])
def test_SlackNotificationAction(data_context):
    renderer = {
        "module_name": "great_expectations.render.renderer.slack_renderer",
        "class_name": "SlackRenderer",
    }
    slack_webhook = "https://hooks.slack.com/services/test/slack/webhook"
    notify_on = "all"

    slack_action = SlackNotificationAction(
        data_context=data_context,
        renderer=renderer,
        slack_webhook=slack_webhook,
        notify_on=notify_on
    )

    validation_result_suite = ExpectationSuiteValidationResult(results=[], success=True,
                                                               statistics={'evaluated_expectations': 0,
                                                                           'successful_expectations': 0,
                                                                           'unsuccessful_expectations': 0,
                                                                           'success_percent': None},
                                                               meta={
                                                                   'great_expectations.__version__': 'v0.8.0__develop',
                                                                   'expectation_suite_name': 'asset.default',
                                                                   'run_id': 'test_100'})

    validation_result_suite_id = ValidationResultIdentifier(expectation_suite_identifier=ExpectationSuiteIdentifier(
        "asset.default"), run_id="test_100", batch_identifier="1234")

    # TODO: improve this test - currently it is verifying a failed call to Slack
    assert slack_action.run(
        validation_result_suite_identifier=validation_result_suite_id,
        validation_result_suite=validation_result_suite,
        data_asset=None
    ) == None
def test_StoreMetricsAction_column_metric(basic_in_memory_data_context_for_validation_operator):
    action = StoreMetricsAction(
        data_context=basic_in_memory_data_context_for_validation_operator,
        requested_metrics={
            "*": [
                {
                    "column": {
                        "provider_id": ["expect_column_values_to_be_unique.result.unexpected_count"]
                    }
                },
                "statistics.evaluated_expectations",
                "statistics.successful_expectations"
            ]
        },
        target_store_name="metrics_store"
    )

    validation_result = ExpectationSuiteValidationResult(
        success=False,
        meta={
            "expectation_suite_name": "foo",
            "run_id": "bar"
        },
        results=[
            ExpectationValidationResult(
                meta={},
                result={
                    "element_count": 10,
                    "missing_count": 0,
                    "missing_percent": 0.0,
                    "unexpected_count": 7,
                    "unexpected_percent": 0.0,
                    "unexpected_percent_nonmissing": 0.0,
                    "partial_unexpected_list": []
                },
                success=True,
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_unique",
                    kwargs={
                        "column": "provider_id",
                        "result_format": "BASIC"
                    }
                ),
                exception_info=None
            )
        ],
        statistics={
            "evaluated_expectations": 5,
            "successful_expectations": 3
        }
    )

    action.run(validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None)

    assert basic_in_memory_data_context_for_validation_operator.stores["metrics_store"].get(ValidationMetricIdentifier(
        run_id="bar",
        expectation_suite_identifier=ExpectationSuiteIdentifier("foo"),
        metric_name="expect_column_values_to_be_unique.result.unexpected_count",
        metric_kwargs_id="column=provider_id"
    )) == 7
Beispiel #9
0
def test_SlackRenderer():
    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            'evaluated_expectations': 0,
            'successful_expectations': 0,
            'unsuccessful_expectations': 0,
            'success_percent': None
        },
        meta={
            'great_expectations.__version__': 'v0.8.0__develop',
            'data_asset_name': {
                'datasource': 'x',
                'generator': 'y',
                'generator_asset': 'z'
            },
            'expectation_suite_name': 'default',
            'run_id': '2019-09-25T060538.829112Z'
        })

    rendered_output = SlackRenderer().render(validation_result_suite)
    print(rendered_output)

    expected_renderer_output = {
        'blocks': [{
            'type': 'section',
            'text': {
                'type':
                'mrkdwn',
                'text':
                '*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Timestamp*: `09/24/2019 23:18:36`\n*Summary*: *0* of *0* expectations were met'
            }
        }, {
            'type': 'divider'
        }, {
            'type':
            'context',
            'elements': [{
                'type':
                'mrkdwn',
                'text':
                'Learn how to review validation results: https://docs.greatexpectations.io/en/latest/features/validation.html#reviewing-validation-results'
            }]
        }],
        'text':
        'default: Success :tada:'
    }

    # We're okay with system variation in locales (OS X likes 24 hour, but not Travis)
    expected_renderer_output['blocks'][0]['text']['text'] = \
        expected_renderer_output['blocks'][0]['text']['text'].replace('09/24/2019 11:18:36 PM', 'LOCALEDATE')
    expected_renderer_output['blocks'][0]['text']['text'] = \
        expected_renderer_output['blocks'][0]['text']['text'].replace('09/24/2019 23:18:36', 'LOCALEDATE')
    rendered_output['blocks'][0]['text']['text'] = \
        rendered_output['blocks'][0]['text']['text'].replace('09/24/2019 11:18:36 PM', 'LOCALEDATE')
    rendered_output['blocks'][0]['text']['text'] = \
        rendered_output['blocks'][0]['text']['text'].replace('09/24/2019 23:18:36', 'LOCALEDATE')

    assert rendered_output == expected_renderer_output
def test_OpsgenieRenderer_validation_results_success():

    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations_version": "v0.12.2__develop",
            "batch_kwargs": {
                "data_asset_name": "x/y/z"
            },
            "data_asset_name": {
                "datasource": "x",
                "generator": "y",
                "generator_asset": "z",
            },
            "expectation_suite_name": "default",
            "run_id": "2021-01-01T000000.000000Z",
        },
    )

    rendered_output = OpsgenieRenderer().render(validation_result_suite)

    expected_output = "Batch Validation Status: Success 🎉\nExpectation suite name: default\nData asset name: x/y/z\nRun ID: 2021-01-01T000000.000000Z\nBatch ID: data_asset_name=x/y/z\nSummary: 0 of 0 expectations were met"

    assert rendered_output == expected_output
Beispiel #11
0
def test_render_DefaultJinjaPageView_meta_info():
    validation_results = ExpectationSuiteValidationResult(
        **{
            "results": [],
            "statistics": {
                "evaluated_expectations": 156,
                "successful_expectations": 139,
                "unsuccessful_expectations": 17,
                "success_percent": 89.1025641025641,
            },
            "meta": {
                "great_expectations.__version__": "0.7.0-beta",
                "data_asset_name": "datasource/generator/tetanusvaricella",
                "expectation_suite_name": "my_suite",
                "run_id": "2019-06-25T14:58:09.960521",
                "batch_kwargs": {
                    "path":
                    "/Users/user/project_data/public_healthcare_datasets/tetanusvaricella/tetvardata.csv",
                    "timestamp": 1561474688.693565,
                },
            },
        })

    document = ProfilingResultsPageRenderer().render(validation_results)
    html = DefaultJinjaPageView().render(document)
    with open(
            file_relative_path(
                __file__,
                "./output/test_render_DefaultJinjaPageView_meta_info.html)"),
            "w",
    ) as outfile:
        outfile.write(html)
Beispiel #12
0
    def _run(
        self,
        validation_result_suite: ExpectationSuiteValidationResult,
        validation_result_suite_identifier: ValidationResultIdentifier,
        expectation_suite_identifier=None,
        checkpoint_identifier=None,
        data_asset=None,
        **kwargs,
    ) -> str:
        logger.debug("SNSNotificationAction.run")

        if validation_result_suite is None:
            logger.warning(
                f"No validation_result_suite was passed to {type(self).__name__} action. Skipping action. "
            )

        if self.sns_message_subject is None:
            logger.warning(
                f"No message subject was passed checking for expectation_suite_name"
            )
            if expectation_suite_identifier is None:
                subject = validation_result_suite_identifier.run_id
                logger.warning(
                    f"No expectation_suite_identifier was passed. Defaulting to validation run_id: {subject}."
                )
            else:
                subject = expectation_suite_identifier.expectation_suite_name
                logger.info(f"Using expectation_suite_name: {subject}")
        else:
            subject = self.sns_message_subject

        return send_sns_notification(self.sns_topic_arn, subject,
                                     validation_result_suite.__str__(),
                                     **kwargs)
Beispiel #13
0
def test_ProfilingResultsOverviewSectionRenderer_empty_type_list():
    # This rather specific test is a reaction to the error documented in #679
    validation = ExpectationSuiteValidationResult(
        results=[
            ExpectationValidationResult(
                success=True,
                result={
                    'observed_value': "VARIANT",  # Note this is NOT a recognized type by many backends
                },
                exception_info={
                    'raised_exception': False, 'exception_message': None, 'exception_traceback': None
                },
                expectation_config=ExpectationConfiguration(
                    expectation_type='expect_column_values_to_be_in_type_list',
                    kwargs={
                        'column': 'live', 'type_list': None, 'result_format': 'SUMMARY'
                    },
                    meta={'BasicDatasetProfiler': {'confidence': 'very low'}}
                )
            )
        ]
    )

    result = ProfilingResultsOverviewSectionRenderer().render(validation)

    # Find the variable types content block:
    types_table = [
        block.table for block in result.content_blocks
        if block.content_block_type == "table" and block.header.string_template["template"] == "Variable types"
    ][0]
    assert ["unknown", "1"] in types_table
Beispiel #14
0
def test_ValidationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ValidationsStore(
        store_backend={
            "class_name": "DatabaseStoreBackend",
            "credentials": connection_kwargs,
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name="asset.quarantine",
        ),
        run_id="20191007T151224.1234Z_prod_200",
        batch_identifier="batch_id",
    )

    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def validation_result_suite():
    return ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations_version": "v0.8.0__develop",
            "expectation_suite_name": "asset.default",
            "run_id": "test_100",
        },
    )
def test_send_sns_notification(sns, aws_credentials):
    results = {
        "success": True,
        "results": {
            "observed_value": 5.0,
            "element_count": 5,
            "missing_count": None,
            "missing_percent": None,
        },
    }
    result = ExpectationSuiteValidationResult(**results)
    topic = "test"
    created = sns.create_topic(Name=topic)
    response = send_sns_notification(created.get("TopicArn"),
                                     str(result.success), str(result.results))

    assert response.startswith("Successfully")
def test_SlackNotificationAction(data_context_parameterized_expectation_suite):
    renderer = {
        "module_name": "great_expectations.render.renderer.slack_renderer",
        "class_name": "SlackRenderer",
    }
    slack_webhook = "https://hooks.slack.com/services/test/slack/webhook"
    notify_on = "all"

    slack_action = SlackNotificationAction(
        data_context=data_context_parameterized_expectation_suite,
        renderer=renderer,
        slack_webhook=slack_webhook,
        notify_on=notify_on,
    )

    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations_version": "v0.8.0__develop",
            "expectation_suite_name": "asset.default",
            "run_id": "test_100",
        },
    )

    validation_result_suite_id = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.default"),
        run_id="test_100",
        batch_identifier="1234",
    )

    # TODO: improve this test - currently it is verifying a failed call to Slack. It returns a "empty" payload
    assert slack_action.run(
        validation_result_suite_identifier=validation_result_suite_id,
        validation_result_suite=validation_result_suite,
        data_asset=None,
    ) == {
        "slack_notification_result": None
    }
def test_MicrosoftTeams_validation_results_with_datadocs():
    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations_version": "v0.8.0__develop",
            "expectation_suite_name": "asset.default",
            "run_id": "test_100",
        },
    )

    validation_result_suite_identifier = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.default"),
        run_id=RunIdentifier(run_name="test_100",
                             run_time="Tue May 08 15:14:45 +0800 2012"),
        batch_identifier=BatchIdentifier(batch_identifier="1234",
                                         data_asset_name="asset"),
    )

    data_docs_pages = {"local_site": "file:///localsite/index.html"}

    rendered_output = MicrosoftTeamsRenderer().render(
        validation_result_suite, validation_result_suite_identifier,
        data_docs_pages)

    expected_output = {
        "attachments": [{
            "content": {
                "$schema":
                "http://adaptivecards.io/schemas/adaptive-card.json",
                "actions": [{
                    "title": "Open data docs",
                    "type": "Action.OpenUrl",
                    "url": "file:///localsite/index.html",
                }],
                "body": [
                    {
                        "height":
                        "auto",
                        "items": [{
                            "columns": [{
                                "items": [
                                    {
                                        "size": "large",
                                        "text": "Validation "
                                        "results",
                                        "type": "TextBlock",
                                        "weight": "bolder",
                                        "wrap": True,
                                    },
                                    {
                                        "isSubtle": True,
                                        "spacing": "none",
                                        "text": "May "
                                        "08 "
                                        "2012 "
                                        "07:14:45",
                                        "type": "TextBlock",
                                        "wrap": True,
                                    },
                                ],
                                "type":
                                "Column",
                                "width":
                                "stretch",
                            }],
                            "type":
                            "ColumnSet",
                        }],
                        "separator":
                        True,
                        "type":
                        "Container",
                    },
                    {
                        "height":
                        "auto",
                        "items": [
                            {
                                "color": "good",
                                "horizontalAlignment": "left",
                                "text": "**Batch validation "
                                "status:** Success "
                                "!!!",
                                "type": "TextBlock",
                            },
                            {
                                "horizontalAlignment": "left",
                                "text": "**Data asset "
                                "name:** asset",
                                "type": "TextBlock",
                            },
                            {
                                "horizontalAlignment": "left",
                                "text": "**Expectation "
                                "suite name:** "
                                "asset.default",
                                "type": "TextBlock",
                            },
                            {
                                "horizontalAlignment": "left",
                                "text": "**Run name:** "
                                "test_100",
                                "type": "TextBlock",
                            },
                            {
                                "horizontalAlignment": "left",
                                "text": "**Batch ID:** 1234",
                                "type": "TextBlock",
                            },
                            {
                                "horizontalAlignment":
                                "left",
                                "text":
                                "**Summary:** *0* "
                                "of *0* "
                                "expectations were "
                                "met",
                                "type":
                                "TextBlock",
                            },
                        ],
                        "separator":
                        True,
                        "type":
                        "Container",
                    },
                ],
                "type":
                "AdaptiveCard",
                "version":
                "1.0",
            },
            "contentType":
            "application/vnd.microsoft.card.adaptive",
        }],
        "type":
        "message",
    }

    assert rendered_output == expected_output
Beispiel #19
0
def test_SlackRenderer_validation_results_with_datadocs():

    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations_version": "v0.8.0__develop",
            "data_asset_name": {
                "datasource": "x",
                "generator": "y",
                "generator_asset": "z",
            },
            "expectation_suite_name": "default",
            "run_id": "2019-09-25T060538.829112Z",
        },
    )

    rendered_output = SlackRenderer().render(validation_result_suite)

    expected_output = {
        "blocks": [
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Summary*: *0* of *0* expectations were met",
                },
            },
            {"type": "divider"},
            {
                "type": "context",
                "elements": [
                    {
                        "type": "mrkdwn",
                        "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html",
                    }
                ],
            },
        ],
        "text": "default: Success :tada:",
    }
    assert rendered_output == expected_output

    data_docs_pages = {"local_site": "file:///localsite/index.html"}
    notify_with = ["local_site"]
    rendered_output = SlackRenderer().render(
        validation_result_suite, data_docs_pages, notify_with
    )

    expected_output = {
        "blocks": [
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Summary*: *0* of *0* expectations were met",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "*DataDocs* can be found here: `file:///localsite/index.html` \n (Please copy and paste link into a browser to view)\n",
                },
            },
            {"type": "divider"},
            {
                "type": "context",
                "elements": [
                    {
                        "type": "mrkdwn",
                        "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html",
                    }
                ],
            },
        ],
        "text": "default: Success :tada:",
    }
    assert rendered_output == expected_output

    # not configured
    notify_with = ["fake_site"]
    rendered_output = SlackRenderer().render(
        validation_result_suite, data_docs_pages, notify_with
    )

    expected_output = {
        "blocks": [
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Summary*: *0* of *0* expectations were met",
                },
            },
            {
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "*ERROR*: Slack is trying to provide a link to the following DataDocs: `fake_site`, but it is not configured under `data_docs_sites` in the `great_expectations.yml`\n",
                },
            },
            {"type": "divider"},
            {
                "type": "context",
                "elements": [
                    {
                        "type": "mrkdwn",
                        "text": "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html",
                    }
                ],
            },
        ],
        "text": "default: Success :tada:",
    }

    assert rendered_output == expected_output
Beispiel #20
0
def test_StoreMetricsAction(
        basic_in_memory_data_context_for_validation_operator):
    action = StoreMetricsAction(
        data_context=basic_in_memory_data_context_for_validation_operator,
        requested_metrics={
            "*": [
                "statistics.evaluated_expectations",
                "statistics.successful_expectations",
            ]
        },
        target_store_name="metrics_store",
    )

    run_id = RunIdentifier(run_name="bar")

    validation_result = ExpectationSuiteValidationResult(
        success=False,
        meta={
            "expectation_suite_name": "foo",
            "run_id": run_id
        },
        statistics={
            "evaluated_expectations": 5,
            "successful_expectations": 3
        },
    )

    # Run the action and store our metrics
    action.run(
        validation_result,
        ValidationResultIdentifier.from_object(validation_result),
        data_asset=None,
    )

    validation_result = ExpectationSuiteValidationResult(
        success=False,
        meta={
            "expectation_suite_name": "foo.warning",
            "run_id": run_id
        },
        statistics={
            "evaluated_expectations": 8,
            "successful_expectations": 4
        },
    )

    action.run(
        validation_result,
        ValidationResultIdentifier.from_object(validation_result),
        data_asset=None,
    )

    assert (
        basic_in_memory_data_context_for_validation_operator.
        stores["metrics_store"].get(
            ValidationMetricIdentifier(
                run_id=run_id,
                data_asset_name=None,
                expectation_suite_identifier=ExpectationSuiteIdentifier("foo"),
                metric_name="statistics.evaluated_expectations",
                metric_kwargs_id=None,
            )) == 5)

    assert (
        basic_in_memory_data_context_for_validation_operator.
        stores["metrics_store"].get(
            ValidationMetricIdentifier(
                run_id=run_id,
                data_asset_name=None,
                expectation_suite_identifier=ExpectationSuiteIdentifier("foo"),
                metric_name="statistics.successful_expectations",
                metric_kwargs_id=None,
            )) == 3)

    assert (basic_in_memory_data_context_for_validation_operator.
            stores["metrics_store"].get(
                ValidationMetricIdentifier(
                    run_id=run_id,
                    data_asset_name=None,
                    expectation_suite_identifier=ExpectationSuiteIdentifier(
                        "foo.warning"),
                    metric_name="statistics.evaluated_expectations",
                    metric_kwargs_id=None,
                )) == 8)

    assert (basic_in_memory_data_context_for_validation_operator.
            stores["metrics_store"].get(
                ValidationMetricIdentifier(
                    run_id=run_id,
                    data_asset_name=None,
                    expectation_suite_identifier=ExpectationSuiteIdentifier(
                        "foo.warning"),
                    metric_name="statistics.successful_expectations",
                    metric_kwargs_id=None,
                )) == 4)
Beispiel #21
0
def test_evaluation_parameter_store_methods(data_context):
    run_id = "20191125T000000.000000Z"
    source_patient_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_patient_data.default",
            "run_id": run_id
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_table_row_count_to_equal",
                    kwargs={
                        "value": 1024,
                    }),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False
                },
                result={
                    "observed_value": 1024,
                    "element_count": 1024,
                    "missing_percent": 0.0,
                    "missing_count": 0
                })
        ],
        success=True)

    data_context.store_evaluation_parameters(source_patient_data_results)

    bound_parameters = data_context.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result'
        '.observed_value':
        1024
    }
    source_diabetes_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_diabetes_data.default",
            "run_id": run_id
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type=
                    "expect_column_unique_value_count_to_be_between",
                    kwargs={
                        "column": "patient_nbr",
                        "min": 2048,
                        "max": 2048
                    }),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False
                },
                result={
                    "observed_value": 2048,
                    "element_count": 5000,
                    "missing_percent": 0.0,
                    "missing_count": 0
                })
        ],
        success=True)

    data_context.store_evaluation_parameters(source_diabetes_data_results)
    bound_parameters = data_context.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result'
        '.observed_value':
        1024,
        'urn:great_expectations:validations:source_diabetes_data.default'
        ':expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr':
        2048
    }
Beispiel #22
0
TABLE_NAME = "test_data"

# Common validation results
table_result = ExpectationValidationResult(success=True,
                                           expectation_config=ExpectationConfiguration(
                                               expectation_type='expect_table_row_count_to_equal',
                                               kwargs={'value': 10}),
                                           result={"observed_value": 10})
column_result = ExpectationValidationResult(success=True,
                                            expectation_config=ExpectationConfiguration(
                                                expectation_type='expect_column_sum_to_be_between',
                                                kwargs={'column': 'size', 'min_value': 0,
                                                        'max_value': 100}
                                            ),
                                            result={'observed_value': 60})
result_suite = ExpectationSuiteValidationResult(success=True, meta={'batch_kwargs': {}},
                                                results=[table_result, column_result])


@pytest.fixture(scope='session')
def test_db_file():
    fd, file = tempfile.mkstemp()
    conn = sqlite3.connect(file)
    cursor = conn.cursor()
    cursor.execute(
        f'CREATE TABLE {TABLE_NAME} (name text, birthdate text, address text, size integer)')
    yield file
    os.remove(file)


def test_dataset_from_sql_source(test_db_file, tmpdir):
    connection_url = f'sqlite:///{test_db_file}'
def test_validate():

    with open(
            file_relative_path(__file__,
                               "./test_sets/titanic_expectations.json")) as f:
        my_expectation_suite = expectationSuiteSchema.loads(f.read())

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(file_relative_path(__file__,
                                               "./test_sets/Titanic.csv"),
                            expectation_suite=my_expectation_suite)
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    with mock.patch("datetime.datetime") as mock_datetime:
        mock_datetime.utcnow.return_value = datetime(1955, 11, 5)
        results = my_df.validate(catch_exceptions=False)

    with open(
            file_relative_path(
                __file__,
                './test_sets/titanic_expected_data_asset_validate_results.json'
            )) as f:
        expected_results = expectationSuiteValidationResultSchema.loads(
            f.read())

    del results.meta["great_expectations.__version__"]

    assert expected_results == results

    # Now, change the results and ensure they are no longer equal
    results.results[0] = ExpectationValidationResult()
    assert expected_results != results

    # Finally, confirm that only_return_failures works
    # and does not affect the "statistics" field.
    with mock.patch("datetime.datetime") as mock_datetime:
        mock_datetime.utcnow.return_value = datetime(1955, 11, 5)
        validation_results = my_df.validate(only_return_failures=True)
        del validation_results.meta["great_expectations.__version__"]

    expected_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "titanic",
            "run_id": "19551105T000000.000000Z",
            "batch_kwargs": {
                "ge_batch_id": "1234"
            },
            "batch_markers": {},
            "batch_parameters": {}
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_in_set",
                    kwargs={
                        "column": "PClass",
                        "value_set": ["1st", "2nd", "3rd"]
                    }),
                success=False,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False
                },
                result={
                    "partial_unexpected_index_list": [456],
                    "unexpected_count": 1,
                    "unexpected_list": ["*"],
                    "unexpected_percent": 0.07616146230007616,
                    "element_count": 1313,
                    "missing_percent": 0.0,
                    "partial_unexpected_counts": [{
                        "count": 1,
                        "value": "*"
                    }],
                    "partial_unexpected_list": ["*"],
                    "unexpected_percent_nonmissing": 0.07616146230007616,
                    "missing_count": 0,
                    "unexpected_index_list": [456]
                })
        ],
        success=expected_results.success,  # unaffected
        statistics=expected_results["statistics"]  # unaffected
    )
    assert expected_results == validation_results
def test_SlackRenderer():
    validation_result_suite = ExpectationSuiteValidationResult(
        results=[],
        success=True,
        statistics={
            "evaluated_expectations": 0,
            "successful_expectations": 0,
            "unsuccessful_expectations": 0,
            "success_percent": None,
        },
        meta={
            "great_expectations.__version__": "v0.8.0__develop",
            "data_asset_name": {
                "datasource": "x",
                "generator": "y",
                "generator_asset": "z",
            },
            "expectation_suite_name": "default",
            "run_id": "2019-09-25T060538.829112Z",
        },
    )

    rendered_output = SlackRenderer().render(validation_result_suite)
    print(rendered_output)

    expected_renderer_output = {
        "blocks": [
            {
                "type": "section",
                "text": {
                    "type":
                    "mrkdwn",
                    "text":
                    "*Batch Validation Status*: Success :tada:\n*Expectation suite name*: `default`\n*Run ID*: `2019-09-25T060538.829112Z`\n*Batch ID*: `None`\n*Timestamp*: `09/24/2019 23:18:36`\n*Summary*: *0* of *0* expectations were met",
                },
            },
            {
                "type": "divider"
            },
            {
                "type":
                "context",
                "elements": [{
                    "type":
                    "mrkdwn",
                    "text":
                    "Learn how to review validation results in Data Docs: https://docs.greatexpectations.io/en/latest/tutorials/getting_started/set_up_data_docs.html#_getting_started__set_up_data_docs",
                }],
            },
        ],
        "text":
        "default: Success :tada:",
    }

    # We're okay with system variation in locales (OS X likes 24 hour, but not Travis)
    expected_renderer_output["blocks"][0]["text"][
        "text"] = expected_renderer_output["blocks"][0]["text"][
            "text"].replace("09/24/2019 11:18:36 PM", "LOCALEDATE")
    expected_renderer_output["blocks"][0]["text"][
        "text"] = expected_renderer_output["blocks"][0]["text"][
            "text"].replace("09/24/2019 23:18:36", "LOCALEDATE")
    rendered_output["blocks"][0]["text"]["text"] = rendered_output["blocks"][
        0]["text"]["text"].replace("09/24/2019 11:18:36 PM UTC", "LOCALEDATE")
    rendered_output["blocks"][0]["text"]["text"] = rendered_output["blocks"][
        0]["text"]["text"].replace("09/24/2019 23:18:36 UTC", "LOCALEDATE")

    assert rendered_output == expected_renderer_output
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir"
        )
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "asset",
            "quarantine",
            "prod-20",
            datetime.datetime.now(datetime.timezone.utc),
            "batch_id",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (
        gen_directory_tree_str(path)
        == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        .ge_store_backend_id
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
"""
    )

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)

    # Check that another store with the same configuration shares the same store_backend_id
    my_store_duplicate = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )
    assert my_store.store_backend_id == my_store_duplicate.store_backend_id