Esempio n. 1
0
def test_ValidationsStore_with_InMemoryStoreBackend():
    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        })

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier.from_tuple(
        ("a", "b", "c", "quarantine", "prod-100"))
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple(
        ("a", "b", "c", "quarantine", "prod-200"))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_ValidationsStore_with_InMemoryStoreBackend():
    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "InMemoryStoreBackend",
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier.from_tuple(
        (
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-100",
        )
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-200",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)
Esempio n. 3
0
    def _build_validation_and_profiling_result_site_keys(
            self,
            skip_and_clean_missing: bool) -> List[ValidationResultIdentifier]:
        validation_and_profiling_result_site_keys = []
        validations = self.site_section_builders_config.get(
            "validations", "None")
        profiling = self.site_section_builders_config.get("profiling", "None")
        if (validations and validations not in FALSEY_YAML_STRINGS) or (
                profiling and profiling not in FALSEY_YAML_STRINGS):
            source_store = ("validations" if
                            (validations
                             and validations not in FALSEY_YAML_STRINGS) else
                            "profiling")
            validation_and_profiling_result_source_keys = set(
                self.data_context.stores[self.site_section_builders_config[
                    source_store].get("source_store_name")].list_keys())
            validation_and_profiling_result_site_keys = [
                ValidationResultIdentifier.from_tuple(validation_result_tuple)
                for validation_result_tuple in self.target_store.
                store_backends[ValidationResultIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for (validation_result_site_key
                     ) in validation_and_profiling_result_site_keys:
                    if (validation_result_site_key not in
                            validation_and_profiling_result_source_keys):
                        self.target_store.store_backends[
                            ValidationResultIdentifier].remove_key(
                                validation_result_site_key)
                    else:
                        cleaned_keys.append(validation_result_site_key)
                validation_and_profiling_result_site_keys = cleaned_keys

        return validation_and_profiling_result_site_keys
Esempio n. 4
0
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir")
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            "asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[])

    ns_2 = ValidationResultIdentifier.from_tuple((
        "asset",
        "quarantine",
        "prod-20",
        datetime.datetime.now(datetime.timezone.utc),
        "batch_id",
    ))
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[])

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (gen_directory_tree_str(path) == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
""")
def test_HtmlSiteStore_filesystem_backend(tmp_path_factory):

    path = str(tmp_path_factory.mktemp('test_HtmlSiteStore_with_TupleFileSystemStoreBackend__dir'))

    my_store = HtmlSiteStore(
        store_backend={
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store"
        },
        runtime_environment={
            "root_directory": path
        }
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    with pytest.raises(ValidationError):
        my_store.get(validationResultIdentifierSchema.load({}).data)
    
    ns_1 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier.from_tuple(('a', 'b', 'c', 'quarantine', 'prod-100'))
    )
    my_store.set(ns_1, "aaa")
    # assert my_store.get(ns_1) == "aaa"

    ns_2 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier.from_tuple(('a', 'b', 'c', 'quarantine', 'prod-20'))
    )
    my_store.set(ns_2, "bbb")
    # assert my_store.get(ns_2) == {"B": "bbb"}

    print(my_store.list_keys())
    # WARNING: OBSERVE THAT SITE_SECTION_NAME IS LOST IN THE CALL TO LIST_KEYS
    assert set(my_store.list_keys()) == {
        ns_1.resource_identifier,
        ns_2.resource_identifier,
    }

    print(gen_directory_tree_str(path))
    assert gen_directory_tree_str(path) == """\
    def build(self):
        # Loop over sections in the HtmlStore
        logger.debug("DefaultSiteIndexBuilder.build")

        expectation_suite_keys = [
            ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
            for expectation_suite_tuple in self.target_store.store_backends[
                ExpectationSuiteIdentifier
            ].list_keys()
        ]
        validation_and_profiling_result_keys = [
            ValidationResultIdentifier.from_tuple(validation_result_tuple)
            for validation_result_tuple in self.target_store.store_backends[
                ValidationResultIdentifier
            ].list_keys()
        ]
        profiling_result_keys = [
            validation_result_key
            for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id.run_name == "profiling"
        ]
        validation_result_keys = [
            validation_result_key
            for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id.run_name != "profiling"
        ]
        validation_result_keys = sorted(
            validation_result_keys, key=lambda x: x.run_id.run_time, reverse=True
        )
        if self.validation_results_limit:
            validation_result_keys = validation_result_keys[
                : self.validation_results_limit
            ]

        index_links_dict = OrderedDict()
        index_links_dict["site_name"] = self.site_name

        if self.show_how_to_buttons:
            index_links_dict["cta_object"] = self.get_calls_to_action()

        for expectation_suite_key in expectation_suite_keys:
            self.add_resource_info_to_index_links_dict(
                index_links_dict=index_links_dict,
                expectation_suite_name=expectation_suite_key.expectation_suite_name,
                section_name="expectations",
            )

        for profiling_result_key in profiling_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=profiling_result_key.batch_identifier,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=profiling_result_key.run_id,
                    validations_store_name=self.source_stores.get("profiling"),
                )

                batch_kwargs = validation.meta.get("batch_kwargs", {})

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="profiling",
                    batch_identifier=profiling_result_key.batch_identifier,
                    run_id=profiling_result_key.run_id,
                    run_time=profiling_result_key.run_id.run_time,
                    run_name=profiling_result_key.run_id.run_name,
                    asset_name=batch_kwargs.get("data_asset_name"),
                    batch_kwargs=batch_kwargs,
                )
            except Exception:
                error_msg = "Profiling result not found: {0:s} - skipping".format(
                    str(profiling_result_key.to_tuple())
                )
                logger.warning(error_msg)

        for validation_result_key in validation_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=validation_result_key.batch_identifier,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=validation_result_key.run_id,
                    validations_store_name=self.source_stores.get("validations"),
                )

                validation_success = validation.success
                batch_kwargs = validation.meta.get("batch_kwargs", {})

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="validations",
                    batch_identifier=validation_result_key.batch_identifier,
                    run_id=validation_result_key.run_id,
                    validation_success=validation_success,
                    run_time=validation_result_key.run_id.run_time,
                    run_name=validation_result_key.run_id.run_name,
                    asset_name=batch_kwargs.get("data_asset_name"),
                    batch_kwargs=batch_kwargs,
                )
            except Exception:
                error_msg = "Validation result not found: {0:s} - skipping".format(
                    str(validation_result_key.to_tuple())
                )
                logger.warning(error_msg)

        try:
            rendered_content = self.renderer_class.render(index_links_dict)
            viewable_content = self.view_class.render(
                rendered_content,
                data_context_id=self.data_context_id,
                show_how_to_buttons=self.show_how_to_buttons,
            )
        except Exception as e:
            exception_message = f"""\
An unexpected Exception occurred during data docs rendering.  Because of this error, certain parts of data docs will \
not be rendered properly and/or may not appear altogether.  Please use the trace, included in this message, to \
diagnose and repair the underlying issue.  Detailed information follows:
            """
            exception_traceback = traceback.format_exc()
            exception_message += (
                f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
            )
            logger.error(exception_message, e, exc_info=True)

        return (self.target_store.write_index_page(viewable_content), index_links_dict)
    def build(self, skip_and_clean_missing=True, build_index: bool = True):
        """
        :param skip_and_clean_missing: if True, target html store keys without corresponding source store keys will
        be skipped and removed from the target store
        :param build_index: a flag if False, skips building the index page
        :return: tuple(index_page_url, index_links_dict)
        """

        # Loop over sections in the HtmlStore
        logger.debug("DefaultSiteIndexBuilder.build")
        if not build_index:
            logger.debug("Skipping index rendering")
            return None, None

        index_links_dict = OrderedDict()
        index_links_dict["site_name"] = self.site_name

        if self.show_how_to_buttons:
            index_links_dict["cta_object"] = self.get_calls_to_action()

        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("expectations", "None")
                and self.site_section_builders_config.get(
                    "expectations", "None") not in FALSEY_YAML_STRINGS):
            expectation_suite_source_keys = self.data_context.stores[
                self.site_section_builders_config["expectations"].get(
                    "source_store_name")].list_keys()
            expectation_suite_site_keys = [
                ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
                for expectation_suite_tuple in self.target_store.
                store_backends[ExpectationSuiteIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for expectation_suite_site_key in expectation_suite_site_keys:
                    if expectation_suite_site_key not in expectation_suite_source_keys:
                        self.target_store.store_backends[
                            ExpectationSuiteIdentifier].remove_key(
                                expectation_suite_site_key)
                    else:
                        cleaned_keys.append(expectation_suite_site_key)
                expectation_suite_site_keys = cleaned_keys

            for expectation_suite_key in expectation_suite_site_keys:
                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=expectation_suite_key.
                    expectation_suite_name,
                    section_name="expectations",
                )

        validation_and_profiling_result_site_keys = []
        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("validations", "None")
                and self.site_section_builders_config.get(
                    "validations", "None") not in FALSEY_YAML_STRINGS
                or self.site_section_builders_config.get("profiling", "None")
                and self.site_section_builders_config.get(
                    "profiling", "None") not in FALSEY_YAML_STRINGS):
            source_store = (
                "validations"
                # TODO why is this duplicated?
                if
                self.site_section_builders_config.get("validations", "None")
                and self.site_section_builders_config.get(
                    "validations", "None") not in FALSEY_YAML_STRINGS else
                "profiling")
            validation_and_profiling_result_source_keys = self.data_context.stores[
                self.site_section_builders_config[source_store].get(
                    "source_store_name")].list_keys()
            validation_and_profiling_result_site_keys = [
                ValidationResultIdentifier.from_tuple(validation_result_tuple)
                for validation_result_tuple in self.target_store.
                store_backends[ValidationResultIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for (validation_result_site_key
                     ) in validation_and_profiling_result_site_keys:
                    if (validation_result_site_key not in
                            validation_and_profiling_result_source_keys):
                        self.target_store.store_backends[
                            ValidationResultIdentifier].remove_key(
                                validation_result_site_key)
                    else:
                        cleaned_keys.append(validation_result_site_key)
                validation_and_profiling_result_site_keys = cleaned_keys

        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("profiling", "None")
                and self.site_section_builders_config.get(
                    "profiling", "None") not in FALSEY_YAML_STRINGS):
            profiling_run_name_filter = self.site_section_builders_config[
                "profiling"]["run_name_filter"]
            profiling_result_site_keys = [
                validation_result_key for validation_result_key in
                validation_and_profiling_result_site_keys
                if resource_key_passes_run_name_filter(
                    validation_result_key, profiling_run_name_filter)
            ]
            for profiling_result_key in profiling_result_site_keys:
                try:
                    validation = self.data_context.get_validation_result(
                        batch_identifier=profiling_result_key.batch_identifier,
                        expectation_suite_name=profiling_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        run_id=profiling_result_key.run_id,
                        validations_store_name=self.source_stores.get(
                            "profiling"),
                    )

                    batch_kwargs = validation.meta.get("batch_kwargs", {})
                    batch_spec = validation.meta.get("batch_spec", {})

                    self.add_resource_info_to_index_links_dict(
                        index_links_dict=index_links_dict,
                        expectation_suite_name=profiling_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        section_name="profiling",
                        batch_identifier=profiling_result_key.batch_identifier,
                        run_id=profiling_result_key.run_id,
                        run_time=profiling_result_key.run_id.run_time,
                        run_name=profiling_result_key.run_id.run_name,
                        asset_name=batch_kwargs.get("data_asset_name")
                        or batch_spec.get("data_asset_name"),
                        batch_kwargs=batch_kwargs,
                        batch_spec=batch_spec,
                    )
                except Exception:
                    error_msg = "Profiling result not found: {:s} - skipping".format(
                        str(profiling_result_key.to_tuple()))
                    logger.warning(error_msg)

        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("validations", "None")
                and self.site_section_builders_config.get(
                    "validations", "None") not in FALSEY_YAML_STRINGS):
            validations_run_name_filter = self.site_section_builders_config[
                "validations"]["run_name_filter"]
            validation_result_site_keys = [
                validation_result_key for validation_result_key in
                validation_and_profiling_result_site_keys
                if resource_key_passes_run_name_filter(
                    validation_result_key, validations_run_name_filter)
            ]
            validation_result_site_keys = sorted(
                validation_result_site_keys,
                key=lambda x: x.run_id.run_time,
                reverse=True,
            )
            if self.validation_results_limit:
                validation_result_site_keys = validation_result_site_keys[:self
                                                                          .
                                                                          validation_results_limit]
            for validation_result_key in validation_result_site_keys:
                try:
                    validation = self.data_context.get_validation_result(
                        batch_identifier=validation_result_key.
                        batch_identifier,
                        expectation_suite_name=validation_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        run_id=validation_result_key.run_id,
                        validations_store_name=self.source_stores.get(
                            "validations"),
                    )

                    validation_success = validation.success
                    batch_kwargs = validation.meta.get("batch_kwargs", {})
                    batch_spec = validation.meta.get("batch_spec", {})

                    self.add_resource_info_to_index_links_dict(
                        index_links_dict=index_links_dict,
                        expectation_suite_name=validation_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        section_name="validations",
                        batch_identifier=validation_result_key.
                        batch_identifier,
                        run_id=validation_result_key.run_id,
                        validation_success=validation_success,
                        run_time=validation_result_key.run_id.run_time,
                        run_name=validation_result_key.run_id.run_name,
                        asset_name=batch_kwargs.get("data_asset_name")
                        or batch_spec.get("data_asset_name"),
                        batch_kwargs=batch_kwargs,
                        batch_spec=batch_spec,
                    )
                except Exception:
                    error_msg = "Validation result not found: {:s} - skipping".format(
                        str(validation_result_key.to_tuple()))
                    logger.warning(error_msg)

        try:
            rendered_content = self.renderer_class.render(index_links_dict)
            viewable_content = self.view_class.render(
                rendered_content,
                data_context_id=self.data_context_id,
                show_how_to_buttons=self.show_how_to_buttons,
            )
        except Exception as e:
            exception_message = f"""\
An unexpected Exception occurred during data docs rendering.  Because of this error, certain parts of data docs will \
not be rendered properly and/or may not appear altogether.  Please use the trace, included in this message, to \
diagnose and repair the underlying issue.  Detailed information follows:
            """
            exception_traceback = traceback.format_exc()
            exception_message += (
                f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
            )
            logger.error(exception_message)

        return (self.target_store.write_index_page(viewable_content),
                index_links_dict)
Esempio n. 8
0
def test_HtmlSiteStore_filesystem_backend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_HtmlSiteStore_with_TupleFileSystemStoreBackend__dir"))

    my_store = HtmlSiteStore(
        store_backend={
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    with pytest.raises(ValidationError):
        my_store.get(validationResultIdentifierSchema.load({}))

    ns_1 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier.from_tuple((
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-100",
        )),
    )
    my_store.set(ns_1, "aaa")
    # assert my_store.get(ns_1) == "aaa"

    ns_2 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier.from_tuple((
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-20",
        )),
    )
    my_store.set(ns_2, "bbb")
    # assert my_store.get(ns_2) == {"B": "bbb"}

    print(my_store.list_keys())
    # WARNING: OBSERVE THAT SITE_SECTION_NAME IS LOST IN THE CALL TO LIST_KEYS
    assert set(my_store.list_keys()) == {
        ns_1.resource_identifier,
        ns_2.resource_identifier,
    }

    print(gen_directory_tree_str(path))
    assert (gen_directory_tree_str(path) == """\
test_HtmlSiteStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        validations/
            a/
                b/
                    c/
                        quarantine/
                            20190926T134241.000000Z/
                                prod-100.html
                                prod-20.html
""")
def test_configuration_driven_site_builder_skip_and_clean_missing(
    site_builder_data_context_with_html_store_titanic_random,
):
    # tests auto-cleaning functionality of DefaultSiteIndexBuilder
    # when index page is built, if an HTML page is present without corresponding suite or validation result,
    # the HTML page should be removed and not appear on index page
    context = site_builder_data_context_with_html_store_titanic_random

    context.add_validation_operator(
        "validate_and_store",
        {
            "class_name": "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction",
                        "target_store_name": "validations_store",
                    },
                },
                {
                    "name": "extract_and_store_eval_parameters",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction",
                        "target_store_name": "evaluation_parameter_store",
                    },
                },
            ],
        },
    )

    # profiling the Titanic datasource will generate one expectation suite and one validation
    # that is a profiling result
    datasource_name = "titanic"
    data_asset_name = "Titanic"
    profiler_name = "BasicDatasetProfiler"
    generator_name = "subdir_reader"
    context.profile_datasource(datasource_name)

    # creating another validation result using the profiler's suite (no need to use a new expectation suite
    # for this test). having two validation results - one with run id "profiling" - allows us to test
    # the logic of run_name_filter that helps filtering validation results to be included in
    # the profiling and the validation sections.
    batch_kwargs = context.build_batch_kwargs(
        datasource=datasource_name,
        batch_kwargs_generator=generator_name,
        data_asset_name=data_asset_name,
    )

    expectation_suite_name = "{}.{}.{}.{}".format(
        datasource_name, generator_name, data_asset_name, profiler_name
    )

    batch = context.get_batch(
        batch_kwargs=batch_kwargs,
        expectation_suite_name=expectation_suite_name,
    )
    run_id = RunIdentifier(run_name="test_run_id_12345")
    context.run_validation_operator(
        assets_to_validate=[batch],
        run_id=run_id,
        validation_operator_name="validate_and_store",
    )

    data_docs_config = context._project_config.data_docs_sites
    local_site_config = data_docs_config["local_site"]

    validations_set = set(context.stores["validations_store"].list_keys())
    assert len(validations_set) == 6

    expectation_suite_set = set(context.stores["expectations_store"].list_keys())
    assert len(expectation_suite_set) == 5

    site_builder = SiteBuilder(
        data_context=context,
        runtime_environment={"root_directory": context.root_directory},
        **local_site_config
    )
    site_builder.build()

    # test expectation suite pages
    expectation_suite_html_pages = {
        ExpectationSuiteIdentifier.from_tuple(suite_tuple)
        for suite_tuple in site_builder.target_store.store_backends[
            ExpectationSuiteIdentifier
        ].list_keys()
    }
    # suites in expectations store should match html pages
    assert expectation_suite_set == expectation_suite_html_pages

    # remove suites from expectations store
    for i in range(2):
        context.stores["expectations_store"].remove_key(list(expectation_suite_set)[i])

    # re-build data docs, which should remove suite HTML pages that no longer have corresponding suite in
    # expectations store
    site_builder.build()

    expectation_suite_set = set(context.stores["expectations_store"].list_keys())
    expectation_suite_html_pages = {
        ExpectationSuiteIdentifier.from_tuple(suite_tuple)
        for suite_tuple in site_builder.target_store.store_backends[
            ExpectationSuiteIdentifier
        ].list_keys()
    }
    assert expectation_suite_set == expectation_suite_html_pages

    # test validation result pages
    validation_html_pages = {
        ValidationResultIdentifier.from_tuple(result_tuple)
        for result_tuple in site_builder.target_store.store_backends[
            ValidationResultIdentifier
        ].list_keys()
    }
    # validations in store should match html pages
    assert validations_set == validation_html_pages

    # remove validations from store
    for i in range(2):
        context.stores["validations_store"].store_backend.remove_key(
            list(validations_set)[i]
        )

    # re-build data docs, which should remove validation HTML pages that no longer have corresponding validation in
    # validations store
    site_builder.build()

    validations_set = set(context.stores["validations_store"].list_keys())
    validation_html_pages = {
        ValidationResultIdentifier.from_tuple(result_tuple)
        for result_tuple in site_builder.target_store.store_backends[
            ValidationResultIdentifier
        ].list_keys()
    }
    assert validations_set == validation_html_pages
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir"
        )
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "asset",
            "quarantine",
            "prod-20",
            datetime.datetime.now(datetime.timezone.utc),
            "batch_id",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (
        gen_directory_tree_str(path)
        == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        .ge_store_backend_id
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
"""
    )

    """
    What does this test and why?
    A Store should be able to report it's store_backend_id
    which is set when the StoreBackend is instantiated.
    """
    # Check that store_backend_id exists can be read
    assert my_store.store_backend_id is not None
    # Check that store_backend_id is a valid UUID
    assert test_utils.validate_uuid4(my_store.store_backend_id)

    # Check that another store with the same configuration shares the same store_backend_id
    my_store_duplicate = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )
    assert my_store.store_backend_id == my_store_duplicate.store_backend_id
Esempio n. 11
0
    def build(self):
        # Loop over sections in the HtmlStore
        logger.debug("DefaultSiteIndexBuilder.build")

        expectation_suite_keys = [
            ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple) for expectation_suite_tuple in
            self.target_store.store_backends[ExpectationSuiteIdentifier].list_keys()
        ]
        validation_and_profiling_result_keys = [
            ValidationResultIdentifier.from_tuple(validation_result_tuple) for validation_result_tuple in
            self.target_store.store_backends[ValidationResultIdentifier].list_keys()
        ]
        profiling_result_keys = [
            validation_result_key for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id == "profiling"
        ]
        validation_result_keys = [
            validation_result_key for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id != "profiling"
        ]
        validation_result_keys = sorted(validation_result_keys, key=lambda x: x.run_id, reverse=True)
        if self.validation_results_limit:
            validation_result_keys = validation_result_keys[:self.validation_results_limit]

        index_links_dict = OrderedDict()
        index_links_dict["site_name"] = self.site_name

        if self.show_cta_footer:
            index_links_dict["cta_object"] = self.get_calls_to_action()

        for expectation_suite_key in expectation_suite_keys:
            self.add_resource_info_to_index_links_dict(
                index_links_dict=index_links_dict,
                expectation_suite_name=expectation_suite_key.expectation_suite_name,
                section_name="expectations"
            )

        for profiling_result_key in profiling_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=profiling_result_key.batch_identifier,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=profiling_result_key.run_id
                )

                validation_success = validation.success

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="profiling",
                    batch_identifier=profiling_result_key.batch_identifier,
                    run_id=profiling_result_key.run_id,
                    validation_success=validation_success
                )
            except Exception as e:
                error_msg = "Profiling result not found: {0:s} - skipping".format(str(profiling_result_key.to_tuple()))
                logger.warning(error_msg)

        for validation_result_key in validation_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=validation_result_key.batch_identifier,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=validation_result_key.run_id
                )

                validation_success = validation.success

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="validations",
                    batch_identifier=validation_result_key.batch_identifier,
                    run_id=validation_result_key.run_id,
                    validation_success=validation_success
                )
            except Exception as e:
                error_msg = "Validation result not found: {0:s} - skipping".format(str(validation_result_key.to_tuple()))
                logger.warning(error_msg)

        try:
            rendered_content = self.renderer_class.render(index_links_dict)
            viewable_content = self.view_class.render(rendered_content)
        except Exception as e:
            logger.error("Exception occurred during data docs rendering: ", e, exc_info=True)
            return None

        return (
            self.target_store.write_index_page(viewable_content),
            index_links_dict
        )