Example #1
0
    def __init__(self,
                 data_context,
                 requested_metrics,
                 target_store_name="metrics_store"):
        """

        Args:
            data_context: Data Context
            requested_metrics: dictionary of metrics to store. Dictionary should have the following structure:

                expectation_suite_name:
                    metric_name:
                        - metric_kwargs_id

                You may use "*" to denote that any expectation suite should match.
            target_store_name: the name of the store in the Data Context which
                should be used to store the metrics
        """
        super().__init__(data_context)
        self._requested_metrics = requested_metrics
        self._target_store_name = target_store_name
        try:
            store = data_context.stores[target_store_name]
        except KeyError:
            raise DataContextError(
                "Unable to find store {} in your DataContext configuration.".
                format(target_store_name))
        if not isinstance(store, MetricStore):
            raise DataContextError(
                "StoreMetricsAction must have a valid MetricsStore for its target store."
            )
Example #2
0
 def __init__(self,
              store_backend=None,
              runtime_environment=None,
              store_name="no_store_name"):
     """
     Runtime environment may be necessary to instantiate store backend elements.
     Args:
         store_backend:
         runtime_environment:
         store_name: store name given in the DataContextConfig (via either in-code or yaml configuration)
     """
     if store_backend is None:
         store_backend = {"class_name": "InMemoryStoreBackend"}
     self._store_name = store_name
     logger.debug("Building store_backend.")
     module_name = "great_expectations.data_context.store"
     self._store_backend = instantiate_class_from_config(
         config=store_backend,
         runtime_environment=runtime_environment or {},
         config_defaults={
             "module_name": module_name,
             "store_name": self._store_name,
         },
     )
     if not self._store_backend:
         raise ClassInstantiationError(module_name=module_name,
                                       package_name=None,
                                       class_name=store_backend)
     if not isinstance(self._store_backend, StoreBackend):
         raise DataContextError(
             "Invalid StoreBackend configuration: expected a StoreBackend instance."
         )
     self._use_fixed_length_key = self._store_backend.fixed_length_key
    def get_ge_cloud_config(
        self,
        ge_cloud_base_url: Optional[str] = None,
        ge_cloud_account_id: Optional[str] = None,
        ge_cloud_access_token: Optional[str] = None,
        ge_cloud_organization_id: Optional[str] = None,
    ) -> GeCloudConfig:
        """
        Build a GeCloudConfig object. Config attributes are collected from any combination of args passed in at
        runtime, environment variables, or a global great_expectations.conf file (in order of precedence)
        """
        ge_cloud_config_dict = self._get_ge_cloud_config_dict(
            ge_cloud_base_url=ge_cloud_base_url,
            ge_cloud_account_id=ge_cloud_account_id,
            ge_cloud_access_token=ge_cloud_access_token,
            ge_cloud_organization_id=ge_cloud_organization_id,
        )

        missing_keys = []
        for key, val in ge_cloud_config_dict.items():
            if not val:
                missing_keys.append(key)
        if len(missing_keys) > 0:
            missing_keys_str = [f'"{key}"' for key in missing_keys]
            global_config_path_str = [
                f'"{path}"' for path in super().GLOBAL_CONFIG_PATHS
            ]
            raise DataContextError(
                f"{(', ').join(missing_keys_str)} arg(s) required for ge_cloud_mode but neither provided nor found in "
                f"environment or in global configs ({(', ').join(global_config_path_str)})."
            )

        return GeCloudConfig(**ge_cloud_config_dict)
 def from_object(cls, validation_result):
     batch_kwargs = validation_result.meta.get("batch_kwargs", {})
     if isinstance(batch_kwargs, IDDict):
         batch_identifier = batch_kwargs.to_id()
     elif isinstance(batch_kwargs, dict):
         batch_identifier = IDDict(batch_kwargs).to_id()
     else:
         raise DataContextError("Unable to construct ValidationResultIdentifier from provided object.")
     return cls(
         expectation_suite_identifier=ExpectationSuiteIdentifier(validation_result.meta["expectation_suite_name"]),
         run_id=validation_result.meta.get("run_id"),
         batch_identifier=batch_identifier
     )
Example #5
0
    def add_expectation(
        self,
        expectation_configuration: ExpectationConfiguration,
        match_type: str = "domain",
        overwrite_existing: bool = True,
    ) -> ExpectationConfiguration:
        """

        Args:
            expectation_configuration: The ExpectationConfiguration to add or update
            match_type: The criteria used to determine whether the Suite already has an ExpectationConfiguration
                and so whether we should add or replace.
            overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if
                False.
        Returns:
            The ExpectationConfiguration to add or replace.
        Raises:
            More than one match
            One match if overwrite_existing = False
        """
        found_expectation_indexes = self.find_expectation_indexes(
            expectation_configuration, match_type
        )

        if len(found_expectation_indexes) > 1:
            raise ValueError(
                "More than one matching expectation was found. Please be more specific with your search "
                "criteria"
            )
        elif len(found_expectation_indexes) == 1:
            # Currently, we completely replace the expectation_configuration, but we could potentially use patch_expectation
            # to update instead. We need to consider how to handle meta in that situation.
            # patch_expectation = jsonpatch.make_patch(self.expectations[found_expectation_index] \
            #   .kwargs, expectation_configuration.kwargs)
            # patch_expectation.apply(self.expectations[found_expectation_index].kwargs, in_place=True)
            if overwrite_existing:
                self.expectations[
                    found_expectation_indexes[0]
                ] = expectation_configuration
            else:
                raise DataContextError(
                    "A matching ExpectationConfiguration already exists. If you would like to overwrite this "
                    "ExpectationConfiguration, set overwrite_existing=True"
                )
        else:
            self.append_expectation(expectation_configuration)

        return expectation_configuration
Example #6
0
 def __init__(self, data_context, site_names=None, target_site_names=None):
     """
     :param data_context: Data Context
     :param site_names: *optional* List of site names for building data docs
     """
     super().__init__(data_context)
     if target_site_names:
         warnings.warn(
             "target_site_names is deprecated. Please use site_names instead.",
             DeprecationWarning,
         )
         if site_names:
             raise DataContextError(
                 "Invalid configuration: legacy key target_site_names and site_names key are "
                 "both present in UpdateDataDocsAction configuration")
         site_names = target_site_names
     self._site_names = site_names
Example #7
0
    def __init__(
        self,
        data_context,
        api_key,
        routing_key,
        notify_on="failure",
    ):
        """Construct a PagerdutyAlertAction

        Args:
            data_context:
            api_key: Events API v2 key for pagerduty.
            routing_key: The 32 character Integration Key for an integration on a service or on a global ruleset.
            notify_on: "all", "failure", "success" - specifies validation status that will trigger notification
        """
        super().__init__(data_context)
        if not pypd:
            raise DataContextError("ModuleNotFoundError: No module named 'pypd'")
        self.api_key = api_key
        assert api_key, "No Pagerduty api_key found in action config."
        self.routing_key = routing_key
        assert routing_key, "No Pagerduty routing_key found in action config."
        self.notify_on = notify_on
    def __init__(self, store_backend=None, runtime_environment=None):
        store_backend_module_name = store_backend.get(
            "module_name", "great_expectations.data_context.store")
        store_backend_class_name = store_backend.get(
            "class_name", "TupleFilesystemStoreBackend")
        verify_dynamic_loading_support(module_name=store_backend_module_name)
        store_class = load_class(store_backend_class_name,
                                 store_backend_module_name)

        # Store Class was loaded successfully; verify that it is of a correct subclass.
        if not issubclass(store_class, TupleStoreBackend):
            raise DataContextError(
                "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend"
            )
        if "filepath_template" in store_backend or (
                "fixed_length_key" in store_backend
                and store_backend["fixed_length_key"] is True):
            logger.warning(
                "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: "
                "filepaths will be selected based on the type of asset rendered."
            )

        # One thing to watch for is reversibility of keys.
        # If several types are being written to overlapping directories, we could get collisions.
        module_name = 'great_expectations.data_context.store'
        filepath_prefix = 'expectations'
        filepath_suffix = '.html'
        expectation_suite_identifier_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults={
                "module_name": module_name,
                "filepath_prefix": filepath_prefix,
                "filepath_suffix": filepath_suffix,
            })
        if not expectation_suite_identifier_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend['class_name'])

        filepath_prefix = 'validations'
        validation_result_idendifier_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults={
                "module_name": module_name,
                "filepath_prefix": filepath_prefix,
                "filepath_suffix": filepath_suffix,
            })
        if not validation_result_idendifier_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend['class_name'])

        filepath_template = 'index.html'
        index_page_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults={
                "module_name": module_name,
                "filepath_template": filepath_template,
            })
        if not index_page_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend['class_name'])

        filepath_template = None
        static_assets_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults={
                "module_name": module_name,
                "filepath_template": filepath_template,
            })
        if not static_assets_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend['class_name'])

        self.store_backends = {
            ExpectationSuiteIdentifier: expectation_suite_identifier_obj,
            ValidationResultIdentifier: validation_result_idendifier_obj,
            "index_page": index_page_obj,
            "static_assets": static_assets_obj,
        }

        # NOTE: Instead of using the filesystem as the source of record for keys,
        # this class tracks keys separately in an internal set.
        # This means that keys are stored for a specific session, but can't be fetched after the original
        # HtmlSiteStore instance leaves scope.
        # Doing it this way allows us to prevent namespace collisions among keys while still having multiple
        # backends that write to the same directory structure.
        # It's a pretty reasonable way for HtmlSiteStore to do its job---you just ahve to remember that it
        # can't necessarily set and list_keys like most other Stores.
        self.keys = set()
    def _add_expectation(
        self,
        expectation_configuration: ExpectationConfiguration,
        send_usage_event: bool,
        match_type: str = "domain",
        overwrite_existing: bool = True,
    ) -> ExpectationConfiguration:
        """
        This is a private method for adding expectations that allows for usage_events to be suppressed when
        Expectations are added through internal processing (ie. while building profilers, rendering or validation). It
        takes in send_usage_event boolean.

        Args:
            expectation_configuration: The ExpectationConfiguration to add or update
            send_usage_event: Whether to send a usage_statistics event. When called through ExpectationSuite class'
                public add_expectation() method, this is set to `True`.
            match_type: The criteria used to determine whether the Suite already has an ExpectationConfiguration
                and so whether we should add or replace.
            overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if
                False.
        Returns:
            The ExpectationConfiguration to add or replace.
        Raises:
            More than one match
            One match if overwrite_existing = False
        """

        found_expectation_indexes = self.find_expectation_indexes(
            expectation_configuration, match_type)

        if len(found_expectation_indexes) > 1:
            if send_usage_event:
                self.send_usage_event(success=False)
            raise ValueError(
                "More than one matching expectation was found. Please be more specific with your search "
                "criteria")
        elif len(found_expectation_indexes) == 1:
            # Currently, we completely replace the expectation_configuration, but we could potentially use patch_expectation
            # to update instead. We need to consider how to handle meta in that situation.
            # patch_expectation = jsonpatch.make_patch(self.expectations[found_expectation_index] \
            #   .kwargs, expectation_configuration.kwargs)
            # patch_expectation.apply(self.expectations[found_expectation_index].kwargs, in_place=True)
            if overwrite_existing:
                # if existing Expectation has a ge_cloud_id, add it back to the new Expectation Configuration
                existing_expectation_ge_cloud_id = self.expectations[
                    found_expectation_indexes[0]].ge_cloud_id
                if existing_expectation_ge_cloud_id is not None:
                    expectation_configuration.ge_cloud_id = (
                        existing_expectation_ge_cloud_id)
                self.expectations[
                    found_expectation_indexes[0]] = expectation_configuration
            else:
                if send_usage_event:
                    self.send_usage_event(success=False)
                raise DataContextError(
                    "A matching ExpectationConfiguration already exists. If you would like to overwrite this "
                    "ExpectationConfiguration, set overwrite_existing=True")
        else:
            self.append_expectation(expectation_configuration)
        if send_usage_event:
            self.send_usage_event(success=True)
        return expectation_configuration
Example #10
0
    def __init__(self,
                 root_directory,
                 serialization_type=None,
                 store_backend=None):
        self.key_class = SiteSectionIdentifier
        store_backend_module_name = store_backend.get(
            "module_name", "great_expectations.data_context.store")
        store_backend_class_name = store_backend.get(
            "class_name", "FixedLengthTupleFilesystemStoreBackend")
        store_class = load_class(store_backend_class_name,
                                 store_backend_module_name)

        if not issubclass(store_class, FixedLengthTupleStoreBackend):
            raise DataContextError(
                "Invalid configuration: HtmlSiteStore needs a FixedLengthTupleStoreBackend"
            )
        if "filepath_template" in store_backend or "key_length" in store_backend:
            logger.warning(
                "Configuring a filepath_template or key_length is not supported in SiteBuilder: "
                "filepaths will be selected based on the type of asset rendered."
            )

        # # Each key type gets its own backend.
        # # If backends were DB connections, this could be inefficient, but it doesn't much matter for filepaths.
        # # One thing to watch for is reversibility of keys.
        # # If several types are being writtten to overlapping directories, we could get collisions.
        # expectations_backend_config = copy.deepcopy(store_backend)
        # if "base_directory" in expectations_backend_config:
        #     expectations_backend_config["base_directory"] = os.path.join(expectations_backend_config["base_directory"], "expectations")
        # elif "prefix" in expectations_backend_config:
        #     expectations_backend_config["prefix"] = os.path.join(expectations_backend_config["prefix"], "expectations")
        #
        # validations_backend_config = copy.deepcopy(store_backend)
        # if "base_directory" in validations_backend_config:
        #     validations_backend_config["base_directory"] = os.path.join(validations_backend_config["base_directory"], "validations")
        # elif "prefix" in validations_backend_config:
        #     validations_backend_config["prefix"] = os.path.join(validations_backend_config["prefix"], "validations")

        self.store_backends = {
            ExpectationSuiteIdentifier:
            instantiate_class_from_config(
                config=store_backend,
                runtime_config={"root_directory": root_directory},
                config_defaults={
                    "module_name": "great_expectations.data_context.store",
                    "key_length": 4,
                    "filepath_template": 'expectations/{0}/{1}/{2}/{3}.html',
                }),
            ValidationResultIdentifier:
            instantiate_class_from_config(
                config=store_backend,
                runtime_config={"root_directory": root_directory},
                config_defaults={
                    "module_name": "great_expectations.data_context.store",
                    "key_length": 5,
                    "filepath_template":
                    'validations/{4}/{0}/{1}/{2}/{3}.html',
                }),
            "index_page":
            instantiate_class_from_config(
                config=store_backend,
                runtime_config={"root_directory": root_directory},
                config_defaults={
                    "module_name": "great_expectations.data_context.store",
                    "key_length": 0,
                    "filepath_template": 'index.html',
                }),
        }

        self.root_directory = root_directory
        self.serialization_type = serialization_type

        # NOTE: Instead of using the filesystem as the source of record for keys,
        # this class trackes keys separately in an internal set.
        # This means that keys are stored for a specific session, but can't be fetched after the original
        # HtmlSiteStore instance leaves scope.
        # Doing it this way allows us to prevent namespace collisions among keys while still having multiple
        # backends that write to the same directory structure.
        # It's a pretty reasonable way for HtmlSiteStore to do its job---you just ahve to remember that it
        # can't necessarily set and list_keys like most other Stores.
        self.keys = set()
Example #11
0
    def __init__(self, store_backend=None, runtime_environment=None):
        store_backend_module_name = store_backend.get(
            "module_name", "great_expectations.data_context.store")
        store_backend_class_name = store_backend.get(
            "class_name", "TupleFilesystemStoreBackend")
        verify_dynamic_loading_support(module_name=store_backend_module_name)
        store_class = load_class(store_backend_class_name,
                                 store_backend_module_name)

        # Store Class was loaded successfully; verify that it is of a correct subclass.
        if not issubclass(store_class,
                          (TupleStoreBackend, GeCloudStoreBackend)):
            raise DataContextError(
                "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend or GeCloudStoreBackend"
            )
        if "filepath_template" in store_backend or (
                "fixed_length_key" in store_backend
                and store_backend["fixed_length_key"] is True):
            logger.warning(
                "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: "
                "filepaths will be selected based on the type of asset rendered."
            )

        # One thing to watch for is reversibility of keys.
        # If several types are being written to overlapping directories, we could get collisions.
        module_name = "great_expectations.data_context.store"
        filepath_suffix = ".html"
        is_ge_cloud_store = store_backend[
            "class_name"] == "GeCloudStoreBackend"
        expectation_config_defaults = {
            "module_name": module_name,
            "filepath_prefix": "expectations",
            "filepath_suffix": filepath_suffix,
            "suppress_store_backend_id": True,
        }
        if is_ge_cloud_store:
            expectation_config_defaults = {
                "module_name": module_name,
                "suppress_store_backend_id": True,
            }
        expectation_suite_identifier_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults=expectation_config_defaults,
        )
        if not expectation_suite_identifier_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend["class_name"],
            )

        validation_result_config_defaults = {
            "module_name": module_name,
            "filepath_prefix": "validations",
            "filepath_suffix": filepath_suffix,
            "suppress_store_backend_id": True,
        }
        if is_ge_cloud_store:
            validation_result_config_defaults = {
                "module_name": module_name,
                "suppress_store_backend_id": True,
            }

        validation_result_idendifier_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults=validation_result_config_defaults,
        )
        if not validation_result_idendifier_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend["class_name"],
            )

        filepath_template = "index.html"
        index_page_config_defaults = {
            "module_name": module_name,
            "filepath_template": filepath_template,
            "suppress_store_backend_id": True,
        }
        if is_ge_cloud_store:
            index_page_config_defaults = {
                "module_name": module_name,
                "suppress_store_backend_id": True,
            }

        index_page_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults=index_page_config_defaults,
        )
        if not index_page_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend["class_name"],
            )

        static_assets_config_defaults = {
            "module_name": module_name,
            "filepath_template": None,
            "suppress_store_backend_id": True,
        }
        if is_ge_cloud_store:
            static_assets_config_defaults = {
                "module_name": module_name,
                "suppress_store_backend_id": True,
            }
        static_assets_obj = instantiate_class_from_config(
            config=store_backend,
            runtime_environment=runtime_environment,
            config_defaults=static_assets_config_defaults,
        )
        if not static_assets_obj:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=store_backend["class_name"],
            )

        self.store_backends = {
            ExpectationSuiteIdentifier: expectation_suite_identifier_obj,
            ValidationResultIdentifier: validation_result_idendifier_obj,
            "index_page": index_page_obj,
            "static_assets": static_assets_obj,
        }

        # NOTE: Instead of using the filesystem as the source of record for keys,
        # this class tracks keys separately in an internal set.
        # This means that keys are stored for a specific session, but can't be fetched after the original
        # HtmlSiteStore instance leaves scope.
        # Doing it this way allows us to prevent namespace collisions among keys while still having multiple
        # backends that write to the same directory structure.
        # It's a pretty reasonable way for HtmlSiteStore to do its job---you just have to remember that it
        # can't necessarily set and list_keys like most other Stores.
        self.keys = set()

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_backend": store_backend,
            "runtime_environment": runtime_environment,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config,
                               clean_falsy=True,
                               inplace=True)