def __init__(self, data_context, requested_metrics, target_store_name="metrics_store"): """ Args: data_context: Data Context requested_metrics: dictionary of metrics to store. Dictionary should have the following structure: expectation_suite_name: metric_name: - metric_kwargs_id You may use "*" to denote that any expectation suite should match. target_store_name: the name of the store in the Data Context which should be used to store the metrics """ super().__init__(data_context) self._requested_metrics = requested_metrics self._target_store_name = target_store_name try: store = data_context.stores[target_store_name] except KeyError: raise DataContextError( "Unable to find store {} in your DataContext configuration.". format(target_store_name)) if not isinstance(store, MetricStore): raise DataContextError( "StoreMetricsAction must have a valid MetricsStore for its target store." )
def __init__(self, store_backend=None, runtime_environment=None, store_name="no_store_name"): """ Runtime environment may be necessary to instantiate store backend elements. Args: store_backend: runtime_environment: store_name: store name given in the DataContextConfig (via either in-code or yaml configuration) """ if store_backend is None: store_backend = {"class_name": "InMemoryStoreBackend"} self._store_name = store_name logger.debug("Building store_backend.") module_name = "great_expectations.data_context.store" self._store_backend = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment or {}, config_defaults={ "module_name": module_name, "store_name": self._store_name, }, ) if not self._store_backend: raise ClassInstantiationError(module_name=module_name, package_name=None, class_name=store_backend) if not isinstance(self._store_backend, StoreBackend): raise DataContextError( "Invalid StoreBackend configuration: expected a StoreBackend instance." ) self._use_fixed_length_key = self._store_backend.fixed_length_key
def get_ge_cloud_config( self, ge_cloud_base_url: Optional[str] = None, ge_cloud_account_id: Optional[str] = None, ge_cloud_access_token: Optional[str] = None, ge_cloud_organization_id: Optional[str] = None, ) -> GeCloudConfig: """ Build a GeCloudConfig object. Config attributes are collected from any combination of args passed in at runtime, environment variables, or a global great_expectations.conf file (in order of precedence) """ ge_cloud_config_dict = self._get_ge_cloud_config_dict( ge_cloud_base_url=ge_cloud_base_url, ge_cloud_account_id=ge_cloud_account_id, ge_cloud_access_token=ge_cloud_access_token, ge_cloud_organization_id=ge_cloud_organization_id, ) missing_keys = [] for key, val in ge_cloud_config_dict.items(): if not val: missing_keys.append(key) if len(missing_keys) > 0: missing_keys_str = [f'"{key}"' for key in missing_keys] global_config_path_str = [ f'"{path}"' for path in super().GLOBAL_CONFIG_PATHS ] raise DataContextError( f"{(', ').join(missing_keys_str)} arg(s) required for ge_cloud_mode but neither provided nor found in " f"environment or in global configs ({(', ').join(global_config_path_str)})." ) return GeCloudConfig(**ge_cloud_config_dict)
def from_object(cls, validation_result): batch_kwargs = validation_result.meta.get("batch_kwargs", {}) if isinstance(batch_kwargs, IDDict): batch_identifier = batch_kwargs.to_id() elif isinstance(batch_kwargs, dict): batch_identifier = IDDict(batch_kwargs).to_id() else: raise DataContextError("Unable to construct ValidationResultIdentifier from provided object.") return cls( expectation_suite_identifier=ExpectationSuiteIdentifier(validation_result.meta["expectation_suite_name"]), run_id=validation_result.meta.get("run_id"), batch_identifier=batch_identifier )
def add_expectation( self, expectation_configuration: ExpectationConfiguration, match_type: str = "domain", overwrite_existing: bool = True, ) -> ExpectationConfiguration: """ Args: expectation_configuration: The ExpectationConfiguration to add or update match_type: The criteria used to determine whether the Suite already has an ExpectationConfiguration and so whether we should add or replace. overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if False. Returns: The ExpectationConfiguration to add or replace. Raises: More than one match One match if overwrite_existing = False """ found_expectation_indexes = self.find_expectation_indexes( expectation_configuration, match_type ) if len(found_expectation_indexes) > 1: raise ValueError( "More than one matching expectation was found. Please be more specific with your search " "criteria" ) elif len(found_expectation_indexes) == 1: # Currently, we completely replace the expectation_configuration, but we could potentially use patch_expectation # to update instead. We need to consider how to handle meta in that situation. # patch_expectation = jsonpatch.make_patch(self.expectations[found_expectation_index] \ # .kwargs, expectation_configuration.kwargs) # patch_expectation.apply(self.expectations[found_expectation_index].kwargs, in_place=True) if overwrite_existing: self.expectations[ found_expectation_indexes[0] ] = expectation_configuration else: raise DataContextError( "A matching ExpectationConfiguration already exists. If you would like to overwrite this " "ExpectationConfiguration, set overwrite_existing=True" ) else: self.append_expectation(expectation_configuration) return expectation_configuration
def __init__(self, data_context, site_names=None, target_site_names=None): """ :param data_context: Data Context :param site_names: *optional* List of site names for building data docs """ super().__init__(data_context) if target_site_names: warnings.warn( "target_site_names is deprecated. Please use site_names instead.", DeprecationWarning, ) if site_names: raise DataContextError( "Invalid configuration: legacy key target_site_names and site_names key are " "both present in UpdateDataDocsAction configuration") site_names = target_site_names self._site_names = site_names
def __init__( self, data_context, api_key, routing_key, notify_on="failure", ): """Construct a PagerdutyAlertAction Args: data_context: api_key: Events API v2 key for pagerduty. routing_key: The 32 character Integration Key for an integration on a service or on a global ruleset. notify_on: "all", "failure", "success" - specifies validation status that will trigger notification """ super().__init__(data_context) if not pypd: raise DataContextError("ModuleNotFoundError: No module named 'pypd'") self.api_key = api_key assert api_key, "No Pagerduty api_key found in action config." self.routing_key = routing_key assert routing_key, "No Pagerduty routing_key found in action config." self.notify_on = notify_on
def __init__(self, store_backend=None, runtime_environment=None): store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "TupleFilesystemStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_class = load_class(store_backend_class_name, store_backend_module_name) # Store Class was loaded successfully; verify that it is of a correct subclass. if not issubclass(store_class, TupleStoreBackend): raise DataContextError( "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend" ) if "filepath_template" in store_backend or ( "fixed_length_key" in store_backend and store_backend["fixed_length_key"] is True): logger.warning( "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " "filepaths will be selected based on the type of asset rendered." ) # One thing to watch for is reversibility of keys. # If several types are being written to overlapping directories, we could get collisions. module_name = 'great_expectations.data_context.store' filepath_prefix = 'expectations' filepath_suffix = '.html' expectation_suite_identifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, }) if not expectation_suite_identifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) filepath_prefix = 'validations' validation_result_idendifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_prefix": filepath_prefix, "filepath_suffix": filepath_suffix, }) if not validation_result_idendifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) filepath_template = 'index.html' index_page_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_template": filepath_template, }) if not index_page_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) filepath_template = None static_assets_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults={ "module_name": module_name, "filepath_template": filepath_template, }) if not static_assets_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend['class_name']) self.store_backends = { ExpectationSuiteIdentifier: expectation_suite_identifier_obj, ValidationResultIdentifier: validation_result_idendifier_obj, "index_page": index_page_obj, "static_assets": static_assets_obj, } # NOTE: Instead of using the filesystem as the source of record for keys, # this class tracks keys separately in an internal set. # This means that keys are stored for a specific session, but can't be fetched after the original # HtmlSiteStore instance leaves scope. # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # backends that write to the same directory structure. # It's a pretty reasonable way for HtmlSiteStore to do its job---you just ahve to remember that it # can't necessarily set and list_keys like most other Stores. self.keys = set()
def _add_expectation( self, expectation_configuration: ExpectationConfiguration, send_usage_event: bool, match_type: str = "domain", overwrite_existing: bool = True, ) -> ExpectationConfiguration: """ This is a private method for adding expectations that allows for usage_events to be suppressed when Expectations are added through internal processing (ie. while building profilers, rendering or validation). It takes in send_usage_event boolean. Args: expectation_configuration: The ExpectationConfiguration to add or update send_usage_event: Whether to send a usage_statistics event. When called through ExpectationSuite class' public add_expectation() method, this is set to `True`. match_type: The criteria used to determine whether the Suite already has an ExpectationConfiguration and so whether we should add or replace. overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if False. Returns: The ExpectationConfiguration to add or replace. Raises: More than one match One match if overwrite_existing = False """ found_expectation_indexes = self.find_expectation_indexes( expectation_configuration, match_type) if len(found_expectation_indexes) > 1: if send_usage_event: self.send_usage_event(success=False) raise ValueError( "More than one matching expectation was found. Please be more specific with your search " "criteria") elif len(found_expectation_indexes) == 1: # Currently, we completely replace the expectation_configuration, but we could potentially use patch_expectation # to update instead. We need to consider how to handle meta in that situation. # patch_expectation = jsonpatch.make_patch(self.expectations[found_expectation_index] \ # .kwargs, expectation_configuration.kwargs) # patch_expectation.apply(self.expectations[found_expectation_index].kwargs, in_place=True) if overwrite_existing: # if existing Expectation has a ge_cloud_id, add it back to the new Expectation Configuration existing_expectation_ge_cloud_id = self.expectations[ found_expectation_indexes[0]].ge_cloud_id if existing_expectation_ge_cloud_id is not None: expectation_configuration.ge_cloud_id = ( existing_expectation_ge_cloud_id) self.expectations[ found_expectation_indexes[0]] = expectation_configuration else: if send_usage_event: self.send_usage_event(success=False) raise DataContextError( "A matching ExpectationConfiguration already exists. If you would like to overwrite this " "ExpectationConfiguration, set overwrite_existing=True") else: self.append_expectation(expectation_configuration) if send_usage_event: self.send_usage_event(success=True) return expectation_configuration
def __init__(self, root_directory, serialization_type=None, store_backend=None): self.key_class = SiteSectionIdentifier store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "FixedLengthTupleFilesystemStoreBackend") store_class = load_class(store_backend_class_name, store_backend_module_name) if not issubclass(store_class, FixedLengthTupleStoreBackend): raise DataContextError( "Invalid configuration: HtmlSiteStore needs a FixedLengthTupleStoreBackend" ) if "filepath_template" in store_backend or "key_length" in store_backend: logger.warning( "Configuring a filepath_template or key_length is not supported in SiteBuilder: " "filepaths will be selected based on the type of asset rendered." ) # # Each key type gets its own backend. # # If backends were DB connections, this could be inefficient, but it doesn't much matter for filepaths. # # One thing to watch for is reversibility of keys. # # If several types are being writtten to overlapping directories, we could get collisions. # expectations_backend_config = copy.deepcopy(store_backend) # if "base_directory" in expectations_backend_config: # expectations_backend_config["base_directory"] = os.path.join(expectations_backend_config["base_directory"], "expectations") # elif "prefix" in expectations_backend_config: # expectations_backend_config["prefix"] = os.path.join(expectations_backend_config["prefix"], "expectations") # # validations_backend_config = copy.deepcopy(store_backend) # if "base_directory" in validations_backend_config: # validations_backend_config["base_directory"] = os.path.join(validations_backend_config["base_directory"], "validations") # elif "prefix" in validations_backend_config: # validations_backend_config["prefix"] = os.path.join(validations_backend_config["prefix"], "validations") self.store_backends = { ExpectationSuiteIdentifier: instantiate_class_from_config( config=store_backend, runtime_config={"root_directory": root_directory}, config_defaults={ "module_name": "great_expectations.data_context.store", "key_length": 4, "filepath_template": 'expectations/{0}/{1}/{2}/{3}.html', }), ValidationResultIdentifier: instantiate_class_from_config( config=store_backend, runtime_config={"root_directory": root_directory}, config_defaults={ "module_name": "great_expectations.data_context.store", "key_length": 5, "filepath_template": 'validations/{4}/{0}/{1}/{2}/{3}.html', }), "index_page": instantiate_class_from_config( config=store_backend, runtime_config={"root_directory": root_directory}, config_defaults={ "module_name": "great_expectations.data_context.store", "key_length": 0, "filepath_template": 'index.html', }), } self.root_directory = root_directory self.serialization_type = serialization_type # NOTE: Instead of using the filesystem as the source of record for keys, # this class trackes keys separately in an internal set. # This means that keys are stored for a specific session, but can't be fetched after the original # HtmlSiteStore instance leaves scope. # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # backends that write to the same directory structure. # It's a pretty reasonable way for HtmlSiteStore to do its job---you just ahve to remember that it # can't necessarily set and list_keys like most other Stores. self.keys = set()
def __init__(self, store_backend=None, runtime_environment=None): store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "TupleFilesystemStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_class = load_class(store_backend_class_name, store_backend_module_name) # Store Class was loaded successfully; verify that it is of a correct subclass. if not issubclass(store_class, (TupleStoreBackend, GeCloudStoreBackend)): raise DataContextError( "Invalid configuration: HtmlSiteStore needs a TupleStoreBackend or GeCloudStoreBackend" ) if "filepath_template" in store_backend or ( "fixed_length_key" in store_backend and store_backend["fixed_length_key"] is True): logger.warning( "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " "filepaths will be selected based on the type of asset rendered." ) # One thing to watch for is reversibility of keys. # If several types are being written to overlapping directories, we could get collisions. module_name = "great_expectations.data_context.store" filepath_suffix = ".html" is_ge_cloud_store = store_backend[ "class_name"] == "GeCloudStoreBackend" expectation_config_defaults = { "module_name": module_name, "filepath_prefix": "expectations", "filepath_suffix": filepath_suffix, "suppress_store_backend_id": True, } if is_ge_cloud_store: expectation_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } expectation_suite_identifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=expectation_config_defaults, ) if not expectation_suite_identifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) validation_result_config_defaults = { "module_name": module_name, "filepath_prefix": "validations", "filepath_suffix": filepath_suffix, "suppress_store_backend_id": True, } if is_ge_cloud_store: validation_result_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } validation_result_idendifier_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=validation_result_config_defaults, ) if not validation_result_idendifier_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) filepath_template = "index.html" index_page_config_defaults = { "module_name": module_name, "filepath_template": filepath_template, "suppress_store_backend_id": True, } if is_ge_cloud_store: index_page_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } index_page_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=index_page_config_defaults, ) if not index_page_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) static_assets_config_defaults = { "module_name": module_name, "filepath_template": None, "suppress_store_backend_id": True, } if is_ge_cloud_store: static_assets_config_defaults = { "module_name": module_name, "suppress_store_backend_id": True, } static_assets_obj = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment, config_defaults=static_assets_config_defaults, ) if not static_assets_obj: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=store_backend["class_name"], ) self.store_backends = { ExpectationSuiteIdentifier: expectation_suite_identifier_obj, ValidationResultIdentifier: validation_result_idendifier_obj, "index_page": index_page_obj, "static_assets": static_assets_obj, } # NOTE: Instead of using the filesystem as the source of record for keys, # this class tracks keys separately in an internal set. # This means that keys are stored for a specific session, but can't be fetched after the original # HtmlSiteStore instance leaves scope. # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # backends that write to the same directory structure. # It's a pretty reasonable way for HtmlSiteStore to do its job---you just have to remember that it # can't necessarily set and list_keys like most other Stores. self.keys = set() # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)