def _is_parent_class_recognized( classes_to_check, object_=None, object_class=None, object_config=None, ) -> Optional[str]: """ Check if the parent class is a subclass of any core GE class. This private method is intended to be used by anonymizers in a public `is_parent_class_recognized()` method. These anonymizers define and provide the core GE classes_to_check. Returns: The name of the parent class found, or None if no parent class was found """ assert (object_ or object_class or object_config ), "Must pass either object_ or object_class or object_config." try: if object_class is None and object_ is not None: object_class = object_.__class__ elif object_class is None and object_config is not None: object_class_name = object_config.get("class_name") object_module_name = object_config.get("module_name") object_class = load_class(object_class_name, object_module_name) for class_to_check in classes_to_check: if issubclass(object_class, class_to_check): return class_to_check.__name__ return None except AttributeError: return None
def __init__(self, store_backend=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") verify_dynamic_loading_support( module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_metrics") store_backend["key_columns"] = store_backend.get( "key_columns", [ "run_id", "expectation_suite_identifier", "metric_name", "metric_kwargs_id", ], ) super().__init__(store_backend=store_backend)
def __init__(self, store_backend=None, store_name=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store" ) store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend" ) verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class( store_backend_class_name, store_backend_module_name ) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_evaluation_parameters" ) super().__init__(store_backend=store_backend, store_name=store_name) # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. self._config = { "store_backend": store_backend, "store_name": store_name, "module_name": self.__class__.__module__, "class_name": self.__class__.__name__, } filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
def anonymize_object_info( self, anonymized_info_dict, ge_classes, object_=None, object_class=None, object_config=None ): assert object_ or object_class or object_config, "Must pass either object_ or object_class or object_config." try: if object_class is None and object_ is not None: object_class = object_.__class__ elif object_class is None and object_config is not None: object_class_name = object_config.get("class_name") object_module_name = object_config.get("module_name") object_class = load_class(object_class_name, object_module_name) object_class_name = object_class.__name__ for ge_class in ge_classes: if issubclass(object_class, ge_class): anonymized_info_dict["parent_class"] = ge_class.__name__ if not object_class == ge_class: anonymized_info_dict["anonymized_class"] = self.anonymize(object_class_name) break if not anonymized_info_dict.get("parent_class"): anonymized_info_dict["parent_class"] = "__not_recognized__" anonymized_info_dict["anonymized_class"] = self.anonymize(object_class_name) except AttributeError: anonymized_info_dict["parent_class"] = "__not_recognized__" anonymized_info_dict["anonymized_class"] = self.anonymize(object_class_name) return anonymized_info_dict
def __init__(self, bullet_list_renderer=None): if bullet_list_renderer is None: bullet_list_renderer = { "class_name": "ExpectationSuiteBulletListContentBlockRenderer" } self._bullet_list_renderer = load_class( class_name=bullet_list_renderer.get("class_name"), module_name=bullet_list_renderer.get("module_name", "great_expectations.render.renderer.content_block") )
def __init__(self, table_renderer=None): if table_renderer is None: table_renderer = { "class_name": "ValidationResultsTableContentBlockRenderer" } self._table_renderer = load_class( class_name=table_renderer.get("class_name"), module_name=table_renderer.get("module_name", "great_expectations.render.renderer.content_block") )
def __init__(self, overview_table_renderer=None): if overview_table_renderer is None: overview_table_renderer = { "class_name": "ProfilingOverviewTableContentBlockRenderer" } self._overview_table_renderer = load_class( class_name=overview_table_renderer.get("class_name"), module_name=overview_table_renderer.get("module_name", "great_expectations.render.renderer.content_block") )
def get_parent_class( classes_to_check: Optional[List[type]] = None, object_: Optional[object] = None, object_class: Optional[type] = None, object_config: Optional[dict] = None, ) -> Optional[str]: """Check if the parent class is a subclass of any core GE class. These anonymizers define and provide an optional list of core GE classes_to_check. If not provided, the object's inheritance hierarchy is traversed. Args: classes_to_check: An optinal list of candidate parent classes to iterate through. object_: The specific object to analyze. object_class: The class of the specific object to analyze. object_config: The dictionary configuration of the specific object to analyze. Returns: The name of the parent class found, or None if no parent class was found. Raises: AssertionError if no object_, object_class, or object_config is provided. """ assert (object_ or object_class or object_config ), "Must pass either object_ or object_class or object_config." try: if object_class is None and object_ is not None: object_class = object_.__class__ elif object_class is None and object_config is not None: object_class_name = object_config.get("class_name") object_module_name = object_config.get("module_name") object_class = load_class(object_class_name, object_module_name) # Utilize candidate list if provided. if classes_to_check: for class_to_check in classes_to_check: if issubclass(object_class, class_to_check): return class_to_check.__name__ return None # Otherwise, iterate through parents in inheritance hierarchy. parents: Tuple[type, ...] = object_class.__bases__ parent_class: type for parent_class in parents: parent_module_name: str = parent_class.__module__ if BaseAnonymizer._is_core_great_expectations_class( parent_module_name): return parent_class.__name__ except AttributeError: pass return None
def __init__(self, bullet_list_renderer=None): super().__init__() if bullet_list_renderer is None: bullet_list_renderer = { "class_name": "ExpectationSuiteBulletListContentBlockRenderer" } module_name = bullet_list_renderer.get( "module_name", "great_expectations.render.renderer.content_block") verify_dynamic_loading_support(module_name=module_name) class_name = bullet_list_renderer.get("class_name") self._bullet_list_renderer = load_class(class_name=class_name, module_name=module_name)
def __init__(self, table_renderer=None): super().__init__() if table_renderer is None: table_renderer = { "class_name": "ValidationResultsTableContentBlockRenderer" } module_name = table_renderer.get( "module_name", "great_expectations.render.renderer.content_block") verify_dynamic_loading_support(module_name=module_name) class_name = table_renderer.get("class_name") self._table_renderer = load_class(class_name=class_name, module_name=module_name)
def __init__(self, store_backend=None): if store_backend is not None: store_backend_module_name = store_backend.get("module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get("class_name", "InMemoryStoreBackend") verify_dynamic_loading_support(module_name=store_backend_module_name) store_backend_class = load_class(store_backend_class_name, store_backend_module_name) # Store Backend Class was loaded successfully; verify that it is of a correct subclass. if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get("table_name", "ge_evaluation_parameters") super().__init__(store_backend=store_backend)
def __init__(self, store_backend=None): if store_backend is not None: store_backend_module_name = store_backend.get( "module_name", "great_expectations.data_context.store") store_backend_class_name = store_backend.get( "class_name", "InMemoryStoreBackend") store_backend_class = load_class(store_backend_class_name, store_backend_module_name) if issubclass(store_backend_class, DatabaseStoreBackend): # Provide defaults for this common case store_backend["table_name"] = store_backend.get( "table_name", "ge_evaluation_parameters") super(EvaluationParameterStore, self).__init__(store_backend=store_backend)
def __init__(self, batch, expectation_suite, expectation_engine=None, **kwargs): self.batch = batch self.expectation_suite = expectation_suite if isinstance(expectation_engine, dict): expectation_engine = ClassConfig(**expectation_engine) if isinstance(expectation_engine, ClassConfig): module_name = expectation_engine.module_name or "great_expectations.dataset" verify_dynamic_loading_support(module_name=module_name) expectation_engine = load_class( class_name=expectation_engine.class_name, module_name=module_name) self.expectation_engine = expectation_engine if self.expectation_engine is None: # Guess the engine try: import pandas as pd if isinstance(batch.data, pd.DataFrame): self.expectation_engine = PandasDataset except ImportError: pass if self.expectation_engine is None: if isinstance(batch.data, SqlAlchemyBatchReference): self.expectation_engine = SqlAlchemyDataset if self.expectation_engine is None: try: import pyspark if isinstance(batch.data, pyspark.sql.DataFrame): self.expectation_engine = SparkDFDataset except ImportError: pass if self.expectation_engine is None: raise ValueError( "Unable to identify expectation_engine. It must be a subclass of DataAsset." ) self.init_kwargs = kwargs
def test_load_class_raises_error_when_module_name_is_not_string(): for bad_input in [1, 1.3, ["a"], {"foo": "bar"}]: with pytest.raises(TypeError): load_class(bad_input, "great_expectations.datasource")
def test_load_class_raises_error_when_module_name_is_None(): with pytest.raises(TypeError): load_class("foo", None)
def test_load_class_raises_error_when_class_name_is_None(): with pytest.raises(TypeError): load_class(None, "great_expectations.datasource")
def test_load_class_raises_error_when_class_not_found(): with pytest.raises(gee.PluginClassNotFoundError): load_class("TotallyNotARealClass", "great_expectations.datasource")
def test_load_class_raises_error_when_module_not_found(): with pytest.raises(gee.PluginModuleNotFoundError): load_class("foo", "bar")
def instantiate_class_from_config(config, runtime_environment, config_defaults=None): """Build a GE class from configuration dictionaries.""" if config_defaults is None: config_defaults = {} config = copy.deepcopy(config) module_name = config.pop("module_name", None) if module_name is None: try: module_name = config_defaults.pop("module_name") except KeyError: raise KeyError( "Neither config : {} nor config_defaults : {} contains a module_name key." .format( config, config_defaults, )) else: # Pop the value without using it, to avoid sending an unwanted value to the config_class config_defaults.pop("module_name", None) verify_dynamic_loading_support(module_name=module_name) class_name = config.pop("class_name", None) if class_name is None: logger.warning( "Instantiating class from config without an explicit class_name is dangerous. Consider adding " "an explicit class_name for %s" % config.get("name")) try: class_name = config_defaults.pop("class_name") except KeyError: raise KeyError( "Neither config : {} nor config_defaults : {} contains a class_name key." .format( config, config_defaults, )) else: # Pop the value without using it, to avoid sending an unwanted value to the config_class config_defaults.pop("class_name", None) class_ = load_class(class_name=class_name, module_name=module_name) config_with_defaults = copy.deepcopy(config_defaults) config_with_defaults.update(config) if runtime_environment is not None: # If there are additional kwargs available in the runtime_environment requested by a # class to be instantiated, provide them argspec = inspect.getfullargspec(class_.__init__)[0][1:] missing_args = set(argspec) - set(config_with_defaults.keys()) config_with_defaults.update({ missing_arg: runtime_environment[missing_arg] for missing_arg in missing_args if missing_arg in runtime_environment }) # Add the entire runtime_environment as well if it's requested if "runtime_environment" in missing_args: config_with_defaults.update( {"runtime_environment": runtime_environment}) try: class_instance = class_(**config_with_defaults) except TypeError as e: raise TypeError( "Couldn't instantiate class : {} with config : \n\t{}\n \n".format( class_name, format_dict_for_error_message( config_with_defaults)) + str(e)) return class_instance
def _anonymize_object_info( self, anonymized_info_dict: dict, object_: Optional[object] = None, object_class: Optional[type] = None, object_config: Optional[dict] = None, runtime_environment: Optional[dict] = None, ) -> dict: """Given an object, anonymize relevant fields and return result as a dictionary. Args: anonymized_info_dict: The payload object to hydrate with anonymized values. object_: The specific object to anonymize. object_class: The class of the specific object to anonymize. object_config: The dictionary configuration of the specific object to anonymize. runtime_environment: A dictionary containing relevant runtime information (like class_name and module_name) Returns: The anonymized_info_dict that's been populated with anonymized values. Raises: AssertionError if no object_, object_class, or object_config is provided. """ assert (object_ or object_class or object_config ), "Must pass either object_ or object_class or object_config." if runtime_environment is None: runtime_environment = {} object_class_name: Optional[str] = None object_module_name: Optional[str] = None try: if object_class is None and object_ is not None: object_class = object_.__class__ elif object_class is None and object_config is not None: object_class_name = object_config.get("class_name") object_module_name = object_config.get( "module_name") or runtime_environment.get("module_name") object_class = load_class(object_class_name, object_module_name) object_class_name = object_class.__name__ object_module_name = object_class.__module__ parents: Tuple[type, ...] = object_class.__bases__ if self._is_core_great_expectations_class(object_module_name): anonymized_info_dict["parent_class"] = object_class_name else: # Chetan - 20220311 - If we can't identify the class in question, we iterate through the parents. # While GE rarely utilizes multiple inheritance when defining core objects (as of v0.14.10), # it is important to recognize that this is possibility. # # In the presence of multiple valid parents, we generate a comma-delimited list. parent_class_list: List[str] = [] parent_class: type for parent_class in parents: parent_module_name: str = parent_class.__module__ if BaseAnonymizer._is_core_great_expectations_class( parent_module_name): parent_class_list.append(parent_class.__name__) if parent_class_list: concatenated_parent_classes: str = ",".join( cls for cls in parent_class_list) anonymized_info_dict[ "parent_class"] = concatenated_parent_classes anonymized_info_dict[ "anonymized_class"] = self._anonymize_string( object_class_name) # Catch-all to prevent edge cases from slipping past if not anonymized_info_dict.get("parent_class"): anonymized_info_dict["parent_class"] = "__not_recognized__" anonymized_info_dict[ "anonymized_class"] = self._anonymize_string( object_class_name) except AttributeError: anonymized_info_dict["parent_class"] = "__not_recognized__" anonymized_info_dict["anonymized_class"] = self._anonymize_string( object_class_name) return anonymized_info_dict