Exemple #1
0
    def _is_parent_class_recognized(
        classes_to_check,
        object_=None,
        object_class=None,
        object_config=None,
    ) -> Optional[str]:
        """
        Check if the parent class is a subclass of any core GE class.
        This private method is intended to be used by anonymizers in a public `is_parent_class_recognized()` method. These anonymizers define and provide the core GE classes_to_check.
        Returns:
            The name of the parent class found, or None if no parent class was found
        """
        assert (object_ or object_class or object_config
                ), "Must pass either object_ or object_class or object_config."
        try:
            if object_class is None and object_ is not None:
                object_class = object_.__class__
            elif object_class is None and object_config is not None:
                object_class_name = object_config.get("class_name")
                object_module_name = object_config.get("module_name")
                object_class = load_class(object_class_name,
                                          object_module_name)

            for class_to_check in classes_to_check:
                if issubclass(object_class, class_to_check):
                    return class_to_check.__name__

            return None

        except AttributeError:
            return None
    def __init__(self, store_backend=None):
        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store")
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend")
            verify_dynamic_loading_support(
                module_name=store_backend_module_name)
            store_backend_class = load_class(store_backend_class_name,
                                             store_backend_module_name)

            if issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get(
                    "table_name", "ge_metrics")
                store_backend["key_columns"] = store_backend.get(
                    "key_columns",
                    [
                        "run_id",
                        "expectation_suite_identifier",
                        "metric_name",
                        "metric_kwargs_id",
                    ],
                )

        super().__init__(store_backend=store_backend)
Exemple #3
0
    def __init__(self, store_backend=None, store_name=None):
        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store"
            )
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend"
            )
            verify_dynamic_loading_support(module_name=store_backend_module_name)
            store_backend_class = load_class(
                store_backend_class_name, store_backend_module_name
            )

            # Store Backend Class was loaded successfully; verify that it is of a correct subclass.
            if issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get(
                    "table_name", "ge_evaluation_parameters"
                )
        super().__init__(store_backend=store_backend, store_name=store_name)

        # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter
        # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "store_backend": store_backend,
            "store_name": store_name,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)
    def anonymize_object_info(
            self,
            anonymized_info_dict,
            ge_classes,
            object_=None,
            object_class=None,
            object_config=None
    ):
        assert object_ or object_class or object_config, "Must pass either object_ or object_class or object_config."
        try:
            if object_class is None and object_ is not None:
                object_class = object_.__class__
            elif object_class is None and object_config is not None:
                object_class_name = object_config.get("class_name")
                object_module_name = object_config.get("module_name")
                object_class = load_class(object_class_name, object_module_name)
            object_class_name = object_class.__name__

            for ge_class in ge_classes:
                if issubclass(object_class, ge_class):
                    anonymized_info_dict["parent_class"] = ge_class.__name__
                    if not object_class == ge_class:
                        anonymized_info_dict["anonymized_class"] = self.anonymize(object_class_name)
                    break

            if not anonymized_info_dict.get("parent_class"):
                anonymized_info_dict["parent_class"] = "__not_recognized__"
                anonymized_info_dict["anonymized_class"] = self.anonymize(object_class_name)
        except AttributeError:
            anonymized_info_dict["parent_class"] = "__not_recognized__"
            anonymized_info_dict["anonymized_class"] = self.anonymize(object_class_name)

        return anonymized_info_dict
Exemple #5
0
 def __init__(self, bullet_list_renderer=None):
     if bullet_list_renderer is None:
         bullet_list_renderer = {
             "class_name": "ExpectationSuiteBulletListContentBlockRenderer"
         }
     self._bullet_list_renderer = load_class(
         class_name=bullet_list_renderer.get("class_name"),
         module_name=bullet_list_renderer.get("module_name", "great_expectations.render.renderer.content_block")
     )
Exemple #6
0
 def __init__(self, table_renderer=None):
     if table_renderer is None:
         table_renderer = {
             "class_name": "ValidationResultsTableContentBlockRenderer"
         }
     self._table_renderer = load_class(
         class_name=table_renderer.get("class_name"),
         module_name=table_renderer.get("module_name", "great_expectations.render.renderer.content_block")
     )
Exemple #7
0
 def __init__(self, overview_table_renderer=None):
     if overview_table_renderer is None:
         overview_table_renderer = {
             "class_name": "ProfilingOverviewTableContentBlockRenderer"
         }
     self._overview_table_renderer = load_class(
         class_name=overview_table_renderer.get("class_name"),
         module_name=overview_table_renderer.get("module_name", "great_expectations.render.renderer.content_block")
     )
Exemple #8
0
    def get_parent_class(
        classes_to_check: Optional[List[type]] = None,
        object_: Optional[object] = None,
        object_class: Optional[type] = None,
        object_config: Optional[dict] = None,
    ) -> Optional[str]:
        """Check if the parent class is a subclass of any core GE class.

        These anonymizers define and provide an optional list of core GE classes_to_check.
        If not provided, the object's inheritance hierarchy is traversed.

        Args:
            classes_to_check: An optinal list of candidate parent classes to iterate through.
            object_: The specific object to analyze.
            object_class: The class of the specific object to analyze.
            object_config: The dictionary configuration of the specific object to analyze.

        Returns:
            The name of the parent class found, or None if no parent class was found.

        Raises:
            AssertionError if no object_, object_class, or object_config is provided.
        """
        assert (object_ or object_class or object_config
                ), "Must pass either object_ or object_class or object_config."
        try:
            if object_class is None and object_ is not None:
                object_class = object_.__class__
            elif object_class is None and object_config is not None:
                object_class_name = object_config.get("class_name")
                object_module_name = object_config.get("module_name")
                object_class = load_class(object_class_name,
                                          object_module_name)

            # Utilize candidate list if provided.
            if classes_to_check:
                for class_to_check in classes_to_check:
                    if issubclass(object_class, class_to_check):
                        return class_to_check.__name__
                return None

            # Otherwise, iterate through parents in inheritance hierarchy.
            parents: Tuple[type, ...] = object_class.__bases__
            parent_class: type
            for parent_class in parents:
                parent_module_name: str = parent_class.__module__
                if BaseAnonymizer._is_core_great_expectations_class(
                        parent_module_name):
                    return parent_class.__name__

        except AttributeError:
            pass

        return None
 def __init__(self, bullet_list_renderer=None):
     super().__init__()
     if bullet_list_renderer is None:
         bullet_list_renderer = {
             "class_name": "ExpectationSuiteBulletListContentBlockRenderer"
         }
     module_name = bullet_list_renderer.get(
         "module_name", "great_expectations.render.renderer.content_block")
     verify_dynamic_loading_support(module_name=module_name)
     class_name = bullet_list_renderer.get("class_name")
     self._bullet_list_renderer = load_class(class_name=class_name,
                                             module_name=module_name)
 def __init__(self, table_renderer=None):
     super().__init__()
     if table_renderer is None:
         table_renderer = {
             "class_name": "ValidationResultsTableContentBlockRenderer"
         }
     module_name = table_renderer.get(
         "module_name", "great_expectations.render.renderer.content_block")
     verify_dynamic_loading_support(module_name=module_name)
     class_name = table_renderer.get("class_name")
     self._table_renderer = load_class(class_name=class_name,
                                       module_name=module_name)
    def __init__(self, store_backend=None):
        if store_backend is not None:
            store_backend_module_name = store_backend.get("module_name", "great_expectations.data_context.store")
            store_backend_class_name = store_backend.get("class_name", "InMemoryStoreBackend")
            verify_dynamic_loading_support(module_name=store_backend_module_name)
            store_backend_class = load_class(store_backend_class_name, store_backend_module_name)

            # Store Backend Class was loaded successfully; verify that it is of a correct subclass.
            if issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get("table_name", "ge_evaluation_parameters")
        super().__init__(store_backend=store_backend)
    def __init__(self, store_backend=None):
        if store_backend is not None:
            store_backend_module_name = store_backend.get(
                "module_name", "great_expectations.data_context.store")
            store_backend_class_name = store_backend.get(
                "class_name", "InMemoryStoreBackend")
            store_backend_class = load_class(store_backend_class_name,
                                             store_backend_module_name)

            if issubclass(store_backend_class, DatabaseStoreBackend):
                # Provide defaults for this common case
                store_backend["table_name"] = store_backend.get(
                    "table_name", "ge_evaluation_parameters")
        super(EvaluationParameterStore,
              self).__init__(store_backend=store_backend)
Exemple #13
0
    def __init__(self,
                 batch,
                 expectation_suite,
                 expectation_engine=None,
                 **kwargs):
        self.batch = batch
        self.expectation_suite = expectation_suite

        if isinstance(expectation_engine, dict):
            expectation_engine = ClassConfig(**expectation_engine)

        if isinstance(expectation_engine, ClassConfig):
            module_name = expectation_engine.module_name or "great_expectations.dataset"
            verify_dynamic_loading_support(module_name=module_name)
            expectation_engine = load_class(
                class_name=expectation_engine.class_name,
                module_name=module_name)

        self.expectation_engine = expectation_engine
        if self.expectation_engine is None:
            # Guess the engine
            try:
                import pandas as pd

                if isinstance(batch.data, pd.DataFrame):
                    self.expectation_engine = PandasDataset
            except ImportError:
                pass
        if self.expectation_engine is None:
            if isinstance(batch.data, SqlAlchemyBatchReference):
                self.expectation_engine = SqlAlchemyDataset

        if self.expectation_engine is None:
            try:
                import pyspark

                if isinstance(batch.data, pyspark.sql.DataFrame):
                    self.expectation_engine = SparkDFDataset
            except ImportError:
                pass

        if self.expectation_engine is None:
            raise ValueError(
                "Unable to identify expectation_engine. It must be a subclass of DataAsset."
            )

        self.init_kwargs = kwargs
Exemple #14
0
def test_load_class_raises_error_when_module_name_is_not_string():
    for bad_input in [1, 1.3, ["a"], {"foo": "bar"}]:
        with pytest.raises(TypeError):
            load_class(bad_input, "great_expectations.datasource")
Exemple #15
0
def test_load_class_raises_error_when_module_name_is_None():
    with pytest.raises(TypeError):
        load_class("foo", None)
Exemple #16
0
def test_load_class_raises_error_when_class_name_is_None():
    with pytest.raises(TypeError):
        load_class(None, "great_expectations.datasource")
Exemple #17
0
def test_load_class_raises_error_when_class_not_found():
    with pytest.raises(gee.PluginClassNotFoundError):
        load_class("TotallyNotARealClass", "great_expectations.datasource")
Exemple #18
0
def test_load_class_raises_error_when_module_not_found():
    with pytest.raises(gee.PluginModuleNotFoundError):
        load_class("foo", "bar")
Exemple #19
0
def instantiate_class_from_config(config,
                                  runtime_environment,
                                  config_defaults=None):
    """Build a GE class from configuration dictionaries."""

    if config_defaults is None:
        config_defaults = {}

    config = copy.deepcopy(config)

    module_name = config.pop("module_name", None)
    if module_name is None:
        try:
            module_name = config_defaults.pop("module_name")
        except KeyError:
            raise KeyError(
                "Neither config : {} nor config_defaults : {} contains a module_name key."
                .format(
                    config,
                    config_defaults,
                ))
    else:
        # Pop the value without using it, to avoid sending an unwanted value to the config_class
        config_defaults.pop("module_name", None)

    verify_dynamic_loading_support(module_name=module_name)

    class_name = config.pop("class_name", None)
    if class_name is None:
        logger.warning(
            "Instantiating class from config without an explicit class_name is dangerous. Consider adding "
            "an explicit class_name for %s" % config.get("name"))
        try:
            class_name = config_defaults.pop("class_name")
        except KeyError:
            raise KeyError(
                "Neither config : {} nor config_defaults : {} contains a class_name key."
                .format(
                    config,
                    config_defaults,
                ))
    else:
        # Pop the value without using it, to avoid sending an unwanted value to the config_class
        config_defaults.pop("class_name", None)

    class_ = load_class(class_name=class_name, module_name=module_name)

    config_with_defaults = copy.deepcopy(config_defaults)
    config_with_defaults.update(config)
    if runtime_environment is not None:
        # If there are additional kwargs available in the runtime_environment requested by a
        # class to be instantiated, provide them
        argspec = inspect.getfullargspec(class_.__init__)[0][1:]

        missing_args = set(argspec) - set(config_with_defaults.keys())
        config_with_defaults.update({
            missing_arg: runtime_environment[missing_arg]
            for missing_arg in missing_args
            if missing_arg in runtime_environment
        })
        # Add the entire runtime_environment as well if it's requested
        if "runtime_environment" in missing_args:
            config_with_defaults.update(
                {"runtime_environment": runtime_environment})

    try:
        class_instance = class_(**config_with_defaults)
    except TypeError as e:
        raise TypeError(
            "Couldn't instantiate class : {} with config : \n\t{}\n \n".format(
                class_name, format_dict_for_error_message(
                    config_with_defaults)) + str(e))

    return class_instance
Exemple #20
0
    def _anonymize_object_info(
        self,
        anonymized_info_dict: dict,
        object_: Optional[object] = None,
        object_class: Optional[type] = None,
        object_config: Optional[dict] = None,
        runtime_environment: Optional[dict] = None,
    ) -> dict:
        """Given an object, anonymize relevant fields and return result as a dictionary.

        Args:
            anonymized_info_dict: The payload object to hydrate with anonymized values.
            object_: The specific object to anonymize.
            object_class: The class of the specific object to anonymize.
            object_config: The dictionary configuration of the specific object to anonymize.
            runtime_environment: A dictionary containing relevant runtime information (like class_name and module_name)

        Returns:
            The anonymized_info_dict that's been populated with anonymized values.

        Raises:
            AssertionError if no object_, object_class, or object_config is provided.
        """
        assert (object_ or object_class or object_config
                ), "Must pass either object_ or object_class or object_config."

        if runtime_environment is None:
            runtime_environment = {}

        object_class_name: Optional[str] = None
        object_module_name: Optional[str] = None
        try:
            if object_class is None and object_ is not None:
                object_class = object_.__class__
            elif object_class is None and object_config is not None:
                object_class_name = object_config.get("class_name")
                object_module_name = object_config.get(
                    "module_name") or runtime_environment.get("module_name")
                object_class = load_class(object_class_name,
                                          object_module_name)

            object_class_name = object_class.__name__
            object_module_name = object_class.__module__
            parents: Tuple[type, ...] = object_class.__bases__

            if self._is_core_great_expectations_class(object_module_name):
                anonymized_info_dict["parent_class"] = object_class_name
            else:

                # Chetan - 20220311 - If we can't identify the class in question, we iterate through the parents.
                # While GE rarely utilizes multiple inheritance when defining core objects (as of v0.14.10),
                # it is important to recognize that this is possibility.
                #
                # In the presence of multiple valid parents, we generate a comma-delimited list.

                parent_class_list: List[str] = []

                parent_class: type
                for parent_class in parents:
                    parent_module_name: str = parent_class.__module__
                    if BaseAnonymizer._is_core_great_expectations_class(
                            parent_module_name):
                        parent_class_list.append(parent_class.__name__)

                if parent_class_list:
                    concatenated_parent_classes: str = ",".join(
                        cls for cls in parent_class_list)
                    anonymized_info_dict[
                        "parent_class"] = concatenated_parent_classes
                    anonymized_info_dict[
                        "anonymized_class"] = self._anonymize_string(
                            object_class_name)

            # Catch-all to prevent edge cases from slipping past
            if not anonymized_info_dict.get("parent_class"):
                anonymized_info_dict["parent_class"] = "__not_recognized__"
                anonymized_info_dict[
                    "anonymized_class"] = self._anonymize_string(
                        object_class_name)

        except AttributeError:
            anonymized_info_dict["parent_class"] = "__not_recognized__"
            anonymized_info_dict["anonymized_class"] = self._anonymize_string(
                object_class_name)

        return anonymized_info_dict