Esempi in Python per deep_filter_properties_iterable, esempi in Python per great_expectations.util.deep_filter_properties_iterable

Esempio n. 1

0

Mostra file

File: batch.py Progetto: rpatil524/great_expectations

 def to_json_dict(self) -> dict:
     json_dict: dict = self.to_dict()
     deep_filter_properties_iterable(
         properties=json_dict["batch_request"],
         inplace=True,
     )
     return json_dict

Esempio n. 2

0

Mostra file

File: suite_profile_notebook_renderer.py Progetto: MuttData/great_expectations

    def __init__(
        self,
        context: DataContext,
        expectation_suite_name: str,
        profiler_name: str,
        batch_request: Union[str, Dict[str, Union[str, int, Dict[str, Any]]]],
    ) -> None:
        super().__init__(context=context)

        if batch_request is None:
            batch_request = {}

        deep_filter_properties_iterable(
            properties=batch_request,
            inplace=True,
        )
        batch_request = standardize_batch_request_display_ordering(
            batch_request=batch_request
        )

        self._batch_request = batch_request
        self._validator = context.get_validator(
            batch_request=BatchRequest(**batch_request),
            expectation_suite_name=expectation_suite_name,
        )

        self._profiler_name = profiler_name
        self._expectation_suite_name = self._validator.expectation_suite_name

Esempio n. 3

0

Mostra file

File: domain.py Progetto: rpatil524/great_expectations

 def __eq__(self, other):
     return (other is not None) and (
         (hasattr(other, "to_json_dict")
          and self.to_json_dict() == other.to_json_dict()) or
         (isinstance(other, dict) and deep_filter_properties_iterable(
             properties=self.to_json_dict(), clean_falsy=True)
          == deep_filter_properties_iterable(properties=other,
                                             clean_falsy=True)) or
         (self.__str__() == str(other)))

Esempio n. 4

0

Mostra file

File: batch.py Progetto: rpatil524/great_expectations

 def __repr__(self) -> str:
     """
     # TODO: <Alex>2/4/2022</Alex>
     This implementation of a custom "__repr__()" occurs frequently and should ideally serve as the reference
     implementation in the "SerializableDictDot" class.  However, the circular import dependencies, due to the
     location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this
     refactoring infeasible at the present time.
     """
     json_dict: dict = self.to_json_dict()
     deep_filter_properties_iterable(
         properties=json_dict,
         inplace=True,
     )
     return json.dumps(json_dict, indent=2)

Esempio n. 5

0

Mostra file

File: domain.py Progetto: rpatil524/great_expectations

    def to_json_dict(self) -> dict:
        details: dict = {}

        key: str
        value: Any
        for key, value in self["details"].items():
            if value:
                if key == INFERRED_SEMANTIC_TYPE_KEY:
                    column_name: str
                    semantic_type: Union[str, SemanticDomainTypes]
                    value = {
                        column_name:
                        SemanticDomainTypes(semantic_type.lower()).value if
                        isinstance(semantic_type, str) else semantic_type.value
                        for column_name, semantic_type in value.items()
                    }

            details[key] = convert_to_json_serializable(data=value)

        json_dict: dict = {
            "domain_type": self["domain_type"].value,
            "domain_kwargs": self["domain_kwargs"].to_json_dict(),
            "details": details,
            "rule_name": self["rule_name"],
        }
        json_dict = convert_to_json_serializable(data=json_dict)

        return deep_filter_properties_iterable(properties=json_dict,
                                               clean_falsy=True)

Esempio n. 6

0

Mostra file

File: expectation_suite.py Progetto: admariner/great_expectations

    def get_column_pair_expectations(self) -> List[ExpectationConfiguration]:
        """Return a list of column_pair map expectations."""
        expectation_configurations: List[ExpectationConfiguration] = list(
            filter(
                lambda element: element.get_domain_type() == MetricDomainTypes.
                COLUMN_PAIR,
                self.expectations,
            ))

        expectation_configuration: ExpectationConfiguration
        kwargs: dict
        column_A_name: str
        column_B_name: str
        for expectation_configuration in expectation_configurations:
            kwargs = deep_filter_properties_iterable(
                properties=expectation_configuration.kwargs, clean_falsy=True)
            column_A_name = kwargs.pop("column_A")
            column_B_name = kwargs.pop("column_B")
            expectation_configuration.kwargs = {
                "column_A": column_A_name,
                "column_B": column_B_name,
                **kwargs,
            }

        return expectation_configurations

Esempio n. 7

0

Mostra file

File: domain.py Progetto: rpatil524/great_expectations

    def _convert_dictionaries_to_domain_kwargs(
            self,
            source: Optional[Any] = None) -> Optional[Union[Any, "Domain"]]:
        if source is None:
            return None

        if isinstance(source, dict):
            if not isinstance(source, Domain):
                deep_filter_properties_iterable(properties=source,
                                                inplace=True)
                source = DomainKwargs(source)

            key: str
            value: Any
            for key, value in source.items():
                source[key] = self._convert_dictionaries_to_domain_kwargs(
                    source=value)

        return source

Esempio n. 8

0

Mostra file

def test_deep_filter_properties_iterable_on_batch_request_dict():
    batch_request: dict = {
        "datasource_name": "df78ebde1957385a02d8736cd2c9a6d9",
        "data_connector_name": "123a3221fc4b65014d061cce4a71782e",
        "data_asset_name": "eac128c5824b698c22b441ada61022d4",
        "batch_spec_passthrough": {},
        "data_connector_query": {
            "batch_filter_parameters": {}
        },
        "limit": None,
    }

    deep_filter_properties_iterable(properties=batch_request,
                                    clean_nulls=True,
                                    clean_falsy=True,
                                    inplace=True)

    assert batch_request == {
        "datasource_name": "df78ebde1957385a02d8736cd2c9a6d9",
        "data_connector_name": "123a3221fc4b65014d061cce4a71782e",
        "data_asset_name": "eac128c5824b698c22b441ada61022d4",
    }

Esempio n. 9

0

Mostra file

def test_batch_request_deepcopy():
    test_df: pd.DataFrame = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
    batch_request: RuntimeBatchRequest = RuntimeBatchRequest(
        **{
            "datasource_name": "my_datasource",
            "data_connector_name": "my_runtime_data_connector",
            "data_asset_name": "default_data_asset_name",
            "batch_identifiers": {
                "pipeline_stage_name": "core_processing",
                "airflow_run_id": 1234567890,
            },
            "runtime_parameters": {"batch_data": test_df},
        }
    )

    batch_request_copy: RuntimeBatchRequest = copy.deepcopy(batch_request)
    assert deep_filter_properties_iterable(
        properties=batch_request_copy.to_dict(),
        clean_falsy=True,
    ) == deep_filter_properties_iterable(
        properties=batch_request.to_dict(),
        clean_falsy=True,
    )

Esempio n. 10

0

Mostra file

    def run(
        self,
        variables: Optional[Dict[str, Any]] = None,
        rules: Optional[Dict[str, Dict[str, Any]]] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        **kwargs: dict,
    ) -> DataAssistantResult:
        """
        variables: attribute name/value pairs, commonly-used in Builder objects, to modify using "runtime_environment"
        rules: name/(configuration-dictionary) to modify using "runtime_environment"
        kwargs: additional/override directives supplied at runtime
            "kwargs" directives structure:
            {
                "include_column_names": ["column_a", "column_b", "column_c", ...],
                "exclude_column_names": ["column_d", "column_e", "column_f", "column_g", ...],
                ...
            }
        Implementation makes best effort at assigning directives to appropriate "MetricDomainTypes" member.

        Returns:
            DataAssistantResult: The result object for the DataAssistant
        """
        data_assistant_name: str = self._data_assistant_cls.data_assistant_type
        validator: Validator = get_validator_with_expectation_suite(
            batch_request=batch_request,
            data_context=self._data_context,
            expectation_suite=None,
            expectation_suite_name=None,
            component_name=data_assistant_name,
            persist=False,
        )
        data_assistant: DataAssistant = self._data_assistant_cls(
            name=data_assistant_name,
            validator=validator,
        )
        directives: dict = deep_filter_properties_iterable(properties=kwargs, )

        variables_directives_list: List[
            RuntimeEnvironmentVariablesDirectives] = build_variables_directives(
                **directives)
        domain_type_directives_list: List[
            RuntimeEnvironmentDomainTypeDirectives] = build_domain_type_directives(
                **directives)
        data_assistant_result: DataAssistantResult = data_assistant.run(
            variables=variables,
            rules=rules,
            variables_directives_list=variables_directives_list,
            domain_type_directives_list=domain_type_directives_list,
        )
        return data_assistant_result

Esempio n. 11

0

Mostra file

    def render_to_disk(self, notebook_file_path: str, **kwargs: dict) -> None:
        """
        Render a notebook to disk from an expectation suite.

        If batch_request dictionary is passed, its properties will override any found in suite citations.
        """
        # noinspection PyTypeChecker
        suite: ExpectationSuite = kwargs.get("suite")
        batch_request: Optional[Union[str,
                                      Dict[str,
                                           Any]]] = kwargs.get("batch_request")

        deep_filter_properties_iterable(
            properties=batch_request,
            inplace=True,
        )
        # noinspection PyTypeChecker
        self.render(
            suite=suite,
            batch_request=batch_request,
        )
        self.write_notebook_to_disk(notebook=self._notebook,
                                    notebook_file_path=notebook_file_path)

Esempio n. 12

0

Mostra file

File: profiler_anonymizer.py Progetto: MuttData/great_expectations

    def _anonymize_profiler_run(self, obj: object, **kwargs) -> dict:
        """
        Traverse the entire RuleBasedProfiler configuration structure (as per its formal, validated Marshmallow schema) and
        anonymize every field that can be customized by a user (public fields are recorded as their original names).
        """
        assert isinstance(
            obj, RuleBasedProfilerConfig
        ), "ProfilerAnonymizer can only handle objects of type RuleBasedProfilerConfig"
        profiler_config: RuleBasedProfilerConfig = obj

        name: str = profiler_config.name
        anonymized_name: Optional[str] = self._anonymize_string(name)

        config_version: float = profiler_config.config_version

        rules: Dict[str, dict] = profiler_config.rules
        anonymized_rules: List[dict] = self._anonymize_rules(rules=rules)
        rule_count: int = len(rules)

        variables: dict = profiler_config.variables or {}
        variable_count: int = len(variables)

        anonymized_profiler_run_properties_dict: dict = {
            "anonymized_name": anonymized_name,
            "config_version": config_version,
            "anonymized_rules": anonymized_rules,
            "rule_count": rule_count,
            "variable_count": variable_count,
        }

        deep_filter_properties_iterable(
            properties=anonymized_profiler_run_properties_dict,
            clean_falsy=True,
            inplace=True,
        )

        return anonymized_profiler_run_properties_dict

Esempio n. 13

0

Mostra file

File: expectation_suite.py Progetto: admariner/great_expectations

    def get_table_expectations(self) -> List[ExpectationConfiguration]:
        """Return a list of table expectations."""
        expectation_configurations: List[ExpectationConfiguration] = list(
            filter(
                lambda element: element.get_domain_type() == MetricDomainTypes.
                TABLE,
                self.expectations,
            ))

        expectation_configuration: ExpectationConfiguration
        for expectation_configuration in expectation_configurations:
            expectation_configuration.kwargs = deep_filter_properties_iterable(
                properties=expectation_configuration.kwargs, clean_falsy=True)

        return expectation_configurations

Esempio n. 14

0

Mostra file

File: suite_edit_notebook_renderer.py Progetto: MuttData/great_expectations

    def render_to_disk(
        self,
        suite: ExpectationSuite,
        notebook_file_path: str,
        batch_request: Optional[
            Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]
        ] = None,
    ) -> None:
        """
        Render a notebook to disk from an expectation suite.

        If batch_request dictionary is passed, its properties will override any found in suite citations.
        """
        deep_filter_properties_iterable(
            properties=batch_request,
            inplace=True,
        )
        self.render(
            suite=suite,
            batch_request=batch_request,
        )
        self.write_notebook_to_disk(
            notebook=self._notebook, notebook_file_path=notebook_file_path
        )

Esempio n. 15

0

Mostra file

File: expectation_suite.py Progetto: admariner/great_expectations

    def get_multicolumn_expectations(self) -> List[ExpectationConfiguration]:
        """Return a list of multicolumn map expectations."""
        expectation_configurations: List[ExpectationConfiguration] = list(
            filter(
                lambda element: element.get_domain_type() == MetricDomainTypes.
                MULTICOLUMN,
                self.expectations,
            ))

        expectation_configuration: ExpectationConfiguration
        kwargs: dict
        column_list: str
        for expectation_configuration in expectation_configurations:
            kwargs = deep_filter_properties_iterable(
                properties=expectation_configuration.kwargs, clean_falsy=True)
            column_list = kwargs.pop("column_list")
            expectation_configuration.kwargs = {
                "column_list": column_list,
                **kwargs
            }

        return expectation_configurations

Esempio n. 16

0

Mostra file

def add_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    checkpoint_store_name: str,
    ge_cloud_mode: bool,
    name: str,
    config_version: Optional[Union[int, float]] = None,
    template_name: Optional[str] = None,
    module_name: Optional[str] = None,
    class_name: Optional[str] = None,
    run_name_template: Optional[str] = None,
    expectation_suite_name: Optional[str] = None,
    batch_request: Optional[dict] = None,
    action_list: Optional[List[dict]] = None,
    evaluation_parameters: Optional[dict] = None,
    runtime_configuration: Optional[dict] = None,
    validations: Optional[List[dict]] = None,
    profilers: Optional[List[dict]] = None,
    # Next two fields are for LegacyCheckpoint configuration
    validation_operator_name: Optional[str] = None,
    batches: Optional[List[dict]] = None,
    # the following four arguments are used by SimpleCheckpoint
    site_names: Optional[Union[str, List[str]]] = None,
    slack_webhook: Optional[str] = None,
    notify_on: Optional[str] = None,
    notify_with: Optional[Union[str, List[str]]] = None,
    ge_cloud_id: Optional[str] = None,
    expectation_suite_ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    checkpoint_config: Union[CheckpointConfig, dict]

    # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments.
    batch_request = get_batch_request_as_dict(batch_request=batch_request)
    validations = get_validations_with_batch_request_as_dict(validations=validations)

    # DataFrames shouldn't be saved to CheckpointStore
    if batch_request_contains_batch_data(batch_request=batch_request):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    if batch_request_in_validations_contains_batch_data(validations=validations):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    checkpoint_config = {
        "name": name,
        "config_version": config_version,
        "template_name": template_name,
        "module_name": module_name,
        "class_name": class_name,
        "run_name_template": run_name_template,
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": action_list,
        "evaluation_parameters": evaluation_parameters,
        "runtime_configuration": runtime_configuration,
        "validations": validations,
        "profilers": profilers,
        # Next two fields are for LegacyCheckpoint configuration
        "validation_operator_name": validation_operator_name,
        "batches": batches,
        # the following four keys are used by SimpleCheckpoint
        "site_names": site_names,
        "slack_webhook": slack_webhook,
        "notify_on": notify_on,
        "notify_with": notify_with,
        "ge_cloud_id": ge_cloud_id,
        "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
    }

    checkpoint_config = deep_filter_properties_iterable(
        properties=checkpoint_config,
        clean_falsy=True,
    )

    new_checkpoint: Union[
        Checkpoint, SimpleCheckpoint, LegacyCheckpoint
    ] = instantiate_class_from_config(
        config=checkpoint_config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    if ge_cloud_mode:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )

    checkpoint_config = new_checkpoint.get_config()

    checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config)
    if isinstance(checkpoint_ref, GeCloudIdAwareRef):
        ge_cloud_id = checkpoint_ref.ge_cloud_id
        new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id)

    return new_checkpoint

Esempio n. 17

0

Mostra file

def test_checkpoint_config_repr_after_substitution(checkpoint):
    df: pd.DataFrame = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

    batch_request_param: dict = {
        "runtime_parameters": {
            "batch_data": df
        },
        "batch_identifiers": {
            "default_identifier_name": "my_simple_df"
        },
    }

    result_format_param: dict = {"result_format": "SUMMARY"}

    kwargs: dict = {
        "batch_request": batch_request_param,
        "result_format": result_format_param,
    }

    # Matching how this is called in usage_statistics.py (parameter style)
    resolved_runtime_kwargs: dict = (
        CheckpointConfig.resolve_config_using_acceptable_arguments(
            *(checkpoint, ), **kwargs))

    json_dict: dict = convert_to_json_serializable(
        data=resolved_runtime_kwargs)
    deep_filter_properties_iterable(
        properties=json_dict,
        inplace=True,
    )

    keys: List[str] = sorted(list(json_dict.keys()))

    key: str
    sorted_json_dict: dict = {key: json_dict[key] for key in keys}

    checkpoint_config_repr: str = json.dumps(sorted_json_dict, indent=2)

    assert (checkpoint_config_repr == """{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "batch_request": {
    "runtime_parameters": {
      "batch_data": [
        {
          "a": 1,
          "b": 3
        },
        {
          "a": 2,
          "b": 4
        }
      ]
    },
    "batch_identifiers": {
      "default_identifier_name": "my_simple_df"
    }
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "my_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "batch_request": {
        "datasource_name": "example_datasource",
        "data_connector_name": "default_runtime_data_connector_name",
        "data_asset_name": "my_data_asset",
        "runtime_parameters": {
          "batch_data": "<class \'pandas.core.frame.DataFrame\'>"
        },
        "batch_identifiers": {
          "default_identifier_name": "my_simple_df"
        }
      },
      "expectation_suite_name": "test_suite",
      "action_list": [
        {
          "name": "store_validation_result",
          "action": {
            "class_name": "StoreValidationResultAction"
          }
        },
        {
          "name": "store_evaluation_params",
          "action": {
            "class_name": "StoreEvaluationParametersAction"
          }
        },
        {
          "name": "update_data_docs",
          "action": {
            "class_name": "UpdateDataDocsAction",
            "site_names": []
          }
        }
      ]
    }
  ]
}""")

Esempio n. 18

0

Mostra file

def test_checkpoint_config_deepcopy(
    titanic_pandas_data_context_with_v013_datasource_stats_enabled_with_checkpoints_v1_with_templates,
    monkeypatch,
):
    monkeypatch.setenv("GE_ENVIRONMENT", "my_ge_environment")
    monkeypatch.setenv("VAR", "test")
    monkeypatch.setenv("MY_PARAM", "1")
    monkeypatch.setenv("OLD_PARAM", "2")

    context: DataContext = titanic_pandas_data_context_with_v013_datasource_stats_enabled_with_checkpoints_v1_with_templates

    test_df: pd.DataFrame = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})

    runtime_batch_request: RuntimeBatchRequest = RuntimeBatchRequest(
        **{
            "datasource_name": "my_datasource",
            "data_connector_name": "my_runtime_data_connector",
            "data_asset_name": "default_data_asset_name",
            "batch_identifiers": {
                "pipeline_stage_name": "core_processing",
                "airflow_run_id": 1234567890,
            },
            "runtime_parameters": {"batch_data": test_df},
        }
    )

    nested_checkpoint_config: CheckpointConfig = CheckpointConfig(
        name="my_nested_checkpoint",
        config_version=1,
        template_name="my_nested_checkpoint_template_2",
        expectation_suite_name="users.delivery",
        validations=[
            {
                "batch_request": {
                    "datasource_name": "my_datasource",
                    "data_connector_name": "my_special_data_connector",
                    "data_asset_name": "users",
                    "data_connector_query": {"partition_index": -1},
                }
            },
            {
                "batch_request": {
                    "datasource_name": "my_datasource",
                    "data_connector_name": "my_other_data_connector",
                    "data_asset_name": "users",
                    "data_connector_query": {"partition_index": -2},
                }
            },
        ],
    )
    nested_checkpoint: Checkpoint = Checkpoint(
        data_context=context,
        **filter_properties_dict(
            properties=nested_checkpoint_config.to_json_dict(),
            delete_fields={"class_name", "module_name"},
            clean_falsy=True,
        ),
    )
    substituted_config_template_and_runtime_kwargs: dict = nested_checkpoint.get_substituted_config(
        runtime_kwargs={
            "batch_request": runtime_batch_request,
            "expectation_suite_name": "runtime_suite_name",
            "template_name": "my_nested_checkpoint_template_3",
            "validations": [
                {
                    "batch_request": {
                        "datasource_name": "my_datasource",
                        "data_connector_name": "my_other_data_connector_2_runtime",
                        "data_asset_name": "users",
                        "data_connector_query": {"partition_index": -3},
                    }
                },
                {
                    "batch_request": {
                        "datasource_name": "my_datasource",
                        "data_connector_name": "my_other_data_connector_3_runtime",
                        "data_asset_name": "users",
                        "data_connector_query": {"partition_index": -4},
                    }
                },
            ],
            "run_name_template": "runtime_run_template",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction",
                    },
                },
                {
                    "name": "store_evaluation_params",
                    "action": {
                        "class_name": "MyCustomRuntimeStoreEvaluationParametersAction",
                    },
                },
                {
                    "name": "update_data_docs",
                    "action": None,
                },
                {
                    "name": "update_data_docs_deluxe_runtime",
                    "action": {
                        "class_name": "UpdateDataDocsAction",
                    },
                },
            ],
            "evaluation_parameters": {
                "environment": "runtime-$GE_ENVIRONMENT",
                "tolerance": 1.0e-2,
                "aux_param_0": "runtime-$MY_PARAM",
                "aux_param_1": "1 + $MY_PARAM",
                "new_runtime_eval_param": "bloopy!",
            },
            "runtime_configuration": {
                "result_format": "BASIC",
                "partial_unexpected_count": 999,
                "new_runtime_config_key": "bleepy!",
            },
        }
    )

    checkpoint_config_copy: dict = copy.deepcopy(
        substituted_config_template_and_runtime_kwargs
    )
    assert deep_filter_properties_iterable(
        properties=checkpoint_config_copy,
        clean_falsy=True,
    ) == deep_filter_properties_iterable(
        properties=substituted_config_template_and_runtime_kwargs,
        clean_falsy=True,
    )

Esempio n. 19

0

Mostra file

File: checkpoint_anonymizer.py Progetto: MuttData/great_expectations

    def _anonymize_checkpoint_run(self, obj: object, **kwargs) -> dict:
        """
        Traverse the entire Checkpoint configuration structure (as per its formal, validated Marshmallow schema) and
        anonymize every field that can be customized by a user (public fields are recorded as their original names).
        """
        attribute_name: str
        attribute_value: Optional[Union[str, dict]]
        validation_obj: dict

        checkpoint_optional_top_level_keys: List[str] = []

        name: Optional[str] = kwargs.get("name")
        anonymized_name: Optional[str] = self._anonymize_string(name)

        config_version: Optional[Union[Number,
                                       str]] = kwargs.get("config_version")
        if config_version is None:
            config_version = 1.0

        template_name: Optional[str] = kwargs.get("template_name")
        anonymized_template_name: Optional[str] = self._anonymize_string(
            template_name)

        run_name_template: Optional[str] = kwargs.get("run_name_template")
        anonymized_run_name_template: Optional[str] = self._anonymize_string(
            run_name_template)

        expectation_suite_name: Optional[str] = kwargs.get(
            "expectation_suite_name")
        anonymized_expectation_suite_name: Optional[
            str] = self._anonymize_string(expectation_suite_name)

        batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest,
                                      dict]] = kwargs.get("batch_request")
        if batch_request is None:
            batch_request = {}

        anonymized_batch_request: Optional[Dict[
            str, List[str]]] = self._aggregate_anonymizer.anonymize(
                *(), **batch_request)

        action_list: Optional[List[dict]] = kwargs.get("action_list")
        anonymized_action_list: Optional[List[dict]] = None
        if action_list:
            # noinspection PyBroadException
            try:
                anonymized_action_list = [
                    self._aggregate_anonymizer.anonymize(
                        action_name=action_config_dict["name"],
                        action_config=action_config_dict["action"],
                    ) for action_config_dict in action_list
                ]
            except Exception:
                logger.debug(
                    "anonymize_checkpoint_run: Unable to create anonymized_action_list payload field"
                )

        validations: Optional[List[dict]] = kwargs.get("validations")
        anonymized_validations: Optional[List[dict]] = []
        if validations:
            for validation_obj in validations:
                validation_batch_request: Optional[
                    Union[BatchRequest, RuntimeBatchRequest,
                          dict]] = validation_obj.get("batch_request")
                if validation_batch_request is None:
                    validation_batch_request = {}

                validation_batch_request = get_batch_request_as_dict(
                    batch_request=validation_batch_request)

                anonymized_validation_batch_request: Optional[Optional[Dict[
                    str, List[str]]]] = self._aggregate_anonymizer.anonymize(
                        *(), **validation_batch_request)

                validation_expectation_suite_name: Optional[
                    str] = validation_obj.get("expectation_suite_name")
                anonymized_validation_expectation_suite_name: Optional[
                    str] = self._anonymize_string(
                        validation_expectation_suite_name)

                validation_action_list: Optional[
                    List[dict]] = validation_obj.get("action_list")
                anonymized_validation_action_list: Optional[List[dict]] = None
                if validation_action_list:
                    # noinspection PyBroadException
                    try:
                        anonymized_validation_action_list = [
                            self._aggregate_anonymizer.anonymize(
                                action_name=action_config_dict["name"],
                                action_config=action_config_dict["action"],
                            ) for action_config_dict in validation_action_list
                        ]
                    except Exception:
                        logger.debug(
                            "anonymize_checkpoint_run: Unable to create anonymized_validation_action_list payload field"
                        )

                anonymized_validation: Dict[str, Union[str, Dict[str, Any],
                                                       List[Dict[str,
                                                                 Any]]]] = {}

                if anonymized_validation_batch_request:
                    anonymized_validation[
                        "anonymized_batch_request"] = anonymized_validation_batch_request

                if anonymized_validation_expectation_suite_name:
                    anonymized_validation[
                        "anonymized_expectation_suite_name"] = anonymized_validation_expectation_suite_name

                if anonymized_validation_action_list:
                    anonymized_validation[
                        "anonymized_action_list"] = anonymized_validation_action_list

                anonymized_validation: Dict[str, Dict[str, Any]] = {
                    "anonymized_batch_request":
                    anonymized_validation_batch_request,
                    "anonymized_expectation_suite_name":
                    anonymized_validation_expectation_suite_name,
                    "anonymized_action_list":
                    anonymized_validation_action_list,
                }

                anonymized_validations.append(anonymized_validation)

        run_id: Optional[Union[str, RunIdentifier]] = kwargs.get("run_id")
        anonymized_run_id: Optional[Union[str, RunIdentifier]]
        if run_id is None:
            anonymized_run_id = None
        else:
            anonymized_run_id = self._anonymize_string(str(run_id))

        run_name: Optional[str] = kwargs.get("run_name")
        anonymized_run_name: Optional[str]
        if run_name is None:
            anonymized_run_name = None
        else:
            anonymized_run_name = self._anonymize_string(run_name)

        run_time: Optional[Union[str,
                                 datetime.datetime]] = kwargs.get("run_time")
        anonymized_run_time: Optional[str]
        if run_time is None:
            anonymized_run_time = None
        else:
            anonymized_run_time = self._anonymize_string(str(run_time))

        expectation_suite_ge_cloud_id: Optional[str] = kwargs.get(
            "expectation_suite_ge_cloud_id")
        anonymized_expectation_suite_ge_cloud_id: Optional[str]
        if expectation_suite_ge_cloud_id is None:
            anonymized_expectation_suite_ge_cloud_id = None
        else:
            anonymized_expectation_suite_ge_cloud_id = self._anonymize_string(
                str(expectation_suite_ge_cloud_id))

        for attribute_name in sorted(CHECKPOINT_OPTIONAL_TOP_LEVEL_KEYS):
            attribute_value = kwargs.get(attribute_name)
            if attribute_value:
                checkpoint_optional_top_level_keys.append(attribute_name)

        anonymized_checkpoint_run_properties_dict: Dict[str, List[str]] = {
            "anonymized_name":
            anonymized_name,
            "config_version":
            config_version,
            "anonymized_template_name":
            anonymized_template_name,
            "anonymized_run_name_template":
            anonymized_run_name_template,
            "anonymized_expectation_suite_name":
            anonymized_expectation_suite_name,
            "anonymized_batch_request":
            anonymized_batch_request,
            "anonymized_action_list":
            anonymized_action_list,
            "anonymized_validations":
            anonymized_validations,
            "anonymized_run_id":
            anonymized_run_id,
            "anonymized_run_name":
            anonymized_run_name,
            "anonymized_run_time":
            anonymized_run_time,
            "anonymized_expectation_suite_ge_cloud_id":
            anonymized_expectation_suite_ge_cloud_id,
            "checkpoint_optional_top_level_keys":
            checkpoint_optional_top_level_keys,
        }

        deep_filter_properties_iterable(
            properties=anonymized_checkpoint_run_properties_dict,
            clean_falsy=True,
            inplace=True,
        )

        return anonymized_checkpoint_run_properties_dict

Esempio n. 20

0

Mostra file

def test_deep_filter_properties_iterable():
    source_dict: dict = {
        "integer_zero": 0,
        "null": None,
        "string": "xyz_0",
        "integer_one": 1,
        "scientific_notation_floating_point_number": 9.8e1,
        "empty_top_level_dictionary": {},
        "empty_top_level_list": [],
        "empty_top_level_set": set(),
        "non_empty_top_level_set": {
            0,
            1,
            2,
            "a",
            "b",
            "c",
        },
        "non_empty_top_level_dictionary": {
            "empty_1st_level_list": [],
            "empty_1st_level_set": set(),
            "non_empty_1st_level_set": {
                "empty_2nd_level_list": [],
                "non_empty_2nd_level_list": [
                    0,
                    1,
                    2,
                    "a",
                    "b",
                    "c",
                ],
                "non_empty_2nd_level_dictionary": {
                    "integer_zero": 0,
                    "null": None,
                    "string": "xyz_0",
                    "integer_one": 1,
                    "scientific_notation_floating_point_number": 9.8e1,
                },
                "empty_2nd_level_dictionary": {},
            },
        },
    }

    d0_begin: dict = copy.deepcopy(source_dict)
    deep_filter_properties_iterable(
        properties=d0_begin,
        clean_falsy=True,
        inplace=True,
    )
    d0_end: dict = d0_begin
    d0_end_expected: dict = {
        "integer_zero": 0,
        "string": "xyz_0",
        "integer_one": 1,
        "scientific_notation_floating_point_number": 98.0,
        "non_empty_top_level_set": {
            0,
            1,
            2,
            "a",
            "b",
            "c",
        },
        "non_empty_top_level_dictionary": {
            "non_empty_1st_level_set": {
                "non_empty_2nd_level_list": [0, 1, 2, "a", "b", "c"],
                "non_empty_2nd_level_dictionary": {
                    "integer_zero": 0,
                    "string": "xyz_0",
                    "integer_one": 1,
                    "scientific_notation_floating_point_number": 98.0,
                },
            }
        },
    }
    assert d0_end == d0_end_expected

    d1_begin: dict = copy.deepcopy(source_dict)
    d1_end: dict = deep_filter_properties_iterable(
        properties=d1_begin,
        clean_falsy=True,
        keep_falsy_numerics=False,
    )
    d1_end_expected: dict = {
        "string": "xyz_0",
        "integer_one": 1,
        "scientific_notation_floating_point_number": 98.0,
        "non_empty_top_level_set": {
            0,
            1,
            2,
            "a",
            "b",
            "c",
        },
        "non_empty_top_level_dictionary": {
            "non_empty_1st_level_set": {
                "non_empty_2nd_level_list": [0, 1, 2, "a", "b", "c"],
                "non_empty_2nd_level_dictionary": {
                    "string": "xyz_0",
                    "integer_one": 1,
                    "scientific_notation_floating_point_number": 98.0,
                },
            }
        },
    }
    assert d1_end == d1_end_expected

Esempio n. 21

0

Mostra file

File: batch_request_anonymizer.py Progetto: MuttData/great_expectations

    def anonymize(self, obj: Optional[object] = None, **kwargs) -> Any:
        anonymized_batch_request_properties_dict: Optional[Dict[
            str, List[str]]] = None

        # noinspection PyBroadException
        try:
            from great_expectations.core.batch import (
                BatchRequest,
                get_batch_request_from_acceptable_arguments,
                standardize_batch_request_display_ordering,
            )

            batch_request: BatchRequest = get_batch_request_from_acceptable_arguments(
                **kwargs)
            batch_request_dict: dict = batch_request.to_json_dict()

            anonymized_batch_request_dict: Optional[Union[
                str, dict]] = self._anonymize_batch_request_properties(
                    source=batch_request_dict)
            anonymized_batch_request_dict = standardize_batch_request_display_ordering(
                batch_request=anonymized_batch_request_dict)
            deep_filter_properties_iterable(
                properties=anonymized_batch_request_dict,
                clean_falsy=True,
                inplace=True,
            )

            anonymized_batch_request_required_top_level_properties: dict = {}
            batch_request_optional_top_level_keys: List[str] = []
            batch_spec_passthrough_keys: List[str] = []
            data_connector_query_keys: List[str] = []
            runtime_parameters_keys: List[str] = []

            anonymized_batch_request_properties_dict = {
                "anonymized_batch_request_required_top_level_properties":
                (anonymized_batch_request_required_top_level_properties),
                "batch_request_optional_top_level_keys":
                batch_request_optional_top_level_keys,
                "batch_spec_passthrough_keys":
                batch_spec_passthrough_keys,
                "runtime_parameters_keys":
                runtime_parameters_keys,
                "data_connector_query_keys":
                data_connector_query_keys,
            }
            self._build_anonymized_batch_request(
                destination=anonymized_batch_request_properties_dict,
                source=anonymized_batch_request_dict,
            )
            deep_filter_properties_iterable(
                properties=anonymized_batch_request_properties_dict,
                clean_falsy=True,
                inplace=True,
            )
            batch_request_optional_top_level_keys.sort()
            batch_spec_passthrough_keys.sort()
            data_connector_query_keys.sort()
            runtime_parameters_keys.sort()

        except Exception:
            logger.debug(
                "anonymize_batch_request: Unable to create anonymized_batch_request payload field"
            )

        return anonymized_batch_request_properties_dict

Esempio n. 22

0

Mostra file

File: data_assistant_runner.py Progetto: admariner/great_expectations

    def run(
        self,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        **kwargs: dict,
    ) -> DataAssistantResult:
        """
        batch_request: Explicit batch_request used to supply data at runtime
        kwargs: additional/override directives supplied at runtime (using "runtime_environment")
            "kwargs" directives structure:
            {
                "DomainBuilder" parameters:
                include_column_names=["column_a", "column_b", "column_c", ...],
                exclude_column_names=["column_d", "column_e", "column_f", "column_g", ...],
                max_unexpected_values=0,
                max_unexpected_ratio=None,
                min_max_unexpected_values_proportion=9.75e-1,
                ... Other "DomainBuilder" parameters ...
                "variables" settings for "Rule" configurations:
                numeric_columns_rule={
                    "round_decimals": 12,
                    "false_positive_rate": 0.1,
                    "random_seed": 43792,
                },
                datetime_columns_rule={
                    "truncate_values": {
                        "lower_bound": 0,
                        "upper_bound": 4481049600,  # Friday, January 1, 2112 0:00:00
                    },
                    "round_decimals": 0,
                },
                categorical_columns_rule={
                    "false_positive_rate": 0.1,
                    "round_decimals": 4,
                },
                ... "variables" settings for other "Rule" configurations ...
            }
        Implementation makes best effort at assigning directives to appropriate "MetricDomainTypes" member.

        Returns:
            DataAssistantResult: The result object for the DataAssistant
        """
        if batch_request is None:
            data_assistant_name: str = self._data_assistant_cls.data_assistant_type
            raise ge_exceptions.DataAssistantExecutionError(
                message=f"""Utilizing "{data_assistant_name}.run()" requires valid "batch_request" to be \
specified (empty or missing "batch_request" detected)."""
            )

        data_assistant: DataAssistant = self._build_data_assistant(
            batch_request=batch_request
        )
        directives: dict = deep_filter_properties_iterable(
            properties=kwargs,
        )

        variables_directives_list: List[
            RuntimeEnvironmentVariablesDirectives
        ] = build_variables_directives(**directives)
        domain_type_directives_list: List[
            RuntimeEnvironmentDomainTypeDirectives
        ] = build_domain_type_directives(**directives)
        data_assistant_result: DataAssistantResult = data_assistant.run(
            variables_directives_list=variables_directives_list,
            domain_type_directives_list=domain_type_directives_list,
        )
        return data_assistant_result

Esempio n. 23

0

Mostra file

File: test_rule_based_profiler.py Progetto: alfredo-f/great_expectations

def test_reconcile_profiler_rules_new_rule_override(
    profiler_with_placeholder_args, ):
    rules: Dict[str, Dict[str, Any]] = {
        "rule_0": {
            "domain_builder": {
                "class_name":
                "ColumnDomainBuilder",
                "module_name":
                "great_expectations.rule_based_profiler.domain_builder",
            },
            "parameter_builders": [
                {
                    "class_name": "MetricMultiBatchParameterBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.parameter_builder",
                    "name": "my_parameter",
                    "metric_name": "my_metric",
                },
                {
                    "class_name":
                    "NumericMetricRangeMultiBatchParameterBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.parameter_builder",
                    "name": "my_other_parameter",
                    "metric_name": "my_other_metric",
                },
            ],
            "expectation_configuration_builders": [
                {
                    "class_name": "DefaultExpectationConfigurationBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.expectation_configuration_builder",
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                    "column_A": "$domain.domain_kwargs.column_A",
                    "column_B": "$domain.domain_kwargs.column_B",
                    "my_one_arg": "$parameter.my_parameter.value[0]",
                    "meta": {
                        "details": {
                            "my_parameter_estimator":
                            "$parameter.my_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
                {
                    "class_name": "DefaultExpectationConfigurationBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.expectation_configuration_builder",
                    "expectation_type": "expect_column_min_to_be_between",
                    "column": "$domain.domain_kwargs.column",
                    "my_another_arg": "$parameter.my_other_parameter.value[0]",
                    "meta": {
                        "details": {
                            "my_other_parameter_estimator":
                            "$parameter.my_other_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
            ],
        },
    }

    expected_rules: List[dict] = [
        {
            "name":
            "rule_0",
            "domain_builder": {},
            "parameter_builders": [
                {
                    "name": "my_parameter",
                    "metric_name": "my_metric",
                    "enforce_numeric_metric": False,
                    "replace_nan_with_zero": False,
                    "reduce_scalar_metric": True,
                },
                {
                    "name": "my_other_parameter",
                    "metric_name": "my_other_metric",
                    "sampling_method": "bootstrap",
                    "enforce_numeric_metric": True,
                    "replace_nan_with_zero": True,
                    "reduce_scalar_metric": True,
                    "false_positive_rate": 0.05,
                    "truncate_values": {},
                },
            ],
            "expectation_configuration_builders": [
                {
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                    "column_A": "$domain.domain_kwargs.column_A",
                    "column_B": "$domain.domain_kwargs.column_B",
                    "my_one_arg": "$parameter.my_parameter.value[0]",
                    "meta": {
                        "details": {
                            "my_parameter_estimator":
                            "$parameter.my_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
                {
                    "expectation_type": "expect_column_min_to_be_between",
                    "column": "$domain.domain_kwargs.column",
                    "my_another_arg": "$parameter.my_other_parameter.value[0]",
                    "meta": {
                        "details": {
                            "my_other_parameter_estimator":
                            "$parameter.my_other_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
            ],
        },
        {
            "name":
            "rule_1",
            "domain_builder": {},
            "parameter_builders": [
                {
                    "name": "my_parameter",
                    "metric_name": "my_metric",
                    "enforce_numeric_metric": False,
                    "replace_nan_with_zero": False,
                    "reduce_scalar_metric": True,
                },
            ],
            "expectation_configuration_builders": [
                {
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                    "column_A": "$domain.domain_kwargs.column_A",
                    "column_B": "$domain.domain_kwargs.column_B",
                    "my_arg": "$parameter.my_parameter.value[0]",
                    "my_other_arg": "$parameter.my_parameter.value[1]",
                    "meta": {
                        "details": {
                            "my_parameter_estimator":
                            "$parameter.my_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
            ],
        },
    ]

    effective_rules: List[
        Rule] = profiler_with_placeholder_args.reconcile_profiler_rules(
            rules=rules)

    rule: Rule
    effective_rule_configs_actual: dict = {
        rule.name: rule.to_json_dict()
        for rule in effective_rules
    }
    deep_filter_properties_iterable(effective_rule_configs_actual,
                                    inplace=True)

    rule_config: dict
    effective_rule_configs_expected: dict = {
        rule_config["name"]: rule_config
        for rule_config in expected_rules
    }

    assert effective_rule_configs_actual == effective_rule_configs_expected

Esempio n. 24

0

Mostra file

File: batch.py Progetto: rpatil524/great_expectations

def get_batch_request_from_acceptable_arguments(
    datasource_name: Optional[str] = None,
    data_connector_name: Optional[str] = None,
    data_asset_name: Optional[str] = None,
    *,
    batch_request: Optional[BatchRequestBase] = None,
    batch_data: Optional[Any] = None,
    data_connector_query: Optional[dict] = None,
    batch_identifiers: Optional[dict] = None,
    limit: Optional[int] = None,
    index: Optional[Union[int, list, tuple, slice, str]] = None,
    custom_filter_function: Optional[Callable] = None,
    batch_spec_passthrough: Optional[dict] = None,
    sampling_method: Optional[str] = None,
    sampling_kwargs: Optional[dict] = None,
    splitter_method: Optional[str] = None,
    splitter_kwargs: Optional[dict] = None,
    runtime_parameters: Optional[dict] = None,
    query: Optional[str] = None,
    path: Optional[str] = None,
    batch_filter_parameters: Optional[dict] = None,
    **kwargs,
) -> Union[BatchRequest, RuntimeBatchRequest]:
    """Obtain formal BatchRequest typed object from allowed attributes (supplied as arguments).
    This method applies only to the new (V3) Datasource schema.

    Args:
        datasource_name
        data_connector_name
        data_asset_name

        batch_request
        batch_data
        query
        path
        runtime_parameters
        data_connector_query
        batch_identifiers
        batch_filter_parameters

        limit
        index
        custom_filter_function

        sampling_method
        sampling_kwargs

        splitter_method
        splitter_kwargs

        batch_spec_passthrough

        **kwargs

    Returns:
        (BatchRequest or RuntimeBatchRequest) The formal BatchRequest or RuntimeBatchRequest object
    """

    if batch_request:
        if not isinstance(batch_request, (BatchRequest, RuntimeBatchRequest)):
            raise TypeError(
                f"""batch_request must be an instance of BatchRequest or RuntimeBatchRequest object, not \
{type(batch_request)}""")
        datasource_name = batch_request.datasource_name

    # ensure that the first parameter is datasource_name, which should be a str. This check prevents users
    # from passing in batch_request as an unnamed parameter.
    if not isinstance(datasource_name, str):
        raise ge_exceptions.GreatExpectationsTypeError(
            f"the first parameter, datasource_name, must be a str, not {type(datasource_name)}"
        )

    if len([arg for arg in [batch_data, query, path] if arg is not None]) > 1:
        raise ValueError(
            "Must provide only one of batch_data, query, or path.")

    if any([
            batch_data is not None and runtime_parameters
            and "batch_data" in runtime_parameters,
            query and runtime_parameters and "query" in runtime_parameters,
            path and runtime_parameters and "path" in runtime_parameters,
    ]):
        raise ValueError(
            "If batch_data, query, or path arguments are provided, the same keys cannot appear in the "
            "runtime_parameters argument.")

    if batch_request:
        # TODO: Raise a warning if any parameters besides batch_requests are specified
        return batch_request

    batch_request_class: type
    batch_request_as_dict: dict

    if any([batch_data is not None, query, path, runtime_parameters]):
        batch_request_class = RuntimeBatchRequest

        runtime_parameters = runtime_parameters or {}
        if batch_data is not None:
            runtime_parameters["batch_data"] = batch_data
        elif query is not None:
            runtime_parameters["query"] = query
        elif path is not None:
            runtime_parameters["path"] = path

        if batch_identifiers is None:
            batch_identifiers = kwargs
        else:
            # Raise a warning if kwargs exist
            pass

        batch_request_as_dict = {
            "datasource_name": datasource_name,
            "data_connector_name": data_connector_name,
            "data_asset_name": data_asset_name,
            "runtime_parameters": runtime_parameters,
            "batch_identifiers": batch_identifiers,
            "batch_spec_passthrough": batch_spec_passthrough,
        }
    else:
        batch_request_class = BatchRequest

        if data_connector_query is None:
            if batch_filter_parameters is not None and batch_identifiers is not None:
                raise ValueError(
                    'Must provide either "batch_filter_parameters" or "batch_identifiers", not both.'
                )

            if batch_filter_parameters is None and batch_identifiers is not None:
                logger.warning(
                    'Attempting to build data_connector_query but "batch_identifiers" was provided '
                    'instead of "batch_filter_parameters". The "batch_identifiers" key on '
                    'data_connector_query has been renamed to "batch_filter_parameters". Please update '
                    'your code. Falling back on provided "batch_identifiers".')
                batch_filter_parameters = batch_identifiers
            elif batch_filter_parameters is None and batch_identifiers is None:
                batch_filter_parameters = kwargs
            else:
                # Raise a warning if kwargs exist
                pass

            data_connector_query_params: dict = {
                "batch_filter_parameters": batch_filter_parameters,
                "limit": limit,
                "index": index,
                "custom_filter_function": custom_filter_function,
            }
            data_connector_query = IDDict(data_connector_query_params)
        else:
            # Raise a warning if batch_filter_parameters or kwargs exist
            data_connector_query = IDDict(data_connector_query)

        if batch_spec_passthrough is None:
            batch_spec_passthrough = {}
            if sampling_method is not None:
                sampling_params: dict = {
                    "sampling_method": sampling_method,
                }
                if sampling_kwargs is not None:
                    sampling_params["sampling_kwargs"] = sampling_kwargs
                batch_spec_passthrough.update(sampling_params)
            if splitter_method is not None:
                splitter_params: dict = {
                    "splitter_method": splitter_method,
                }
                if splitter_kwargs is not None:
                    splitter_params["splitter_kwargs"] = splitter_kwargs
                batch_spec_passthrough.update(splitter_params)

        batch_request_as_dict: dict = {
            "datasource_name": datasource_name,
            "data_connector_name": data_connector_name,
            "data_asset_name": data_asset_name,
            "data_connector_query": data_connector_query,
            "batch_spec_passthrough": batch_spec_passthrough,
        }

    deep_filter_properties_iterable(
        properties=batch_request_as_dict,
        inplace=True,
    )

    batch_request = batch_request_class(**batch_request_as_dict)

    return batch_request

Esempio n. 25

0

Mostra file

File: test_rule_based_profiler.py Progetto: alfredo-f/great_expectations

def test_reconcile_profiler_rules_existing_rule_domain_builder_override(
    profiler_with_placeholder_args, ):
    rules: Dict[str, Dict[str, Any]] = {
        "rule_1": {
            "domain_builder": {
                "class_name": "SimpleColumnSuffixDomainBuilder",
                "module_name":
                "great_expectations.rule_based_profiler.domain_builder",
                "column_name_suffixes": [
                    "_ts",
                ],
            },
        },
    }

    expected_rules: List[dict] = [
        {
            "name":
            "rule_1",
            "domain_builder": {
                "column_name_suffixes": [
                    "_ts",
                ],
            },
            "parameter_builders": [
                {
                    "name": "my_parameter",
                    "metric_name": "my_metric",
                    "enforce_numeric_metric": False,
                    "replace_nan_with_zero": False,
                    "reduce_scalar_metric": True,
                },
            ],
            "expectation_configuration_builders": [
                {
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                    "column_A": "$domain.domain_kwargs.column_A",
                    "column_B": "$domain.domain_kwargs.column_B",
                    "my_arg": "$parameter.my_parameter.value[0]",
                    "my_other_arg": "$parameter.my_parameter.value[1]",
                    "meta": {
                        "details": {
                            "my_parameter_estimator":
                            "$parameter.my_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
            ],
        },
    ]

    effective_rules: List[
        Rule] = profiler_with_placeholder_args.reconcile_profiler_rules(
            rules=rules)

    rule: Rule
    effective_rule_configs_actual: dict = {
        rule.name: rule.to_json_dict()
        for rule in effective_rules
    }
    deep_filter_properties_iterable(effective_rule_configs_actual,
                                    inplace=True)

    rule_config: dict
    effective_rule_configs_expected: dict = {
        rule_config["name"]: rule_config
        for rule_config in expected_rules
    }

    assert effective_rule_configs_actual == effective_rule_configs_expected