Beispiel #1
0
    def add_domain(
        self,
        domain: Domain,
        allow_duplicates: bool = False,
    ) -> None:
        domain_cursor: Domain
        if not allow_duplicates and domain.id in [
                domain_cursor.id for domain_cursor in self.domains
        ]:
            raise ge_exceptions.ProfilerConfigurationError(
                f"""Error: Domain\n{domain}\nalready exists.  In order to add it, either pass "allow_duplicates=True" \
or call "RuleState.remove_domain_if_exists()" with Domain having ID equal to "{domain.id}" as argument first.
""")
        self.domains.append(domain)
def _validate_builder_override_config(builder_config: dict):
    """
    In order to insure successful instantiation of custom builder classes using "instantiate_class_from_config()",
    candidate builder override configurations are required to supply both "class_name" and "module_name" attributes.

    :param builder_config: candidate builder override configuration
    :raises: ProfilerConfigurationError
    """
    if not all([
            isinstance(builder_config, dict),
            "class_name" in builder_config,
            "module_name" in builder_config,
    ]):
        raise ge_exceptions.ProfilerConfigurationError(
            'Both "class_name" and "module_name" must be specified.')
Beispiel #3
0
    def initialize_parameter_container_for_domain(
        self,
        domain: Domain,
        overwrite: bool = True,
    ) -> None:
        if not overwrite and domain.id in self.parameters:
            raise ge_exceptions.ProfilerConfigurationError(
                f"""Error: ParameterContainer for Domain\n{domain}\nalready exists.  In order to overwrite it, either \
pass "overwrite=True" or call "RuleState.remove_parameter_container_from_domain()" with Domain having ID equal to \
"{domain.id}" as argument first.
""")

        parameter_container: ParameterContainer = ParameterContainer(
            parameter_nodes=None)
        self._parameters[domain.id] = parameter_container
Beispiel #4
0
    def set_batch_data(
        self,
        batch_list: Optional[List[Batch]] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
    ) -> None:
        arg: Any
        num_supplied_batch_specification_args: int = sum(
            [0 if arg is None else 1 for arg in (
                batch_list,
                batch_request,
            )])
        if num_supplied_batch_specification_args > 1:
            raise ge_exceptions.ProfilerConfigurationError(
                f'Please pass at most one of "batch_list" and "batch_request" arguments (you passed {num_supplied_batch_specification_args} arguments).'
            )

        if batch_list is None:
            self.batch_request = batch_request
        else:
            self.batch_list = batch_list
    def _init_rule(
        self,
        rule_name: str,
        rule_config: Dict[str, Any],
    ) -> Rule:
        # Config is validated through schema but do a sanity check
        attr: str
        for attr in (
                "domain_builder",
                "expectation_configuration_builders",
        ):
            if attr not in rule_config:
                raise ge_exceptions.ProfilerConfigurationError(
                    message=
                    f'Invalid rule "{rule_name}": missing mandatory {attr}.')

        # Instantiate builder attributes
        domain_builder: DomainBuilder = RuleBasedProfiler._init_rule_domain_builder(
            domain_builder_config=rule_config["domain_builder"],
            data_context=self._data_context,
        )
        parameter_builders: Optional[List[
            ParameterBuilder]] = RuleBasedProfiler._init_rule_parameter_builders(
                parameter_builder_configs=rule_config.get(
                    "parameter_builders"),
                data_context=self._data_context,
            )
        expectation_configuration_builders: List[
            ExpectationConfigurationBuilder] = RuleBasedProfiler._init_rule_expectation_configuration_builders(
                expectation_configuration_builder_configs=rule_config[
                    "expectation_configuration_builders"])

        # Compile previous steps and package into a Rule object
        return Rule(
            name=rule_name,
            domain_builder=domain_builder,
            parameter_builders=parameter_builders,
            expectation_configuration_builders=
            expectation_configuration_builders,
        )
Beispiel #6
0
    def __init__(
        self,
        *,
        profiler_config: Optional[Dict[str, Dict[str, Dict[str, Any]]]] = None,
        data_context: Optional[DataContext] = None,
    ):
        """
        Create a new Profiler using configured rules.
        For a rule or an item in a rule configuration, instantiates the following if
        available: a domain builder, a parameter builder, and a configuration builder.
        These will be used to define profiler computation patterns.

        Args:
            profiler_config: Variables and Rules configuration as a dictionary
            data_context: DataContext object that defines a full runtime environment (data access, etc.)
        """
        self._profiler_config = profiler_config
        self._data_context = data_context
        self._rules = []

        rules_configs: Dict[str, Dict[str, Any]] = self._profiler_config.get(
            "rules", {})
        rule_name: str
        rule_config: Dict[str, Any]

        for rule_name, rule_config in rules_configs.items():
            domain_builder_config: dict = rule_config.get("domain_builder")

            if domain_builder_config is None:
                raise ge_exceptions.ProfilerConfigurationError(
                    message=
                    f'Invalid rule "{rule_name}": no domain_builder found.')

            domain_builder: DomainBuilder = instantiate_class_from_config(
                config=domain_builder_config,
                runtime_environment={"data_context": data_context},
                config_defaults={
                    "module_name":
                    "great_expectations.rule_based_profiler.domain_builder"
                },
            )

            parameter_builders: List[ParameterBuilder] = []

            parameter_builder_configs: dict = rule_config.get(
                "parameter_builders")

            if parameter_builder_configs:
                parameter_builder_config: dict
                for parameter_builder_config in parameter_builder_configs:
                    parameter_builders.append(
                        instantiate_class_from_config(
                            config=parameter_builder_config,
                            runtime_environment={"data_context": data_context},
                            config_defaults={
                                "module_name":
                                "great_expectations.rule_based_profiler.parameter_builder"
                            },
                        ))

            expectation_configuration_builders: List[
                ExpectationConfigurationBuilder] = []

            expectation_configuration_builder_configs: dict = rule_config.get(
                "expectation_configuration_builders")

            if expectation_configuration_builder_configs:
                expectation_configuration_builder_config: dict
                for (expectation_configuration_builder_config
                     ) in expectation_configuration_builder_configs:
                    expectation_configuration_builders.append(
                        instantiate_class_from_config(
                            config=expectation_configuration_builder_config,
                            runtime_environment={},
                            config_defaults={
                                "class_name":
                                "DefaultExpectationConfigurationBuilder",
                                "module_name":
                                "great_expectations.rule_based_profiler.expectation_configuration_builder",
                            },
                        ))

            variables_configs: Dict[str, Dict] = self._profiler_config.get(
                "variables", {})
            variables: Optional[ParameterContainer] = None

            if variables_configs:
                variables = build_parameter_container_for_variables(
                    variables_configs=variables_configs)

            self._rules.append(
                Rule(
                    name=rule_name,
                    domain_builder=domain_builder,
                    parameter_builders=parameter_builders,
                    expectation_configuration_builders=
                    expectation_configuration_builders,
                    variables=variables,
                ))