コード例 #1
0
    def __init__(self, serialization_type=None, root_directory=None):
        self.serialization_type = serialization_type
        self.root_directory = root_directory

        self.store_backend = instantiate_class_from_config(
            config={
                "module_name": "great_expectations.data_context.store",
                "class_name": "InMemoryStoreBackend",
                "separator": ".",
            },
            runtime_config={
                "root_directory": root_directory,
            },
            config_defaults={},
        )
コード例 #2
0
 def __init__(self, column_section_renderer=None):
     if column_section_renderer is None:
         column_section_renderer = {
             "class_name": "ValidationResultsColumnSectionRenderer"
         }
     self._column_section_renderer = instantiate_class_from_config(
         config=column_section_renderer,
         runtime_config={},
         config_defaults={
             "module_name":
             column_section_renderer.get(
                 "module_name",
                 "great_expectations.render.renderer.column_section_renderer"
             )
         })
コード例 #3
0
 def _build_generator_from_config(self, **kwargs):
     if "type" in kwargs:
         warnings.warn(
             "Using type to configure generators is now deprecated. Please use module_name and class_name"
             "instead.")
         type_ = kwargs.pop("type")
         generator_class = self._get_generator_class_from_type(type_)
         kwargs.update({"class_name": generator_class.__name__})
     generator = instantiate_class_from_config(
         config=kwargs,
         runtime_config={"datasource": self},
         config_defaults={
             "module_name": "great_expectations.datasource.generator"
         })
     return generator
def test_site_builder_with_custom_site_section_builders_config(
        tmp_path_factory):
    """Test that site builder can handle partially specified custom site_section_builders config"""
    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    # fixture config swaps site section builder source stores and specifies custom run_name_filters
    shutil.copy(
        file_relative_path(
            __file__,
            "../test_fixtures/great_expectations_custom_local_site_config.yml"
        ),
        str(os.path.join(project_dir, "great_expectations.yml")),
    )
    context = DataContext(context_root_dir=project_dir)
    local_site_config = context._project_config.data_docs_sites.get(
        "local_site")

    module_name = "great_expectations.render.renderer.site_builder"
    site_builder = instantiate_class_from_config(
        config=local_site_config,
        runtime_environment={
            "data_context": context,
            "root_directory": context.root_directory,
            "site_name": "local_site",
        },
        config_defaults={"module_name": module_name},
    )
    site_section_builders = site_builder.site_section_builders

    expectations_site_section_builder = site_section_builders["expectations"]
    assert isinstance(expectations_site_section_builder.source_store,
                      ValidationsStore)

    validations_site_section_builder = site_section_builders["validations"]
    assert isinstance(validations_site_section_builder.source_store,
                      ExpectationsStore)
    assert validations_site_section_builder.run_name_filter == {
        "ne": "custom_validations_filter"
    }

    profiling_site_section_builder = site_section_builders["profiling"]
    assert isinstance(validations_site_section_builder.source_store,
                      ExpectationsStore)
    assert profiling_site_section_builder.run_name_filter == {
        "eq": "custom_profiling_filter"
    }
def test_site_builder_usage_statistics_disabled(
    site_builder_data_context_with_html_store_titanic_random,
):
    context = site_builder_data_context_with_html_store_titanic_random
    context._project_config.anonymous_usage_statistics = {
        "enabled": False,
        "data_context_id": "f43d4897-385f-4366-82b0-1a8eda2bf79c",
    }
    data_context_id = context.anonymous_usage_statistics["data_context_id"]

    sites = (
        site_builder_data_context_with_html_store_titanic_random.project_config_with_variables_substituted.data_docs_sites
    )
    local_site_config = sites["local_site"]
    site_builder = instantiate_class_from_config(
        config=local_site_config,
        runtime_environment={
            "data_context": context,
            "root_directory": context.root_directory,
            "site_name": "local_site",
        },
        config_defaults={
            "module_name": "great_expectations.render.renderer.site_builder"
        },
    )
    site_builder_return_obj = site_builder.build()
    index_page_path = site_builder_return_obj[0]
    links_dict = site_builder_return_obj[1]
    expectation_suite_pages = [
        file_relative_path(index_page_path, expectation_suite_link_dict["filepath"])
        for expectation_suite_link_dict in links_dict["expectations_links"]
    ]
    profiling_results_pages = [
        file_relative_path(index_page_path, profiling_link_dict["filepath"])
        for profiling_link_dict in links_dict["profiling_links"]
    ]

    page_paths_to_check = (
        [index_page_path] + expectation_suite_pages + profiling_results_pages
    )

    expected_logo_url = "https://great-expectations-web-assets.s3.us-east-2.amazonaws.com/logo-long.png?d=20190924T231836.000000Z"

    for page_path in page_paths_to_check:
        with open(page_path[7:]) as f:
            page_contents = f.read()
            assert expected_logo_url in page_contents
            assert data_context_id not in page_contents
コード例 #6
0
def in_memory_param_store(request, test_backends):
    # If we have a backend configuration but we do not have postgres configured, skip
    backend_config = request.param.get("store_backend", None)
    if backend_config:
        if (backend_config.get("credentials", {}).get("drivername",
                                                      None) == "postgresql"):
            if "postgresql" not in test_backends:
                pytest.skip("skipping fixture because postgresql not selected")

    return instantiate_class_from_config(
        config=request.param,
        config_defaults={
            "module_name": "great_expectations.data_context.store",
        },
        runtime_environment={},
    )
 def _build_asset_from_config(self, config: dict):
     runtime_environment: dict = {"data_connector": self}
     config = assetConfigSchema.load(config)
     config = assetConfigSchema.dump(config)
     asset: Asset = instantiate_class_from_config(
         config=config,
         runtime_environment=runtime_environment,
         config_defaults={},
     )
     if not asset:
         raise ge_exceptions.ClassInstantiationError(
             module_name="great_expectations.datasource.data_connector.asset",
             package_name=None,
             class_name=config["class_name"],
         )
     return asset
コード例 #8
0
    def from_data_context(data_context):
        suite_edit_notebook_config: Optional[NotebookConfig] = None
        if data_context.notebooks and data_context.notebooks.get("suite_edit"):
            suite_edit_notebook_config = notebookConfigSchema.load(
                data_context.notebooks.get("suite_edit"))

        return instantiate_class_from_config(
            config=suite_edit_notebook_config.__dict__
            if suite_edit_notebook_config else {
                "module_name":
                "great_expectations.render.renderer.suite_edit_notebook_renderer",
                "class_name": "SuiteEditNotebookRenderer",
            },
            runtime_environment={},
            config_defaults={},
        )
コード例 #9
0
def test_get_definition_list_from_batch_request_with_empty_args_raises_error(
        mock_gcs_conn, mock_list_keys, mock_emit,
        empty_data_context_stats_enabled):
    my_data_connector_yaml = yaml.load(
        f"""
           class_name: ConfiguredAssetGCSDataConnector
           datasource_name: test_environment
           bucket_or_name: my_bucket
           prefix: ""
           assets:
               TestFiles:
           default_regex:
               pattern: (.+)_(.+)_(.+)\\.csv
               group_names:
                   - name
                   - timestamp
                   - price
       """, )

    mock_list_keys.return_value = ([
        "alex_20200809_1000.csv",
        "eugene_20200809_1500.csv",
        "james_20200811_1009.csv",
        "abe_20200809_1040.csv",
        "will_20200809_1002.csv",
        "james_20200713_1567.csv",
        "eugene_20201129_1900.csv",
        "will_20200810_1001.csv",
        "james_20200810_1003.csv",
        "alex_20200819_1300.csv",
    ], )

    my_data_connector: ConfiguredAssetGCSDataConnector = instantiate_class_from_config(
        config=my_data_connector_yaml,
        runtime_environment={
            "name": "general_gcs_data_connector",
            "execution_engine": PandasExecutionEngine(),
        },
        config_defaults={
            "module_name": "great_expectations.datasource.data_connector"
        },
    )

    # Raises error in `FilePathDataConnector.get_batch_definition_list_from_batch_request()` due to missing a `batch_request` arg
    with pytest.raises(TypeError):
        # noinspection PyArgumentList
        my_data_connector.get_batch_definition_list_from_batch_request()
コード例 #10
0
    def __init__(
        self,
        data_context,
        renderer,
        slack_webhook=None,
        slack_token=None,
        slack_channel=None,
        notify_on="all",
        notify_with=None,
    ):
        """Construct a SlackNotificationAction

        Args:
            data_context:
            renderer: dictionary specifying the renderer used to generate a query consumable by Slack API, for example:
                {
                   "module_name": "great_expectations.render.renderer.slack_renderer",
                   "class_name": "SlackRenderer",
               }
            slack_webhook: incoming Slack webhook to which to send notification
            notify_on: "all", "failure", "success" - specifies validation status that will trigger notification
            payload: *Optional* payload from other ValidationActions
        """
        super().__init__(data_context)
        self.renderer = instantiate_class_from_config(
            config=renderer,
            runtime_environment={},
            config_defaults={},
        )
        module_name = renderer["module_name"]
        if not self.renderer:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=renderer["class_name"],
            )
        if not slack_token and slack_channel:
            assert slack_webhook
        if not slack_webhook:
            assert slack_token and slack_channel
        assert not (slack_webhook and slack_channel and slack_token)

        self.slack_webhook = slack_webhook
        self.slack_token = slack_token
        self.slack_channel = slack_channel
        self.notify_on = notify_on
        self.notify_with = notify_with
コード例 #11
0
def clean_up_tables_with_prefix(connection_string: str, table_prefix: str) -> List[str]:
    """Drop all tables starting with the provided table_prefix.
    Note: Uses private method InferredAssetSqlDataConnector._introspect_db()
    to get the table names to not duplicate code, but should be refactored in the
    future to not use a private method.

    Args:
        connection_string: To connect to the database.
        table_prefix: First characters of the tables you want to remove.

    Returns:
        List of deleted tables.
    """
    execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
        connection_string=connection_string
    )
    data_connector = instantiate_class_from_config(
        config={
            "class_name": "InferredAssetSqlDataConnector",
            "name": "temp_data_connector",
        },
        runtime_environment={
            "execution_engine": execution_engine,
            "datasource_name": "temp_datasource",
        },
        config_defaults={"module_name": "great_expectations.datasource.data_connector"},
    )
    introspection_output = data_connector._introspect_db()

    tables_to_drop: List[str] = []
    tables_dropped: List[str] = []

    for table in introspection_output:
        if table["table_name"].startswith(table_prefix):
            tables_to_drop.append(table["table_name"])

    connection = execution_engine.engine.connect()
    for table_name in tables_to_drop:
        print(f"Dropping table {table_name}")
        connection.execute(f"DROP TABLE IF EXISTS {table_name}")
        tables_dropped.append(table_name)

    tables_skipped: List[str] = list(set(tables_to_drop) - set(tables_dropped))
    if len(tables_skipped) > 0:
        warnings.warn(f"Warning: Tables skipped: {tables_skipped}")

    return tables_dropped
コード例 #12
0
    def __init__(self,
                 name="default",
                 datasource=None,
                 query_store_backend=None,
                 queries=None):
        super().__init__(name=name, datasource=datasource)
        if (datasource and datasource.data_context
                and datasource.data_context.root_directory):
            root_directory = datasource.data_context.root_directory
        else:
            root_directory = None

        if query_store_backend is None:
            # We will choose a Tuple store if there is a configured DataContext with a root_directory,
            # and an InMemoryStore otherwise
            if root_directory:
                query_store_backend = {
                    "class_name":
                    "TupleFilesystemStoreBackend",
                    "base_directory":
                    os.path.join(
                        datasource.data_context.root_directory,
                        "datasources",
                        datasource.name,
                        "generators",
                        name,
                    ),
                    "filepath_suffix":
                    ".sql",
                }
            else:
                query_store_backend = {"class_name": "InMemoryStoreBackend"}
        module_name = "great_expectations.data_context.store"
        self._store_backend = instantiate_class_from_config(
            config=query_store_backend,
            runtime_environment={"root_directory": root_directory},
            config_defaults={"module_name": module_name},
        )
        if not self._store_backend:
            raise ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=query_store_backend["class_name"],
            )
        if queries is not None:
            for query_name, query in queries.items():
                self.add_query(data_asset_name=query_name, query=query)
コード例 #13
0
    def __init__(
        self,
        data_context,
        action_list,
        name,
        result_format={"result_format": "SUMMARY"},
    ):
        super().__init__()
        self.data_context = data_context
        self.name = name

        result_format = parse_result_format(result_format)
        assert result_format["result_format"] in [
            "BOOLEAN_ONLY",
            "BASIC",
            "SUMMARY",
            "COMPLETE",
        ]
        self.result_format = result_format

        self.action_list = action_list
        self.actions = OrderedDict()
        for action_config in action_list:
            assert isinstance(action_config, dict)
            # NOTE: Eugene: 2019-09-23: need a better way to validate an action config:
            if not set(action_config.keys()) == {"name", "action"}:
                raise KeyError(
                    'Action config keys must be ("name", "action"). Instead got {}'.format(
                        action_config.keys()
                    )
                )

            config = action_config["action"]
            module_name = "great_expectations.validation_operators"
            new_action = instantiate_class_from_config(
                config=config,
                runtime_environment={"data_context": self.data_context},
                config_defaults={"module_name": module_name},
            )
            if not new_action:
                raise ClassInstantiationError(
                    module_name=module_name,
                    package_name=None,
                    class_name=config["class_name"],
                )
            self.actions[action_config["name"]] = new_action
コード例 #14
0
    def _build_batch_kwargs_generator(self, **kwargs):
        """Build a BatchKwargGenerator using the provided configuration and return the newly-built generator."""
        generator = instantiate_class_from_config(
            config=kwargs,
            runtime_environment={"datasource": self},
            config_defaults={
                "module_name": "great_expectations.datasource.batch_kwargs_generator"
            },
        )
        if not generator:
            raise ClassInstantiationError(
                module_name="great_expectations.datasource.batch_kwargs_generator",
                package_name=None,
                class_name=kwargs["class_name"],
            )

        return generator
コード例 #15
0
def basic_pandas_datasource_v013(tmp_path_factory):
    base_directory: str = str(
        tmp_path_factory.mktemp(
            "basic_pandas_datasource_v013_filesystem_data_connector"
        )
    )

    basic_datasource: Datasource = instantiate_class_from_config(
        yaml.load(
            f"""
class_name: Datasource

execution_engine:
    class_name: PandasExecutionEngine

data_connectors:
    test_runtime_data_connector:
        module_name: great_expectations.datasource.data_connector
        class_name: RuntimeDataConnector
        runtime_keys:
            - pipeline_stage_name
            - airflow_run_id

    my_filesystem_data_connector:
        class_name: ConfiguredAssetFilesystemDataConnector
        base_directory: {base_directory}
        # TODO: <Alex>Investigate: this potentially breaks the data_reference centric design.</Alex>
        glob_directive: "*.csv"
        # glob_directive: "*"

        assets:
            Titanic: {{}}

        default_regex:
            # TODO: <Alex>Investigate: this potentially breaks the data_reference centric design.</Alex>
            pattern: (.+)_(\\d+)\\.csv
            # pattern: (.+)_(\\d+)\\.[a-z][a-z][a-z]
            group_names:
            - letter
            - number
    """,
        ),
        runtime_environment={"name": "my_datasource"},
        config_defaults={"module_name": "great_expectations.datasource"},
    )
    return basic_datasource
コード例 #16
0
def init_parameter_builder(
        parameter_builder_config: Union["ParameterBuilderConfig",
                                        dict],  # noqa: F821
        data_context: Optional["BaseDataContext"] = None,  # noqa: F821
) -> "ParameterBuilder":  # noqa: F821
    if not isinstance(parameter_builder_config, dict):
        parameter_builder_config = parameter_builder_config.to_dict()

    parameter_builder: "ParameterBuilder" = instantiate_class_from_config(  # noqa: F821
        config=parameter_builder_config,
        runtime_environment={"data_context": data_context},
        config_defaults={
            "module_name":
            "great_expectations.rule_based_profiler.parameter_builder"
        },
    )
    return parameter_builder
コード例 #17
0
 def __init__(self, column_section_renderer=None):
     if column_section_renderer is None:
         column_section_renderer = {
             "class_name": "ExpectationSuiteColumnSectionRenderer"
         }
     module_name = 'great_expectations.render.renderer.column_section_renderer'
     self._column_section_renderer = instantiate_class_from_config(
         config=column_section_renderer,
         runtime_environment={},
         config_defaults={
             "module_name":
             column_section_renderer.get("module_name", module_name)
         })
     if not self._column_section_renderer:
         raise ClassInstantiationError(
             module_name=column_section_renderer,
             package_name=None,
             class_name=column_section_renderer['class_name'])
コード例 #18
0
def _build_asset_from_config(runtime_environment: "DataConnector",
                             config: dict) -> Asset:
    """Build Asset from configuration and return asset. Used by both ConfiguredAssetDataConnector and RuntimeDataConnector"""
    runtime_environment: dict = {"data_connector": runtime_environment}
    config = assetConfigSchema.load(config)
    config = assetConfigSchema.dump(config)
    asset: Asset = instantiate_class_from_config(
        config=config,
        runtime_environment=runtime_environment,
        config_defaults={},
    )
    if not asset:
        raise ge_exceptions.ClassInstantiationError(
            module_name="great_expectations.datasource.data_connector.asset",
            package_name=None,
            class_name=config["class_name"],
        )
    return asset
 def _build_asset_from_config(self, name: str, config: dict):
     """Build an Asset using the provided configuration and return the newly-built Asset."""
     runtime_environment: dict = {"name": name, "data_connector": self}
     asset: Asset = instantiate_class_from_config(
         config=config,
         runtime_environment=runtime_environment,
         config_defaults={
             "module_name": "great_expectations.datasource.data_connector.asset",
             "class_name": "Asset",
         },
     )
     if not asset:
         raise ge_exceptions.ClassInstantiationError(
             module_name="great_expectations.datasource.data_connector.asset",
             package_name=None,
             class_name=config["class_name"],
         )
     return asset
コード例 #20
0
    def get_profiler(
        data_context: "DataContext",  # noqa: F821
        profiler_store: ProfilerStore,
        name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> "RuleBasedProfiler":
        assert bool(name) ^ bool(
            ge_cloud_id
        ), "Must provide either name or ge_cloud_id (but not both)"

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name, )
        try:
            profiler_config: RuleBasedProfilerConfig = profiler_store.get(
                key=key)
        except ge_exceptions.InvalidKeyError as exc_ik:
            id_ = (key.configuration_key if isinstance(
                key, ConfigurationIdentifier) else key)
            raise ge_exceptions.ProfilerNotFoundError(
                message=
                f'Non-existent Profiler configuration named "{id_}".\n\nDetails: {exc_ik}'
            )

        config = profiler_config.to_json_dict()
        if name:
            config.update({"name": name})
        config = filter_properties_dict(properties=config, clean_falsy=True)

        profiler = instantiate_class_from_config(
            config=config,
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.rule_based_profiler",
                "class_name": "RuleBasedProfiler",
            },
        )

        return profiler
コード例 #21
0
 def __init__(self, store_backend=None, runtime_environment=None):
     """Runtime environment may be necessary to instantiate store backend elements."""
     if store_backend is None:
         store_backend = {"class_name": "InMemoryStoreBackend"}
     logger.debug("Building store_backend.")
     module_name = 'great_expectations.data_context.store'
     self._store_backend = instantiate_class_from_config(
         config=store_backend,
         runtime_environment=runtime_environment or {},
         config_defaults={"module_name": module_name})
     if not self._store_backend:
         raise ClassInstantiationError(module_name=module_name,
                                       package_name=None,
                                       class_name=store_backend)
     if not isinstance(self._store_backend, StoreBackend):
         raise DataContextError(
             "Invalid StoreBackend configuration: expected a StoreBackend instance."
         )
     self._use_fixed_length_key = self._store_backend.fixed_length_key
def test_basic_instantiation_with_bigquery_creds_failure_pkey(
        sa, empty_data_context):
    context = empty_data_context
    try:
        my_data_source = instantiate_class_from_config(
            # private key is valid but useless
            config={
                "connection_string": "bigquery://project-1353/dataset",
                "credentials_info": {
                    "type":
                    "service_account",
                    "project_id":
                    "project-1353",
                    "private_key_id":
                    "df87033061fd7c27dcc953e235fe099a7017f9c4",
                    "private_key":
                    "bad_pkey",
                    "client_email":
                    "*****@*****.**",
                    "client_id":
                    "100945395817716260007",
                    "auth_uri":
                    "https://accounts.google.com/o/oauth2/auth",
                    "token_uri":
                    "https://oauth2.googleapis.com/token",
                    "auth_provider_x509_cert_url":
                    "https://www.googleapis.com/oauth2/v1/certs",
                    "client_x509_cert_url":
                    "https://www.googleapis.com/robot/v1/metadata/x509/testme%40project-1353.iam.gserviceaccount.com",
                },
            },
            config_defaults={
                "module_name": "great_expectations.datasource",
                "class_name": "SimpleSqlalchemyDatasource",
            },
            runtime_environment={"name": "my_sql_datasource"},
        )
    except:
        return

    raise Exception("BigQuery incorrectly passed with invalid private key")

    print(my_data_source)
コード例 #23
0
    def __init__(
        self, foo, fake_configurable, x, y=None, z=None,
    ):
        assert isinstance(foo, int)

        self.foo = foo

        self.x = x
        self.y = y
        self.z = z

        print(fake_configurable)

        # This code allows us to specify defaults for the child class
        self.fake_configurable_object = instantiate_class_from_config(
            config=fake_configurable,
            runtime_environment={"x": self.x, "y": self.y, "z": self.z,},
            config_defaults={"a": "default_value_for_a"},
        )
コード例 #24
0
    def _init_store_backend(self, store_backend_config, runtime_config):
        self.key_class = ExpectationSuiteIdentifier

        if store_backend_config[
                "class_name"] == "FixedLengthTupleFilesystemStoreBackend":
            config_defaults = {
                "key_length": 4,
                "module_name": "great_expectations.data_context.store",
                "filepath_template": "{0}/{1}/{2}/{3}.json",
            }
        else:
            config_defaults = {
                "module_name": "great_expectations.data_context.store",
            }

        return instantiate_class_from_config(
            config=store_backend_config,
            runtime_config=runtime_config,
            config_defaults=config_defaults,
        )
コード例 #25
0
def init_expectation_configuration_builder(
    expectation_configuration_builder_config: Union[
        "ExpectationConfigurationBuilder", dict  # noqa: F821
    ],
    data_context: Optional["BaseDataContext"] = None,  # noqa: F821
) -> "ExpectationConfigurationBuilder":  # noqa: F821
    if not isinstance(expectation_configuration_builder_config, dict):
        expectation_configuration_builder_config = (
            expectation_configuration_builder_config.to_dict()
        )

    expectation_configuration_builder: "ExpectationConfigurationBuilder" = instantiate_class_from_config(  # noqa: F821
        config=expectation_configuration_builder_config,
        runtime_environment={"data_context": data_context},
        config_defaults={
            "class_name": "DefaultExpectationConfigurationBuilder",
            "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder",
        },
    )
    return expectation_configuration_builder
コード例 #26
0
    def _build_data_connector_from_config(
        self, name: str, config: Dict[str, Any],
    ) -> DataConnector:
        """Build a DataConnector using the provided configuration and return the newly-built DataConnector."""
        new_data_connector: DataConnector = instantiate_class_from_config(
            config=config,
            runtime_environment={
                "name": name,
                "datasource_name": self.name,
                "execution_engine": self.execution_engine,
            },
            config_defaults={
                "module_name": "great_expectations.datasource.data_connector"
            },
        )
        new_data_connector.data_context_root_directory = (
            self._data_context_root_directory
        )

        self.data_connectors[name] = new_data_connector
        return new_data_connector
コード例 #27
0
    def __init__(self, data_context, action_list):
        self.data_context = data_context

        self.action_list = action_list
        self.actions = {}
        for action_config in action_list:
            assert isinstance(action_config, dict)
            #NOTE: Eugene: 2019-09-23: need a better way to validate an action config:
            if not set(action_config.keys()) == {"name", "action"}:
                raise KeyError('Action config keys must be ("name", "action"). Instead got {}'.format(action_config.keys()))

            new_action = instantiate_class_from_config(
                config=action_config["action"],
                runtime_environment={
                    "data_context": self.data_context,
                },
                config_defaults={
                    "module_name": "great_expectations.validation_operators"
                }
            )
            self.actions[action_config["name"]] = new_action
コード例 #28
0
 def __init__(self, column_section_renderer=None):
     super().__init__()
     if column_section_renderer is None:
         column_section_renderer = {
             "class_name": "ValidationResultsColumnSectionRenderer"
         }
     module_name = "great_expectations.render.renderer.column_section_renderer"
     self._column_section_renderer = instantiate_class_from_config(
         config=column_section_renderer,
         runtime_environment={},
         config_defaults={
             "module_name":
             column_section_renderer.get("module_name", module_name)
         },
     )
     if not self._column_section_renderer:
         raise ClassInstantiationError(
             module_name=module_name,
             package_name=None,
             class_name=column_section_renderer["class_name"],
         )
コード例 #29
0
def basic_spark_datasource(tmp_path_factory, spark_session):
    base_directory: str = str(
        tmp_path_factory.mktemp("basic_spark_datasource_v013_filesystem_data_connector")
    )

    basic_datasource: Datasource = instantiate_class_from_config(
        yaml.load(
            f"""
class_name: Datasource

execution_engine:
    class_name: SparkDFExecutionEngine
    spark_config:
        spark.master: local[*]
        spark.executor.memory: 6g
        spark.driver.memory: 6g
        spark.ui.showConsoleProgress: false
        spark.sql.shuffle.partitions: 2
        spark.default.parallelism: 4
data_connectors:
    test_runtime_data_connector:
        module_name: great_expectations.datasource.data_connector
        class_name: RuntimeDataConnector
        runtime_keys:
            - pipeline_stage_name
            - airflow_run_id
    simple_filesystem_data_connector:
        class_name: InferredAssetFilesystemDataConnector
        base_directory: {base_directory}
        glob_directive: '*'
        default_regex:
            pattern: (.+)\\.csv
            group_names:
            - data_asset_name
    """,
        ),
        runtime_environment={"name": "my_datasource"},
        config_defaults={"module_name": "great_expectations.datasource"},
    )
    return basic_datasource
def basic_datasource_with_runtime_data_connector():
    basic_datasource: Datasource = instantiate_class_from_config(
        yaml.load(
            f"""
    class_name: Datasource

    execution_engine:
        class_name: PandasExecutionEngine

    data_connectors:
        test_runtime_data_connector:
            module_name: great_expectations.datasource.data_connector
            class_name: RuntimeDataConnector
            batch_identifiers:
                - pipeline_stage_name
                - airflow_run_id
                - custom_key_0
        """, ),
        runtime_environment={"name": "my_datasource"},
        config_defaults={"module_name": "great_expectations.datasource"},
    )
    return basic_datasource