Esempio n. 1
0
def test_suite_notebook_renderer_render_user_configurable_profiler_configuration(
    mock_data_context: mock.MagicMock,
):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name="",  # No name should signal that UserConfigurableProfiler is necessary
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = [
        # Imports
        """import datetime

import pandas as pd

import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.profile.user_configurable_profiler import (
    UserConfigurableProfiler,
)
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError""",
        # Batch request
        """batch_request = {
    "datasource_name": "my_datasource",
    "data_connector_name": "my_basic_data_connector",
    "data_asset_name": "Titanic_1912",
}""",
        # Profiler instantiation/usage
        """profiler = UserConfigurableProfiler(
    profile_dataset=validator,
    excluded_expectations=None,
    ignored_columns=ignored_columns,
    not_null_only=False,
    primary_or_compound_key=False,
    semantic_types_dict=None,
    table_expectations_only=False,
    value_set_threshold="MANY",
)
suite = profiler.build_suite()""",
    ]

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook, snippet
        ), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_rule_based_profiler_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "my_profiler",  # Name should signal that RBP from context's profile store is necessary
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = [
        # Imports
        """\
import datetime

import pandas as pd

import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError
""",
        # Batch request
        """\
batch_request = {
    "datasource_name": "my_datasource",
    "data_connector_name": "my_basic_data_connector",
    "data_asset_name": "Titanic_1912",
}
""",
        # Profiler instantiation/usage
        """\
result = context.run_profiler_with_dynamic_arguments(
    name="my_profiler",
    batch_request=batch_request,
)
validator.expectation_suite = result.get_expectation_suite(
    expectation_suite_name=expectation_suite_name
)""",
    ]

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_rule_based_profiler_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "my_profiler",  # Name should signal that RBP from context's profile store is necessary
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = SNIPPETS_RULE_BASED_PROFILER

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_onboarding_data_assistant_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "",  # No name should signal that OnboardingDataAssistant is invoked
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = SNIPPETS_USER_CONFIGURABLE_PROFILER

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"
Esempio n. 5
0
def _suite_edit_workflow(
    context: DataContext,
    expectation_suite_name: str,
    profile: bool,
    usage_event: str,
    interactive: bool,
    no_jupyter: bool,
    create_if_not_exist: Optional[bool] = False,
    datasource_name: Optional[str] = None,
    batch_request: Optional[Union[str, Dict[str, Union[str, int,
                                                       Dict[str,
                                                            Any]]]]] = None,
    additional_batch_request_args: Optional[Dict[str,
                                                 Union[str, int,
                                                       Dict[str,
                                                            Any]]]] = None,
    suppress_usage_message: Optional[bool] = False,
    assume_yes: Optional[bool] = False,
):
    # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow().
    # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False
    if suppress_usage_message:
        usage_event = None

    suite: ExpectationSuite = toolkit.load_expectation_suite(
        data_context=context,
        expectation_suite_name=expectation_suite_name,
        usage_event=usage_event,
        create_if_not_exist=create_if_not_exist,
    )

    try:
        if interactive or profile:
            batch_request_from_citation_is_up_to_date: bool = True

            batch_request_from_citation: Optional[Union[str, Dict[str, Union[
                str, Dict[str,
                          Any]]]]] = toolkit.get_batch_request_from_citations(
                              expectation_suite=suite)

            if batch_request is not None and isinstance(batch_request, str):
                batch_request = toolkit.get_batch_request_from_json_file(
                    batch_request_json_file_path=batch_request,
                    data_context=context,
                    usage_event=usage_event,
                    suppress_usage_message=suppress_usage_message,
                )
                if batch_request != batch_request_from_citation:
                    batch_request_from_citation_is_up_to_date = False

            if not (batch_request and isinstance(batch_request, dict)
                    and BatchRequest(**batch_request)):
                if (batch_request_from_citation
                        and isinstance(batch_request_from_citation, dict)
                        and BatchRequest(**batch_request_from_citation)):
                    batch_request = copy.deepcopy(batch_request_from_citation)
                else:
                    batch_request = toolkit.get_batch_request_using_datasource_name(
                        data_context=context,
                        datasource_name=datasource_name,
                        usage_event=usage_event,
                        suppress_usage_message=False,
                        additional_batch_request_args=
                        additional_batch_request_args,
                    )
                    if batch_request != batch_request_from_citation:
                        batch_request_from_citation_is_up_to_date = False

            if not batch_request_from_citation_is_up_to_date:
                toolkit.add_citation_with_batch_request(
                    data_context=context,
                    expectation_suite=suite,
                    batch_request=batch_request,
                )

        notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name)
        notebook_path: str = _get_notebook_path(context, notebook_name)

        if profile:
            if not assume_yes:
                toolkit.prompt_profile_to_create_a_suite(
                    data_context=context,
                    expectation_suite_name=expectation_suite_name)

            renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer(
                context=context,
                expectation_suite_name=expectation_suite_name,
                batch_request=batch_request,
            )
            renderer.render_to_disk(notebook_file_path=notebook_path)
        else:
            SuiteEditNotebookRenderer.from_data_context(
                data_context=context).render_to_disk(
                    suite=suite,
                    notebook_file_path=notebook_path,
                    batch_request=batch_request,
                )

        if no_jupyter:
            cli_message(
                string=
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )
        else:
            cli_message(
                string=
                """<green>Opening a notebook for you now to edit your expectation suite!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")

        payload: dict = edit_expectation_suite_usage_statistics(
            data_context=context,
            expectation_suite_name=suite.expectation_suite_name)

        if not suppress_usage_message:
            toolkit.send_usage_message(
                data_context=context,
                event=usage_event,
                event_payload=payload,
                success=True,
            )

        if not no_jupyter:
            toolkit.launch_jupyter_notebook(notebook_path=notebook_path)

    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            ValueError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message(string="<red>{}</red>".format(e))
        if not suppress_usage_message:
            toolkit.send_usage_message(data_context=context,
                                       event=usage_event,
                                       success=False)
        sys.exit(1)

    except Exception as e:
        if not suppress_usage_message:
            toolkit.send_usage_message(data_context=context,
                                       event=usage_event,
                                       success=False)
        raise e
def test_suite_notebook_renderer_render_onboarding_data_assistant_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "",  # No name should signal that OnboardingDataAssistant is invoked
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = [
        # Imports
        """\
import datetime

import pandas as pd

import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.rule_based_profiler.types.data_assistant_result import (
    DataAssistantResult,
)
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError""",
        # Batch request
        """batch_request = {
    "datasource_name": "my_datasource",
    "data_connector_name": "my_basic_data_connector",
    "data_asset_name": "Titanic_1912",
}
""",
        # OnboardingDataAssistant instantiation/usage
        """\
data_assistant_result: DataAssistantResult = context.assistants.onboarding.run(
    batch_request=batch_request,
    # include_column_names=include_column_names,
    exclude_column_names=exclude_column_names,
    # include_column_name_suffixes=include_column_name_suffixes,
    # exclude_column_name_suffixes=exclude_column_name_suffixes,
    # semantic_type_filter_module_name=semantic_type_filter_module_name,
    # semantic_type_filter_class_name=semantic_type_filter_class_name,
    # include_semantic_types=include_semantic_types,
    # exclude_semantic_types=exclude_semantic_types,
    # allowed_semantic_types_passthrough=allowed_semantic_types_passthrough,
    cardinality_limit_mode="rel_100",  # case-insenstive (see documentaiton for other options)
    # max_unique_values=max_unique_values,
    # max_proportion_unique=max_proportion_unique,
    # column_value_uniqueness_rule={
    #     "success_ratio": 0.8,
    # },
    # column_value_nullity_rule={
    # },
    # column_value_nonnullity_rule={
    # },
    # numeric_columns_rule={
    #     "false_positive_rate": 0.1,
    #     "random_seed": 43792,
    # },
    # datetime_columns_rule={
    #     "truncate_values": {
    #         "lower_bound": 0,
    #         "upper_bound": 4481049600,  # Friday, January 1, 2112 0:00:00
    #     },
    #     "round_decimals": 0,
    # },
    # text_columns_rule={
    #     "strict_min": True,
    #     "strict_max": True,
    #     "success_ratio": 0.8,
    # },
    # categorical_columns_rule={
    #     "false_positive_rate": 0.1,
    #     "round_decimals": 3,
    # },
)
validator.expectation_suite = data_assistant_result.get_expectation_suite(
    expectation_suite_name=expectation_suite_name
)""",
    ]

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"