Ejemplo n.º 1
0
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None:
    usage_event = "cli.suite.scaffold"
    suite_name = suite
    context = load_data_context_with_error_handling(directory)
    notebook_filename = f"scaffold_{suite_name}.ipynb"
    notebook_path = _get_notebook_path(context, notebook_filename)

    if suite_name in context.list_expectation_suite_names():
        toolkit.tell_user_suite_exists(suite_name)
        if os.path.isfile(notebook_path):
            cli_message(
                f"  - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>"
            )
        send_usage_message(data_context=context, event=usage_event, success=False)
        sys.exit(1)

    datasource = toolkit.select_datasource(context)
    if datasource is None:
        send_usage_message(data_context=context, event=usage_event, success=False)
        sys.exit(1)

    _suite = context.create_expectation_suite(suite_name)
    _, _, _, batch_kwargs = get_batch_kwargs(context, datasource_name=datasource.name)
    renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs)
    renderer.render_to_disk(notebook_path)

    if jupyter:
        toolkit.launch_jupyter_notebook(notebook_path)
    else:
        cli_message(
            f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`"
        )

    send_usage_message(data_context=context, event=usage_event, success=True)
Ejemplo n.º 2
0
def test_launch_jupyter_notebook_env_set_in_env(mock_subprocess):
    with mock.patch.dict(os.environ,
                         {"GE_JUPYTER_CMD": "jupyter notebook --test-args"},
                         clear=True):
        toolkit.launch_jupyter_notebook("test_path")
        mock_subprocess.assert_called_once_with(
            "jupyter notebook --test-args test_path", shell=True)
Ejemplo n.º 3
0
def _datasource_new_flow(
    context: DataContext,
    usage_event_end: str,
    datasource_name: Optional[str] = None,
    jupyter: bool = True,
) -> None:
    files_or_sql_selection = click.prompt(
        """
What data would you like Great Expectations to connect to?
    1. Files on a filesystem (for processing with Pandas or Spark)
    2. Relational database (SQL)
""",
        type=click.Choice(["1", "2"]),
        show_choices=False,
    )
    if files_or_sql_selection == "1":
        selected_files_backend = _prompt_for_execution_engine()
        helper = _get_files_helper(
            selected_files_backend,
            context_root_dir=context.root_directory,
            datasource_name=datasource_name,
        )
    elif files_or_sql_selection == "2":
        if not _verify_sqlalchemy_dependent_modules():
            return None
        selected_database = _prompt_user_for_database_backend()
        helper = _get_sql_yaml_helper_class(selected_database, datasource_name)
    else:
        helper = None

    helper.send_backend_choice_usage_message(context)
    if not helper.verify_libraries_installed():
        return None
    helper.prompt()
    notebook_path = helper.create_notebook(context)
    if jupyter is False:
        cli_message(
            f"To continue editing this Datasource, run <green>jupyter notebook {notebook_path}</green>"
        )
        send_usage_message(
            data_context=context,
            event=usage_event_end,
            success=True,
        )
        return None

    if notebook_path:
        cli_message(
            """<green>Because you requested to create a new Datasource, we'll open a notebook for you now to complete it!</green>\n\n"""
        )
        send_usage_message(
            data_context=context,
            event=usage_event_end,
            success=True,
        )
        toolkit.launch_jupyter_notebook(notebook_path)
Ejemplo n.º 4
0
def test_launch_jupyter_notebook_env_none(mock_subprocess):
    try:
        if os.environ.get("GE_JUPYTER_CMD"):
            temp = os.environ.great_expectations("GE_JUPYTER_CMD")
            del os.environ["GE_JUPYTER_CMD"]
        toolkit.launch_jupyter_notebook("test_path")
        mock_subprocess.assert_called_once_with(["jupyter", "notebook", "test_path"])
    except Exception:
        if temp:
            os.environ["GE_JUPYTER_CMD"] = temp
        raise
Ejemplo n.º 5
0
def _checkpoint_new(ctx, checkpoint_name, jupyter):

    context: DataContext = ctx.obj.data_context
    usage_event_end: str = ctx.obj.usage_event_end

    try:
        _verify_checkpoint_does_not_exist(context, checkpoint_name, usage_event_end)

        # Create notebook on disk
        notebook_name = f"edit_checkpoint_{checkpoint_name}.ipynb"
        notebook_file_path = _get_notebook_path(context, notebook_name)
        checkpoint_new_notebook_renderer = CheckpointNewNotebookRenderer(
            context=context, checkpoint_name=checkpoint_name
        )
        checkpoint_new_notebook_renderer.render_to_disk(
            notebook_file_path=notebook_file_path
        )

        if not jupyter:
            cli_message(
                f"To continue editing this Checkpoint, run <green>jupyter notebook {notebook_file_path}</green>"
            )

        send_usage_message(
            data_context=context,
            event=usage_event_end,
            success=True,
        )

        if jupyter:
            cli_message(
                """<green>Because you requested to create a new Checkpoint, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n"""
            )
            toolkit.launch_jupyter_notebook(notebook_file_path)

    except Exception as e:
        toolkit.exit_with_failure_message_and_stats(
            data_context=context,
            usage_event=usage_event_end,
            message=f"<red>{e}</red>",
        )
        return
Ejemplo n.º 6
0
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = load_data_context_with_error_handling(directory)

    try:
        suite = load_expectation_suite(context, suite)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(context, datasource_name=data_source.name,
                                     additional_batch_kwargs=additional_batch_kwargs)

        notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name)
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context, event=usage_event, event_payload=payload, success=True
        )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(data_context=context, event=usage_event, success=False)
        raise e
Ejemplo n.º 7
0
def _suite_edit_workflow(
    context: DataContext,
    expectation_suite_name: str,
    profile: bool,
    usage_event: str,
    interactive: bool,
    no_jupyter: bool,
    create_if_not_exist: Optional[bool] = False,
    datasource_name: Optional[str] = None,
    batch_request: Optional[Union[str, Dict[str, Union[str, int,
                                                       Dict[str,
                                                            Any]]]]] = None,
    additional_batch_request_args: Optional[Dict[str,
                                                 Union[str, int,
                                                       Dict[str,
                                                            Any]]]] = None,
    suppress_usage_message: Optional[bool] = False,
    assume_yes: Optional[bool] = False,
):
    # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow().
    # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False
    if suppress_usage_message:
        usage_event = None

    suite: ExpectationSuite = toolkit.load_expectation_suite(
        data_context=context,
        expectation_suite_name=expectation_suite_name,
        usage_event=usage_event,
        create_if_not_exist=create_if_not_exist,
    )

    try:
        if interactive or profile:
            batch_request_from_citation_is_up_to_date: bool = True

            batch_request_from_citation: Optional[Union[str, Dict[str, Union[
                str, Dict[str,
                          Any]]]]] = toolkit.get_batch_request_from_citations(
                              expectation_suite=suite)

            if batch_request is not None and isinstance(batch_request, str):
                batch_request = toolkit.get_batch_request_from_json_file(
                    batch_request_json_file_path=batch_request,
                    data_context=context,
                    usage_event=usage_event,
                    suppress_usage_message=suppress_usage_message,
                )
                if batch_request != batch_request_from_citation:
                    batch_request_from_citation_is_up_to_date = False

            if not (batch_request and isinstance(batch_request, dict)
                    and BatchRequest(**batch_request)):
                if (batch_request_from_citation
                        and isinstance(batch_request_from_citation, dict)
                        and BatchRequest(**batch_request_from_citation)):
                    batch_request = copy.deepcopy(batch_request_from_citation)
                else:
                    batch_request = toolkit.get_batch_request_using_datasource_name(
                        data_context=context,
                        datasource_name=datasource_name,
                        usage_event=usage_event,
                        suppress_usage_message=False,
                        additional_batch_request_args=
                        additional_batch_request_args,
                    )
                    if batch_request != batch_request_from_citation:
                        batch_request_from_citation_is_up_to_date = False

            if not batch_request_from_citation_is_up_to_date:
                toolkit.add_citation_with_batch_request(
                    data_context=context,
                    expectation_suite=suite,
                    batch_request=batch_request,
                )

        notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name)
        notebook_path: str = _get_notebook_path(context, notebook_name)

        if profile:
            if not assume_yes:
                toolkit.prompt_profile_to_create_a_suite(
                    data_context=context,
                    expectation_suite_name=expectation_suite_name)

            renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer(
                context=context,
                expectation_suite_name=expectation_suite_name,
                batch_request=batch_request,
            )
            renderer.render_to_disk(notebook_file_path=notebook_path)
        else:
            SuiteEditNotebookRenderer.from_data_context(
                data_context=context).render_to_disk(
                    suite=suite,
                    notebook_file_path=notebook_path,
                    batch_request=batch_request,
                )

        if no_jupyter:
            cli_message(
                string=
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )
        else:
            cli_message(
                string=
                """<green>Opening a notebook for you now to edit your expectation suite!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")

        payload: dict = edit_expectation_suite_usage_statistics(
            data_context=context,
            expectation_suite_name=suite.expectation_suite_name)

        if not suppress_usage_message:
            toolkit.send_usage_message(
                data_context=context,
                event=usage_event,
                event_payload=payload,
                success=True,
            )

        if not no_jupyter:
            toolkit.launch_jupyter_notebook(notebook_path=notebook_path)

    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            ValueError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message(string="<red>{}</red>".format(e))
        if not suppress_usage_message:
            toolkit.send_usage_message(data_context=context,
                                       event=usage_event,
                                       success=False)
        sys.exit(1)

    except Exception as e:
        if not suppress_usage_message:
            toolkit.send_usage_message(data_context=context,
                                       event=usage_event,
                                       success=False)
        raise e