def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None: usage_event = "cli.suite.scaffold" suite_name = suite context = load_data_context_with_error_handling(directory) notebook_filename = f"scaffold_{suite_name}.ipynb" notebook_path = _get_notebook_path(context, notebook_filename) if suite_name in context.list_expectation_suite_names(): toolkit.tell_user_suite_exists(suite_name) if os.path.isfile(notebook_path): cli_message( f" - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>" ) send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) datasource = toolkit.select_datasource(context) if datasource is None: send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) _suite = context.create_expectation_suite(suite_name) _, _, _, batch_kwargs = get_batch_kwargs(context, datasource_name=datasource.name) renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs) renderer.render_to_disk(notebook_path) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) else: cli_message( f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`" ) send_usage_message(data_context=context, event=usage_event, success=True)
def test_launch_jupyter_notebook_env_set_in_env(mock_subprocess): with mock.patch.dict(os.environ, {"GE_JUPYTER_CMD": "jupyter notebook --test-args"}, clear=True): toolkit.launch_jupyter_notebook("test_path") mock_subprocess.assert_called_once_with( "jupyter notebook --test-args test_path", shell=True)
def _datasource_new_flow( context: DataContext, usage_event_end: str, datasource_name: Optional[str] = None, jupyter: bool = True, ) -> None: files_or_sql_selection = click.prompt( """ What data would you like Great Expectations to connect to? 1. Files on a filesystem (for processing with Pandas or Spark) 2. Relational database (SQL) """, type=click.Choice(["1", "2"]), show_choices=False, ) if files_or_sql_selection == "1": selected_files_backend = _prompt_for_execution_engine() helper = _get_files_helper( selected_files_backend, context_root_dir=context.root_directory, datasource_name=datasource_name, ) elif files_or_sql_selection == "2": if not _verify_sqlalchemy_dependent_modules(): return None selected_database = _prompt_user_for_database_backend() helper = _get_sql_yaml_helper_class(selected_database, datasource_name) else: helper = None helper.send_backend_choice_usage_message(context) if not helper.verify_libraries_installed(): return None helper.prompt() notebook_path = helper.create_notebook(context) if jupyter is False: cli_message( f"To continue editing this Datasource, run <green>jupyter notebook {notebook_path}</green>" ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) return None if notebook_path: cli_message( """<green>Because you requested to create a new Datasource, we'll open a notebook for you now to complete it!</green>\n\n""" ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) toolkit.launch_jupyter_notebook(notebook_path)
def test_launch_jupyter_notebook_env_none(mock_subprocess): try: if os.environ.get("GE_JUPYTER_CMD"): temp = os.environ.great_expectations("GE_JUPYTER_CMD") del os.environ["GE_JUPYTER_CMD"] toolkit.launch_jupyter_notebook("test_path") mock_subprocess.assert_called_once_with(["jupyter", "notebook", "test_path"]) except Exception: if temp: os.environ["GE_JUPYTER_CMD"] = temp raise
def _checkpoint_new(ctx, checkpoint_name, jupyter): context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: _verify_checkpoint_does_not_exist(context, checkpoint_name, usage_event_end) # Create notebook on disk notebook_name = f"edit_checkpoint_{checkpoint_name}.ipynb" notebook_file_path = _get_notebook_path(context, notebook_name) checkpoint_new_notebook_renderer = CheckpointNewNotebookRenderer( context=context, checkpoint_name=checkpoint_name ) checkpoint_new_notebook_renderer.render_to_disk( notebook_file_path=notebook_file_path ) if not jupyter: cli_message( f"To continue editing this Checkpoint, run <green>jupyter notebook {notebook_file_path}</green>" ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) if jupyter: cli_message( """<green>Because you requested to create a new Checkpoint, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""" ) toolkit.launch_jupyter_notebook(notebook_file_path) except Exception as e: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event): batch_kwargs_json = batch_kwargs batch_kwargs = None context = load_data_context_with_error_handling(directory) try: suite = load_expectation_suite(context, suite) citations = suite.get_citations(require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = toolkit.load_batch(context, suite, batch_kwargs) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = select_datasource(context, datasource_name=datasource) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs(context, datasource_name=data_source.name, additional_batch_kwargs=additional_batch_kwargs) notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name) notebook_path = _get_notebook_path(context, notebook_name) SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs) if not jupyter: cli_message( f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name ) send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) except Exception as e: send_usage_message(data_context=context, event=usage_event, success=False) raise e
def _suite_edit_workflow( context: DataContext, expectation_suite_name: str, profile: bool, usage_event: str, interactive: bool, no_jupyter: bool, create_if_not_exist: Optional[bool] = False, datasource_name: Optional[str] = None, batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]] = None, additional_batch_request_args: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None, suppress_usage_message: Optional[bool] = False, assume_yes: Optional[bool] = False, ): # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow(). # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False if suppress_usage_message: usage_event = None suite: ExpectationSuite = toolkit.load_expectation_suite( data_context=context, expectation_suite_name=expectation_suite_name, usage_event=usage_event, create_if_not_exist=create_if_not_exist, ) try: if interactive or profile: batch_request_from_citation_is_up_to_date: bool = True batch_request_from_citation: Optional[Union[str, Dict[str, Union[ str, Dict[str, Any]]]]] = toolkit.get_batch_request_from_citations( expectation_suite=suite) if batch_request is not None and isinstance(batch_request, str): batch_request = toolkit.get_batch_request_from_json_file( batch_request_json_file_path=batch_request, data_context=context, usage_event=usage_event, suppress_usage_message=suppress_usage_message, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not (batch_request and isinstance(batch_request, dict) and BatchRequest(**batch_request)): if (batch_request_from_citation and isinstance(batch_request_from_citation, dict) and BatchRequest(**batch_request_from_citation)): batch_request = copy.deepcopy(batch_request_from_citation) else: batch_request = toolkit.get_batch_request_using_datasource_name( data_context=context, datasource_name=datasource_name, usage_event=usage_event, suppress_usage_message=False, additional_batch_request_args= additional_batch_request_args, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not batch_request_from_citation_is_up_to_date: toolkit.add_citation_with_batch_request( data_context=context, expectation_suite=suite, batch_request=batch_request, ) notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name) notebook_path: str = _get_notebook_path(context, notebook_name) if profile: if not assume_yes: toolkit.prompt_profile_to_create_a_suite( data_context=context, expectation_suite_name=expectation_suite_name) renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer( context=context, expectation_suite_name=expectation_suite_name, batch_request=batch_request, ) renderer.render_to_disk(notebook_file_path=notebook_path) else: SuiteEditNotebookRenderer.from_data_context( data_context=context).render_to_disk( suite=suite, notebook_file_path=notebook_path, batch_request=batch_request, ) if no_jupyter: cli_message( string= f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) else: cli_message( string= """<green>Opening a notebook for you now to edit your expectation suite! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") payload: dict = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name) if not suppress_usage_message: toolkit.send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True, ) if not no_jupyter: toolkit.launch_jupyter_notebook(notebook_path=notebook_path) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, ValueError, OSError, SQLAlchemyError, ) as e: cli_message(string="<red>{}</red>".format(e)) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e