def store_list(ctx): """List active Stores.""" context = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: stores = context.list_active_stores() cli_message(f"{len(stores)} active Stores found:") for store in stores: cli_message("") cli_message_dict(store) send_usage_message( data_context=context, event=usage_event_end, success=True, ) except Exception as e: toolkit.exit_with_failure_message_and_stats( context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return
def docs_list(ctx): """List known Data Docs sites.""" context = ctx.obj.data_context docs_sites_url_dicts = context.get_docs_sites_urls() if len(docs_sites_url_dicts) == 0: cli_message("No Data Docs sites found") else: docs_sites_strings = [ " - <cyan>{}</cyan>: {}".format( docs_site_dict["site_name"], docs_site_dict.get("site_url") or f"site configured but does not exist. Run the following command to build site: great_expectations " f'docs build --site-name {docs_site_dict["site_name"]}', ) for docs_site_dict in docs_sites_url_dicts ] list_intro_string = _build_intro_string(docs_sites_strings) cli_message_list(docs_sites_strings, list_intro_string) toolkit.send_usage_message( data_context=context, event="cli.docs.list", success=True )
def select_datasource(context: DataContext, datasource_name: str = None) -> BaseDatasource: """Select a datasource interactively.""" # TODO consolidate all the myriad CLI tests into this data_source: Optional[BaseDatasource] = None if datasource_name is None: data_sources: List[BaseDatasource] = cast( List[BaseDatasource], list( sorted(context.datasources.values(), key=lambda x: (len(x.name), x.name)), ), ) if len(data_sources) == 0: cli_message( string= "<red>No datasources found in the context. To add a datasource, run `great_expectations datasource new`</red>" ) elif len(data_sources) == 1: datasource_name = data_sources[0].name else: choices: str = "\n".join([ f" {i}. {data_source.name}" for i, data_source in enumerate(data_sources, 1) ]) option_selection: str = click.prompt( "Select a datasource" + "\n" + choices + "\n", type=click.Choice( [str(i) for i, data_source in enumerate(data_sources, 1)]), show_choices=False, ) datasource_name = data_sources[int(option_selection) - 1].name if datasource_name is not None: data_source = context.get_datasource(datasource_name=datasource_name) return data_source
def checkpoint_run(ctx: click.Context, checkpoint: str) -> None: """Run a Checkpoint.""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: result: CheckpointResult = toolkit.run_checkpoint( context=context, checkpoint_name=checkpoint, usage_event=usage_event_end, ) except Exception as e: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return if not result["success"]: cli_message(string="Validation failed!") send_usage_message( data_context=context, event=usage_event_end, success=True, ) print_validation_operator_results_details(result=result) sys.exit(1) cli_message("Validation succeeded!") send_usage_message( data_context=context, event=usage_event_end, success=True, ) print_validation_operator_results_details(result=result) sys.exit(0)
def _checkpoint_new(ctx, checkpoint_name, jupyter): usage_event: str = "cli.checkpoint.new" context = ctx.obj.data_context try: _verify_checkpoint_does_not_exist(context, checkpoint_name, usage_event) # Create notebook on disk notebook_name = f"edit_checkpoint_{checkpoint_name}.ipynb" notebook_file_path = _get_notebook_path(context, notebook_name) checkpoint_new_notebook_renderer = CheckpointNewNotebookRenderer( context=context, checkpoint_name=checkpoint_name) checkpoint_new_notebook_renderer.render_to_disk( notebook_file_path=notebook_file_path) if not jupyter: cli_message( f"To continue editing this Checkpoint, run <green>jupyter notebook {notebook_file_path}</green>" ) toolkit.send_usage_message(context, event=usage_event, success=True) if jupyter: cli_message( """<green>Because you requested to create a new Checkpoint, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") toolkit.launch_jupyter_notebook(notebook_file_path) except Exception as e: toolkit.exit_with_failure_message_and_stats( context=context, usage_event=usage_event, message=f"<red>{e}</red>", ) return
def _suite_new_process_profile_and_batch_request_flags( interactive_mode: CLISuiteInteractiveFlagCombinations, profile: bool, batch_request: Optional[str], ) -> CLISuiteInteractiveFlagCombinations: # Explicit check for boolean or None for `interactive_flag` is necessary: None indicates user did not supply flag. interactive_flag = interactive_mode.value["interactive_flag"] if profile: if interactive_flag is None: cli_message( "<green>Entering interactive mode since you passed the --profile flag</green>" ) interactive_mode = ( CLISuiteInteractiveFlagCombinations.UNPROMPTED_OVERRIDE_INTERACTIVE_FALSE_MANUAL_FALSE_PROFILE_TRUE ) elif interactive_flag is True: interactive_mode = ( CLISuiteInteractiveFlagCombinations.UNPROMPTED_OVERRIDE_INTERACTIVE_TRUE_MANUAL_FALSE_PROFILE_TRUE ) elif interactive_flag is False: cli_message( "<yellow>Warning: Ignoring the --manual flag and entering interactive mode since you passed the --profile flag</yellow>" ) interactive_mode = ( CLISuiteInteractiveFlagCombinations.UNPROMPTED_OVERRIDE_INTERACTIVE_FALSE_MANUAL_TRUE_PROFILE_TRUE ) # Assume batch needed if user passes --profile elif batch_request is not None: if interactive_flag is None: cli_message( "<green>Entering interactive mode since you passed the --batch-request flag</green>" ) interactive_mode = ( CLISuiteInteractiveFlagCombinations.UNPROMPTED_OVERRIDE_INTERACTIVE_FALSE_MANUAL_FALSE_BATCH_REQUEST_SPECIFIED ) elif interactive_flag is False: cli_message( "<yellow>Warning: Ignoring the --manual flag and entering interactive mode since you passed the --batch-request flag</yellow>" ) interactive_mode = ( CLISuiteInteractiveFlagCombinations.UNPROMPTED_OVERRIDE_INTERACTIVE_FALSE_MANUAL_TRUE_BATCH_REQUEST_SPECIFIED ) return interactive_mode
def run_checkpoint( context: DataContext, checkpoint_name: str, usage_event: str, ) -> CheckpointResult: """Run a Checkpoint or raise helpful errors.""" failure_message: str = "Exception occurred while running Checkpoint." validate_checkpoint( context=context, checkpoint_name=checkpoint_name, usage_event=usage_event, failure_message=failure_message, ) try: result: CheckpointResult = context.run_checkpoint( checkpoint_name=checkpoint_name) return result except ge_exceptions.CheckpointError as e: cli_message(string=failure_message) exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event, message=f"<red>{e}.</red>", )
def checkpoint_delete(ctx, checkpoint): """Delete a Checkpoint.""" usage_event: str = "cli.checkpoint.delete" context: DataContext = ctx.obj.data_context try: toolkit.delete_checkpoint( context=context, checkpoint_name=checkpoint, usage_event=usage_event, ) toolkit.send_usage_message(context, event="cli.checkpoint.delete", success=True) except Exception as e: toolkit.exit_with_failure_message_and_stats( context=context, usage_event=usage_event, message=f"<red>{e}</red>", ) return cli_message(f'Checkpoint "{checkpoint}" deleted.') sys.exit(0)
def project_check_config(ctx): """Check a config for validity and help with migrations.""" cli_message("Checking your config files for validity...\n") directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location ).get("directory") is_config_ok, error_message, context = do_config_check(directory) if context: toolkit.send_usage_message( data_context=context, event="cli.project.check_config", success=True ) if not is_config_ok: cli_message("Unfortunately, your config appears to be invalid:\n") cli_message("<red>{}</red>".format(error_message)) sys.exit(1) cli_message("<green>Your config file appears valid!</green>")
def delete_datasource(ctx: click.Context, datasource: str) -> None: """Delete the datasource specified as an argument""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end if not ctx.obj.assume_yes: toolkit.confirm_proceed_or_exit( confirm_prompt= f"""\nAre you sure you want to delete the Datasource "{datasource}" (this action is irreversible)?" """, continuation_message=f"Datasource `{datasource}` was not deleted.", exit_on_no=True, data_context=context, usage_stats_event=usage_event_end, ) try: context.delete_datasource(datasource) except ValueError: cli_message(f"<red>Datasource {datasource} could not be found.</red>") send_usage_message( data_context=context, event=usage_event_end, success=False, ) sys.exit(1) try: context.get_datasource(datasource) except ValueError: cli_message( "<green>{}</green>".format("Datasource deleted successfully.")) send_usage_message( data_context=context, event=usage_event_end, success=True, ) sys.exit(0)
def datasource_list(ctx): """List known Datasources.""" context = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: datasources = context.list_datasources() cli_message(_build_datasource_intro_string(datasources)) for datasource in datasources: cli_message("") cli_message_dict({ "name": datasource["name"], "class_name": datasource["class_name"], }) toolkit.send_usage_message(data_context=context, event=usage_event_end, success=True) except Exception as e: toolkit.exit_with_failure_message_and_stats( context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return
def project_upgrade(ctx): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): up_to_date_message = ( "Your project is up-to-date - no further upgrade is necessary.\n") cli_message(f"<green>{up_to_date_message}</green>") sys.exit(0)
def project_upgrade(ctx): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): sys.exit(0) else: failure_message = "Error: Your project could not be upgraded.\n" cli_message(f"<red>{failure_message}</red>") sys.exit(1)
def _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ): cli_message(""" Great Expectations will choose a couple of columns and generate expectations about them to demonstrate some examples of assertions you can make about your data. Great Expectations will store these expectations in a new Expectation Suite '{:s}' here: {:s} """.format( expectation_suite_name, context.stores[ context.expectations_store_name].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name).to_tuple()), )) confirm_proceed_or_exit() # TODO this may not apply cli_message("\nGenerating example Expectation Suite...") run_id = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") profiling_results = context.profile_data_asset( datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, batch_kwargs=batch_kwargs, profiler=BasicSuiteBuilderProfiler, profiler_configuration=profiler_configuration, expectation_suite_name=expectation_suite_name, run_id=run_id, additional_batch_kwargs=additional_batch_kwargs, ) if not profiling_results["success"]: _raise_profiling_errors(profiling_results) cli_message("\nDone generating example Expectation Suite") return profiling_results
def delete_datasource(ctx, datasource): """Delete the datasource specified as an argument""" context = ctx.obj.data_context try: context.delete_datasource(datasource) except ValueError: cli_message( "<red>{}</red>".format( "Datasource {} could not be found.".format(datasource) ) ) sys.exit(1) try: context.get_datasource(datasource) except ValueError: cli_message("<green>{}</green>".format("Datasource deleted successfully.")) sys.exit(1) else: cli_message("<red>{}</red>".format("Datasource not deleted.")) sys.exit(1)
def load_data_context_with_error_handling( directory: str, from_cli_upgrade_command: bool = False ) -> DataContext: """Return a DataContext with good error handling and exit codes.""" try: context: DataContext = DataContext(context_root_dir=directory) if from_cli_upgrade_command: try: send_usage_message( data_context=context, event="cli.project.upgrade.begin", success=True, ) except Exception: # Don't fail for usage stats pass ge_config_version: int = context.get_config().config_version if ( from_cli_upgrade_command and int(ge_config_version) < CURRENT_GE_CONFIG_VERSION ): directory = directory or context.root_directory ( increment_version, exception_occurred, ) = upgrade_project_one_version_increment( context_root_dir=directory, ge_config_version=ge_config_version, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, from_cli_upgrade_command=from_cli_upgrade_command, ) if not exception_occurred and increment_version: context = DataContext(context_root_dir=directory) if from_cli_upgrade_command: send_usage_message( data_context=context, event="cli.project.upgrade.end", success=True, ) return context except ge_exceptions.UnsupportedConfigVersionError as err: directory = directory or DataContext.find_context_root_dir() ge_config_version = DataContext.get_ge_config_version( context_root_dir=directory ) upgrade_helper_class = ( GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version)) if ge_config_version else None ) if upgrade_helper_class and ge_config_version < CURRENT_GE_CONFIG_VERSION: upgrade_project( context_root_dir=directory, ge_config_version=ge_config_version, from_cli_upgrade_command=from_cli_upgrade_command, ) else: cli_message(string="<red>{}</red>".format(err.message)) sys.exit(1) except ( ge_exceptions.ConfigNotFoundError, ge_exceptions.InvalidConfigError, ) as err: cli_message(string="<red>{}</red>".format(err.message)) sys.exit(1) except ge_exceptions.PluginModuleNotFoundError as err: cli_message(string=err.cli_colored_message) sys.exit(1) except ge_exceptions.PluginClassNotFoundError as err: cli_message(string=err.cli_colored_message) sys.exit(1) except ge_exceptions.InvalidConfigurationYamlError as err: cli_message(string=f"<red>{str(err)}</red>") sys.exit(1)
def create_expectation_suite( context, datasource_name=None, batch_kwargs_generator_name=None, generator_asset=None, batch_kwargs=None, expectation_suite_name=None, additional_batch_kwargs=None, empty_suite=False, show_intro_message=False, flag_build_docs=True, open_docs=False, profiler_configuration="demo", data_asset_name=None, ): """ Create a new expectation suite. WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite require a serious revisiting. :return: a tuple: (success, suite name, profiling_results) """ if generator_asset: warnings.warn( "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. " "Please update code accordingly.", DeprecationWarning, ) data_asset_name = generator_asset if show_intro_message and not empty_suite: cli_message( "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n" ) data_source = select_datasource(context, datasource_name=datasource_name) if data_source is None: # select_datasource takes care of displaying an error message, so all is left here is to exit. sys.exit(1) datasource_name = data_source.name if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) sys.exit(1) if (batch_kwargs_generator_name is None or data_asset_name is None or batch_kwargs is None): ( datasource_name, batch_kwargs_generator_name, data_asset_name, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, additional_batch_kwargs=additional_batch_kwargs, ) # In this case, we have "consumed" the additional_batch_kwargs additional_batch_kwargs = {} if expectation_suite_name is None: default_expectation_suite_name = _get_default_expectation_suite_name( batch_kwargs, data_asset_name) while True: expectation_suite_name = click.prompt( "\nName the new Expectation Suite", default=default_expectation_suite_name, ) if expectation_suite_name in context.list_expectation_suite_names( ): tell_user_suite_exists(expectation_suite_name) else: break if empty_suite: create_empty_suite(context, expectation_suite_name, batch_kwargs) return True, expectation_suite_name, None profiling_results = _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ) if flag_build_docs: build_docs(context, view=False) if open_docs: attempt_to_open_validation_results_in_data_docs( context, profiling_results) return True, expectation_suite_name, profiling_results
def exit_with_failure_message_and_stats(context: DataContext, usage_event: str, message: str) -> None: cli_message(message) send_usage_message(context, event=usage_event, success=False) sys.exit(1)
def tell_user_suite_exists(suite_name: str) -> None: cli_message( f"""<red>An expectation suite named `{suite_name}` already exists.</red> - If you intend to edit the suite please use `great_expectations suite edit {suite_name}`.""" )
def _suite_new( suite: str, directory: str, empty: bool, jupyter: bool, view: bool, batch_kwargs, usage_event: str, ) -> None: context = toolkit.load_data_context_with_error_handling(directory) datasource_name = None generator_name = None data_asset_name = None try: if batch_kwargs is not None: batch_kwargs = json.loads(batch_kwargs) success, suite_name, profiling_results = toolkit.create_expectation_suite( context, datasource_name=datasource_name, batch_kwargs_generator_name=generator_name, data_asset_name=data_asset_name, batch_kwargs=batch_kwargs, expectation_suite_name=suite, additional_batch_kwargs={"limit": 1000}, empty_suite=empty, show_intro_message=False, open_docs=view, ) if success: if empty: if jupyter: cli_message( """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") toolkit.send_usage_message(data_context=context, event=usage_event, success=True) _suite_edit( suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs, usage_event= "cli.suite.edit", # or else we will be sending `cli.suite.new` which is incorrect suppress_usage_message= True, # dont want actually send usage_message since the function call is not the result of actual usage ) else: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, OSError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e
def _suite_edit_workflow( context: DataContext, expectation_suite_name: str, profile: bool, usage_event: str, interactive: bool, no_jupyter: bool, create_if_not_exist: Optional[bool] = False, datasource_name: Optional[str] = None, batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]] = None, additional_batch_request_args: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None, suppress_usage_message: Optional[bool] = False, assume_yes: Optional[bool] = False, ): # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow(). # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False if suppress_usage_message: usage_event = None suite: ExpectationSuite = toolkit.load_expectation_suite( data_context=context, expectation_suite_name=expectation_suite_name, usage_event=usage_event, create_if_not_exist=create_if_not_exist, ) try: if interactive or profile: batch_request_from_citation_is_up_to_date: bool = True batch_request_from_citation: Optional[Union[str, Dict[str, Union[ str, Dict[str, Any]]]]] = toolkit.get_batch_request_from_citations( expectation_suite=suite) if batch_request is not None and isinstance(batch_request, str): batch_request = toolkit.get_batch_request_from_json_file( batch_request_json_file_path=batch_request, data_context=context, usage_event=usage_event, suppress_usage_message=suppress_usage_message, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not (batch_request and isinstance(batch_request, dict) and BatchRequest(**batch_request)): if (batch_request_from_citation and isinstance(batch_request_from_citation, dict) and BatchRequest(**batch_request_from_citation)): batch_request = copy.deepcopy(batch_request_from_citation) else: batch_request = toolkit.get_batch_request_using_datasource_name( data_context=context, datasource_name=datasource_name, usage_event=usage_event, suppress_usage_message=False, additional_batch_request_args= additional_batch_request_args, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not batch_request_from_citation_is_up_to_date: toolkit.add_citation_with_batch_request( data_context=context, expectation_suite=suite, batch_request=batch_request, ) notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name) notebook_path: str = _get_notebook_path(context, notebook_name) if profile: if not assume_yes: toolkit.prompt_profile_to_create_a_suite( data_context=context, expectation_suite_name=expectation_suite_name) renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer( context=context, expectation_suite_name=expectation_suite_name, batch_request=batch_request, ) renderer.render_to_disk(notebook_file_path=notebook_path) else: SuiteEditNotebookRenderer.from_data_context( data_context=context).render_to_disk( suite=suite, notebook_file_path=notebook_path, batch_request=batch_request, ) if no_jupyter: cli_message( string= f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) else: cli_message( string= """<green>Opening a notebook for you now to edit your expectation suite! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") payload: dict = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name) if not suppress_usage_message: toolkit.send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True, ) if not no_jupyter: toolkit.launch_jupyter_notebook(notebook_path=notebook_path) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, ValueError, OSError, SQLAlchemyError, ) as e: cli_message(string="<red>{}</red>".format(e)) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e
def _process_suite_edit_flags_and_prompt( context: DataContext, usage_event_end: str, interactive_flag: bool, manual_flag: bool, datasource_name: Optional[str] = None, batch_request: Optional[str] = None, ) -> bool: """ Process various optional suite edit flags and prompt if there is not enough information from the flags. Args: context: Data Context for use in sending error messages if any usage_event_end: event name for ending usage stats message interactive_flag: --interactive from the `suite new` CLI command manual_flag: --manual from the `suite new` CLI command datasource_name: --datasource-name from the `suite new` CLI command batch_request: --batch-request from the `suite new` CLI command Returns: boolean of whether to enter interactive mode """ error_message: Optional[str] = None # Convert interactive / no-interactive flags to interactive interactive: Optional[bool] = None if interactive_flag is True and manual_flag is True: error_message = """Please choose either --interactive or --manual, you may not choose both.""" elif interactive_flag is False and manual_flag is False: interactive = None elif interactive_flag is True and manual_flag is False: interactive = True elif interactive_flag is False and manual_flag is True: interactive = False if (datasource_name is not None) and (batch_request is not None): error_message = """Only one of --datasource-name DATASOURCE_NAME and --batch-request <path to JSON file> \ options can be used. """ if error_message is not None: cli_message(string=f"<red>{error_message}</red>") toolkit.send_usage_message(data_context=context, event=usage_event_end, success=False) sys.exit(1) user_provided_any_flag_skip_prompt: bool = any(( (interactive is not None), (datasource_name is not None), (batch_request is not None), )) # Note - explicit check for boolean or None for `interactive: Optional[bool]` is necessary because None indicates # that a user did not supply either flag. if user_provided_any_flag_skip_prompt: if datasource_name is not None: if interactive is None: cli_message( "<green>Entering interactive mode since you passed the --datasource-name flag</green>" ) elif interactive is False: cli_message( "<yellow>Warning: Ignoring the --manual flag and entering interactive mode since you passed the --datasource-name flag</yellow>" ) interactive = True elif batch_request is not None: if interactive is None: cli_message( "<green>Entering interactive mode since you passed the --batch-request flag</green>" ) elif interactive is False: cli_message( "<yellow>Warning: Ignoring the --manual flag and entering interactive mode since you passed the --batch-request flag</yellow>" ) interactive = True else: suite_edit_method: str = click.prompt( """\ How would you like to edit your Expectation Suite? 1. Manually, without interacting with a sample batch of data (default) 2. Interactively, with a sample batch of data """, type=click.Choice(["1", "2"]), show_choices=False, default="1", show_default=False, ) # Default option if suite_edit_method == "": interactive = False if suite_edit_method == "1": interactive = False elif suite_edit_method == "2": interactive = True return interactive
def _suite_new_workflow( context: DataContext, expectation_suite_name: str, interactive: bool, profile: bool, no_jupyter: bool, usage_event: str, batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]] = None, ): try: datasource_name: Optional[str] = None data_asset_name: Optional[str] = None additional_batch_request_args: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = { "limit": 1000 } if interactive: if batch_request is not None and isinstance(batch_request, str): batch_request = toolkit.get_batch_request_from_json_file( batch_request_json_file_path=batch_request, data_context=context, usage_event=usage_event, suppress_usage_message=False, ) if not batch_request: batch_request = toolkit.get_batch_request_using_datasource_name( data_context=context, datasource_name=datasource_name, usage_event=usage_event, suppress_usage_message=False, additional_batch_request_args=additional_batch_request_args, ) # In this case, we have "consumed" the additional_batch_request_args additional_batch_request_args = {} data_asset_name = batch_request.get("data_asset_name") else: batch_request = None suite: ExpectationSuite = toolkit.get_or_create_expectation_suite( expectation_suite_name=expectation_suite_name, data_context=context, data_asset_name=data_asset_name, usage_event=usage_event, suppress_usage_message=False, batch_request=batch_request, create_if_not_exist=True, ) expectation_suite_name = suite.expectation_suite_name toolkit.add_citation_with_batch_request( data_context=context, expectation_suite=suite, batch_request=batch_request, ) toolkit.send_usage_message(data_context=context, event=usage_event, success=True) if batch_request: datasource_name = batch_request.get("datasource_name") # This usage event is suppressed via suppress_usage_message but here because usage_event is not optional usage_event = "cli.suite.edit.begin" # or else we will be sending `cli.suite.new` which is incorrect # do not want to actually send usage_message, since the function call is not the result of actual usage _suite_edit_workflow( context=context, expectation_suite_name=expectation_suite_name, profile=profile, usage_event=usage_event, interactive=interactive, no_jupyter=no_jupyter, create_if_not_exist=True, datasource_name=datasource_name, batch_request=batch_request, additional_batch_request_args=additional_batch_request_args, suppress_usage_message=True, assume_yes=False, ) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, ValueError, OSError, SQLAlchemyError, ) as e: cli_message(string="<red>{}</red>".format(e)) toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e
def _process_suite_new_flags_and_prompt( context: DataContext, usage_event_end: str, interactive_flag: bool, manual_flag: bool, profile: bool, batch_request: Optional[str] = None, ) -> Dict[str, Optional[bool]]: """ Process various optional suite new flags and prompt if there is not enough information from the flags. Args: context: Data Context for use in sending error messages if any usage_event_end: event name for ending usage stats message interactive_flag: --interactive from the `suite new` CLI command manual_flag: --manual from the `suite new` CLI command profile: --profile from the `suite new` CLI command batch_request: --batch-request from the `suite new` CLI command Returns: Dictionary with keys of processed parameters and boolean values e.g. {"interactive": True, "profile": False} """ error_message: Optional[str] = None # Convert interactive / no-interactive flags to interactive interactive: Optional[bool] = None if interactive_flag is True and manual_flag is True: error_message = """Please choose either --interactive or --manual, you may not choose both.""" elif interactive_flag is False and manual_flag is False: interactive = None elif interactive_flag is True and manual_flag is False: interactive = True elif interactive_flag is False and manual_flag is True: interactive = False if error_message is not None: cli_message(string=f"<red>{error_message}</red>") toolkit.send_usage_message(data_context=context, event=usage_event_end, success=False) sys.exit(1) user_provided_any_flag_skip_prompt: bool = any( ((interactive is not None), (profile is True), (batch_request is not None))) # Note - explicit check for boolean or None for `interactive: Optional[bool]` is necessary because None indicates # that a user did not supply either flag. if user_provided_any_flag_skip_prompt: # Assume batch needed if user passes --profile if profile and interactive is None: cli_message( "<green>Entering interactive mode since you passed the --profile flag</green>" ) interactive = True elif profile and interactive is False: cli_message( "<yellow>Warning: Ignoring the --manual flag and entering interactive mode since you passed the --profile flag</yellow>" ) interactive = True # Assume batch needed if user passes --batch-request elif (batch_request is not None) and (interactive is None): cli_message( "<green>Entering interactive mode since you passed the --batch-request flag</green>" ) interactive = True elif (batch_request is not None) and (interactive is False): cli_message( "<yellow>Warning: Ignoring the --manual flag and entering interactive mode since you passed the --batch-request flag</yellow>" ) interactive = True else: suite_create_method: str = click.prompt( """\ How would you like to create your Expectation Suite? 1. Manually, without interacting with a sample batch of data (default) 2. Interactively, with a sample batch of data 3. Automatically, using a profiler """, type=click.Choice(["1", "2", "3"]), show_choices=False, default="1", show_default=False, ) # Default option if suite_create_method == "": interactive = False profile = False elif suite_create_method == "1": interactive = False profile = False elif suite_create_method == "2": interactive = True profile = False elif suite_create_method == "3": interactive = True profile = True return {"interactive": interactive, "profile": profile}
def upgrade_project_zero_versions_increment( directory: str, context: DataContext, ge_config_version: float, from_cli_upgrade_command: bool = False, ) -> Optional[DataContext]: upgrade_helper_class = (GE_UPGRADE_HELPER_VERSION_MAP.get( int(ge_config_version)) if ge_config_version else None) if upgrade_helper_class: upgrade_helper = upgrade_helper_class(context_root_dir=directory, update_version=False) else: error_message: str = ( f"The upgrade utility for version {ge_config_version} could not be found." ) cli_message(string=f"<red>{error_message}</red>") sys.exit(1) manual_steps_required = upgrade_helper.manual_steps_required() if manual_steps_required: # noinspection PyBroadException try: send_usage_message( data_context=context, event="cli.project.upgrade.begin", success=True, ) except Exception: # Don't fail for usage stats pass ( increment_version, exception_occurred, ) = upgrade_project_up_to_one_version_increment( context_root_dir=directory, ge_config_version=ge_config_version, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, update_version=False, from_cli_upgrade_command=from_cli_upgrade_command, ) if exception_occurred or increment_version: context = None else: if manual_steps_required: upgrade_message = "Your project requires manual upgrade steps in order to be up-to-date.\n" cli_message(f"<yellow>{upgrade_message}</yellow>") else: upgrade_message = ( "Your project is up-to-date - no further upgrade is necessary.\n" ) cli_message(f"<green>{upgrade_message}</green>") context = DataContext(context_root_dir=directory) # noinspection PyBroadException try: send_usage_message( data_context=context, event="cli.project.upgrade.end", success=True, ) except Exception: # Don't fail for usage stats pass return context
def init(ctx, view, usage_stats): """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if directory is None: directory = os.getcwd() target_directory = os.path.abspath(directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): try: if DataContext.is_project_initialized(ge_dir): # Ensure the context can be instantiated cli_message(PROJECT_IS_COMPLETE) except (DataContextError, DatasourceInitializationError) as e: cli_message("<red>{}</red>".format(e.message)) sys.exit(1) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) # TODO if this is correct, ensure this is covered by a test # cli_message(SETUP_SUCCESS) # exit(0) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) # TODO ensure this is covered by a test exit(5) else: if not ctx.obj.assume_yes: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) toolkit.send_usage_message(data_context=context, event="cli.init.create", success=True) except DataContextError as e: # TODO ensure this is covered by a test cli_message("<red>{}</red>".format(e)) # Skip the rest of setup if --assume-yes flag is passed if ctx.obj.assume_yes: cli_message(SECTION_SEPARATOR) cli_message(SETUP_SUCCESS) sys.exit(0) try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suites(): if click.confirm(BUILD_DOCS_PROMPT, default=True): build_docs(context, view=view) else: datasources = context.list_datasources() if len(datasources) == 0: cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to configure a Datasource?", default=True): cli_message("Okay, bye!") sys.exit(1) datasource_name, data_source_type = add_datasource_impl( context, choose_one_data_asset=False) if not datasource_name: # no datasource was created sys.exit(1) datasources = context.list_datasources() if len(datasources) == 1: datasource_name = datasources[0]["name"] cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to profile new Expectations for a single data asset within your new Datasource?", default=True, ): cli_message( "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!" ) sys.exit(1) ( success, suite_name, profiling_results, ) = toolkit.create_expectation_suite( context, datasource_name=datasource_name, additional_batch_kwargs={"limit": 1000}, flag_build_docs=False, open_docs=False, ) cli_message(SECTION_SEPARATOR) if not click.confirm("Would you like to build Data Docs?", default=True): cli_message( "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!" ) sys.exit(1) build_docs(context, view=False) if not click.confirm( "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.", default=True, ): cli_message( "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!" ) sys.exit(1) toolkit.attempt_to_open_validation_results_in_data_docs( context, profiling_results) cli_message(SECTION_SEPARATOR) cli_message(SETUP_SUCCESS) sys.exit(0) except ( DataContextError, ge_exceptions.ProfilerError, OSError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) sys.exit(1)
def upgrade_project( context_root_dir, ge_config_version, from_cli_upgrade_command=False ): if from_cli_upgrade_command: message = ( f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - " f"the version " f"number must be at least {CURRENT_GE_CONFIG_VERSION}.</red>" ) else: message = ( f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - " f"the version " f"number must be at least {CURRENT_GE_CONFIG_VERSION}.\nIn order to proceed, " f"your project must be upgraded.</red>" ) cli_message(string=message) upgrade_prompt = ( "\nWould you like to run the Upgrade Helper to bring your project up-to-date?" ) # This loading of DataContext is optional and just to track if someone exits here try: data_context = DataContext(context_root_dir) except Exception: # Do not raise error for usage stats data_context = None confirm_proceed_or_exit( confirm_prompt=upgrade_prompt, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, data_context=data_context, usage_stats_event="cli.project.upgrade.end", ) cli_message(string=SECTION_SEPARATOR) # use loop in case multiple upgrades need to take place while ge_config_version < CURRENT_GE_CONFIG_VERSION: increment_version, exception_occurred = upgrade_project_one_version_increment( context_root_dir=context_root_dir, ge_config_version=ge_config_version, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, from_cli_upgrade_command=from_cli_upgrade_command, ) if exception_occurred or not increment_version: break ge_config_version += 1 cli_message(string=SECTION_SEPARATOR) upgrade_success_message = "<green>Upgrade complete. Exiting...</green>\n" upgrade_incomplete_message = f"""\ <red>The Upgrade Helper was unable to perform a complete project upgrade. Next steps:</red> - Please perform any manual steps outlined in the Upgrade Overview and/or Upgrade Report above - When complete, increment the config_version key in your <cyan>great_expectations.yml</cyan> to <cyan>{ ge_config_version + 1}</cyan>\n To learn more about the upgrade process, visit \ <cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html</cyan> """ if ge_config_version < CURRENT_GE_CONFIG_VERSION: cli_message(string=upgrade_incomplete_message) # noinspection PyBroadException try: context: DataContext = DataContext(context_root_dir=context_root_dir) send_usage_message( data_context=context, event="cli.project.upgrade.end", success=False ) except Exception: # Do not raise error for usage stats pass else: cli_message(upgrade_success_message) try: context: DataContext = DataContext(context_root_dir) send_usage_message( data_context=context, event="cli.project.upgrade.end", success=True ) except Exception: # Do not raise error for usage stats pass sys.exit(0)
def init(ctx: click.Context, usage_stats: bool) -> None: """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, creates a project file, and appends to a `.gitignore` file. """ directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if directory is None: directory = os.getcwd() target_directory = os.path.abspath(directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): message = ( f"""Warning. An existing `{DataContext.GE_YML}` was found here: {ge_dir}.""" ) warnings.warn(message) try: project_file_structure_exists = ( DataContext.does_config_exist_on_disk(ge_dir) and DataContext.all_uncommitted_directories_exist(ge_dir) and DataContext.config_variables_yml_exist(ge_dir)) if project_file_structure_exists: cli_message(PROJECT_IS_COMPLETE) sys.exit(0) else: # Prompt to modify the project to add missing files if not ctx.obj.assume_yes: if not click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) except (DataContextError, DatasourceInitializationError) as e: cli_message(f"<red>{e.message}</red>") sys.exit(1) try: DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) except DataContextError as e: cli_message(f"<red>{e.message}</red>") # TODO ensure this is covered by a test exit(5) else: if not ctx.obj.assume_yes: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) send_usage_message( data_context=context, event=UsageStatsEvents.CLI_INIT_CREATE.value, success=True, ) except DataContextError as e: # TODO ensure this is covered by a test cli_message(f"<red>{e}</red>") cli_message(SECTION_SEPARATOR) cli_message(READY_FOR_CUSTOMIZATION) cli_message(HOW_TO_CUSTOMIZE) sys.exit(0)
def upgrade_project_one_version_increment( context_root_dir: str, ge_config_version: float, continuation_message: str, from_cli_upgrade_command: bool = False, ) -> [bool, bool]: # Returns increment_version, exception_occurred upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version)) if not upgrade_helper_class: return False, False target_ge_config_version = int(ge_config_version) + 1 # set version temporarily to CURRENT_GE_CONFIG_VERSION to get functional DataContext DataContext.set_ge_config_version( config_version=CURRENT_GE_CONFIG_VERSION, context_root_dir=context_root_dir, ) upgrade_helper = upgrade_helper_class(context_root_dir=context_root_dir) upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview() if confirmation_required or from_cli_upgrade_command: upgrade_confirmed = confirm_proceed_or_exit( confirm_prompt=upgrade_overview, continuation_message=continuation_message, exit_on_no=False, ) else: upgrade_confirmed = True if upgrade_confirmed: cli_message(string="\nUpgrading project...") cli_message(string=SECTION_SEPARATOR) # run upgrade and get report of what was done, if version number should be incremented ( upgrade_report, increment_version, exception_occurred, ) = upgrade_helper.upgrade_project() # display report to user cli_message(string=upgrade_report) if exception_occurred: # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) # display report to user return False, True # set config version to target version if increment_version: DataContext.set_ge_config_version( target_ge_config_version, context_root_dir, validate_config_version=False, ) return True, False # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) return False, False # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) cli_message(string=continuation_message) sys.exit(0)
def _suite_edit( suite, datasource, directory, jupyter, batch_kwargs, usage_event, suppress_usage_message=False, ): # suppress_usage_message flag is for the situation where _suite_edit is called by _suite_new(). # when called by _suite_new(), the flag will be set to False, otherwise it will default to True batch_kwargs_json = batch_kwargs batch_kwargs = None context = toolkit.load_data_context_with_error_handling(directory) try: suite = toolkit.load_expectation_suite(context, suite, usage_event) citations = suite.get_citations(require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = toolkit.load_batch(context, suite, batch_kwargs) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>" .format(je)) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>" .format(ve)) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message(""" A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = toolkit.select_datasource( context, datasource_name=datasource) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, data_asset_name=None, additional_batch_kwargs=additional_batch_kwargs, ) notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name) notebook_path = _get_notebook_path(context, notebook_name) SuiteEditNotebookRenderer.from_data_context(context).render_to_disk( suite, notebook_path, batch_kwargs) if not jupyter: cli_message( f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name) if not suppress_usage_message: toolkit.send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True, ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) except Exception as e: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e