def docs_list(directory) -> None: """List known Data Docs Sites.""" context = toolkit.load_data_context_with_error_handling(directory) docs_sites_url_dicts = context.get_docs_sites_urls() docs_sites_strings = [ " - <cyan>{}</cyan>: {}".format( docs_site_dict["site_name"], docs_site_dict.get("site_url") or f"site configured but does not exist. Run the following command to build site: great_expectations " f'docs build --site-name {docs_site_dict["site_name"]}', ) for docs_site_dict in docs_sites_url_dicts ] if len(docs_sites_strings) == 0: cli_message("No Data Docs sites found") else: list_intro_string = _build_intro_string(docs_sites_strings) cli_message_list(docs_sites_strings, list_intro_string) send_usage_message( data_context=context, event="cli.docs.list", api_version="v2", success=True, )
def clean_data_docs(directory, site_name=None, all=None) -> None: """Delete data docs""" context = toolkit.load_data_context_with_error_handling(directory) failed = True if site_name is None and all is None: cli_message("<red>{}</red>".format( "Please specify --all to remove all sites or specify a specific site using " "--site_name")) sys.exit(1) context.clean_data_docs(site_name=site_name) failed = False if not failed and context is not None: send_usage_message( data_context=context, event="cli.docs.clean", api_version="v2", success=True, ) cli_message("<green>Cleaned data docs</green>") if failed and context is not None: send_usage_message( data_context=context, event="cli.docs.clean", api_version="v2", success=False, )
def checkpoint_run(checkpoint, directory): """Run a checkpoint. (Experimental)""" usage_event = "cli.checkpoint.run" context = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False ) checkpoint: Checkpoint = toolkit.load_checkpoint( context, checkpoint, usage_event, ) try: results = checkpoint.run() except Exception as e: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"<red>{e}</red>" ) if not results["success"]: cli_message("Validation failed!") toolkit.send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(1) cli_message("Validation succeeded!") toolkit.send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(0)
def checkpoint_list(directory) -> None: """List configured checkpoints. (Experimental)""" context = toolkit.load_data_context_with_error_handling(directory) checkpoints = context.list_checkpoints() if not checkpoints: cli_message( "No checkpoints found.\n" " - Use the command `great_expectations checkpoint new` to create one." ) send_usage_message( data_context=context, event="cli.checkpoint.list", api_version="v2", success=True, ) sys.exit(0) number_found = len(checkpoints) plural = "s" if number_found > 1 else "" message = f"Found {number_found} checkpoint{plural}." pretty_list = [f" - <cyan>{cp}</cyan>" for cp in checkpoints] cli_message_list(pretty_list, list_intro_string=message) send_usage_message( data_context=context, event="cli.checkpoint.list", api_version="v2", success=True, )
def confirm_proceed_or_exit( confirm_prompt: str = "Would you like to proceed?", continuation_message: str = "Ok, exiting now. You can always read more at https://docs.greatexpectations.io/ !", exit_on_no: bool = True, exit_code: int = 0, ) -> Optional[bool]: """ Every CLI command that starts a potentially lengthy (>1 sec) computation or modifies some resources (e.g., edits the config file, adds objects to the stores) must follow this pattern: 1. Explain which resources will be created/modified/deleted 2. Use this method to ask for user's confirmation The goal of this standardization is for the users to expect consistency - if you saw one command, you know what to expect from all others. If the user does not confirm, the program should exit. The purpose of the exit_on_no parameter is to provide the option to perform cleanup actions before exiting outside of the function. """ confirm_prompt_colorized = cli_colorize_string(confirm_prompt) continuation_message_colorized = cli_colorize_string(continuation_message) if not click.confirm(confirm_prompt_colorized, default=True): if exit_on_no: cli_message(continuation_message_colorized) sys.exit(exit_code) else: return False return True
def suite_delete(suite, directory): """ Delete an expectation suite from the expectation store. """ usage_event = "cli.suite.delete" context = toolkit.load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if not suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, "</red>No expectation suites found in the project.</red>", ) if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"No expectation suite named {suite} found." ) context.delete_expectation_suite(suite) cli_message(f"Deleted the expectation suite named: {suite}") send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, )
def checkpoint_new(checkpoint, suite, directory, datasource): """Create a new checkpoint for easy deployments. (Experimental)""" suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event ) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: toolkit.send_usage_message(context, usage_event, success=False) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs(context, datasource.name) _ = context.add_checkpoint( name=checkpoint, **{ "class_name": "LegacyCheckpoint", "batches": [ { "batch_kwargs": dict(batch_kwargs), "expectation_suite_names": [suite.expectation_suite_name], } ], }, ) cli_message( f"""<green>A Checkpoint named `{checkpoint}` was added to your project!</green> - To run this Checkpoint, run `great_expectations checkpoint run {checkpoint}`""" ) toolkit.send_usage_message(context, usage_event, success=True)
def checkpoint_script(checkpoint, directory): """ Create a python script to run a checkpoint. (Experimental) Checkpoints can be run directly without this script using the `great_expectations checkpoint run` command. This script is provided for those who wish to run checkpoints via python. """ context = toolkit.load_data_context_with_error_handling(directory) usage_event = "cli.checkpoint.script" # Attempt to load the checkpoint and deal with errors _ = toolkit.load_checkpoint(context, checkpoint, usage_event) script_name = f"run_{checkpoint}.py" script_path = os.path.join( context.root_directory, context.GE_UNCOMMITTED_DIR, script_name ) if os.path.isfile(script_path): toolkit.exit_with_failure_message_and_stats( context, usage_event, f"""<red>Warning! A script named {script_name} already exists and this command will not overwrite it.</red> - Existing file path: {script_path}""", ) _write_checkpoint_script_to_disk(context.root_directory, checkpoint, script_path) cli_message( f"""<green>A python script was created that runs the checkpoint named: `{checkpoint}`</green> - The script is located in `great_expectations/uncommitted/run_{checkpoint}.py` - The script can be run with `python great_expectations/uncommitted/run_{checkpoint}.py`""" ) toolkit.send_usage_message(context, event=usage_event, success=True)
def suite_list(directory): """Lists available Expectation Suites.""" context = toolkit.load_data_context_with_error_handling(directory) try: suite_names = [ f" - <cyan>{suite_name}</cyan>" for suite_name in context.list_expectation_suite_names() ] if len(suite_names) == 0: cli_message("No Expectation Suites found") toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=True) return elif len(suite_names) == 1: list_intro_string = "1 Expectation Suite found:" else: list_intro_string = f"{len(suite_names)} Expectation Suites found:" cli_message_list(suite_names, list_intro_string) toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=True) except Exception as e: toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=False) raise e
def select_datasource(context: DataContext, datasource_name: str = None) -> Datasource: """Select a datasource interactively.""" # TODO consolidate all the myriad CLI tests into this data_source = None if datasource_name is None: data_sources = sorted(context.list_datasources(), key=lambda x: x["name"]) if len(data_sources) == 0: cli_message( "<red>No datasources found in the context. To add a datasource, run `great_expectations datasource new`</red>" ) elif len(data_sources) == 1: datasource_name = data_sources[0]["name"] else: choices = "\n".join( [ " {}. {}".format(i, data_source["name"]) for i, data_source in enumerate(data_sources, 1) ] ) option_selection = click.prompt( "Select a datasource" + "\n" + choices + "\n", type=click.Choice( [str(i) for i, data_source in enumerate(data_sources, 1)] ), show_choices=False, ) datasource_name = data_sources[int(option_selection) - 1]["name"] if datasource_name is not None: data_source = context.get_datasource(datasource_name) return data_source
def load_data_context_with_error_handling( directory: str, from_cli_upgrade_command: bool = False ) -> DataContext: """Return a DataContext with good error handling and exit codes.""" try: context: DataContext = DataContext(context_root_dir=directory) ge_config_version: int = context.get_config().config_version if ( from_cli_upgrade_command and int(ge_config_version) < CURRENT_GE_CONFIG_VERSION ): directory = directory or context.root_directory ( increment_version, exception_occurred, ) = upgrade_project_one_version_increment( context_root_dir=directory, ge_config_version=ge_config_version, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, from_cli_upgrade_command=from_cli_upgrade_command, ) if not exception_occurred and increment_version: context = DataContext(context_root_dir=directory) return context except ge_exceptions.UnsupportedConfigVersionError as err: directory = directory or DataContext.find_context_root_dir() ge_config_version = DataContext.get_ge_config_version( context_root_dir=directory ) upgrade_helper_class = ( GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version)) if ge_config_version else None ) if upgrade_helper_class and ge_config_version < CURRENT_GE_CONFIG_VERSION: upgrade_project( context_root_dir=directory, ge_config_version=ge_config_version, from_cli_upgrade_command=from_cli_upgrade_command, ) else: cli_message(f"<red>{err.message}</red>") sys.exit(1) except ( ge_exceptions.ConfigNotFoundError, ge_exceptions.InvalidConfigError, ) as err: cli_message(f"<red>{err.message}</red>") sys.exit(1) except ge_exceptions.PluginModuleNotFoundError as err: cli_message(err.cli.v012_colored_message) sys.exit(1) except ge_exceptions.PluginClassNotFoundError as err: cli_message(err.cli.v012_colored_message) sys.exit(1) except ge_exceptions.InvalidConfigurationYamlError as err: cli_message(f"<red>{str(err)}</red>") sys.exit(1)
def upgrade_project(context_root_dir, ge_config_version, from_cli_upgrade_command=False) -> None: if from_cli_upgrade_command: message = ( f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - " f"the version " f"number must be at least {CURRENT_GE_CONFIG_VERSION}.</red>") else: message = ( f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - " f"the version " f"number must be at least {CURRENT_GE_CONFIG_VERSION}.\nIn order to proceed, " f"your project must be upgraded.</red>") cli_message(message) upgrade_prompt = ( "\nWould you like to run the Upgrade Helper to bring your project up-to-date?" ) confirm_proceed_or_exit( confirm_prompt=upgrade_prompt, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, ) cli_message(SECTION_SEPARATOR) # use loop in case multiple upgrades need to take place while ge_config_version < CURRENT_GE_CONFIG_VERSION: ( increment_version, exception_occurred, ) = upgrade_project_up_to_one_version_increment( context_root_dir=context_root_dir, ge_config_version=ge_config_version, continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE, from_cli_upgrade_command=from_cli_upgrade_command, ) if exception_occurred or not increment_version: break ge_config_version += 1 cli_message(SECTION_SEPARATOR) upgrade_success_message = "<green>Upgrade complete. Exiting...</green>\n" upgrade_incomplete_message = f"""\ <red>The Upgrade Helper was unable to perform a complete project upgrade. Next steps:</red> - Please perform any manual steps outlined in the Upgrade Overview and/or Upgrade Report above - When complete, increment the config_version key in your <cyan>great_expectations.yml</cyan> to <cyan>{ ge_config_version + 1}</cyan>\n To learn more about the upgrade process, visit \ <cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html</cyan> """ if ge_config_version < CURRENT_GE_CONFIG_VERSION: cli_message(upgrade_incomplete_message) else: cli_message(upgrade_success_message) sys.exit(0)
def exit_with_failure_message_and_stats(context: DataContext, usage_event: str, message: str) -> None: cli_message(message) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1)
def validation_operator_list(directory): """List known Validation Operators.""" try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message(f"<red>{err.message}</red>") return try: validation_operators = context.list_validation_operators() if len(validation_operators) == 0: cli_message("No Validation Operators found") return elif len(validation_operators) == 1: list_intro_string = "1 Validation Operator found:" else: list_intro_string = "{} Validation Operators found:".format( len(validation_operators) ) cli_message(list_intro_string) for validation_operator in validation_operators: cli_message("") cli_message_dict(validation_operator) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.list", success=True ) except Exception as e: toolkit.send_usage_message( data_context=context, event="cli.validation_operator.list", success=False ) raise e
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None: usage_event = "cli.suite.scaffold" suite_name = suite context = toolkit.load_data_context_with_error_handling(directory) notebook_filename = f"scaffold_{suite_name}.ipynb" notebook_path = _get_notebook_path(context, notebook_filename) if suite_name in context.list_expectation_suite_names(): toolkit.tell_user_suite_exists(suite_name) if os.path.isfile(notebook_path): cli_message( f" - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>" ) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) datasource = toolkit.select_datasource(context) if datasource is None: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) _suite = context.create_expectation_suite(suite_name) _, _, _, batch_kwargs = get_batch_kwargs(context, datasource_name=datasource.name) renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs) renderer.render_to_disk(notebook_path) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) else: cli_message( f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`" )
def checkpoint_new(checkpoint, suite, directory, datasource, legacy): """Create a new checkpoint for easy deployments. (Experimental)""" if legacy: suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) ge_config_version = context.get_config().config_version if ge_config_version >= 3: cli_message( f"""<red>The `checkpoint new` CLI command is not yet implemented for Great Expectations config versions >= 3.</red>""" ) toolkit.send_usage_message(context, usage_event, success=False) sys.exit(1) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: toolkit.send_usage_message(context, usage_event, success=False) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs( context, datasource.name) _ = context.add_checkpoint( name=checkpoint, **{ "class_name": "LegacyCheckpoint", "validation_operator_name": "action_list_operator", "batches": [{ "batch_kwargs": dict(batch_kwargs), "expectation_suite_names": [suite.expectation_suite_name], }], }, ) cli_message( f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green> - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`""" ) toolkit.send_usage_message(context, usage_event, success=True) # TODO: <Rob>Rob</Rob> Add flow for new style checkpoints else: pass
def create_empty_suite(context: DataContext, expectation_suite_name: str, batch_kwargs) -> None: cli_message(""" Great Expectations will create a new Expectation Suite '{:s}' and store it here: {:s} """.format( expectation_suite_name, context.stores[ context.expectations_store_name].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name).to_tuple()), )) suite = context.create_expectation_suite(expectation_suite_name) suite.add_citation(comment="New suite added via CLI", batch_kwargs=batch_kwargs) context.save_expectation_suite(suite, expectation_suite_name)
def _slack_setup(context): webhook_url = None cli_message(SLACK_SETUP_INTRO) if not click.confirm(SLACK_SETUP_PROMPT, default=True): cli_message(SLACK_LATER) return context else: webhook_url = click.prompt(SLACK_WEBHOOK_PROMPT, default="") while not is_sane_slack_webhook(webhook_url): cli_message("That URL was not valid.\n") if not click.confirm(SLACK_SETUP_PROMPT, default=True): cli_message(SLACK_LATER) return context webhook_url = click.prompt(SLACK_WEBHOOK_PROMPT, default="") context.save_config_variable("validation_notification_slack_webhook", webhook_url) cli_message(SLACK_SETUP_COMPLETE) return context
def project_check_config(directory): """Check a config for validity and help with migrations.""" cli_message("Checking your config files for validity...\n") is_config_ok, error_message, context = do_config_check(directory) if context: toolkit.send_usage_message(data_context=context, event="cli.project.check_config", success=True) if not is_config_ok: cli_message("Unfortunately, your config appears to be invalid:\n") cli_message(f"<red>{error_message}</red>") sys.exit(1) cli_message("<green>Your config file appears valid!</green>")
def store_list(directory): """List known Stores.""" context = toolkit.load_data_context_with_error_handling(directory) try: stores = context.list_stores() if len(stores) == 0: cli_message("No Stores found") toolkit.send_usage_message(data_context=context, event="cli.store.list", success=True) return elif len(stores) == 1: list_intro_string = "1 Store found:" else: list_intro_string = "{} Stores found:".format(len(stores)) cli_message(list_intro_string) for store in stores: cli_message("") cli_message_dict(store) toolkit.send_usage_message(data_context=context, event="cli.store.list", success=True) except Exception as e: toolkit.send_usage_message(data_context=context, event="cli.store.list", success=False) raise e
def build_docs(context, site_name=None, view=True, assume_yes=False) -> None: """Build documentation in a context""" logger.debug("Starting cli.datasource.build_docs") if site_name is not None: site_names = [site_name] else: site_names = None index_page_locator_infos = context.build_data_docs(site_names=site_names, dry_run=True) msg = "\nThe following Data Docs sites will be built:\n\n" for site_name, index_page_locator_info in index_page_locator_infos.items(): msg += f" - <cyan>{site_name}:</cyan> " msg += f"{index_page_locator_info}\n" cli_message(msg) if not assume_yes: toolkit.confirm_proceed_or_exit() cli_message("\nBuilding Data Docs...\n") context.build_data_docs(site_names=site_names) cli_message("Done building Data Docs") if view: context.open_data_docs(site_name=site_name, only_if_exists=True)
def project_upgrade(directory): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): up_to_date_message = ( "Your project is up-to-date - no further upgrade is necessary.\n") cli_message(f"<green>{up_to_date_message}</green>") sys.exit(0)
def _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ): cli_message( """ Great Expectations will choose a couple of columns and generate expectations about them to demonstrate some examples of assertions you can make about your data. Great Expectations will store these expectations in a new Expectation Suite '{:s}' here: {:s} """.format( expectation_suite_name, context.stores[ context.expectations_store_name ].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name ).to_tuple() ), ) ) confirm_proceed_or_exit() # TODO this may not apply cli_message("\nGenerating example Expectation Suite...") run_id = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") profiling_results = context.profile_data_asset( datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, batch_kwargs=batch_kwargs, profiler=BasicSuiteBuilderProfiler, profiler_configuration=profiler_configuration, expectation_suite_name=expectation_suite_name, run_id=run_id, additional_batch_kwargs=additional_batch_kwargs, ) if not profiling_results["success"]: _raise_profiling_errors(profiling_results) cli_message("\nDone generating example Expectation Suite") return profiling_results
def checkpoint_run(checkpoint, directory): """Run a checkpoint. (Experimental)""" usage_event = "cli.checkpoint.run" context = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False) ge_config_version = context.get_config().config_version if ge_config_version >= 3: cli_message( f"""<red>The `checkpoint run` CLI command is not yet implemented for Great Expectations config versions >= 3.</red>""" ) toolkit.send_usage_message(context, usage_event, success=False) sys.exit(1) checkpoint: Checkpoint = toolkit.load_checkpoint( context, checkpoint, usage_event, ) try: results = checkpoint.run() except Exception as e: toolkit.exit_with_failure_message_and_stats(context, usage_event, f"<red>{e}</red>") if not results["success"]: cli_message("Validation failed!") toolkit.send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(1) cli_message("Validation succeeded!") toolkit.send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(0)
def create_expectation_suite( context, datasource_name=None, batch_kwargs_generator_name=None, generator_asset=None, batch_kwargs=None, expectation_suite_name=None, additional_batch_kwargs=None, empty_suite=False, show_intro_message=False, flag_build_docs=True, open_docs=False, profiler_configuration="demo", data_asset_name=None, ): """ Create a new expectation suite. WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite require a serious revisiting. :return: a tuple: (success, suite name, profiling_results) """ if generator_asset: warnings.warn( "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. " "Please update code accordingly.", DeprecationWarning, ) data_asset_name = generator_asset if show_intro_message and not empty_suite: cli_message( "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n" ) data_source = select_datasource(context, datasource_name=datasource_name) if data_source is None: # select_datasource takes care of displaying an error message, so all is left here is to exit. sys.exit(1) datasource_name = data_source.name if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) sys.exit(1) if ( batch_kwargs_generator_name is None or data_asset_name is None or batch_kwargs is None ): ( datasource_name, batch_kwargs_generator_name, data_asset_name, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, additional_batch_kwargs=additional_batch_kwargs, ) # In this case, we have "consumed" the additional_batch_kwargs additional_batch_kwargs = {} if expectation_suite_name is None: default_expectation_suite_name = _get_default_expectation_suite_name( batch_kwargs, data_asset_name ) while True: expectation_suite_name = click.prompt( "\nName the new Expectation Suite", default=default_expectation_suite_name, ) if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) else: break if empty_suite: create_empty_suite(context, expectation_suite_name, batch_kwargs) return True, expectation_suite_name, None profiling_results = _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ) if flag_build_docs: build_docs(context, view=False) if open_docs: attempt_to_open_validation_results_in_data_docs(context, profiling_results) return True, expectation_suite_name, profiling_results
def upgrade_project_one_version_increment( context_root_dir: str, ge_config_version: float, continuation_message: str, from_cli_upgrade_command: bool = False, ) -> [bool, bool]: # Returns increment_version, exception_occurred upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version)) if not upgrade_helper_class: return False, False target_ge_config_version = int(ge_config_version) + 1 # set version temporarily to CURRENT_GE_CONFIG_VERSION to get functional DataContext DataContext.set_ge_config_version( config_version=CURRENT_GE_CONFIG_VERSION, context_root_dir=context_root_dir, ) upgrade_helper = upgrade_helper_class(context_root_dir=context_root_dir) upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview() if confirmation_required or from_cli_upgrade_command: upgrade_confirmed = confirm_proceed_or_exit( confirm_prompt=upgrade_overview, continuation_message=continuation_message, exit_on_no=False, ) else: upgrade_confirmed = True if upgrade_confirmed: cli_message("\nUpgrading project...") cli_message(SECTION_SEPARATOR) # run upgrade and get report of what was done, if version number should be incremented ( upgrade_report, increment_version, exception_occurred, ) = upgrade_helper.upgrade_project() # display report to user cli_message(upgrade_report) if exception_occurred: # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) # display report to user return False, True # set config version to target version if increment_version: DataContext.set_ge_config_version( target_ge_config_version, context_root_dir, validate_config_version=False, ) return True, False # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) return False, False # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) cli_message(continuation_message) sys.exit(0)
def _suite_edit( suite, datasource, directory, jupyter, batch_kwargs, usage_event, suppress_usage_message=False, ): # suppress_usage_message flag is for the situation where _suite_edit is called by _suite_new(). # when called by _suite_new(), the flag will be set to False, otherwise it will default to True batch_kwargs_json = batch_kwargs batch_kwargs = None context = toolkit.load_data_context_with_error_handling(directory) try: suite = toolkit.load_expectation_suite(context, suite, usage_event) citations = suite.get_citations(require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = toolkit.load_batch(context, suite, batch_kwargs) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = toolkit.select_datasource( context, datasource_name=datasource ) except ValueError as ve: cli_message(f"<red>{ve}</red>") send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, data_asset_name=None, additional_batch_kwargs=additional_batch_kwargs, ) notebook_name = f"edit_{suite.expectation_suite_name}.ipynb" notebook_path = _get_notebook_path(context, notebook_name) SuiteEditNotebookRenderer.from_data_context(context).render_to_disk( suite, notebook_path, batch_kwargs ) if not jupyter: cli_message( f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, event_payload=payload, api_version="v2", success=True, ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) except Exception as e: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) raise e
def validation_operator_run(name, run_name, validation_config_file, suite, directory): # Note though the long lines here aren't pythonic, they look best if Click does the line wraps. """ Run a validation operator against some data. There are two modes to run this command: 1. Interactive (good for development): Specify the name of the validation operator using the --name argument and the name of the expectation suite using the --suite argument. The cli will help you specify the batch of data that you want to validate interactively. 2. Non-interactive (good for production): Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch. Learn how to create a validation config file here: https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1. To learn more about validation operators, go here: https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators """ try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message(f"Failed to process <red>{err.message}</red>") sys.exit(1) try: if validation_config_file is not None: try: with open(validation_config_file) as f: validation_config = json.load(f) except (OSError, json_parse_exception) as e: cli_message( f"Failed to process the --validation_config_file argument: <red>{e}</red>" ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) validation_config_error_message = _validate_valdiation_config( validation_config ) if validation_config_error_message is not None: cli_message( "<red>The validation config in {:s} is misconfigured: {:s}</red>".format( validation_config_file, validation_config_error_message ) ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) else: if suite is None: cli_message( """ Please use --suite argument to specify the name of the expectation suite. Call `great_expectation suite list` command to list the expectation suites in your project. """ ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(0) suite = toolkit.load_expectation_suite( context, suite, "cli.validation_operator.run" ) if name is None: cli_message( """ Please use --name argument to specify the name of the validation operator. Call `great_expectation validation-operator list` command to list the operators in your project. """ ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) else: if name not in context.list_validation_operator_names(): cli_message( f""" Could not find a validation operator {name}. Call `great_expectation validation-operator list` command to list the operators in your project. """ ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) batch_kwargs = None cli_message( """ Let us help you specify the batch of data your want the validation operator to validate.""" ) try: data_source = toolkit.select_datasource(context) except ValueError as ve: cli_message(f"<red>{ve}</red>") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, data_asset_name=None, additional_batch_kwargs=None, ) validation_config = { "validation_operator_name": name, "batches": [ { "batch_kwargs": batch_kwargs, "expectation_suite_names": [suite.expectation_suite_name], } ], } try: validation_operator_name = validation_config["validation_operator_name"] batches_to_validate = [] for entry in validation_config["batches"]: for expectation_suite_name in entry["expectation_suite_names"]: batch = context.get_batch( entry["batch_kwargs"], expectation_suite_name ) batches_to_validate.append(batch) if run_name is None: run_name = datetime.datetime.now(datetime.timezone.utc).strftime( "%Y%m%dT%H%M%S.%fZ" ) run_id = RunIdentifier(run_name=run_name) if suite is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, ) else: if suite.evaluation_parameters is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, ) else: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, evaluation_parameters=suite.evaluation_parameters, ) except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e: cli_message(f"<red>{e}</red>") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) if not results["success"]: cli_message("Validation failed!") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=True ) sys.exit(1) else: cli_message("Validation succeeded!") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=True ) sys.exit(0) except Exception as e: toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) raise e
def exit_with_failure_message_and_stats( context: DataContext, usage_event: str, message: str ) -> None: cli_message(message) send_usage_message(context, event=usage_event, success=False) sys.exit(1)
def tell_user_suite_exists(suite_name: str) -> None: cli_message( f"""<red>An expectation suite named `{suite_name}` already exists.</red> - If you intend to edit the suite please use `great_expectations suite edit {suite_name}`.""" )