def create_expectation_suite( context, datasource_name=None, batch_kwargs_generator_name=None, generator_asset=None, batch_kwargs=None, expectation_suite_name=None, additional_batch_kwargs=None, empty_suite=False, show_intro_message=False, flag_build_docs=True, open_docs=False, profiler_configuration="demo", data_asset_name=None, ): """ Create a new expectation suite. WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite require a serious revisiting. :return: a tuple: (success, suite name, profiling_results) """ if generator_asset: warnings.warn( "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. " "Please update code accordingly.", DeprecationWarning, ) data_asset_name = generator_asset if show_intro_message and not empty_suite: cli_message( "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n" ) data_source = select_datasource(context, datasource_name=datasource_name) if data_source is None: # select_datasource takes care of displaying an error message, so all is left here is to exit. sys.exit(1) datasource_name = data_source.name if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) sys.exit(1) if ( batch_kwargs_generator_name is None or data_asset_name is None or batch_kwargs is None ): ( datasource_name, batch_kwargs_generator_name, data_asset_name, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, additional_batch_kwargs=additional_batch_kwargs, ) # In this case, we have "consumed" the additional_batch_kwargs additional_batch_kwargs = {} if expectation_suite_name is None: default_expectation_suite_name = _get_default_expectation_suite_name( batch_kwargs, data_asset_name ) while True: expectation_suite_name = click.prompt( "\nName the new Expectation Suite", default=default_expectation_suite_name, ) if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) else: break if empty_suite: create_empty_suite(context, expectation_suite_name, batch_kwargs) return True, expectation_suite_name, None profiling_results = _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ) if flag_build_docs: build_docs(context, view=False) if open_docs: attempt_to_open_validation_results_in_data_docs(context, profiling_results) return True, expectation_suite_name, profiling_results
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event): batch_kwargs_json = batch_kwargs batch_kwargs = None context = load_data_context_with_error_handling(directory) try: suite = load_expectation_suite(context, suite) citations = suite.get_citations(require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = toolkit.load_batch(context, suite, batch_kwargs) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = select_datasource(context, datasource_name=datasource) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs(context, datasource_name=data_source.name, additional_batch_kwargs=additional_batch_kwargs) notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name) notebook_path = _get_notebook_path(context, notebook_name) SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs) if not jupyter: cli_message( f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name ) send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) except Exception as e: send_usage_message(data_context=context, event=usage_event, success=False) raise e
def exit_with_failure_message_and_stats( context: DataContext, usage_event: str, message: str ) -> None: cli_message(message) send_usage_message(context, event=usage_event, success=False) sys.exit(1)
def upgrade_project_one_version_increment( context_root_dir: str, ge_config_version: float, continuation_message: str, from_cli_upgrade_command: bool = False, ) -> [bool, bool]: # Returns increment_version, exception_occurred upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version)) if not upgrade_helper_class: return False, False target_ge_config_version = int(ge_config_version) + 1 # set version temporarily to CURRENT_GE_CONFIG_VERSION to get functional DataContext DataContext.set_ge_config_version( config_version=CURRENT_GE_CONFIG_VERSION, context_root_dir=context_root_dir, ) upgrade_helper = upgrade_helper_class(context_root_dir=context_root_dir) upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview() if confirmation_required or from_cli_upgrade_command: upgrade_confirmed = confirm_proceed_or_exit( confirm_prompt=upgrade_overview, continuation_message=continuation_message, exit_on_no=False, ) else: upgrade_confirmed = True if upgrade_confirmed: cli_message("\nUpgrading project...") cli_message(SECTION_SEPARATOR) # run upgrade and get report of what was done, if version number should be incremented ( upgrade_report, increment_version, exception_occurred, ) = upgrade_helper.upgrade_project() # display report to user cli_message(upgrade_report) if exception_occurred: # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) # display report to user return False, True # set config version to target version if increment_version: DataContext.set_ge_config_version( target_ge_config_version, context_root_dir, validate_config_version=False, ) return True, False # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) return False, False # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False ) cli_message(continuation_message) sys.exit(0)
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs): batch_kwargs_json = batch_kwargs batch_kwargs = None try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("<red>{}</red>".format(err.message)) return try: suite = load_expectation_suite(context, suite) citations = suite.get_citations(sort=True, require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = context.get_batch(batch_kwargs, suite.expectation_suite_name) assert isinstance(_batch, DataAsset) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) send_usage_message( data_context=context, event="cli.suite.edit", success=False ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) send_usage_message( data_context=context, event="cli.suite.edit", success=False ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) send_usage_message( data_context=context, event="cli.suite.edit", success=False ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = select_datasource(context, datasource_name=datasource) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) send_usage_message( data_context=context, event="cli.suite.edit", success=False ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") send_usage_message( data_context=context, event="cli.suite.edit", success=False ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, generator_asset=None, additional_batch_kwargs=additional_batch_kwargs, ) notebook_name = "{}.ipynb".format(suite.expectation_suite_name) notebook_path = os.path.join( context.root_directory, context.GE_EDIT_NOTEBOOK_DIR, notebook_name ) NotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs) if not jupyter: cli_message("To continue editing this suite, run <green>jupyter " f"notebook {notebook_path}</green>") payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name ) send_usage_message( data_context=context, event="cli.suite.edit", event_payload=payload, success=True ) if jupyter: subprocess.call(["jupyter", "notebook", notebook_path]) except Exception as e: send_usage_message( data_context=context, event="cli.suite.edit", success=False ) raise e
def tell_user_suite_exists(suite_name: str) -> None: cli_message( f"""<red>An expectation suite named `{suite_name}` already exists.</red> - If you intend to edit the suite please use `great_expectations suite edit {suite_name}`.""" )
def suite_edit(suite, datasource, directory, jupyter, batch_kwargs): """ Generate a Jupyter notebook for editing an existing expectation suite. The SUITE argument is required. This is the name you gave to the suite when you created it. A batch of data is required to edit the suite, which is used as a sample. The edit command will help you specify a batch interactively. Or you can specify them manually by providing --batch-kwargs in valid JSON format. Read more about specifying batches of data in the documentation: https://docs.greatexpectations.io/ """ batch_kwargs_json = batch_kwargs batch_kwargs = None try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("<red>{}</red>".format(err.message)) return except ge_exceptions.ZeroDotSevenConfigVersionError as err: _offer_to_install_new_template(err, context.root_directory) return suite = _load_suite(context, suite) citations = suite.get_citations(sort=True, require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = context.get_batch(batch_kwargs, suite.expectation_suite_name) assert isinstance(_batch, DataAsset) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = select_datasource(context, datasource_name=datasource) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwarg_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, generator_name=None, generator_asset=None, additional_batch_kwargs=additional_batch_kwargs, ) notebook_name = "{}.ipynb".format(suite.expectation_suite_name) notebook_path = os.path.join( context.root_directory, context.GE_EDIT_NOTEBOOK_DIR, notebook_name ) NotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs) cli_message( "To continue editing this suite, run <green>jupyter notebook {}</green>".format( notebook_path ) ) if jupyter: subprocess.call(["jupyter", "notebook", notebook_path])
def init(target_directory, view): """ Create a new project and help with onboarding. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) ge_yml = os.path.join(ge_dir, DataContext.GE_YML) cli_message(GREETING) # TODO this should be a property if os.path.isfile(ge_yml): if DataContext.all_uncommitted_directories_exist(ge_dir) and \ DataContext.config_variables_yml_exist(ge_dir): # Ensure the context can be instantiated try: _ = DataContext(ge_dir) cli_message(PROJECT_IS_COMPLETE) except ge_exceptions.DataContextError as e: cli_message("<red>{}</red>".format(e)) exit(5) else: _complete_onboarding(target_directory) try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suite_keys(): if click.confirm(BUILD_DOCS_PROMPT, default=True): context.build_data_docs() context.open_data_docs() except ge_exceptions.DataContextError as e: cli_message("<red>{}</red>".format(e)) else: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) context, data_source_name, data_source_type = _create_new_project( target_directory) if not data_source_name: # no datasource was created return profile_datasource(context, data_source_name, open_docs=view, additional_batch_kwargs={"limit": 1000}) cli_message( """\n<cyan>Great Expectations is now set up in your project!</cyan>""" )
def _complete_onboarding(target_dir): DataContext.create(target_dir) cli_message(ONBOARDING_COMPLETE) return True
def init(target_directory, view): """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): try: if DataContext.is_project_initialized(ge_dir): # Ensure the context can be instantiated cli_message(PROJECT_IS_COMPLETE) except (DataContextError, DatasourceInitializationError) as e: cli_message("<red>{}</red>".format(e.message)) sys.exit(1) try: context = DataContext.create(target_directory) cli_message(ONBOARDING_COMPLETE) # TODO if this is correct, ensure this is covered by a test # cli_message(SETUP_SUCCESS) # exit(0) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) # TODO ensure this is covered by a test exit(5) else: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: context = DataContext.create(target_directory) except DataContextError as e: # TODO ensure this is covered by a test cli_message("<red>{}</red>".format(e)) try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suites(): if click.confirm(BUILD_DOCS_PROMPT, default=True): build_docs(context, view=view) else: datasources = context.list_datasources() if len(datasources) == 0: datasource_name, data_source_type = add_datasource_impl( context, choose_one_data_asset=True) if not datasource_name: # no datasource was created sys.exit(1) datasources = context.list_datasources() if len(datasources) == 1: datasource_name = datasources[0]["name"] success, suite_name = create_expectation_suite_impl( context, datasource_name=datasource_name, show_intro_message=False, additional_batch_kwargs={"limit": 1000}, open_docs=view, ) if success: cli_message( "A new Expectation suite '{}' was added to your project" .format(suite_name)) cli_message(SETUP_SUCCESS) sys.exit(0) except (DataContextError, ge_exceptions.ProfilerError, IOError, SQLAlchemyError) as e: cli_message("<red>{}</red>".format(e)) sys.exit(1)
def _suite_new(suite: str, directory: str, empty: bool, jupyter: bool, view: bool, batch_kwargs, usage_event: str) -> None: # TODO break this up into demo and new context = load_data_context_with_error_handling(directory) datasource_name = None generator_name = None generator_asset = None try: if batch_kwargs is not None: batch_kwargs = json.loads(batch_kwargs) success, suite_name = create_expectation_suite_impl( context, datasource_name=datasource_name, batch_kwargs_generator_name=generator_name, generator_asset=generator_asset, batch_kwargs=batch_kwargs, expectation_suite_name=suite, additional_batch_kwargs={"limit": 1000}, empty_suite=empty, show_intro_message=False, open_docs=view, ) if success: cli_message( "A new Expectation suite '{}' was added to your project". format(suite_name)) if empty: if jupyter: cli_message( """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") _suite_edit( suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs, usage_event=usage_event, ) send_usage_message(data_context=context, event=usage_event, success=True) else: send_usage_message(data_context=context, event=usage_event, success=False) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, IOError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: send_usage_message(data_context=context, event=usage_event, success=False) raise e
def upgrade_project(context_root_dir, ge_config_version, from_cli_upgrade_command=False): continuation_message = ( "\nOk, exiting now. To upgrade at a later time, use the following command: " "<cyan>great_expectations project upgrade</cyan>\n\nTo learn more about the upgrade " "process, visit " "<cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html" "</cyan>.\n") if from_cli_upgrade_command: message = ( f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - " f"the version " f"number must be at least {MINIMUM_SUPPORTED_CONFIG_VERSION}.</red>" ) else: message = ( f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - " f"the version " f"number must be at least {MINIMUM_SUPPORTED_CONFIG_VERSION}.\nIn order to proceed, " f"your project must be upgraded.</red>") cli_message(message) upgrade_prompt = ( "\nWould you like to run the Upgrade Helper to bring your project up-to-date?" ) confirm_proceed_or_exit(confirm_prompt=upgrade_prompt, continuation_message=continuation_message) cli_message(SECTION_SEPARATOR) # use loop in case multiple upgrades need to take place while ge_config_version < MINIMUM_SUPPORTED_CONFIG_VERSION: upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get( int(ge_config_version)) if not upgrade_helper_class: break target_ge_config_version = int(ge_config_version) + 1 # set version temporarily to MINIMUM_SUPPORTED_CONFIG_VERSION to get functional DataContext DataContext.set_ge_config_version( config_version=MINIMUM_SUPPORTED_CONFIG_VERSION, context_root_dir=context_root_dir, ) upgrade_helper = upgrade_helper_class( context_root_dir=context_root_dir) upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview( ) if confirmation_required: upgrade_confirmed = confirm_proceed_or_exit( confirm_prompt=upgrade_overview, continuation_message=continuation_message, exit_on_no=False, ) else: upgrade_confirmed = True if upgrade_confirmed: cli_message("\nUpgrading project...") cli_message(SECTION_SEPARATOR) # run upgrade and get report of what was done, if version number should be incremented upgrade_report, increment_version = upgrade_helper.upgrade_project( ) # display report to user cli_message(upgrade_report) # set config version to target version if increment_version: DataContext.set_ge_config_version( target_ge_config_version, context_root_dir, validate_config_version=False, ) ge_config_version += 1 else: # restore version number to current number DataContext.set_ge_config_version( ge_config_version, context_root_dir, validate_config_version=False) break else: # restore version number to current number DataContext.set_ge_config_version(ge_config_version, context_root_dir, validate_config_version=False) cli_message(continuation_message) sys.exit(0) cli_message(SECTION_SEPARATOR) upgrade_success_message = "<green>Upgrade complete. Exiting...</green>\n" upgrade_incomplete_message = f"""\ <red>The Upgrade Helper was unable to perform a complete project upgrade. Next steps:</red> - Please perform any manual steps outlined in the Upgrade Overview and/or Upgrade Report above - When complete, increment the config_version key in your <cyan>great_expectations.yml</cyan> to <cyan>{ ge_config_version + 1}</cyan>\n To learn more about the upgrade process, visit \ <cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html</cyan> """ if ge_config_version < MINIMUM_SUPPORTED_CONFIG_VERSION: cli_message(upgrade_incomplete_message) else: cli_message(upgrade_success_message) sys.exit(0)
def _suite_new( suite: str, directory: str, empty: bool, jupyter: bool, view: bool, batch_kwargs, usage_event: str, ) -> None: # TODO break this up into demo and new context = toolkit.load_data_context_with_error_handling(directory) datasource_name = None generator_name = None data_asset_name = None try: if batch_kwargs is not None: batch_kwargs = json.loads(batch_kwargs) success, suite_name, profiling_results = toolkit.create_expectation_suite( context, datasource_name=datasource_name, batch_kwargs_generator_name=generator_name, data_asset_name=data_asset_name, batch_kwargs=batch_kwargs, expectation_suite_name=suite, additional_batch_kwargs={"limit": 1000}, empty_suite=empty, show_intro_message=False, open_docs=view, ) if success: if empty: if jupyter: cli_message( """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") send_usage_message(data_context=context, event=usage_event, success=True) _suite_edit( suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs, usage_event= "cli.suite.edit", # or else we will be sending `cli.suite.new` which is incorrect suppress_usage_message= True, # dont want actually send usage_message since the function call is not the result of actual usage ) else: send_usage_message(data_context=context, event=usage_event, success=False) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, IOError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: send_usage_message(data_context=context, event=usage_event, success=False) raise e
def _validate_tap_filename(tap_filename): if not tap_filename.endswith(".py"): cli_message( "<red>Tap filename must end in .py. Please correct and re-run</red>" ) exit(1)
def create_expectation_suite( context, datasource_name=None, batch_kwargs_generator_name=None, generator_asset=None, batch_kwargs=None, expectation_suite_name=None, additional_batch_kwargs=None, empty_suite=False, show_intro_message=False, open_docs=False, profiler_configuration="demo", ): """ Create a new expectation suite. :return: a tuple: (success, suite name) """ if show_intro_message and not empty_suite: cli_message( "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n" ) data_source = select_datasource(context, datasource_name=datasource_name) if data_source is None: # select_datasource takes care of displaying an error message, so all is left here is to exit. sys.exit(1) datasource_name = data_source.name if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) sys.exit(1) if (batch_kwargs_generator_name is None or generator_asset is None or batch_kwargs is None): ( datasource_name, batch_kwargs_generator_name, generator_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, generator_asset=generator_asset, additional_batch_kwargs=additional_batch_kwargs, ) # In this case, we have "consumed" the additional_batch_kwargs additional_batch_kwargs = {} if expectation_suite_name is None: default_expectation_suite_name = _get_default_expectation_suite_name( batch_kwargs, generator_asset) while True: expectation_suite_name = click.prompt( "\nName the new expectation suite", default=default_expectation_suite_name, show_default=True, ) if expectation_suite_name in context.list_expectation_suite_names( ): tell_user_suite_exists(expectation_suite_name) else: break if empty_suite: create_empty_suite(context, expectation_suite_name, batch_kwargs) return True, expectation_suite_name profiling_results = _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, generator_asset, profiler_configuration, ) build_docs(context, view=False) if open_docs: _attempt_to_open_validation_results_in_data_docs( context, profiling_results) return True, expectation_suite_name
def _get_datasource(context, datasource): datasource = toolkit.select_datasource(context, datasource_name=datasource) if not datasource: cli_message("<red>No datasources found in the context.</red>") sys.exit(1) return datasource
def validation_operator_run(name, run_id, validation_config_file, suite, directory): # Note though the long lines here aren't pythonic, they look best if Click does the line wraps. """ Run a validation operator against some data. There are two modes to run this command: 1. Interactive (good for development): Specify the name of the validation operator using the --name argument and the name of the expectation suite using the --suite argument. The cli will help you specify the batch of data that you want to validate interactively. 2. Non-interactive (good for production): Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch. Learn how to create a validation config file here: https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1. To learn more about validation operators, go here: https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators """ try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("Failed to process <red>{}</red>".format(err.message)) sys.exit(1) try: if validation_config_file is not None: try: with open(validation_config_file) as f: validation_config = json.load(f) except ( IOError, json_parse_exception ) as e: cli_message(f"Failed to process the --validation_config_file argument: <red>{e}</red>") send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) validation_config_error_message = _validate_valdiation_config(validation_config) if validation_config_error_message is not None: cli_message("<red>The validation config in {0:s} is misconfigured: {1:s}</red>".format(validation_config_file, validation_config_error_message)) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) else: if suite is None: cli_message( """ Please use --suite argument to specify the name of the expectation suite. Call `great_expectation suite list` command to list the expectation suites in your project. """ ) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(0) suite = load_expectation_suite(context, suite) if name is None: cli_message( """ Please use --name argument to specify the name of the validation operator. Call `great_expectation validation-operator list` command to list the operators in your project. """ ) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) else: if name not in context.list_validation_operator_names(): cli_message( f""" Could not find a validation operator {name}. Call `great_expectation validation-operator list` command to list the operators in your project. """ ) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) batch_kwargs = None cli_message( """ Let's help you specify the batch of data your want the validation operator to validate.""" ) try: data_source = select_datasource(context) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs(context, datasource_name=data_source.name) validation_config = { "validation_operator_name": name, "batches": [ { "batch_kwargs": batch_kwargs, "expectation_suite_names": [suite.expectation_suite_name] } ] } try: validation_operator_name = validation_config["validation_operator_name"] batches_to_validate = [] for entry in validation_config["batches"]: for expectation_suite_name in entry["expectation_suite_names"]: batch = context.get_batch(entry["batch_kwargs"], expectation_suite_name) batches_to_validate.append(batch) if run_id is None: run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%S.%fZ") if suite is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id ) else: if suite.evaluation_parameters is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id ) else: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, evaluation_parameters=suite.evaluation_parameters ) except ( ge_exceptions.DataContextError, IOError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) if not results["success"]: cli_message("Validation Failed!") send_usage_message( data_context=context, event="cli.validation_operator.run", success=True ) sys.exit(1) else: cli_message("Validation Succeeded!") send_usage_message( data_context=context, event="cli.validation_operator.run", success=True ) sys.exit(0) except Exception as e: send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) raise e
def _complete_onboarding(target_dir): if click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True): DataContext.create(target_dir) cli_message(ONBOARDING_COMPLETE) else: cli_message(RUN_INIT_AGAIN)
def suite_new(suite, directory, empty, jupyter, view, batch_kwargs): """ Create a new Expectation Suite. Great Expectations will choose a couple of columns and generate expectations about them to demonstrate some examples of assertions you can make about your data. If you wish to skip the examples, add the `--empty` flag. """ try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("<red>{}</red>".format(err.message)) return datasource_name = None generator_name = None generator_asset = None try: if batch_kwargs is not None: batch_kwargs = json.loads(batch_kwargs) success, suite_name = create_expectation_suite_impl( context, datasource_name=datasource_name, batch_kwargs_generator_name=generator_name, generator_asset=generator_asset, batch_kwargs=batch_kwargs, expectation_suite_name=suite, additional_batch_kwargs={"limit": 1000}, empty_suite=empty, show_intro_message=False, open_docs=view, ) if success: cli_message( "A new Expectation suite '{}' was added to your project".format( suite_name ) ) if empty: if jupyter: cli_message("""<green>Because you requested an empty suite, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") _suite_edit(suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs) send_usage_message( data_context=context, event="cli.suite.new", success=True ) else: send_usage_message( data_context=context, event="cli.suite.new", success=False ) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, IOError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) send_usage_message( data_context=context, event="cli.suite.new", success=False ) sys.exit(1) except Exception as e: send_usage_message( data_context=context, event="cli.suite.new", success=False ) raise e