def checkpoint_new(checkpoint, suite, directory, datasource): """Create a new checkpoint for easy deployments. (Experimental)""" suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: send_usage_message(context, usage_event, success=False) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs(context, datasource.name) template = _load_checkpoint_yml_template() # This picky update helps template comments stay in place template["batches"][0]["batch_kwargs"] = dict(batch_kwargs) template["batches"][0]["expectation_suite_names"] = [ suite.expectation_suite_name ] checkpoint_file = _write_checkpoint_to_disk(context, template, checkpoint) cli_message( f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green> - To edit this checkpoint edit the checkpoint file: {checkpoint_file} - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`""" ) send_usage_message(context, usage_event, success=True)
def checkpoint_run(checkpoint, directory): """Run a checkpoint. (Experimental)""" context = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False) usage_event = "cli.checkpoint.run" checkpoint: Checkpoint = toolkit.load_checkpoint( context, checkpoint, usage_event, ) try: results = checkpoint.run() except Exception as e: toolkit.exit_with_failure_message_and_stats(context, usage_event, f"<red>{e}</red>") if not results["success"]: cli_message("Validation failed!") send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(1) cli_message("Validation succeeded!") send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(0)
def suite_delete(ctx, suite): """ Delete an expectation suite from the expectation store. """ display_not_implemented_message_and_exit() usage_event = "cli.suite.delete" directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context = toolkit.load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if not suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, "</red>No expectation suites found in the project.</red>", ) if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"No expectation suite named {suite} found.") context.delete_expectation_suite(suite) cli_message(f"Deleted the expectation suite named: {suite}") toolkit.send_usage_message(data_context=context, event=usage_event, success=True)
def suite_list(ctx): """Lists available Expectation Suites.""" display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context = toolkit.load_data_context_with_error_handling(directory) try: suite_names = [ " - <cyan>{}</cyan>".format(suite_name) for suite_name in context.list_expectation_suite_names() ] if len(suite_names) == 0: cli_message("No Expectation Suites found") toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=True) return elif len(suite_names) == 1: list_intro_string = "1 Expectation Suite found:" else: list_intro_string = "{} Expectation Suites found:".format( len(suite_names)) cli_message_list(suite_names, list_intro_string) toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=True) except Exception as e: toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=False) raise e
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None: usage_event = "cli.suite.scaffold" suite_name = suite context = toolkit.load_data_context_with_error_handling(directory) notebook_filename = f"scaffold_{suite_name}.ipynb" notebook_path = _get_notebook_path(context, notebook_filename) if suite_name in context.list_expectation_suite_names(): toolkit.tell_user_suite_exists(suite_name) if os.path.isfile(notebook_path): cli_message( f" - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>" ) send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) datasource = toolkit.select_datasource(context) if datasource is None: send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) _suite = context.create_expectation_suite(suite_name) _, _, _, batch_kwargs = get_batch_kwargs(context, datasource_name=datasource.name) renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs) renderer.render_to_disk(notebook_path) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) else: cli_message( f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`" ) send_usage_message(data_context=context, event=usage_event, success=True)
def docs_build(directory, site_name, view=True, assume_yes=False): """ Build Data Docs for a project.""" context = toolkit.load_data_context_with_error_handling(directory) build_docs(context, site_name=site_name, view=view, assume_yes=assume_yes) send_usage_message(data_context=context, event="cli.docs.build", success=True)
def checkpoint(ctx): """ Checkpoint operations A Checkpoint is a bundle of one or more batches of data with one or more Expectation Suites. A Checkpoint can be as simple as one batch of data paired with one Expectation Suite. A Checkpoint can be as complex as many batches of data across different datasources paired with one or more Expectation Suites each. """ directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context usage_stats_prefix = f"cli.checkpoint.{ctx.invoked_subcommand}" toolkit.send_usage_message( data_context=context, event=f"{usage_stats_prefix}.begin", success=True, ) ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
def suite_list(directory): """Lists available Expectation Suites.""" context = toolkit.load_data_context_with_error_handling(directory) try: suite_names = [ " - <cyan>{}</cyan>".format(suite_name) for suite_name in context.list_expectation_suite_names() ] if len(suite_names) == 0: cli_message("No Expectation Suites found") send_usage_message( data_context=context, event="cli.suite.list", success=True ) return elif len(suite_names) == 1: list_intro_string = "1 Expectation Suite found:" else: list_intro_string = "{} Expectation Suites found:".format(len(suite_names)) cli_message_list(suite_names, list_intro_string) send_usage_message(data_context=context, event="cli.suite.list", success=True) except Exception as e: send_usage_message(data_context=context, event="cli.suite.list", success=False) raise e
def _tap_new(suite, tap_filename, directory, usage_event, datasource=None): context = toolkit.load_data_context_with_error_handling(directory) try: _validate_tap_filename(tap_filename) context_directory = context.root_directory datasource = _get_datasource(context, datasource) suite = toolkit.load_expectation_suite(context, suite, usage_event) _, _, _, batch_kwargs = get_batch_kwargs(context, datasource.name) tap_filename = _write_tap_file_to_disk( batch_kwargs, context_directory, suite, tap_filename ) cli_message( f"""\ <green>A new tap has been generated!</green> To run this tap, run: <green>python {tap_filename}</green> You can edit this script or place this code snippet in your pipeline.""" ) send_usage_message( data_context=context, event=usage_event, success=True ) except Exception as e: send_usage_message( data_context=context, event=usage_event, success=False ) raise e
def checkpoint_script(checkpoint, directory): """ Create a python script to run a checkpoint. (Experimental) Checkpoints can be run directly without this script using the `great_expectations checkpoint run` command. This script is provided for those who wish to run checkpoints via python. """ context = toolkit.load_data_context_with_error_handling(directory) usage_event = "cli.checkpoint.script" # Attempt to load the checkpoint and deal with errors _ = toolkit.load_checkpoint(context, checkpoint, usage_event) script_name = f"run_{checkpoint}.py" script_path = os.path.join(context.root_directory, context.GE_UNCOMMITTED_DIR, script_name) if os.path.isfile(script_path): toolkit.exit_with_failure_message_and_stats( context, usage_event, f"""<red>Warning! A script named {script_name} already exists and this command will not overwrite it.</red> - Existing file path: {script_path}""", ) _write_checkpoint_script_to_disk(context.root_directory, checkpoint, script_path) cli_message( f"""<green>A python script was created that runs the checkpoint named: `{checkpoint}`</green> - The script is located in `great_expectations/uncommitted/run_{checkpoint}.py` - The script can be run with `python great_expectations/uncommitted/run_{checkpoint}.py`""" ) send_usage_message(context, event=usage_event, success=True)
def store_list(directory): """List known Stores.""" context = toolkit.load_data_context_with_error_handling(directory) try: stores = context.list_stores() if len(stores) == 0: cli_message("No Stores found") send_usage_message(data_context=context, event="cli.store.list", success=True) return elif len(stores) == 1: list_intro_string = "1 Store found:" else: list_intro_string = "{} Stores found:".format(len(stores)) cli_message(list_intro_string) for store in stores: cli_message("") cli_message_dict(store) send_usage_message(data_context=context, event="cli.store.list", success=True) except Exception as e: send_usage_message(data_context=context, event="cli.store.list", success=False) raise e
def get_data_context_from_config_file(self) -> DataContext: directory: str = toolkit.parse_cli_config_file_location( config_file_location=self.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) return context
def init(target_directory, usage_stats): """ Create a new Great Expectations project configuration and fill in the Datasources and Suites based on the kedro catalog """ from kedro.framework.context import load_context target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) if not DataContext.does_config_exist_on_disk(ge_dir): if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(SETUP_SUCCESS) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) exit(5) if click.confirm("Generate Datasources based on Kedro Context?", default=True): package_name = Path(os.getcwd()).resolve().name with KedroSession.create(package_name) as session: kedro_context = session.load_context() ge_context = toolkit.load_data_context_with_error_handling(ge_dir) new_datasources = generate_datasources(kedro_context, ge_context) if new_datasources: cli_message("Added {} New datasources to your project.".format( len(new_datasources))) if click.confirm( "Generate Basic Validation Suites based on Kedro Context?", default=True): package_name = Path(os.getcwd()).resolve().name with KedroSession.create(package_name) as session: kedro_context = session.load_context() ge_context = toolkit.load_data_context_with_error_handling(ge_dir) new_datasources = generate_basic_suites(kedro_context, ge_context) if new_datasources: cli_message("Added {} New datasources to your project.".format( len(new_datasources)))
def project_upgrade(directory): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): up_to_date_message = "Your project is up-to-date - no upgrade is necessary.\n" cli_message(f"<green>{up_to_date_message}</green>") sys.exit(0)
def store(ctx): """Store operations""" directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context
def suite_new(directory, empty, replace, batch_kwargs): """ Create Great Expectation Suites based on the kedro catalog using the BasicSuiteBuilderProfiler. If you wish to create suites without using the BasicSuiteBuilderProfiler, add the `--empty` flag. """ kedro_context = load_context(Path.cwd()) ge_context = toolkit.load_data_context_with_error_handling(directory) generate_basic_suites(kedro_context, ge_context, empty, replace, batch_kwargs)
def project_upgrade(ctx): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): up_to_date_message = ( "Your project is up-to-date - no further upgrade is necessary.\n") cli_message(f"<green>{up_to_date_message}</green>") sys.exit(0)
def _suite_new(suite: str, directory: str, empty: bool, jupyter: bool, view: bool, batch_kwargs, usage_event: str) -> None: # TODO break this up into demo and new context = toolkit.load_data_context_with_error_handling(directory) datasource_name = None generator_name = None generator_asset = None try: if batch_kwargs is not None: batch_kwargs = json.loads(batch_kwargs) success, suite_name = toolkit.create_expectation_suite(context, datasource_name=datasource_name, batch_kwargs_generator_name=generator_name, generator_asset=generator_asset, batch_kwargs=batch_kwargs, expectation_suite_name=suite, additional_batch_kwargs={"limit": 1000}, empty_suite=empty, open_docs=view) if success: cli_message( "A new Expectation suite '{}' was added to your project".format( suite_name ) ) if empty: if jupyter: cli_message( """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""" ) _suite_edit( suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs, usage_event=usage_event, ) send_usage_message(data_context=context, event=usage_event, success=True) else: send_usage_message(data_context=context, event=usage_event, success=False) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, IOError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: send_usage_message(data_context=context, event=usage_event, success=False) raise e
def checkpoint_run(checkpoint, directory): """Run a checkpoint. (Experimental)""" context = toolkit.load_data_context_with_error_handling(directory) usage_event = "cli.checkpoint.run" checkpoint_config = toolkit.load_checkpoint(context, checkpoint, usage_event) checkpoint_file = f"great_expectations/checkpoints/{checkpoint}.yml" # TODO loading batches will move into DataContext eventually batches_to_validate = [] for batch in checkpoint_config["batches"]: _validate_at_least_one_suite_is_listed(context, batch, checkpoint_file) batch_kwargs = batch["batch_kwargs"] for suite_name in batch["expectation_suite_names"]: suite = toolkit.load_expectation_suite(context, suite_name, usage_event) try: batch = toolkit.load_batch(context, suite, batch_kwargs) except (FileNotFoundError, SQLAlchemyError, OSError, DataContextError) as e: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"""<red>There was a problem loading a batch: - Batch: {batch_kwargs} - {e} - Please verify these batch kwargs in the checkpoint file: `{checkpoint_file}`</red>""", ) batches_to_validate.append(batch) try: results = context.run_validation_operator( checkpoint_config["validation_operator_name"], assets_to_validate=batches_to_validate, # TODO prepare for new RunID - checkpoint name and timestamp # run_id=RunID(checkpoint) ) except DataContextError as e: toolkit.exit_with_failure_message_and_stats(context, usage_event, f"<red>{e}</red>") if not results["success"]: cli_message("Validation failed!") send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(1) cli_message("Validation succeeded!") send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(0)
def project_upgrade(ctx): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): sys.exit(0) else: failure_message = "Error: Your project could not be upgraded.\n" cli_message(f"<red>{failure_message}</red>") sys.exit(1)
def checkpoint_new(checkpoint, suite, directory, datasource, legacy): """Create a new checkpoint for easy deployments. (Experimental)""" if legacy: suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) ge_config_version = context.get_config().config_version if ge_config_version >= 3: cli_message( f"""<red>The `checkpoint new` CLI command is not yet implemented for GE config versions >= 3.</red>""" ) send_usage_message(context, usage_event, success=False) sys.exit(1) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: send_usage_message(context, usage_event, success=False) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs( context, datasource.name) _ = context.add_checkpoint( name=checkpoint, **{ "class_name": "LegacyCheckpoint", "validation_operator_name": "action_list_operator", "batches": [{ "batch_kwargs": dict(batch_kwargs), "expectation_suite_names": [suite.expectation_suite_name], }], }, ) cli_message( f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green> - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`""" ) send_usage_message(context, usage_event, success=True) # TODO: <Rob>Rob</Rob> Add flow for new style checkpoints else: pass
def suite_delete(suite, directory): """Delete an expectation suite from the expectation store.""" context = toolkit.load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if len(suite_names) == 0: cli_message("No expectation suites found") return if len(suite_names) > 0: expectation_suite = ExpectationSuite(expectation_suite_name=suite) key = ExpectationSuiteIdentifier(expectation_suite_name=suite) if key: context.delete_expectation_suite(expectation_suite) else: cli_message("No matching expectation suites found") sys.exit(1)
def datasource_new(directory): """ Create Great Expectation Datasources based on the kedro catalog. Will create one Datasource each dataset in the catalog. Only supports Spark and Pandas type datasets. """ from kedro.framework.context import load_context ge_context = toolkit.load_data_context_with_error_handling(directory) kedro_context = load_context(Path.cwd()) new_datasources = generate_datasources(kedro_context, ge_context) if new_datasources: cli_message("Added {} New datasources to your project.".format( len(new_datasources))) else: # no datasource was created sys.exit(1)
def datasource(ctx): """Datasource operations""" directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context usage_stats_prefix = f"cli.datasource.{ctx.invoked_subcommand}" toolkit.send_usage_message( data_context=context, event=f"{usage_stats_prefix}.begin", success=True, ) ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
def checkpoint_list(directory): """List configured checkpoints. (Experimental)""" context = toolkit.load_data_context_with_error_handling(directory) checkpoints = context.list_checkpoints() if not checkpoints: cli_message( "No checkpoints found.\n" " - Use the command `great_expectations checkpoint new` to create one." ) send_usage_message(context, event="cli.checkpoint.list", success=True) sys.exit(0) number_found = len(checkpoints) plural = "s" if number_found > 1 else "" message = f"Found {number_found} checkpoint{plural}." pretty_list = [f" - <cyan>{cp}</cyan>" for cp in checkpoints] cli_message_list(pretty_list, list_intro_string=message) send_usage_message(context, event="cli.checkpoint.list", success=True)
def clean_data_docs(directory, site_name=None, all=None): """Delete data docs""" context = toolkit.load_data_context_with_error_handling(directory) failed = True if site_name is None and all is None: cli_message( "<red>{}</red>".format( "Please specify --all y to remove all sites or specify specific site using site_name" ) ) sys.exit(1) context.clean_data_docs(site_name=site_name) failed = False if failed == False and context is not None: send_usage_message(data_context=context, event="cli.docs.clean", success=True) cli_message("<green>{}</green>".format("Cleaned data docs")) if failed and context is not None: send_usage_message(data_context=context, event="cli.docs.clean", success=False)
def docs_list(directory): """List known Data Docs Sites.""" context = toolkit.load_data_context_with_error_handling(directory) docs_sites_url_dicts = context.get_docs_sites_urls() docs_sites_strings = [ " - <cyan>{}</cyan>: {}".format(docs_site_dict["site_name"], docs_site_dict["site_url"])\ for docs_site_dict in docs_sites_url_dicts ] if len(docs_sites_strings) == 0: cli_message("No Data Docs sites found") else: list_intro_string = _build_intro_string(docs_sites_strings) cli_message_list(docs_sites_strings, list_intro_string) send_usage_message(data_context=context, event="cli.docs.list", success=True)
def suite_delete(suite, directory): """ Delete an expectation suite from the expectation store. """ usage_event = "cli.suite.delete" context = toolkit.load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if not suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, "</red>No expectation suites found in the project.</red>") if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"No expectation suite named {suite} found.") context.delete_expectation_suite(suite) cli_message(f"Deleted the expectation suite named: {suite}") send_usage_message(data_context=context, event=usage_event, success=True)
def checkpoint(ctx): """ Checkpoint operations A Checkpoint is a bundle of one or more batches of data with one or more Expectation Suites. A Checkpoint can be as simple as one batch of data paired with one Expectation Suite. A Checkpoint can be as complex as many batches of data across different datasources paired with one or more Expectation Suites each. """ directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context
def docs_list(directory): """List known Data Docs Sites.""" context = toolkit.load_data_context_with_error_handling(directory) docs_sites_url_dicts = context.get_docs_sites_urls() docs_sites_strings = [ " - <cyan>{}</cyan>: {}".format( docs_site_dict["site_name"], docs_site_dict.get("site_url") or f"site configured but does not exist. Run the following command to build site: great_expectations " f'docs build --site-name {docs_site_dict["site_name"]}', ) for docs_site_dict in docs_sites_url_dicts ] if len(docs_sites_strings) == 0: cli_message("No Data Docs sites found") else: list_intro_string = _build_intro_string(docs_sites_strings) cli_message_list(docs_sites_strings, list_intro_string) send_usage_message(data_context=context, event="cli.docs.list", success=True)