예제 #1
0
def suite_delete(suite, directory):
    """
    Delete an expectation suite from the expectation store.
    """
    usage_event = "cli.suite.delete"
    context = toolkit.load_data_context_with_error_handling(directory)
    suite_names = context.list_expectation_suite_names()
    if not suite_names:
        toolkit.exit_with_failure_message_and_stats(
            context,
            usage_event,
            "</red>No expectation suites found in the project.</red>",
        )

    if suite not in suite_names:
        toolkit.exit_with_failure_message_and_stats(
            context, usage_event, f"No expectation suite named {suite} found.")

    context.delete_expectation_suite(suite)
    cli_message(f"Deleted the expectation suite named: {suite}")
    send_usage_message(data_context=context, event=usage_event, success=True)
예제 #2
0
def clean_data_docs(directory, site_name=None, all=None):
    """Delete data docs"""
    context = toolkit.load_data_context_with_error_handling(directory)
    failed = True
    if site_name is None and all is None:
        cli_message("<red>{}</red>".format(
            "Please specify --all to remove all sites or specify a specific site using "
            "--site_name"))
        sys.exit(1)
    context.clean_data_docs(site_name=site_name)
    failed = False
    if not failed and context is not None:
        send_usage_message(data_context=context,
                           event="cli.docs.clean",
                           success=True)
        cli_message("<green>{}</green>".format("Cleaned data docs"))

    if failed and context is not None:
        send_usage_message(data_context=context,
                           event="cli.docs.clean",
                           success=False)
예제 #3
0
def store_list(directory):
    """List known Stores."""
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        stores = context.list_stores()

        if len(stores) == 0:
            cli_message("No Stores found")
            send_usage_message(data_context=context,
                               event="cli.store.list",
                               success=True)
            return
        elif len(stores) == 1:
            list_intro_string = "1 Store found:"
        else:
            list_intro_string = "{} Stores found:".format(len(stores))

        cli_message(list_intro_string)

        for store in stores:
            cli_message("")
            cli_message_dict(store)

        send_usage_message(data_context=context,
                           event="cli.store.list",
                           success=True)
    except Exception as e:
        send_usage_message(data_context=context,
                           event="cli.store.list",
                           success=False)
        raise e
예제 #4
0
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None:
    usage_event = "cli.suite.scaffold"
    suite_name = suite
    context = load_data_context_with_error_handling(directory)
    notebook_filename = f"scaffold_{suite_name}.ipynb"
    notebook_path = _get_notebook_path(context, notebook_filename)

    if suite_name in context.list_expectation_suite_names():
        toolkit.tell_user_suite_exists(suite_name)
        if os.path.isfile(notebook_path):
            cli_message(
                f"  - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>"
            )
        send_usage_message(data_context=context, event=usage_event, success=False)
        sys.exit(1)

    datasource = toolkit.select_datasource(context)
    if datasource is None:
        send_usage_message(data_context=context, event=usage_event, success=False)
        sys.exit(1)

    _suite = context.create_expectation_suite(suite_name)
    _, _, _, batch_kwargs = get_batch_kwargs(context, datasource_name=datasource.name)
    renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs)
    renderer.render_to_disk(notebook_path)

    if jupyter:
        toolkit.launch_jupyter_notebook(notebook_path)
    else:
        cli_message(
            f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`"
        )

    send_usage_message(data_context=context, event=usage_event, success=True)
예제 #5
0
def suite_list(directory):
    """Lists available Expectation Suites."""
    context = load_data_context_with_error_handling(directory)

    try:
        suite_names = [
            " - <cyan>{}</cyan>".format(suite_name)
            for suite_name in context.list_expectation_suite_names()
        ]
        if len(suite_names) == 0:
            cli_message("No Expectation Suites found")
            send_usage_message(
                data_context=context, event="cli.suite.list", success=True
            )
            return
        elif len(suite_names) == 1:
            list_intro_string = "1 Expectation Suite found:"
        else:
            list_intro_string = "{} Expectation Suites found:".format(len(suite_names))

        cli_message_list(suite_names, list_intro_string)
        send_usage_message(data_context=context, event="cli.suite.list", success=True)
    except Exception as e:
        send_usage_message(data_context=context, event="cli.suite.list", success=False)
        raise e
예제 #6
0
def checkpoint_new(checkpoint, suite, directory, datasource, legacy):
    """Create a new checkpoint for easy deployments. (Experimental)"""
    if legacy:
        suite_name = suite
        usage_event = "cli.checkpoint.new"
        context = toolkit.load_data_context_with_error_handling(directory)
        _verify_checkpoint_does_not_exist(context, checkpoint, usage_event)
        suite: ExpectationSuite = toolkit.load_expectation_suite(
            context, suite_name, usage_event)
        datasource = toolkit.select_datasource(context,
                                               datasource_name=datasource)
        if datasource is None:
            send_usage_message(context, usage_event, success=False)
            sys.exit(1)
        _, _, _, batch_kwargs = toolkit.get_batch_kwargs(
            context, datasource.name)

        _ = context.add_checkpoint(
            name=checkpoint,
            **{
                "class_name":
                "LegacyCheckpoint",
                "validation_operator_name":
                "action_list_operator",
                "batches": [{
                    "batch_kwargs":
                    dict(batch_kwargs),
                    "expectation_suite_names": [suite.expectation_suite_name],
                }],
            },
        )

        cli_message(
            f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green>
      - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`"""
        )
        send_usage_message(context, usage_event, success=True)
    # TODO: <Rob>Rob</Rob> Add flow for new style checkpoints
    else:
        pass
예제 #7
0
def docs_list(directory):
    """List known Data Docs Sites."""
    context = toolkit.load_data_context_with_error_handling(directory)

    docs_sites_url_dicts = context.get_docs_sites_urls()
    docs_sites_strings = [
        " - <cyan>{}</cyan>: {}".format(
            docs_site_dict["site_name"],
            docs_site_dict.get("site_url")
            or f"site configured but does not exist. Run the following command to build site: great_expectations "
            f'docs build --site-name {docs_site_dict["site_name"]}',
        )
        for docs_site_dict in docs_sites_url_dicts
    ]

    if len(docs_sites_strings) == 0:
        cli_message("No Data Docs sites found")
    else:
        list_intro_string = _build_intro_string(docs_sites_strings)
        cli_message_list(docs_sites_strings, list_intro_string)

    send_usage_message(data_context=context, event="cli.docs.list", success=True)
예제 #8
0
def _tap_new(suite, tap_filename, directory, usage_event, datasource=None):
    context = load_data_context_with_error_handling(directory)
    try:
        _validate_tap_filename(tap_filename)
        context_directory = context.root_directory
        datasource = _get_datasource(context, datasource)
        suite = load_expectation_suite(context, suite)
        _, _, _, batch_kwargs = get_batch_kwargs(context, datasource.name)

        tap_filename = _write_tap_file_to_disk(batch_kwargs, context_directory,
                                               suite, tap_filename)
        cli_message(f"""\
<green>A new tap has been generated!</green>
To run this tap, run: <green>python {tap_filename}</green>
You can edit this script or place this code snippet in your pipeline.""")
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=True)
    except Exception as e:
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        raise e
예제 #9
0
def suite_list(directory):
    """Lists available Expectation Suites."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    try:
        suite_names = [
            " - <cyan>{}</cyan>".format(suite_name)
            for suite_name in context.list_expectation_suite_names()
        ]
        if len(suite_names) == 0:
            cli_message("No Expectation Suites found")
            send_usage_message(
                data_context=context,
                event="cli.suite.list",
                success=True
            )
            return

        if len(suite_names) == 1:
            list_intro_string = "1 Expectation Suite found:"

        if len(suite_names) > 1:
            list_intro_string = "{} Expectation Suites found:".format(len(suite_names))

        cli_message_list(suite_names, list_intro_string)
        send_usage_message(
            data_context=context,
            event="cli.suite.list",
            success=True
        )
    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.suite.list",
            success=False
        )
        raise e
예제 #10
0
def docs_list(directory):
    """List known Data Docs Sites."""
    context = None
    try:
        failed = True
        context = DataContext(directory)
        docs_sites_url_dicts = context.get_docs_sites_urls()
        docs_sites_strings = [
            " - <cyan>{}</cyan>: {}".format(docs_site_dict["site_name"], docs_site_dict["site_url"])\
            for docs_site_dict in docs_sites_url_dicts
        ]

        if len(docs_sites_strings) == 0:
            cli_message("No Data Docs sites found")
            failed = False
            send_usage_message(data_context=context,
                               event="cli.docs.list",
                               success=True)
            return

        if len(docs_sites_strings) == 1:
            list_intro_string = "1 Data Docs site found:"

        if len(docs_sites_strings) > 1:
            list_intro_string = "{} Data Docs sites found:".format(
                len(docs_sites_strings))
        cli_message_list(docs_sites_strings, list_intro_string)
        failed = False
        send_usage_message(data_context=context,
                           event="cli.docs.list",
                           success=True)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
    finally:
        if failed and context is not None:
            send_usage_message(data_context=context,
                               event="cli.docs.list",
                               success=False)
예제 #11
0
def store_list(directory):
    """List known Stores."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    try:
        stores = context.list_stores()

        if len(stores) == 0:
            cli_message("No Stores found")
            send_usage_message(data_context=context,
                               event="cli.store.list",
                               success=True)
            return

        if len(stores) == 1:
            list_intro_string = "1 Store found:"

        if len(stores) > 1:
            list_intro_string = "{} Stores found:".format(len(stores))

        cli_message(list_intro_string)

        for store in stores:
            cli_message("")
            cli_message_dict(store)

        send_usage_message(data_context=context,
                           event="cli.store.list",
                           success=True)
    except Exception as e:
        send_usage_message(data_context=context,
                           event="cli.store.list",
                           success=False)
        raise e
예제 #12
0
def checkpoint_run(checkpoint, directory):
    """Run a checkpoint. (Experimental)"""
    usage_event = "cli.checkpoint.run"
    context = toolkit.load_data_context_with_error_handling(
        directory=directory, from_cli_upgrade_command=False)

    ge_config_version = context.get_config().config_version
    if ge_config_version >= 3:
        cli_message(
            f"""<red>The `checkpoint run` CLI command is not yet implemented for GE config versions >= 3.</red>"""
        )
        send_usage_message(context, usage_event, success=False)
        sys.exit(1)

    checkpoint: Checkpoint = toolkit.load_checkpoint(
        context,
        checkpoint,
        usage_event,
    )

    try:
        results = checkpoint.run()
    except Exception as e:
        toolkit.exit_with_failure_message_and_stats(context, usage_event,
                                                    f"<red>{e}</red>")

    if not results["success"]:
        cli_message("Validation failed!")
        send_usage_message(context, event=usage_event, success=True)
        print_validation_operator_results_details(results)
        sys.exit(1)

    cli_message("Validation succeeded!")
    send_usage_message(context, event=usage_event, success=True)
    print_validation_operator_results_details(results)
    sys.exit(0)
def validation_operator_run(name, run_name, validation_config_file, suite,
                            directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("Failed to process <red>{}</red>".format(err.message))
        sys.exit(1)

    try:
        if validation_config_file is not None:
            try:
                with open(validation_config_file) as f:
                    validation_config = json.load(f)
            except (OSError, json_parse_exception) as e:
                cli_message(
                    f"Failed to process the --validation_config_file argument: <red>{e}</red>"
                )
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            validation_config_error_message = _validate_valdiation_config(
                validation_config)
            if validation_config_error_message is not None:
                cli_message(
                    "<red>The validation config in {:s} is misconfigured: {:s}</red>"
                    .format(validation_config_file,
                            validation_config_error_message))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

        else:
            if suite is None:
                cli_message("""
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
""")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(0)

            suite = toolkit.load_expectation_suite(
                context, suite, "cli.validation_operator.run")

            if name is None:
                cli_message("""
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
""")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)
            else:
                if name not in context.list_validation_operator_names():
                    cli_message(f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
""")
                    send_usage_message(
                        data_context=context,
                        event="cli.validation_operator.run",
                        success=False,
                    )
                    sys.exit(1)

            batch_kwargs = None

            cli_message("""
Let us help you specify the batch of data your want the validation operator to validate."""
                        )

            try:
                data_source = toolkit.select_datasource(context)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=None,
                )

            validation_config = {
                "validation_operator_name":
                name,
                "batches": [{
                    "batch_kwargs":
                    batch_kwargs,
                    "expectation_suite_names": [suite.expectation_suite_name],
                }],
            }

        try:
            validation_operator_name = validation_config[
                "validation_operator_name"]
            batches_to_validate = []
            for entry in validation_config["batches"]:
                for expectation_suite_name in entry["expectation_suite_names"]:
                    batch = context.get_batch(entry["batch_kwargs"],
                                              expectation_suite_name)
                    batches_to_validate.append(batch)

            if run_name is None:
                run_name = datetime.datetime.now(
                    datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")

            run_id = RunIdentifier(run_name=run_name)

            if suite is None:
                results = context.run_validation_operator(
                    validation_operator_name,
                    assets_to_validate=batches_to_validate,
                    run_id=run_id,
                )
            else:
                if suite.evaluation_parameters is None:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                    )
                else:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                        evaluation_parameters=suite.evaluation_parameters,
                    )
        except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e:
            cli_message("<red>{}</red>".format(e))
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=False)
            sys.exit(1)

        if not results["success"]:
            cli_message("Validation failed!")
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=True)
            sys.exit(1)
        else:
            cli_message("Validation succeeded!")
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=True)
            sys.exit(0)
    except Exception as e:
        send_usage_message(data_context=context,
                           event="cli.validation_operator.run",
                           success=False)
        raise e
예제 #14
0
def init(target_directory, view, usage_stats):
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        try:
            if DataContext.is_project_initialized(ge_dir):
                # Ensure the context can be instantiated
                cli_message(PROJECT_IS_COMPLETE)
        except (DataContextError, DatasourceInitializationError) as e:
            cli_message("<red>{}</red>".format(e.message))
            sys.exit(1)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            cli_message(ONBOARDING_COMPLETE)
            # TODO if this is correct, ensure this is covered by a test
            # cli_message(SETUP_SUCCESS)
            # exit(0)
        except DataContextError as e:
            cli_message("<red>{}</red>".format(e.message))
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            # TODO ensure this is covered by a test
            exit(0)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            send_usage_message(data_context=context,
                               event="cli.init.create",
                               success=True)
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message("<red>{}</red>".format(e))

    try:
        # if expectations exist, offer to build docs
        context = DataContext(ge_dir)
        if context.list_expectation_suites():
            if click.confirm(BUILD_DOCS_PROMPT, default=True):
                build_docs(context, view=view)

        else:
            datasources = context.list_datasources()
            if len(datasources) == 0:
                datasource_name, data_source_type = add_datasource_impl(
                    context, choose_one_data_asset=True)
                if not datasource_name:  # no datasource was created
                    sys.exit(1)

            datasources = context.list_datasources()
            if len(datasources) == 1:
                datasource_name = datasources[0]["name"]

                success, suite_name = toolkit.create_expectation_suite(
                    context,
                    datasource_name=datasource_name,
                    additional_batch_kwargs={"limit": 1000},
                    open_docs=view)
                if success:
                    cli_message(
                        "A new Expectation suite '{}' was added to your project"
                        .format(suite_name))

                cli_message(SETUP_SUCCESS)
                sys.exit(0)
    except (DataContextError, ge_exceptions.ProfilerError, IOError,
            SQLAlchemyError) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)
예제 #15
0
def init(target_directory, view, usage_stats):
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        try:
            if DataContext.is_project_initialized(ge_dir):
                # Ensure the context can be instantiated
                cli_message(PROJECT_IS_COMPLETE)
        except (DataContextError, DatasourceInitializationError) as e:
            cli_message("<red>{}</red>".format(e.message))
            sys.exit(1)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            cli_message(ONBOARDING_COMPLETE)
            # TODO if this is correct, ensure this is covered by a test
            # cli_message(SETUP_SUCCESS)
            # exit(0)
        except DataContextError as e:
            cli_message("<red>{}</red>".format(e.message))
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            # TODO ensure this is covered by a test
            exit(0)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            send_usage_message(data_context=context,
                               event="cli.init.create",
                               success=True)
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message("<red>{}</red>".format(e))

    try:
        # if expectations exist, offer to build docs
        context = DataContext(ge_dir)
        if context.list_expectation_suites():
            if click.confirm(BUILD_DOCS_PROMPT, default=True):
                build_docs(context, view=view)

        else:
            datasources = context.list_datasources()
            if len(datasources) == 0:
                cli_message(SECTION_SEPARATOR)
                if not click.confirm(
                        "Would you like to configure a Datasource?",
                        default=True):
                    cli_message("Okay, bye!")
                    sys.exit(1)
                datasource_name, data_source_type = add_datasource_impl(
                    context, choose_one_data_asset=False)
                if not datasource_name:  # no datasource was created
                    sys.exit(1)

            datasources = context.list_datasources()
            if len(datasources) == 1:
                datasource_name = datasources[0]["name"]

                cli_message(SECTION_SEPARATOR)
                if not click.confirm(
                        "Would you like to profile new Expectations for a single data asset within your new Datasource?",
                        default=True,
                ):
                    cli_message(
                        "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!"
                    )
                    sys.exit(1)

                (
                    success,
                    suite_name,
                    profiling_results,
                ) = toolkit.create_expectation_suite(
                    context,
                    datasource_name=datasource_name,
                    additional_batch_kwargs={"limit": 1000},
                    flag_build_docs=False,
                    open_docs=False,
                )

                cli_message(SECTION_SEPARATOR)
                if not click.confirm("Would you like to build Data Docs?",
                                     default=True):
                    cli_message(
                        "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!"
                    )
                    sys.exit(1)

                build_docs(context, view=False)

                if not click.confirm(
                        "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.",
                        default=True,
                ):
                    cli_message(
                        "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!"
                    )
                    sys.exit(1)
                toolkit.attempt_to_open_validation_results_in_data_docs(
                    context, profiling_results)

                cli_message(SECTION_SEPARATOR)
                cli_message(SETUP_SUCCESS)
                sys.exit(0)
    except (
            DataContextError,
            ge_exceptions.ProfilerError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)
예제 #16
0
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    try:
        suite = load_expectation_suite(context, suite)
        citations = suite.get_citations(sort=True, require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = context.get_batch(batch_kwargs, suite.expectation_suite_name)
                assert isinstance(_batch, DataAsset)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
            """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    generator_asset=None,
                    additional_batch_kwargs=additional_batch_kwargs,
                )

        notebook_name = "{}.ipynb".format(suite.expectation_suite_name)

        notebook_path = os.path.join(
            context.root_directory, context.GE_EDIT_NOTEBOOK_DIR, notebook_name
        )
        NotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message("To continue editing this suite, run <green>jupyter "
                        f"notebook {notebook_path}</green>")

        payload = edit_expectation_suite_usage_statistics(
            data_context=context,
            expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context,
            event="cli.suite.edit",
            event_payload=payload,
            success=True
        )

        if jupyter:
            subprocess.call(["jupyter", "notebook", notebook_path])

    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.suite.edit",
            success=False
        )
        raise e
예제 #17
0
def suite_new(suite, directory, empty, jupyter, view, batch_kwargs):
    """
    Create a new Expectation Suite.

    Great Expectations will choose a couple of columns and generate expectations about them
    to demonstrate some examples of assertions you can make about your data.

    If you wish to skip the examples, add the `--empty` flag.
    """
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    datasource_name = None
    generator_name = None
    generator_asset = None

    try:
        if batch_kwargs is not None:
            batch_kwargs = json.loads(batch_kwargs)

        success, suite_name = create_expectation_suite_impl(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=generator_name,
            generator_asset=generator_asset,
            batch_kwargs=batch_kwargs,
            expectation_suite_name=suite,
            additional_batch_kwargs={"limit": 1000},
            empty_suite=empty,
            show_intro_message=False,
            open_docs=view,
        )
        if success:
            cli_message(
                "A new Expectation suite '{}' was added to your project".format(
                    suite_name
                )
            )
            if empty:
                if jupyter:
                    cli_message("""<green>Because you requested an empty suite, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")
                _suite_edit(suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs)
            send_usage_message(
                data_context=context,
                event="cli.suite.new",
                success=True
            )
        else:
            send_usage_message(
                data_context=context,
                event="cli.suite.new",
                success=False
            )
    except (
        ge_exceptions.DataContextError,
        ge_exceptions.ProfilerError,
        IOError,
        SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        send_usage_message(
            data_context=context,
            event="cli.suite.new",
            success=False
        )
        sys.exit(1)
    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.suite.new",
            success=False
        )
        raise e
예제 #18
0
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = load_data_context_with_error_handling(directory)

    try:
        suite = load_expectation_suite(context, suite)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(context, datasource_name=data_source.name,
                                     additional_batch_kwargs=additional_batch_kwargs)

        notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name)
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context, event=usage_event, event_payload=payload, success=True
        )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(data_context=context, event=usage_event, success=False)
        raise e
예제 #19
0
def _suite_new(
    suite: str,
    directory: str,
    empty: bool,
    jupyter: bool,
    view: bool,
    batch_kwargs,
    usage_event: str,
) -> None:
    # TODO break this up into demo and new
    context = toolkit.load_data_context_with_error_handling(directory)

    datasource_name = None
    generator_name = None
    generator_asset = None

    try:
        if batch_kwargs is not None:
            batch_kwargs = json.loads(batch_kwargs)

        success, suite_name = toolkit.create_expectation_suite(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=generator_name,
            generator_asset=generator_asset,
            batch_kwargs=batch_kwargs,
            expectation_suite_name=suite,
            additional_batch_kwargs={"limit": 1000},
            empty_suite=empty,
            open_docs=view,
        )
        if success:
            cli_message(
                "A new Expectation suite '{}' was added to your project".
                format(suite_name))
            if empty:
                if jupyter:
                    cli_message(
                        """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")
            _suite_edit(
                suite_name,
                datasource_name,
                directory,
                jupyter=jupyter,
                batch_kwargs=batch_kwargs,
                usage_event=usage_event,
            )
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=True)
        else:
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=False)
    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            IOError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        sys.exit(1)
    except Exception as e:
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        raise e
예제 #20
0
def docs_build(directory, site_name, view=True):
    """ Build Data Docs for a project."""
    context = toolkit.load_data_context_with_error_handling(directory)
    build_docs(context, site_name=site_name, view=view)
    send_usage_message(data_context=context, event="cli.docs.build", success=True)
예제 #21
0
def exit_with_failure_message_and_stats(context: DataContext, usage_event: str,
                                        message: str) -> None:
    cli_message(message)
    send_usage_message(context, event=usage_event, success=False)
    sys.exit(1)
예제 #22
0
def _suite_new(
    suite: str,
    directory: str,
    empty: bool,
    jupyter: bool,
    view: bool,
    batch_kwargs,
    usage_event: str,
) -> None:
    # TODO break this up into demo and new
    context = toolkit.load_data_context_with_error_handling(directory)

    datasource_name = None
    generator_name = None
    data_asset_name = None

    try:
        if batch_kwargs is not None:
            batch_kwargs = json.loads(batch_kwargs)

        success, suite_name, profiling_results = toolkit.create_expectation_suite(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=generator_name,
            data_asset_name=data_asset_name,
            batch_kwargs=batch_kwargs,
            expectation_suite_name=suite,
            additional_batch_kwargs={"limit": 1000},
            empty_suite=empty,
            show_intro_message=False,
            open_docs=view,
        )
        if success:
            if empty:
                if jupyter:
                    cli_message(
                        """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=True)

            _suite_edit(
                suite_name,
                datasource_name,
                directory,
                jupyter=jupyter,
                batch_kwargs=batch_kwargs,
                usage_event=
                "cli.suite.edit",  # or else we will be sending `cli.suite.new` which is incorrect
                suppress_usage_message=
                True,  # dont want actually send usage_message since the function call is not the result of actual usage
            )
        else:
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=False)
    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        sys.exit(1)
    except Exception as e:
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        raise e