def test_parse_cli_config_file_location_posix_paths_existing_files_with_no_extension(
    tmp_path_factory, ):

    filename_no_extension_fixtures = [
        {
            "input_path": "relative/path/to/file/no_extension",
            "expected": {
                "directory": "relative/path/to/file",
                "filename": "no_extension",
            },
        },
        {
            "input_path": "/absolute/path/to/file/no_extension",
            "expected": {
                "directory": "/absolute/path/to/file",
                "filename": "no_extension",
            },
        },
        {
            "input_path": "no_extension",
            "expected": {
                "directory": None,
                "filename": "no_extension",
            },
        },
    ]

    for fixture in filename_no_extension_fixtures:
        with pytest.raises(ge_exceptions.ConfigNotFoundError):
            toolkit.parse_cli_config_file_location(fixture["input_path"])

    # Create files and re-run assertions

    root_dir = tmp_path_factory.mktemp("posix")
    root_dir = str(root_dir)
    for fixture in filename_no_extension_fixtures:
        expected_dir = fixture.get("expected").get("directory")

        # Make non-absolute path
        if expected_dir is not None and expected_dir.startswith("/"):
            expected_dir = expected_dir[1:]

        expected_filename = fixture.get("expected").get("filename")
        if expected_dir:
            test_directory = os.path.join(root_dir, expected_dir)
            os.makedirs(test_directory, exist_ok=True)
            if expected_filename:
                expected_filepath = os.path.join(test_directory,
                                                 expected_filename)
                with open(expected_filepath, "w") as fp:
                    pass

                output = toolkit.parse_cli_config_file_location(
                    expected_filepath)

                assert output == {
                    "directory": os.path.join(root_dir, expected_dir),
                    "filename": expected_filename,
                }
def checkpoint(ctx):
    """
    Checkpoint operations

    A Checkpoint is a bundle of one or more batches of data with one or more
    Expectation Suites.

    A Checkpoint can be as simple as one batch of data paired with one
    Expectation Suite.

    A Checkpoint can be as complex as many batches of data across different
    datasources paired with one or more Expectation Suites each.
    """
    directory: str = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    context: DataContext = toolkit.load_data_context_with_error_handling(
        directory=directory,
        from_cli_upgrade_command=False,
    )
    # TODO consider moving this all the way up in to the CLIState constructor
    ctx.obj.data_context = context

    usage_stats_prefix = f"cli.checkpoint.{ctx.invoked_subcommand}"
    toolkit.send_usage_message(
        data_context=context,
        event=f"{usage_stats_prefix}.begin",
        success=True,
    )
    ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
Beispiel #3
0
def suite_list(ctx):
    """Lists available Expectation Suites."""
    display_not_implemented_message_and_exit()

    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        suite_names = [
            " - <cyan>{}</cyan>".format(suite_name)
            for suite_name in context.list_expectation_suite_names()
        ]
        if len(suite_names) == 0:
            cli_message("No Expectation Suites found")
            toolkit.send_usage_message(data_context=context,
                                       event="cli.suite.list",
                                       success=True)
            return
        elif len(suite_names) == 1:
            list_intro_string = "1 Expectation Suite found:"
        else:
            list_intro_string = "{} Expectation Suites found:".format(
                len(suite_names))

        cli_message_list(suite_names, list_intro_string)
        toolkit.send_usage_message(data_context=context,
                                   event="cli.suite.list",
                                   success=True)
    except Exception as e:
        toolkit.send_usage_message(data_context=context,
                                   event="cli.suite.list",
                                   success=False)
        raise e
Beispiel #4
0
def suite_delete(ctx, suite):
    """
    Delete an expectation suite from the expectation store.
    """
    display_not_implemented_message_and_exit()
    usage_event = "cli.suite.delete"
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    context = toolkit.load_data_context_with_error_handling(directory)
    suite_names = context.list_expectation_suite_names()
    if not suite_names:
        toolkit.exit_with_failure_message_and_stats(
            context,
            usage_event,
            "</red>No expectation suites found in the project.</red>",
        )

    if suite not in suite_names:
        toolkit.exit_with_failure_message_and_stats(
            context, usage_event, f"No expectation suite named {suite} found.")

    context.delete_expectation_suite(suite)
    cli_message(f"Deleted the expectation suite named: {suite}")
    toolkit.send_usage_message(data_context=context,
                               event=usage_event,
                               success=True)
Beispiel #5
0
def test_parse_cli_config_file_location_empty_paths():

    posix_fixtures = [
        {
            "input_path": None,
            "expected": {
                "directory": None,
                "filename": None,
            },
        },
        {
            "input_path": "",
            "expected": {
                "directory": None,
                "filename": None,
            },
        },
    ]

    fixtures = posix_fixtures

    for fixture in fixtures:
        assert (
            toolkit.parse_cli_config_file_location(fixture["input_path"])
            == fixture["expected"]
        )
Beispiel #6
0
def suite_edit(ctx, suite, datasource, jupyter, batch_kwargs):
    """
    Generate a Jupyter notebook for editing an existing Expectation Suite.

    The SUITE argument is required. This is the name you gave to the suite
    when you created it.

    A batch of data is required to edit the suite, which is used as a sample.

    The edit command will help you specify a batch interactively. Or you can
    specify them manually by providing --batch-kwargs in valid JSON format.

    Read more about specifying batches of data in the documentation: https://docs.greatexpectations.io/
    """
    display_not_implemented_message_and_exit()

    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    _suite_edit(
        suite,
        datasource,
        directory,
        jupyter,
        batch_kwargs,
        usage_event="cli.suite.edit",
    )
Beispiel #7
0
 def get_data_context_from_config_file(self) -> DataContext:
     directory: str = toolkit.parse_cli_config_file_location(
         config_file_location=self.config_file_location).get("directory")
     context: DataContext = toolkit.load_data_context_with_error_handling(
         directory=directory,
         from_cli_upgrade_command=False,
     )
     return context
Beispiel #8
0
def store(ctx):
    """Store operations"""
    directory: str = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    context: DataContext = toolkit.load_data_context_with_error_handling(
        directory=directory,
        from_cli_upgrade_command=False,
    )
    # TODO consider moving this all the way up in to the CLIState constructor
    ctx.obj.data_context = context
Beispiel #9
0
def project_upgrade(ctx):
    """Upgrade a project after installing the next Great Expectations major version."""
    cli_message("\nChecking project...")
    cli_message(SECTION_SEPARATOR)
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    if load_data_context_with_error_handling(directory=directory,
                                             from_cli_upgrade_command=True):
        up_to_date_message = (
            "Your project is up-to-date - no further upgrade is necessary.\n")
        cli_message(f"<green>{up_to_date_message}</green>")
        sys.exit(0)
Beispiel #10
0
def project_upgrade(ctx):
    """Upgrade a project after installing the next Great Expectations major version."""
    cli_message("\nChecking project...")
    cli_message(SECTION_SEPARATOR)
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")

    if load_data_context_with_error_handling(directory=directory,
                                             from_cli_upgrade_command=True):
        sys.exit(0)
    else:
        failure_message = "Error: Your project could not be upgraded.\n"
        cli_message(f"<red>{failure_message}</red>")
        sys.exit(1)
Beispiel #11
0
def project_check_config(ctx):
    """Check a config for validity and help with migrations."""
    cli_message("Checking your config files for validity...\n")
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    is_config_ok, error_message, context = do_config_check(directory)
    if context:
        toolkit.send_usage_message(data_context=context,
                                   event="cli.project.check_config",
                                   success=True)
    if not is_config_ok:
        cli_message("Unfortunately, your config appears to be invalid:\n")
        cli_message(f"<red>{error_message}</red>")
        sys.exit(1)

    cli_message("<green>Your config file appears valid!</green>")
def datasource(ctx):
    """Datasource operations"""
    directory: str = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    context: DataContext = toolkit.load_data_context_with_error_handling(
        directory=directory,
        from_cli_upgrade_command=False,
    )
    # TODO consider moving this all the way up in to the CLIState constructor
    ctx.obj.data_context = context
    usage_stats_prefix = f"cli.datasource.{ctx.invoked_subcommand}"
    toolkit.send_usage_message(
        data_context=context,
        event=f"{usage_stats_prefix}.begin",
        success=True,
    )
    ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
Beispiel #13
0
def suite_new(ctx, suite, jupyter, batch_kwargs):
    """
    Create a new empty Expectation Suite.

    Edit in jupyter notebooks, or skip with the --no-jupyter flag
    """
    display_not_implemented_message_and_exit()

    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    _suite_new(
        suite=suite,
        directory=directory,
        empty=True,
        jupyter=jupyter,
        view=False,
        batch_kwargs=batch_kwargs,
        usage_event="cli.suite.new",
    )
def checkpoint(ctx):
    """
    Checkpoint operations

    A Checkpoint is a bundle of one or more batches of data with one or more
    Expectation Suites.

    A Checkpoint can be as simple as one batch of data paired with one
    Expectation Suite.

    A Checkpoint can be as complex as many batches of data across different
    datasources paired with one or more Expectation Suites each.
    """
    directory: str = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    context: DataContext = toolkit.load_data_context_with_error_handling(
        directory=directory,
        from_cli_upgrade_command=False,
    )
    # TODO consider moving this all the way up in to the CLIState constructor
    ctx.obj.data_context = context
Beispiel #15
0
def test_parse_cli_config_file_location_posix_paths(tmp_path_factory):
    """
    What does this test and why?
    We want to parse posix paths into their directory and filename parts
    so that we can pass the directory to our data context constructor.
    We need to be able to do that with all versions of path that can be input.
    This tests for posix paths for files/dirs that don't exist and files/dirs that do.
    Other tests handle testing for windows support.
    """

    filename_fixtures = [
        {
            "input_path": "just_a_file.yml",
            "expected": {
                "directory": "",
                "filename": "just_a_file.yml",
            },
        },
    ]
    absolute_path_fixtures = [
        {
            "input_path": "/path/to/file/filename.yml",
            "expected": {
                "directory": "/path/to/file",
                "filename": "filename.yml",
            },
        },
        {
            "input_path": "/absolute/directory/ending/slash/",
            "expected": {
                "directory": "/absolute/directory/ending/slash/",
                "filename": None,
            },
        },
        {
            "input_path": "/absolute/directory/ending/no/slash",
            "expected": {
                "directory": "/absolute/directory/ending/no/slash",
                "filename": None,
            },
        },
    ]
    relative_path_fixtures = [
        {
            "input_path": "relative/path/to/file.yml",
            "expected": {
                "directory": "relative/path/to",
                "filename": "file.yml",
            },
        },
        {
            "input_path": "relative/path/to/directory/slash/",
            "expected": {
                "directory": "relative/path/to/directory/slash/",
                "filename": None,
            },
        },
        {
            "input_path": "relative/path/to/directory/no_slash",
            "expected": {
                "directory": "relative/path/to/directory/no_slash",
                "filename": None,
            },
        },
    ]

    fixtures = filename_fixtures + absolute_path_fixtures + relative_path_fixtures

    for fixture in fixtures:
        with pytest.raises(ge_exceptions.ConfigNotFoundError):
            toolkit.parse_cli_config_file_location(fixture["input_path"])

        # Create files and re-run assertions
    root_dir = tmp_path_factory.mktemp("posix")
    root_dir = str(root_dir)
    for fixture in fixtures:
        expected_dir = fixture.get("expected").get("directory")

        # Make non-absolute path
        if expected_dir is not None and expected_dir.startswith("/"):
            expected_dir = expected_dir[1:]

        expected_filename = fixture.get("expected").get("filename")
        if expected_dir:
            test_directory = os.path.join(root_dir, expected_dir)
            os.makedirs(test_directory, exist_ok=True)
            if expected_filename:
                expected_filepath = os.path.join(test_directory, expected_filename)
                with open(expected_filepath, "w") as fp:
                    pass

                output = toolkit.parse_cli_config_file_location(expected_filepath)

                assert output == {
                    "directory": os.path.join(root_dir, expected_dir),
                    "filename": expected_filename,
                }
Beispiel #16
0
def test_parse_cli_config_file_location_windows_paths(tmp_path_factory):
    """
    What does this test and why?
    Since we are unable to test windows paths on our unix CI, this just
    tests that if a file doesn't exist we raise an error.
    Args:
        tmp_path_factory:
    Returns:
    """

    filename_fixtures = [
        {
            "input_path": "just_a_file.yml",
            "expected": {
                "directory": "",
                "filename": "just_a_file.yml",
            },
        },
    ]
    absolute_path_fixtures = [
        {
            "input_path": r"C:\absolute\windows\path\to\file.yml",
            "expected": {
                "directory": r"C:\absolute\windows\path\to",
                "filename": "file.yml",
            },
        },
        {
            "input_path": r"C:\absolute\windows\directory\ending\slash\\",
            "expected": {
                "directory": r"C:\absolute\windows\directory\ending\slash\\",
                "filename": None,
            },
        },
        {
            "input_path": r"C:\absolute\windows\directory\ending\no_slash",
            "expected": {
                "directory": r"C:\absolute\windows\directory\ending\no_slash",
                "filename": None,
            },
        },
    ]
    relative_path_fixtures = [
        {
            "input_path": r"relative\windows\path\to\file.yml",
            "expected": {
                "directory": r"relative\windows\path\to",
                "filename": "file.yml",
            },
        },
        # Double slash at end of raw string to escape slash
        {
            "input_path": r"relative\windows\path\to\directory\slash\\",
            "expected": {
                "directory": r"relative\windows\path\to\directory\slash\\",
                "filename": None,
            },
        },
        {
            "input_path": r"relative\windows\path\to\directory\no_slash",
            "expected": {
                "directory": r"relative\windows\path\to\directory\no_slash",
                "filename": None,
            },
        },
    ]
    fixtures = filename_fixtures + absolute_path_fixtures + relative_path_fixtures

    for fixture in fixtures:
        with pytest.raises(ge_exceptions.ConfigNotFoundError):
            toolkit.parse_cli_config_file_location(fixture["input_path"])
Beispiel #17
0
def suite_scaffold(ctx, suite, jupyter):
    """Scaffold a new Expectation Suite."""
    display_not_implemented_message_and_exit()
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    _suite_scaffold(suite, directory, jupyter)
Beispiel #18
0
def init(ctx: click.Context, usage_stats: bool) -> None:
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, creates a project file, and
    appends to a `.gitignore` file.
    """
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    if directory is None:
        directory = os.getcwd()
    target_directory = os.path.abspath(directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        message = (
            f"""Warning. An existing `{DataContext.GE_YML}` was found here: {ge_dir}."""
        )
        warnings.warn(message)
        try:
            project_file_structure_exists = (
                DataContext.does_config_exist_on_disk(ge_dir)
                and DataContext.all_uncommitted_directories_exist(ge_dir)
                and DataContext.config_variables_yml_exist(ge_dir))
            if project_file_structure_exists:
                cli_message(PROJECT_IS_COMPLETE)
                sys.exit(0)
            else:
                # Prompt to modify the project to add missing files
                if not ctx.obj.assume_yes:
                    if not click.confirm(COMPLETE_ONBOARDING_PROMPT,
                                         default=True):
                        cli_message(RUN_INIT_AGAIN)
                        exit(0)

        except (DataContextError, DatasourceInitializationError) as e:
            cli_message(f"<red>{e.message}</red>")
            sys.exit(1)

        try:
            DataContext.create(target_directory,
                               usage_statistics_enabled=usage_stats)
            cli_message(ONBOARDING_COMPLETE)

        except DataContextError as e:
            cli_message(f"<red>{e.message}</red>")
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not ctx.obj.assume_yes:
            if not click.confirm(LETS_BEGIN_PROMPT, default=True):
                cli_message(RUN_INIT_AGAIN)
                exit(0)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            send_usage_message(
                data_context=context,
                event=UsageStatsEvents.CLI_INIT_CREATE.value,
                success=True,
            )
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message(f"<red>{e}</red>")

    cli_message(SECTION_SEPARATOR)
    cli_message(READY_FOR_CUSTOMIZATION)
    cli_message(HOW_TO_CUSTOMIZE)
    sys.exit(0)
Beispiel #19
0
def init(ctx, view, usage_stats):
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    display_not_implemented_message_and_exit()
    directory = toolkit.parse_cli_config_file_location(
        config_file_location=ctx.obj.config_file_location).get("directory")
    if directory is None:
        directory = os.getcwd()
    target_directory = os.path.abspath(directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        try:
            if DataContext.is_project_initialized(ge_dir):
                # Ensure the context can be instantiated
                cli_message(PROJECT_IS_COMPLETE)
        except (DataContextError, DatasourceInitializationError) as e:
            cli_message("<red>{}</red>".format(e.message))
            sys.exit(1)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            cli_message(ONBOARDING_COMPLETE)
            # TODO if this is correct, ensure this is covered by a test
            # cli_message(SETUP_SUCCESS)
            # exit(0)
        except DataContextError as e:
            cli_message("<red>{}</red>".format(e.message))
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not ctx.obj.assume_yes:
            if not click.confirm(LETS_BEGIN_PROMPT, default=True):
                cli_message(RUN_INIT_AGAIN)
                # TODO ensure this is covered by a test
                exit(0)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            toolkit.send_usage_message(data_context=context,
                                       event="cli.init.create",
                                       success=True)
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message("<red>{}</red>".format(e))

    # Skip the rest of setup if --assume-yes flag is passed
    if ctx.obj.assume_yes:
        cli_message(SECTION_SEPARATOR)
        cli_message(SETUP_SUCCESS)
        sys.exit(0)

    try:
        # if expectations exist, offer to build docs
        context = DataContext(ge_dir)
        if context.list_expectation_suites():
            if click.confirm(BUILD_DOCS_PROMPT, default=True):
                build_docs(context, view=view)

        else:
            datasources = context.list_datasources()
            if len(datasources) == 0:
                cli_message(SECTION_SEPARATOR)
                if not click.confirm(
                        "Would you like to configure a Datasource?",
                        default=True):
                    cli_message("Okay, bye!")
                    sys.exit(1)
                datasource_name, data_source_type = add_datasource_impl(
                    context, choose_one_data_asset=False)
                if not datasource_name:  # no datasource was created
                    sys.exit(1)

            datasources = context.list_datasources()
            if len(datasources) == 1:
                datasource_name = datasources[0]["name"]

                cli_message(SECTION_SEPARATOR)
                if not click.confirm(
                        "Would you like to profile new Expectations for a single data asset within your new Datasource?",
                        default=True,
                ):
                    cli_message(
                        "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!"
                    )
                    sys.exit(1)

                (
                    success,
                    suite_name,
                    profiling_results,
                ) = toolkit.create_expectation_suite(
                    context,
                    datasource_name=datasource_name,
                    additional_batch_kwargs={"limit": 1000},
                    flag_build_docs=False,
                    open_docs=False,
                )

                cli_message(SECTION_SEPARATOR)
                if not click.confirm("Would you like to build Data Docs?",
                                     default=True):
                    cli_message(
                        "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!"
                    )
                    sys.exit(1)

                build_docs(context, view=False)

                if not click.confirm(
                        "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.",
                        default=True,
                ):
                    cli_message(
                        "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!"
                    )
                    sys.exit(1)
                toolkit.attempt_to_open_validation_results_in_data_docs(
                    context, profiling_results)

                cli_message(SECTION_SEPARATOR)
                cli_message(SETUP_SUCCESS)
                sys.exit(0)
    except (
            DataContextError,
            ge_exceptions.ProfilerError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)