Beispiel #1
0
def init(target_directory, view, usage_stats):
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        try:
            if DataContext.is_project_initialized(ge_dir):
                # Ensure the context can be instantiated
                cli_message(PROJECT_IS_COMPLETE)
        except (DataContextError, DatasourceInitializationError) as e:
            cli_message("<red>{}</red>".format(e.message))
            sys.exit(1)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            cli_message(ONBOARDING_COMPLETE)
            # TODO if this is correct, ensure this is covered by a test
            # cli_message(SETUP_SUCCESS)
            # exit(0)
        except DataContextError as e:
            cli_message("<red>{}</red>".format(e.message))
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            # TODO ensure this is covered by a test
            exit(0)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            send_usage_message(data_context=context,
                               event="cli.init.create",
                               success=True)
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message("<red>{}</red>".format(e))

    try:
        # if expectations exist, offer to build docs
        context = DataContext(ge_dir)
        if context.list_expectation_suites():
            if click.confirm(BUILD_DOCS_PROMPT, default=True):
                build_docs(context, view=view)

        else:
            datasources = context.list_datasources()
            if len(datasources) == 0:
                cli_message(SECTION_SEPARATOR)
                if not click.confirm(
                        "Would you like to configure a Datasource?",
                        default=True):
                    cli_message("Okay, bye!")
                    sys.exit(1)
                datasource_name, data_source_type = add_datasource_impl(
                    context, choose_one_data_asset=False)
                if not datasource_name:  # no datasource was created
                    sys.exit(1)

            datasources = context.list_datasources()
            if len(datasources) == 1:
                datasource_name = datasources[0]["name"]

                cli_message(SECTION_SEPARATOR)
                if not click.confirm(
                        "Would you like to profile new Expectations for a single data asset within your new Datasource?",
                        default=True,
                ):
                    cli_message(
                        "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!"
                    )
                    sys.exit(1)

                (
                    success,
                    suite_name,
                    profiling_results,
                ) = toolkit.create_expectation_suite(
                    context,
                    datasource_name=datasource_name,
                    additional_batch_kwargs={"limit": 1000},
                    flag_build_docs=False,
                    open_docs=False,
                )

                cli_message(SECTION_SEPARATOR)
                if not click.confirm("Would you like to build Data Docs?",
                                     default=True):
                    cli_message(
                        "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!"
                    )
                    sys.exit(1)

                build_docs(context, view=False)

                if not click.confirm(
                        "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.",
                        default=True,
                ):
                    cli_message(
                        "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!"
                    )
                    sys.exit(1)
                toolkit.attempt_to_open_validation_results_in_data_docs(
                    context, profiling_results)

                cli_message(SECTION_SEPARATOR)
                cli_message(SETUP_SUCCESS)
                sys.exit(0)
    except (
            DataContextError,
            ge_exceptions.ProfilerError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)
Beispiel #2
0
def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    open_docs=False,
    profiler_configuration="demo",
):
    """
    Create a new expectation suite.

    :return: a tuple: (success, suite name)
    """
    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)
    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (batch_kwargs_generator_name is None or generator_asset is None
            or batch_kwargs is None):
        (
            datasource_name,
            batch_kwargs_generator_name,
            generator_asset,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            generator_asset=generator_asset,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, generator_asset)
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new expectation suite",
                default=default_expectation_suite_name,
            )
            if expectation_suite_name in context.list_expectation_suite_names(
            ):
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        generator_asset,
        profiler_configuration,
    )

    build_docs(context, view=False)
    if open_docs:
        _attempt_to_open_validation_results_in_data_docs(
            context, profiling_results)

    return True, expectation_suite_name
def init(target_directory, view, usage_stats):
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        try:
            if DataContext.is_project_initialized(ge_dir):
                # Ensure the context can be instantiated
                cli_message(PROJECT_IS_COMPLETE)
        except (DataContextError, DatasourceInitializationError) as e:
            cli_message("<red>{}</red>".format(e.message))
            sys.exit(1)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            cli_message(ONBOARDING_COMPLETE)
            # TODO if this is correct, ensure this is covered by a test
            # cli_message(SETUP_SUCCESS)
            # exit(0)
        except DataContextError as e:
            cli_message("<red>{}</red>".format(e.message))
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            # TODO ensure this is covered by a test
            exit(0)

        try:
            context = DataContext.create(target_directory,
                                         usage_statistics_enabled=usage_stats)
            send_usage_message(data_context=context,
                               event="cli.init.create",
                               success=True)
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message("<red>{}</red>".format(e))

    try:
        # if expectations exist, offer to build docs
        context = DataContext(ge_dir)
        if context.list_expectation_suites():
            if click.confirm(BUILD_DOCS_PROMPT, default=True):
                build_docs(context, view=view)

        else:
            datasources = context.list_datasources()
            if len(datasources) == 0:
                datasource_name, data_source_type = add_datasource_impl(
                    context, choose_one_data_asset=True)
                if not datasource_name:  # no datasource was created
                    sys.exit(1)

            datasources = context.list_datasources()
            if len(datasources) == 1:
                datasource_name = datasources[0]["name"]

                success, suite_name = toolkit.create_expectation_suite(
                    context,
                    datasource_name=datasource_name,
                    additional_batch_kwargs={"limit": 1000},
                    open_docs=view)
                if success:
                    cli_message(
                        "A new Expectation suite '{}' was added to your project"
                        .format(suite_name))

                cli_message(SETUP_SUCCESS)
                sys.exit(0)
    except (DataContextError, ge_exceptions.ProfilerError, IOError,
            SQLAlchemyError) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)
Beispiel #4
0
def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    flag_build_docs=True,
    open_docs=False,
    profiler_configuration="demo",
    data_asset_name=None,
):
    """
    Create a new expectation suite.

    WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite
    require a serious revisiting.
    :return: a tuple: (success, suite name, profiling_results)
    """
    if generator_asset:
        warnings.warn(
            "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. "
            "Please update code accordingly.",
            DeprecationWarning,
        )
        data_asset_name = generator_asset

    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)

    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (
        batch_kwargs_generator_name is None
        or data_asset_name is None
        or batch_kwargs is None
    ):
        (
            datasource_name,
            batch_kwargs_generator_name,
            data_asset_name,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            data_asset_name=data_asset_name,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, data_asset_name
        )
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new Expectation Suite",
                default=default_expectation_suite_name,
            )
            if expectation_suite_name in context.list_expectation_suite_names():
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name, None

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        data_asset_name,
        profiler_configuration,
    )

    if flag_build_docs:
        build_docs(context, view=False)
        if open_docs:
            attempt_to_open_validation_results_in_data_docs(context, profiling_results)

    return True, expectation_suite_name, profiling_results