コード例 #1
0
def build_docs(
    context: DataContext,
    usage_stats_event: str,
    site_names: Optional[List[str]] = None,
    view: Optional[bool] = True,
    assume_yes: Optional[bool] = False,
):
    """Build documentation in a context"""
    logger.debug("Starting cli.datasource.build_docs")

    index_page_locator_infos: Dict[str, str] = context.build_data_docs(
        site_names=site_names, dry_run=True)

    msg: str = "\nThe following Data Docs sites will be built:\n\n"
    for site_name, index_page_locator_info in index_page_locator_infos.items():
        msg += " - <cyan>{}:</cyan> ".format(site_name)
        msg += "{}\n".format(index_page_locator_info)

    cli_message(msg)
    if not assume_yes:
        toolkit.confirm_proceed_or_exit(data_context=context,
                                        usage_stats_event=usage_stats_event)

    cli_message("\nBuilding Data Docs...\n")
    context.build_data_docs(site_names=site_names)

    cli_message("Done building Data Docs")

    if view and site_names:
        for site_to_open in site_names:
            context.open_data_docs(site_name=site_to_open, only_if_exists=True)
コード例 #2
0
def build_docs(context, site_name=None, view=True, assume_yes=False):
    """Build documentation in a context"""
    logger.debug("Starting cli.datasource.build_docs")

    if site_name is not None:
        site_names = [site_name]
    else:
        site_names = None
    index_page_locator_infos = context.build_data_docs(site_names=site_names,
                                                       dry_run=True)

    msg = "\nThe following Data Docs sites will be built:\n\n"
    for site_name, index_page_locator_info in index_page_locator_infos.items():
        msg += " - <cyan>{}:</cyan> ".format(site_name)
        msg += "{}\n".format(index_page_locator_info)

    cli_message(msg)
    if not assume_yes:
        toolkit.confirm_proceed_or_exit()

    cli_message("\nBuilding Data Docs...\n")
    index_page_locator_infos = context.build_data_docs(site_names=site_names)

    cli_message("Done building Data Docs")

    if view:
        context.open_data_docs(site_name=site_name, only_if_exists=True)
コード例 #3
0
def delete_datasource(ctx, datasource):
    """Delete the datasource specified as an argument"""
    context: DataContext = ctx.obj.data_context
    usage_event_end: str = ctx.obj.usage_event_end

    if not ctx.obj.assume_yes:
        toolkit.confirm_proceed_or_exit(
            confirm_prompt=f"""\nAre you sure you want to delete the Datasource "{datasource}" (this action is irreversible)?" """,
            continuation_message=f"Datasource `{datasource}` was not deleted.",
            exit_on_no=True,
            data_context=context,
            usage_stats_event=usage_event_end,
        )

    try:
        context.delete_datasource(datasource)
    except ValueError:
        cli_message(f"<red>Datasource {datasource} could not be found.</red>")
        toolkit.send_usage_message(context, event=usage_event_end, success=False)
        sys.exit(1)
    try:
        context.get_datasource(datasource)
    except ValueError:
        cli_message("<green>{}</green>".format("Datasource deleted successfully."))
        toolkit.send_usage_message(context, event=usage_event_end, success=True)
        sys.exit(0)
コード例 #4
0
def build_docs(context, site_name=None, view=True):
    """Build documentation in a context"""
    logger.debug("Starting cli.datasource.build_docs")

    if site_name is not None:
        site_names = [site_name]
    else:
        site_names = None
    index_page_locator_infos = context.build_data_docs(
        site_names=site_names, dry_run=True
    )

    msg = "\nThe following Data Docs sites will be built:\n\n"
    for site_name, index_page_locator_info in index_page_locator_infos.items():
        if index_page_locator_info.startswith("file"):
            if os.path.isfile(index_page_locator_info[6:]):
                msg += " - <cyan>{}:</cyan> ".format(site_name)
                msg += "{}\n".format(index_page_locator_info)
            else:
                msg += " - <cyan>{}:</cyan> ".format(site_name)
                msg += "{}\n".format(
                    "Site doesn’t exist or is inaccessible at "
                    + index_page_locator_info
                    + ". If you"
                    + " just built data docs,"
                    + " please check permissions."
                )
        else:
            r = requests.get(index_page_locator_info, stream=True)
            if r == 200:
                msg += " - <cyan>{}:</cyan> ".format(site_name)
                msg += "{}\n".format(index_page_locator_info)
            else:
                msg += " - <cyan>{}:</cyan> ".format(site_name)
                msg += "{}\n".format(
                    "Site doesn’t exist or is inaccessible at "
                    + index_page_locator_info
                    + ". If you"
                    + " just built data docs,"
                    + " please check permissions."
                )

    cli_message(msg)
    toolkit.confirm_proceed_or_exit()

    cli_message("\nBuilding Data Docs...\n")
    index_page_locator_infos = context.build_data_docs(site_names=site_names)

    cli_message("Done building Data Docs")

    if view:
        context.open_data_docs(site_name=site_name, only_if_exists=True)
コード例 #5
0
def library_install_load_check(
    python_import_name: str, pip_library_name: str
) -> Union[int, None]:
    """
    Dynamically load a module from strings, attempt a pip install or raise a helpful error.

    :return: True if the library was loaded successfully, False otherwise

    Args:
        pip_library_name: name of the library to load
        python_import_name (str): a module to import to verify installation
    """
    if is_library_loadable(library_name=python_import_name):
        return None

    confirm_prompt: str = f"""Great Expectations relies on the library `{python_import_name}` to connect to your data, \
but the package `{pip_library_name}` containing this library is not installed.
    Would you like Great Expectations to try to execute `pip install {pip_library_name}` for you?"""
    continuation_message: str = f"""\nOK, exiting now.
    - Please execute `pip install {pip_library_name}` before trying again."""
    pip_install_confirmed = toolkit.confirm_proceed_or_exit(
        confirm_prompt=confirm_prompt,
        continuation_message=continuation_message,
        exit_on_no=True,
        exit_code=1,
    )

    if not pip_install_confirmed:
        cli_message(continuation_message)
        sys.exit(1)

    status_code: int = execute_shell_command_with_progress_polling(
        f"pip install {pip_library_name}"
    )

    # project_distribution: Distribution = get_project_distribution()
    # if project_distribution:
    #     project_name: str = project_distribution.metadata['Name']
    #     version: str = project_distribution.metadata['Version']
    #
    # pkg_resources.working_set = pkg_resources.WorkingSet._build_master()

    working_set: WorkingSet = pkg_resources.working_set
    # noinspection SpellCheckingInspection
    distr: Distribution = pkg_resources.get_distribution(dist=pip_library_name)
    pkg_resources.WorkingSet.add_entry(self=working_set, entry=distr.key)

    library_loadable: bool = is_library_loadable(library_name=python_import_name)

    if status_code == 0 and library_loadable:
        return 0

    if not library_loadable:
        cli_message(
            f"""<red>ERROR: Great Expectations relies on the library `{pip_library_name}` to connect to your data.</red>
        - Please execute `pip install {pip_library_name}` before trying again."""
        )
        return 1

    return status_code
コード例 #6
0
ファイル: suite.py プロジェクト: rpatil524/great_expectations
def suite_delete(ctx: click.Context, suite: str) -> None:
    """
    Delete an Expectation Suite from the Expectation Store.
    """
    context: DataContext = ctx.obj.data_context
    usage_event_end: str = ctx.obj.usage_event_end
    try:
        suite_names: List[str] = context.list_expectation_suite_names()
    except Exception as e:
        send_usage_message(
            data_context=context,
            event=usage_event_end,
            success=False,
        )
        raise e
    if not suite_names:
        toolkit.exit_with_failure_message_and_stats(
            data_context=context,
            usage_event=usage_event_end,
            suppress_usage_message=False,
            message="<red>No expectation suites found in the project.</red>",
        )

    if suite not in suite_names:
        toolkit.exit_with_failure_message_and_stats(
            data_context=context,
            usage_event=usage_event_end,
            suppress_usage_message=False,
            message=f"<red>No expectation suite named {suite} found.</red>",
        )

    if not (
        ctx.obj.assume_yes
        or toolkit.confirm_proceed_or_exit(
            exit_on_no=False, data_context=context, usage_stats_event=usage_event_end
        )
    ):
        cli_message(string=f"Suite `{suite}` was not deleted.")
        sys.exit(0)

    context.delete_expectation_suite(suite)
    cli_message(string=f"Deleted the expectation suite named: {suite}")
    send_usage_message(
        data_context=context,
        event=usage_event_end,
        success=True,
    )
コード例 #7
0
def _add_spark_datasource(
    context, passthrough_generator_only=True, prompt_for_datasource_name=True
):
    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "spark"},
        success=True,
    )

    if not _verify_pyspark_dependent_modules():
        return None

    if passthrough_generator_only:
        datasource_name = "files_spark_datasource"

        # configuration = SparkDFDatasource.build_configuration(batch_kwargs_generators={
        #     "default": {
        #         "class_name": "PassthroughGenerator",
        #     }
        # }
        # )
        configuration = SparkDFDatasource.build_configuration()

    else:
        path = click.prompt(
            msg_prompt_filesys_enter_base_path,
            type=click.Path(exists=True, file_okay=False),
        ).strip()
        if path.startswith("./"):
            path = path[2:]

        if path.endswith("/"):
            basenamepath = path[:-1]
        else:
            basenamepath = path

        datasource_name = os.path.basename(basenamepath) + "__dir"
        if prompt_for_datasource_name:
            datasource_name = click.prompt(
                msg_prompt_datasource_name, default=datasource_name
            )

        configuration = SparkDFDatasource.build_configuration(
            batch_kwargs_generators={
                "subdir_reader": {
                    "class_name": "SubdirReaderBatchKwargsGenerator",
                    "base_directory": os.path.join("..", path),
                }
            }
        )
        configuration["class_name"] = "SparkDFDatasource"
        configuration["module_name"] = "great_expectations.datasource"
        errors = DatasourceConfigSchema().validate(configuration)
        if len(errors) != 0:
            raise ge_exceptions.GreatExpectationsError(
                "Invalid Datasource configuration: {:s}".format(errors)
            )

    cli_message(
        """
Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml:

{:s}
""".format(
            datasource_name,
            textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), "  "),
        )
    )
    toolkit.confirm_proceed_or_exit()

    context.add_datasource(name=datasource_name, **configuration)
    return datasource_name
コード例 #8
0
def _add_sqlalchemy_datasource(context, prompt_for_datasource_name=True):

    msg_success_database = (
        "\n<green>Great Expectations connected to your database!</green>"
    )

    if not _verify_sqlalchemy_dependent_modules():
        return None

    db_choices = [str(x) for x in list(range(1, 1 + len(SupportedDatabases)))]
    selected_database = (
        int(
            click.prompt(
                msg_prompt_choose_database,
                type=click.Choice(db_choices),
                show_choices=False,
            )
        )
        - 1
    )  # don't show user a zero index list :)

    selected_database = list(SupportedDatabases)[selected_database]

    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "sqlalchemy", "db": selected_database.name},
        success=True,
    )

    datasource_name = "my_{}_db".format(selected_database.value.lower())
    if selected_database == SupportedDatabases.OTHER:
        datasource_name = "my_database"
    if prompt_for_datasource_name:
        datasource_name = click.prompt(
            msg_prompt_datasource_name, default=datasource_name
        )

    credentials = {}
    # Since we don't want to save the database credentials in the config file that will be
    # committed in the repo, we will use our Variable Substitution feature to store the credentials
    # in the credentials file (that will not be committed, since it is in the uncommitted directory)
    # with the datasource's name as the variable name.
    # The value of the datasource's "credentials" key in the config file (great_expectations.yml) will
    # be ${datasource name}.
    # Great Expectations will replace the ${datasource name} with the value from the credentials file in runtime.

    while True:
        cli_message(msg_db_config.format(datasource_name))

        if selected_database == SupportedDatabases.MYSQL:
            if not _verify_mysql_dependent_modules():
                return None

            credentials = _collect_mysql_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.POSTGRES:
            if not _verify_postgresql_dependent_modules():
                return None

            credentials = _collect_postgres_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.REDSHIFT:
            if not _verify_redshift_dependent_modules():
                return None

            credentials = _collect_redshift_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.SNOWFLAKE:
            if not _verify_snowflake_dependent_modules():
                return None

            credentials = _collect_snowflake_credentials(
                default_credentials=credentials
            )
        elif selected_database == SupportedDatabases.BIGQUERY:
            if not _verify_bigquery_dependent_modules():
                return None

            credentials = _collect_bigquery_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.OTHER:
            sqlalchemy_url = click.prompt(
                """What is the url/connection string for the sqlalchemy connection?
(reference: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls)
""",
                show_default=False,
            ).strip()
            credentials = {"url": sqlalchemy_url}

        context.save_config_variable(datasource_name, credentials)

        message = """
<red>Cannot connect to the database.</red>
  - Please check your environment and the configuration you provided.
  - Database Error: {0:s}"""
        try:
            cli_message(
                "<cyan>Attempting to connect to your database. This may take a moment...</cyan>"
            )

            configuration = SqlAlchemyDatasource.build_configuration(
                credentials="${" + datasource_name + "}"
            )

            configuration["class_name"] = "SqlAlchemyDatasource"
            configuration["module_name"] = "great_expectations.datasource"
            errors = DatasourceConfigSchema().validate(configuration)
            if len(errors) != 0:
                raise ge_exceptions.GreatExpectationsError(
                    "Invalid Datasource configuration: {:s}".format(errors)
                )

            cli_message(
                """
Great Expectations will now add a new Datasource '{0:s}' to your deployment, by adding this entry to your great_expectations.yml:

{1:s}
The credentials will be saved in uncommitted/config_variables.yml under the key '{0:s}'
""".format(
                    datasource_name,
                    textwrap.indent(
                        toolkit.yaml.dump({datasource_name: configuration}), "  "
                    ),
                )
            )

            toolkit.confirm_proceed_or_exit()
            context.add_datasource(name=datasource_name, **configuration)
            cli_message(msg_success_database)
            break
        except ModuleNotFoundError as de:
            cli_message(message.format(str(de)))
            return None

        except DatasourceInitializationError as de:
            cli_message(message.format(str(de)))
            if not click.confirm("Enter the credentials again?", default=True):
                context.add_datasource(
                    datasource_name,
                    initialize=False,
                    module_name="great_expectations.datasource",
                    class_name="SqlAlchemyDatasource",
                    data_asset_type={"class_name": "SqlAlchemyDataset"},
                    credentials="${" + datasource_name + "}",
                )
                # TODO this message about continuing may not be accurate
                cli_message(
                    """
We saved datasource {:s} in {:s} and the credentials you entered in {:s}.
Since we could not connect to the database, you can complete troubleshooting in the configuration files documented in the how-to guides here:
<blue>https://docs.greatexpectations.io/en/latest/guides/how_to_guides/configuring_datasources.html?utm_source=cli&utm_medium=init&utm_campaign={:s}#{:s}</blue> .

After you connect to the datasource, run great_expectations init to continue.

""".format(
                        datasource_name,
                        DataContext.GE_YML,
                        context.get_config()["config_variables_file_path"],
                        rtd_url_ge_version,
                        selected_database.value.lower(),
                    )
                )
                return None

    return datasource_name
コード例 #9
0
def _add_pandas_datasource(
    context, passthrough_generator_only=True, prompt_for_datasource_name=True
):
    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "pandas"},
        success=True,
    )

    if passthrough_generator_only:
        datasource_name = "files_datasource"
        configuration = PandasDatasource.build_configuration()

    else:
        path = click.prompt(
            msg_prompt_filesys_enter_base_path,
            type=click.Path(exists=True, file_okay=False),
        )

        if path.startswith("./"):
            path = path[2:]

        if path.endswith("/"):
            basenamepath = path[:-1]
        else:
            basenamepath = path

        datasource_name = os.path.basename(basenamepath) + "__dir"
        if prompt_for_datasource_name:
            datasource_name = click.prompt(
                msg_prompt_datasource_name, default=datasource_name
            )

        configuration = PandasDatasource.build_configuration(
            batch_kwargs_generators={
                "subdir_reader": {
                    "class_name": "SubdirReaderBatchKwargsGenerator",
                    "base_directory": os.path.join("..", path),
                }
            }
        )

        configuration["class_name"] = "PandasDatasource"
        configuration["module_name"] = "great_expectations.datasource"
        errors = DatasourceConfigSchema().validate(configuration)
        if len(errors) != 0:
            raise ge_exceptions.GreatExpectationsError(
                "Invalid Datasource configuration: {:s}".format(errors)
            )

    cli_message(
        """
Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml:

{:s}
""".format(
            datasource_name,
            textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), "  "),
        )
    )

    toolkit.confirm_proceed_or_exit(
        continuation_message="Okay, exiting now. To learn more about adding datasources, run great_expectations "
        "datasource --help or visit https://docs.greatexpectations.io/"
    )

    context.add_datasource(name=datasource_name, **configuration)
    return datasource_name