Python DataContext.DataContext Exemples, great_expectations.data_context.DataContext.DataContext Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_usage_statistics.py Projet : whiteDigitalAI/great_expectations

def test_opt_out_env_var_overrides_yml(tmp_path_factory, monkeypatch):
    monkeypatch.delenv(
        "GE_USAGE_STATS", raising=False
    )  # Undo the project-wide test default
    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    os.makedirs(context_path, exist_ok=True)
    fixture_dir = file_relative_path(__file__, "../../test_fixtures")

    shutil.copy(
        os.path.join(
            fixture_dir, "great_expectations_basic_with_usage_stats_enabled.yml"
        ),
        str(os.path.join(context_path, "great_expectations.yml")),
    )

    assert (
        DataContext(
            context_root_dir=context_path
        )._project_config.anonymous_usage_statistics.enabled
        is True
    )

    monkeypatch.setenv("GE_USAGE_STATS", "False")
    context = DataContext(context_root_dir=context_path)
    project_config = context._project_config
    assert project_config.anonymous_usage_statistics.enabled is False

Exemple #2

0

Afficher le fichier

Fichier : test_usage_statistics.py Projet : siddarthpaim/great_expectations

def test_opt_out_etc_overrides_yml(tmp_path_factory, monkeypatch):
    monkeypatch.delenv("GE_USAGE_STATS", raising=False)  # Undo the project-wide test default
    home_config_dir = tmp_path_factory.mktemp("home_dir")
    home_config_dir = str(home_config_dir)
    etc_config_dir = tmp_path_factory.mktemp("etc")
    etc_config_dir = str(etc_config_dir)
    config_dirs = [home_config_dir, etc_config_dir]
    config_dirs = [
        os.path.join(config_dir, "great_expectations.conf") for config_dir in config_dirs
    ]

    disabled_config = configparser.ConfigParser()
    disabled_config["anonymous_usage_statistics"] = {"enabled": "False"}

    with open(os.path.join(etc_config_dir, "great_expectations.conf"), 'w') as configfile:
        disabled_config.write(configfile)

    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    os.makedirs(context_path, exist_ok=True)
    fixture_dir = file_relative_path(__file__, "../../test_fixtures")

    shutil.copy(
        os.path.join(fixture_dir, "great_expectations_basic_with_usage_stats_enabled.yml"),
        str(os.path.join(context_path, "great_expectations.yml")),
    )

    assert DataContext(context_root_dir=context_path)._project_config.anonymous_usage_statistics.enabled is True

    with mock.patch("great_expectations.data_context.BaseDataContext.GLOBAL_CONFIG_PATHS", config_dirs):
        context = DataContext(context_root_dir=context_path)
        project_config = context._project_config
        assert project_config.anonymous_usage_statistics.enabled is False

Exemple #3

0

Afficher le fichier

Fichier : test_data_context.py Projet : abnair24/great_expectations

def test_existing_local_data_docs_urls_returns_single_url_from_customized_local_site(
        tmp_path_factory):
    empty_directory = str(tmp_path_factory.mktemp("yo_yo"))
    DataContext.create(empty_directory)
    ge_dir = os.path.join(empty_directory, DataContext.GE_DIR)
    context = DataContext(ge_dir)

    context._project_config["data_docs_sites"] = {
        "my_rad_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "base_directory": "uncommitted/data_docs/some/local/path/"
            }
        }
    }

    # TODO Workaround project config programmatic config manipulation
    #  statefulness issues by writing to disk and re-upping a new context
    context._save_project_config()
    context = DataContext(ge_dir)
    context.build_data_docs()

    expected_path = os.path.join(
        ge_dir, "uncommitted/data_docs/some/local/path/index.html")
    assert os.path.isfile(expected_path)

    obs = context.get_docs_sites_urls()
    assert obs == ["file://{}".format(expected_path)]

Exemple #4

0

Afficher le fichier

Fichier : cli.py Projet : tfeusels/great_expectations

def init(target_directory, view):
    """
    Create a new project and help with onboarding.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    ge_yml = os.path.join(ge_dir, DataContext.GE_YML)

    cli_message(GREETING)

    # TODO this should be a property
    if os.path.isfile(ge_yml):
        if DataContext.all_uncommitted_directories_exist(ge_dir) and \
                DataContext.config_variables_yml_exist(ge_dir):
            # Ensure the context can be instantiated
            try:
                _ = DataContext(ge_dir)
                cli_message(PROJECT_IS_COMPLETE)
            except ge_exceptions.DataContextError as e:
                cli_message("<red>{}</red>".format(e))
                exit(5)
        else:
            _complete_onboarding(target_directory)

        try:
            # if expectations exist, offer to build docs
            context = DataContext(ge_dir)
            if context.list_expectation_suite_keys():
                if click.confirm(BUILD_DOCS_PROMPT, default=True):
                    context.build_data_docs()
                    context.open_data_docs()
        except ge_exceptions.DataContextError as e:
            cli_message("<red>{}</red>".format(e))
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            exit(0)

        context, data_source_name, data_source_type = _create_new_project(
            target_directory)
        if not data_source_name:  # no datasource was created
            return

        profile_datasource(context,
                           data_source_name,
                           open_docs=view,
                           additional_batch_kwargs={"limit": 1000})
        cli_message(
            """\n<cyan>Great Expectations is now set up in your project!</cyan>"""
        )

Exemple #5

0

Afficher le fichier

Fichier : test_data_context_ge_cloud_mode.py Projet : alfredo-f/great_expectations

def test_data_context_ge_cloud_mode_makes_successful_request_to_cloud_api(
    mock_request,
    ge_cloud_runtime_base_url,
    ge_cloud_runtime_organization_id,
    ge_cloud_runtime_access_token,
):
    # Ensure that the request goes through
    mock_request.return_value.status_code = 200
    try:
        DataContext(
            ge_cloud_mode=True,
            ge_cloud_base_url=ge_cloud_runtime_base_url,
            ge_cloud_organization_id=ge_cloud_runtime_organization_id,
            ge_cloud_access_token=ge_cloud_runtime_access_token,
        )
    except:  # Not concerned with constructor output (only evaluating interaction with requests during __init__)
        pass

    called_with_url = f"{ge_cloud_runtime_base_url}/organizations/{ge_cloud_runtime_organization_id}/data-context-configuration"
    called_with_header = {
        "headers": {
            "Content-Type": "application/vnd.api+json",
            "Authorization": f"Bearer {ge_cloud_runtime_access_token}",
        }
    }

    # Only ever called once with the endpoint URL and auth token as args
    mock_request.assert_called_once()
    assert mock_request.call_args[0][0] == called_with_url
    assert mock_request.call_args[1] == called_with_header

Exemple #6

0

Afficher le fichier

def profile(datasource_name, data_assets, profile_all_data_assets, directory):
    """Profile datasources from the specified context.


    DATASOURCE_NAME: the datasource to profile, or leave blank to profile all datasources."""

    if profile_all_data_assets:
        max_data_assets = None

    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message(
            "Error: no great_expectations context configuration found in the specified directory."
        )
        return

    if datasource_name is None:
        datasources = [
            datasource["name"] for datasource in context.list_datasources()
        ]
        for datasource_name in datasources:
            profile_datasource(context,
                               datasource_name,
                               max_data_assets=max_data_assets)
    else:
        profile_datasource(context, datasource_name, data_assets=data_assets)

Exemple #7

0

Afficher le fichier

Fichier : test_data_context_config_errors.py Projet : zhuohuwu0603/great_expectations

def test_DataContext_raises_error_on_invalid_top_level_type():
    local_dir = file_relative_path(
        __file__, os.path.join(BASE_DIR, "invalid_top_level_value_type"))
    with pytest.raises(ge_exceptions.InvalidDataContextConfigError) as exc:
        DataContext(local_dir)

    assert "data_docs_sites" in exc.value.messages

Exemple #8

0

Afficher le fichier

Fichier : cli.py Projet : scarrucciu/great_expectations

def profile(datasource_name, data_assets, profile_all_data_assets, directory):
    """
    Profile datasources from the specified context.

    If the optional data_assets and profile_all_data_assets arguments are not specified, the profiler will check
    if the number of data assets in the datasource exceeds the internally defined limit. If it does, it will
    prompt the user to either specify the list of data assets to profile or to profile all.
    If the limit is not exceeded, the profiler will profile all data assets in the datasource.

    :param datasource_name: name of the datasource to profile
    :param data_assets: if this comma-separated list of data asset names is provided, only the specified data assets will be profiled
    :param profile_all_data_assets: if provided, all data assets will be profiled
    :param directory:
    :return:
    """

    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message("Error: no great_expectations context configuration found in the specified directory.")
        return

    if datasource_name is None:
        datasources = [datasource["name"] for datasource in context.list_datasources()]
        if len(datasources) > 1:
            cli_message("Error: please specify the datasource to profile. Available datasources: " + ", ".join(datasources))
            return
        else:
            profile_datasource(context, datasources[0], data_assets=data_assets, profile_all_data_assets=profile_all_data_assets)
    else:
        profile_datasource(context, datasource_name, data_assets=data_assets, profile_all_data_assets=profile_all_data_assets)

Exemple #9

0

Afficher le fichier

Fichier : test_data_context_config_errors.py Projet : zhuohuwu0603/great_expectations

def test_DataContext_raises_error_on_old_config_version():
    local_dir = file_relative_path(
        __file__, os.path.join(BASE_DIR, "old_config_version"))
    with pytest.raises(ge_exceptions.InvalidDataContextConfigError) as exc:
        DataContext(local_dir)

    assert "Error while processing DataContextConfig" in exc.value.message

Exemple #10

0

Afficher le fichier

Fichier : test_data_context_ge_cloud_mode.py Projet : alfredo-f/great_expectations

def test_datasource_initialization_error_thrown_in_cloud_mode(
    ge_cloud_data_context_config: DataContextConfig,
    ge_cloud_runtime_base_url,
    ge_cloud_runtime_organization_id,
    ge_cloud_runtime_access_token,
):
    # normally the DataContext swallows exceptions when there is an error raised from get_datasource
    # (which is used during initialization). In cloud mode, we want a DatasourceInitializationError to
    # propogate.

    # normally in cloud mode configuration is retrieved from an endpoint; we're providing it here in-line
    with mock.patch(
        "great_expectations.data_context.DataContext._retrieve_data_context_config_from_ge_cloud",
        return_value=ge_cloud_data_context_config,
    ):
        # DataContext._init_datasources calls get_datasource, which may generate a DatasourceInitializationError
        # that normally gets swallowed.
        with mock.patch(
            "great_expectations.data_context.DataContext.get_datasource"
        ) as get_datasource:
            get_datasource.side_effect = DatasourceInitializationError(
                "mock_datasource", "mock_message"
            )
            with pytest.raises(DatasourceInitializationError):
                DataContext(
                    ge_cloud_mode=True,
                    ge_cloud_base_url=ge_cloud_runtime_base_url,
                    ge_cloud_organization_id=ge_cloud_runtime_organization_id,
                    ge_cloud_access_token=ge_cloud_runtime_access_token,
                )

Exemple #11

0

Afficher le fichier

Fichier : cli.py Projet : xxsacxx/great_expectations

def profile(datasource_name, data_assets, profile_all_data_assets, directory, view, batch_kwargs):
    """
    Profile datasources from the specified context.

    If the optional data_assets and profile_all_data_assets arguments are not specified, the profiler will check
    if the number of data assets in the datasource exceeds the internally defined limit. If it does, it will
    prompt the user to either specify the list of data assets to profile or to profile all.
    If the limit is not exceeded, the profiler will profile all data assets in the datasource.

    :param datasource_name: name of the datasource to profile
    :param data_assets: if this comma-separated list of data asset names is provided, only the specified data assets will be profiled
    :param profile_all_data_assets: if provided, all data assets will be profiled
    :param directory:
    :param view: Open the docs in a browser
    :param batch_kwargs: Additional keyword arguments to be provided to get_batch when loading the data asset.
    :return:
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)
        return

    if batch_kwargs is not None:
        batch_kwargs = json.loads(batch_kwargs)

    if datasource_name is None:
        datasources = [datasource["name"] for datasource in context.list_datasources()]
        if not datasources:
            cli_message(NO_DATASOURCES_FOUND)
            sys.exit(-1)
        elif len(datasources) > 1:
            cli_message(
                "<red>Error: please specify the datasource to profile. "\
                "Available datasources: " + ", ".join(datasources) + "</red>"
            )
            sys.exit(-1)
        else:
            profile_datasource(
                context,
                datasources[0],
                data_assets=data_assets,
                profile_all_data_assets=profile_all_data_assets,
                open_docs=view,
                additional_batch_kwargs=batch_kwargs
            )
    else:
        profile_datasource(
            context,
            datasource_name,
            data_assets=data_assets,
            profile_all_data_assets=profile_all_data_assets,
            open_docs=view,
            additional_batch_kwargs=batch_kwargs
        )

Exemple #12

0

Afficher le fichier

Fichier : util.py Projet : talagluck/great_expectations

def validate(data_asset,
             expectation_suite=None,
             data_asset_name=None,
             data_context=None,
             data_asset_type=None,
             *args,
             **kwargs):
    """Validate the provided data asset using the provided expectation suite"""
    if expectation_suite is None and data_context is None:
        raise ValueError(
            "Either an expectation suite or a DataContext is required for validation."
        )

    if expectation_suite is None:
        logger.info("Using expectation suite from DataContext.")
        # Allow data_context to be a string, and try loading it from path in that case
        if isinstance(data_context, string_types):
            data_context = DataContext(data_context)
        expectation_suite = data_context.get_expectation_suite(data_asset_name)
    else:
        if data_asset_name in expectation_suite:
            logger.info("Using expectation suite with name %s" %
                        expectation_suite["data_asset_name"])
        else:
            logger.info("Using expectation suite with no data_asset_name")

    # If the object is already a Dataset type, then this is purely a convenience method
    # and no conversion is needed
    if isinstance(data_asset, dataset.Dataset) and data_asset_type is None:
        return data_asset.validate(expectation_suite=expectation_suite,
                                   data_context=data_context,
                                   *args,
                                   **kwargs)
    elif data_asset_type is None:
        # Guess the GE data_asset_type based on the type of the data_asset
        if isinstance(data_asset, pd.DataFrame):
            data_asset_type = dataset.PandasDataset
        # Add other data_asset_type conditions here as needed

    # Otherwise, we will convert for the user to a subclass of the
    # existing class to enable new expectations, but only for datasets
    if not isinstance(data_asset, (dataset.Dataset, pd.DataFrame)):
        raise ValueError(
            "The validate util method only supports dataset validations, including custom subclasses. For other data asset types, use the object's own validate method."
        )

    if not issubclass(type(data_asset), data_asset_type):
        if isinstance(data_asset, (pd.DataFrame)) and issubclass(
                data_asset_type, dataset.PandasDataset):
            pass  # This is a special type of allowed coercion
        else:
            raise ValueError(
                "The validate util method only supports validation for subtypes of the provided data_asset_type."
            )

    data_asset_ = _convert_to_dataset_class(data_asset, data_asset_type,
                                            expectation_suite)
    return data_asset_.validate(*args, data_context=data_context, **kwargs)

Exemple #13

0

Afficher le fichier

def test_existing_local_data_docs_urls_returns_multiple_urls_from_customized_local_site(
        tmp_path_factory):
    empty_directory = str(tmp_path_factory.mktemp("yo_yo_ma"))
    DataContext.create(empty_directory)
    ge_dir = os.path.join(empty_directory, DataContext.GE_DIR)
    context = DataContext(ge_dir)

    context._project_config["data_docs_sites"] = {
        "my_rad_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "base_directory": "uncommitted/data_docs/some/path/"
            }
        },
        "another_just_amazing_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "base_directory": "uncommitted/data_docs/another/path/"
            }
        }
    }

    # TODO Workaround project config programmatic config manipulation
    #  statefulness issues by writing to disk and re-upping a new context
    context._save_project_config()
    context = DataContext(ge_dir)
    context.build_data_docs()
    data_docs_dir = os.path.join(ge_dir, "uncommitted/data_docs/")

    path_1 = os.path.join(data_docs_dir, "some/path/index.html")
    path_2 = os.path.join(data_docs_dir, "another/path/index.html")
    for expected_path in [path_1, path_2]:
        assert os.path.isfile(expected_path)

    obs = context.get_docs_sites_urls()

    assert obs == [{
        'site_name': 'my_rad_site',
        'site_url': "file://{}".format(path_1)
    }, {
        'site_name': 'another_just_amazing_site',
        'site_url': "file://{}".format(path_2)
    }]

Exemple #14

0

Afficher le fichier

Fichier : test_data_context.py Projet : abnair24/great_expectations

def empty_context(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('data_context'))
    DataContext.create(project_path)
    ge_dir = os.path.join(project_path, "great_expectations")
    assert os.path.isdir(ge_dir)
    assert os.path.isfile(os.path.join(ge_dir, DataContext.GE_YML))
    context = DataContext(ge_dir)
    assert isinstance(context, DataContext)
    return context

Exemple #15

0

Afficher le fichier

Fichier : test_data_context_ge_cloud_mode.py Projet : alfredo-f/great_expectations

def test_data_context_ge_cloud_mode_with_incomplete_cloud_config_should_throw_error(
    ge_cloud_data_context_config,
    data_context_with_incomplete_global_config_in_dot_dir_only,
):
    # Don't want to make a real request in a unit test so we simply patch the config fixture
    with mock.patch(
        "great_expectations.data_context.DataContext._retrieve_data_context_config_from_ge_cloud",
        return_value=ge_cloud_data_context_config,
    ):
        with pytest.raises(DataContextError):
            DataContext(context_root_dir="/my/context/root/dir", ge_cloud_mode=True)

Exemple #16

0

Afficher le fichier

Fichier : test_data_context.py Projet : 88sanjay/great_expectations

def test_existing_local_data_docs_urls_returns_url_on_project_with_no_datasources_and_a_site_configured(tmp_path_factory):
    """
    This test ensures that a url will be returned for a default site even if a
    datasource is not configured, and docs are not built.
    """
    empty_directory = str(tmp_path_factory.mktemp("another_empty_project"))
    DataContext.create(empty_directory)
    context = DataContext(os.path.join(empty_directory, DataContext.GE_DIR))

    obs = context.get_docs_sites_urls()
    assert len(obs) == 1
    assert obs[0].endswith("great_expectations/uncommitted/data_docs/local_site/index.html")

Exemple #17

0

Afficher le fichier

def list_datasources(directory):
    """List known datasources."""
    try:
        context = DataContext(directory)
        datasources = context.list_datasources()
        # TODO Pretty up this console output
        cli_message(str([d for d in datasources]))
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)

Exemple #18

0

Afficher le fichier

def documentation(directory):
    """Build data documentation for a project.
    """
    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message(
            "Error: no great_expectations context configuration found in the specified directory."
        )
        return

    build_documentation(context)

Exemple #19

0

Afficher le fichier

def do_config_check(target_directory):
    try:
        DataContext(context_root_dir=target_directory)
        return True, None
    except (ge_exceptions.InvalidConfigurationYamlError,
            ge_exceptions.InvalidTopLevelConfigKeyError,
            ge_exceptions.MissingTopLevelConfigKeyError,
            ge_exceptions.InvalidConfigValueTypeError,
            ge_exceptions.InvalidConfigVersionError,
            ge_exceptions.UnsupportedConfigVersionError,
            ge_exceptions.DataContextError,
            ge_exceptions.PluginClassNotFoundError) as err:
        return False, err.message

Exemple #20

0

Afficher le fichier

Fichier : test_data_context_config_errors.py Projet : MelissaSong911/great_expectations

def test_DataContext_raises_error_on_invalid_config_version():
    local_dir = file_relative_path(
        __file__, os.path.join(BASE_DIR, "invalid_config_version")
    )
    with pytest.raises(ge_exceptions.InvalidDataContextConfigError) as exc:
        DataContext(local_dir)

    error_messages = []
    if exc.value.messages and len(exc.value.messages) > 0:
        error_messages.extend(exc.value.messages)
    if exc.value.message:
        error_messages.append(exc.value.message)
    error_messages = " ".join(error_messages)
    assert "config_version" in error_messages

Exemple #21

0

Afficher le fichier

def profile(datasource_name, max_data_assets, profile_all_data_assets,
            target_directory):
    """Profile a great expectations object.

    datasource_name: A datasource within this GE context to profile.
    """

    if profile_all_data_assets:
        max_data_assets = None

    # FIXME: By default, this should iterate over all datasources
    context = DataContext(target_directory)
    context.profile_datasource(datasource_name,
                               max_data_assets=max_data_assets)

Exemple #22

0

Afficher le fichier

Fichier : cli.py Projet : scarrucciu/great_expectations

def build_documentation(directory, site_name, data_asset_name):
    """Build data documentation for a project.
    """
    if data_asset_name is not None and site_name is None:
        cli_message("Error: When specifying data_asset_name, must also specify site_name.")
        return
        
    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message("Error: no great_expectations context configuration found in the specified directory.")
        return

    build_documentation_impl(context, site_name=site_name, data_asset_name=data_asset_name)

Exemple #23

0

Afficher le fichier

Fichier : test_usage_statistics.py Projet : tsanikgr/great_expectations

def test_opt_out_yml(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    safe_mmkdir(context_path, exist_ok=True)
    fixture_dir = file_relative_path(__file__, "../../test_fixtures")

    shutil.copy(
        os.path.join(fixture_dir,
                     "great_expectations_basic_with_usage_stats_disabled.yml"),
        str(os.path.join(context_path, "great_expectations.yml")),
    )

    assert DataContext(
        context_root_dir=context_path
    )._project_config.anonymous_usage_statistics.enabled is False

Exemple #24

0

Afficher le fichier

Fichier : test_data_context_ge_cloud_mode.py Projet : alfredo-f/great_expectations

def test_data_context_ge_cloud_mode_with_bad_request_to_cloud_api_should_throw_error(
    mock_request,
    ge_cloud_runtime_base_url,
    ge_cloud_runtime_organization_id,
    ge_cloud_runtime_access_token,
):
    # Ensure that the request fails
    mock_request.return_value.status_code = 401

    with pytest.raises(GeCloudError):
        DataContext(
            ge_cloud_mode=True,
            ge_cloud_base_url=ge_cloud_runtime_base_url,
            ge_cloud_organization_id=ge_cloud_runtime_organization_id,
            ge_cloud_access_token=ge_cloud_runtime_access_token,
        )

Exemple #25

0

Afficher le fichier

def add_datasource(directory, view):
    """Add a new datasource to the data context."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)

    data_source_name, data_source_type = add_datasource_impl(context)

    if not data_source_name:  # no datasource was created
        return

    profile_datasource(context, data_source_name, open_docs=view)

Exemple #26

0

Afficher le fichier

Fichier : cli.py Projet : scarrucciu/great_expectations

def add_datasource(directory):
    """Add a new datasource to the data context
    """
    try:
        context = DataContext(directory)
    except ConfigNotFoundError:
        cli_message("Error: no great_expectations context configuration found in the specified directory.")
        return


    data_source_name = add_datasource_impl(context)

    if not data_source_name: # no datasource was created
        return

    profile_datasource(context, data_source_name)

Exemple #27

0

Afficher le fichier

Fichier : cli.py Projet : PeasonKews/great_expectations

def init(target_directory):
    """
    Create a new project and help with onboarding.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    ge_yml = os.path.join(ge_dir, DataContext.GE_YML)

    cli_message(GREETING)

    # TODO this should be a property
    if os.path.isfile(ge_yml):
        if DataContext.all_uncommitted_directories_exist(ge_dir) and \
                DataContext.config_variables_yml_exist(ge_dir):
            cli_message(PROJECT_IS_COMPLETE)
        else:
            _complete_onboarding(target_directory)

        try:
            # if expectations exist, offer to build docs
            context = DataContext(ge_dir)
            if context.list_expectation_suite_keys():
                if click.confirm(BUILD_DOCS_PROMPT, default=True):
                    context.build_data_docs()
                    _open_data_docs_in_browser(context.root_directory)
        except ge_exceptions.DataContextError as e:
            cli_message("<red>{}</red>".format(e))
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            exit(0)

        context, data_source_name = _create_new_project(target_directory)
        if not data_source_name:  # no datasource was created
            return

        context = _slack_setup(context)

        profile_datasource(context, data_source_name)
        cli_message(MSG_GO_TO_NOTEBOOK)

Exemple #28

0

Afficher le fichier

def build_docs(directory, site_name, view=True):
    """Build Data Docs for a project."""
    logger.debug("Starting cli.build_docs")

    try:
        context = DataContext(directory)
        build_documentation_impl(context, site_name=site_name)
        if view:
            context.open_data_docs()
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        sys.exit(1)
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)
        return
    except ge_exceptions.PluginModuleNotFoundError as err:
        cli_message(err.cli_colored_message)
        sys.exit(1)
    except ge_exceptions.PluginClassNotFoundError as err:
        cli_message(err.cli_colored_message)
        sys.exit(1)

Exemple #29

0

Afficher le fichier

Fichier : util.py Projet : yangrong688/great_expectations

def get_context():
    from great_expectations.data_context.data_context import DataContext

    return DataContext()

Exemple #30

0

Afficher le fichier

Fichier : util.py Projet : yangrong688/great_expectations

def validate(
    data_asset,
    expectation_suite=None,
    data_asset_name=None,
    expectation_suite_name=None,
    data_context=None,
    data_asset_class_name=None,
    data_asset_module_name="great_expectations.dataset",
    data_asset_class=None,
    *args,
    **kwargs,
):
    """Validate the provided data asset. Validate can accept an optional data_asset_name to apply, data_context to use
    to fetch an expectation_suite if one is not provided, and data_asset_class_name/data_asset_module_name or
    data_asset_class to use to provide custom expectations.

    Args:
        data_asset: the asset to validate
        expectation_suite: the suite to use, or None to fetch one using a DataContext
        data_asset_name: the name of the data asset to use
        expectation_suite_name: the name of the expectation_suite to use
        data_context: data context to use to fetch an an expectation suite, or the path from which to obtain one
        data_asset_class_name: the name of a class to dynamically load a DataAsset class
        data_asset_module_name: the name of the module to dynamically load a DataAsset class
        data_asset_class: a class to use. overrides data_asset_class_name/ data_asset_module_name if provided
        *args:
        **kwargs:

    Returns:

    """
    # Get an expectation suite if not provided
    if expectation_suite is None and data_context is None:
        raise ValueError(
            "Either an expectation suite or a DataContext is required for validation."
        )

    if expectation_suite is None:
        logger.info("Using expectation suite from DataContext.")
        # Allow data_context to be a string, and try loading it from path in that case
        if isinstance(data_context, str):
            from great_expectations.data_context import DataContext

            data_context = DataContext(data_context)
        expectation_suite = data_context.get_expectation_suite(
            expectation_suite_name=expectation_suite_name
        )
    else:
        if isinstance(expectation_suite, dict):
            expectation_suite = expectationSuiteSchema.load(expectation_suite)
        if data_asset_name is not None:
            raise ValueError(
                "When providing an expectation suite, data_asset_name cannot also be provided."
            )
        if expectation_suite_name is not None:
            raise ValueError(
                "When providing an expectation suite, expectation_suite_name cannot also be provided."
            )
        logger.info(
            "Validating data_asset_name %s with expectation_suite_name %s"
            % (data_asset_name, expectation_suite.expectation_suite_name)
        )

    # If the object is already a DataAsset type, then this is purely a convenience method
    # and no conversion is needed; try to run validate on the given object
    if data_asset_class_name is None and data_asset_class is None:
        return data_asset.validate(
            expectation_suite=expectation_suite,
            data_context=data_context,
            *args,
            **kwargs,
        )

    # Otherwise, try to convert and validate the dataset
    if data_asset_class is None:
        verify_dynamic_loading_support(module_name=data_asset_module_name)
        data_asset_class = load_class(data_asset_class_name, data_asset_module_name)

    import pandas as pd

    from great_expectations.dataset import Dataset, PandasDataset

    if data_asset_class is None:
        # Guess the GE data_asset_type based on the type of the data_asset
        if isinstance(data_asset, pd.DataFrame):
            data_asset_class = PandasDataset
        # Add other data_asset_type conditions here as needed

    # Otherwise, we will convert for the user to a subclass of the
    # existing class to enable new expectations, but only for datasets
    if not isinstance(data_asset, (Dataset, pd.DataFrame)):
        raise ValueError(
            "The validate util method only supports dataset validations, including custom subclasses. For other data "
            "asset types, use the object's own validate method."
        )

    if not issubclass(type(data_asset), data_asset_class):
        if isinstance(data_asset, pd.DataFrame) and issubclass(
            data_asset_class, PandasDataset
        ):
            pass  # This is a special type of allowed coercion
        else:
            raise ValueError(
                "The validate util method only supports validation for subtypes of the provided data_asset_type."
            )

    data_asset_ = _convert_to_dataset_class(
        data_asset, dataset_class=data_asset_class, expectation_suite=expectation_suite
    )
    return data_asset_.validate(*args, data_context=data_context, **kwargs)