Exemple #1
0
def test_cli_init_spark_without_library_installed_instructs_user(
        caplog, tmp_path_factory):
    basedir = tmp_path_factory.mktemp("test_cli_init_diff")
    basedir = str(basedir)
    os.chdir(basedir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["init", "--no-view"],
                           input="\n\n1\n2\n",
                           catch_exceptions=False)
    stdout = result.output

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert "Great Expectations relies on the library `pyspark`" in stdout
    assert "Please `pip install pyspark` before trying again" in stdout

    assert "Profiling" not in stdout
    assert "Building" not in stdout
    assert "Data Docs" not in stdout
    assert "Great Expectations is now set up" not in stdout

    assert result.exit_code == 1

    assert os.path.isdir(os.path.join(basedir, "great_expectations"))
    config_path = os.path.join(basedir,
                               "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path, "r"))
    assert config["datasources"] == dict()

    obs_tree = gen_directory_tree_str(
        os.path.join(basedir, "great_expectations"))
    assert (obs_tree == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
""")

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project(
    mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file
):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    ge_dir = os.path.join(project_dir, "great_expectations")

    database_path = os.path.join(project_dir, "titanic.db")
    shutil.copy(titanic_sqlite_db_file, database_path)
    engine = create_engine("sqlite:///{}".format(database_path))

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="\n\n2\n6\ntitanic\n{}\n\n\n1\nwarning\n\n\n\n".format(engine.url),
        catch_exceptions=False,
    )
    stdout = result.output
    assert len(stdout) < 6000, "CLI output is unreasonably long."

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "Which database backend are you using" in stdout
    assert "Give your new Datasource a short name" in stdout
    assert "What is the url/connection string for the sqlalchemy connection" in stdout
    assert "Attempting to connect to your database." in stdout
    assert "Great Expectations connected to your database" in stdout
    assert "Which table would you like to use?" in stdout
    assert "Name the new Expectation Suite [main.titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout
    )
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert "Great Expectations is now set up" in stdout

    context = DataContext(ge_dir)
    assert len(context.list_datasources()) == 1
    assert context.list_datasources()[0]["class_name"] == "SqlAlchemyDatasource"
    assert context.list_datasources()[0]["name"] == "titanic"

    first_suite = context.list_expectation_suites()[0]
    suite = context.get_expectation_suite(first_suite.expectation_suite_name)
    assert len(suite.expectations) == 14

    assert os.path.isdir(ge_dir)
    config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path, "r"))
    data_source_class = config["datasources"]["titanic"]["data_asset_type"][
        "class_name"
    ]
    assert data_source_class == "SqlAlchemyDataset"

    obs_tree = gen_directory_tree_str(ge_dir)

    # Instead of monkey patching guids, just regex out the guids
    guid_safe_obs_tree = re.sub(
        r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", obs_tree
    )
    # print(guid_safe_obs_tree)
    assert (
        guid_safe_obs_tree
        == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        warning.json
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    warning.html
                static/
                    fonts/
                        HKGrotesk/
                            HKGrotesk-Bold.otf
                            HKGrotesk-BoldItalic.otf
                            HKGrotesk-Italic.otf
                            HKGrotesk-Light.otf
                            HKGrotesk-LightItalic.otf
                            HKGrotesk-Medium.otf
                            HKGrotesk-MediumItalic.otf
                            HKGrotesk-Regular.otf
                            HKGrotesk-SemiBold.otf
                            HKGrotesk-SemiBoldItalic.otf
                    images/
                        favicon.ico
                        glossary_scroller.gif
                        iterative-dev-loop.png
                        logo-long-vector.svg
                        logo-long.png
                        short-logo-vector.svg
                        short-logo.png
                        validation_failed_unexpected_values.gif
                    styles/
                        data_docs_custom_styles_template.css
                        data_docs_default_styles.css
                validations/
                    warning/
                        20190926T134241.000000Z/
                            20190926T134241.000000Z/
                                foobarbazguid.html
        validations/
            warning/
                20190926T134241.000000Z/
                    20190926T134241.000000Z/
                        foobarbazguid.json
"""
    )

    assert_no_logging_messages_or_tracebacks(caplog, result)

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format(
            project_dir
        )
        in mock_webbrowser.call_args[0][0]
    )
Exemple #3
0
def _library_not_loaded_test(tmp_path_factory, cli_input, library_name,
                             library_import_name, my_caplog):
    """
    This test requires that a library is NOT installed. It tests that:
    - a helpful error message is returned to install the missing library
    - the expected tree structure is in place
    - the config yml contains an empty dict in its datasource entry
    """
    basedir = tmp_path_factory.mktemp("test_cli_init_diff")
    basedir = str(basedir)
    os.chdir(basedir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["init", "--no-view"],
                           input=cli_input,
                           catch_exceptions=False)
    stdout = result.output

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "Which database backend are you using" in stdout
    assert "Give your new Datasource a short name" in stdout
    assert (
        """Next, we will configure database credentials and store them in the `my_db` section
of this config file: great_expectations/uncommitted/config_variables.yml"""
        in stdout)
    assert ("Great Expectations relies on the library `{}`".format(
        library_import_name) in stdout)
    assert "Please `pip install {}` before trying again".format(
        library_name) in stdout

    assert "Profiling" not in stdout
    assert "Building" not in stdout
    assert "Data Docs" not in stdout
    assert "Great Expectations is now set up" not in stdout

    assert result.exit_code == 1

    assert os.path.isdir(os.path.join(basedir, "great_expectations"))
    config_path = os.path.join(basedir,
                               "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path, "r"))
    assert config["datasources"] == dict()

    obs_tree = gen_directory_tree_str(
        os.path.join(basedir, "great_expectations"))
    assert (obs_tree == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
""")

    assert_no_logging_messages_or_tracebacks(my_caplog, result)
def test_cli_init_on_new_project(mock_emit, mock_webbrowser, caplog,
                                 tmp_path_factory, monkeypatch):
    monkeypatch.delenv("GE_USAGE_STATS",
                       raising=False)  # Undo the project-wide test default
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_folder_path = os.path.join(project_dir, "data")
    data_path = os.path.join(project_dir, "data", "Titanic.csv")
    fixture_path = file_relative_path(__file__, "../test_sets/Titanic.csv")
    shutil.copy(fixture_path, data_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="\n\n1\n1\n{}\n\n\n\n2\n{}\n\n\n\n".format(
            data_folder_path, data_path),
        catch_exceptions=False,
    )
    stdout = result.output
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/Titanic/warning/"
        .format(project_dir) in mock_webbrowser.call_args[0][0])

    assert len(stdout) < 6000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert (
        "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)"
        in stdout)
    assert "Name the new Expectation Suite [Titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout)
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert "Done generating example Expectation Suite" in stdout
    assert "Great Expectations is now set up" in stdout

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir,
                               "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["data__dir"]["data_asset_type"][
        "class_name"]
    assert data_source_class == "PandasDataset"

    obs_tree = gen_directory_tree_str(
        os.path.join(project_dir, "great_expectations"))

    # Instead of monkey patching guids, just regex out the guids
    guid_safe_obs_tree = re.sub(r"[a-z0-9]{32}(?=\.(json|html))",
                                "foobarbazguid", obs_tree)
    # print(guid_safe_obs_tree)
    assert (guid_safe_obs_tree == """great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
        Titanic/
            warning.json
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    Titanic/
                        warning.html
                static/
                    fonts/
                        HKGrotesk/
                            HKGrotesk-Bold.otf
                            HKGrotesk-BoldItalic.otf
                            HKGrotesk-Italic.otf
                            HKGrotesk-Light.otf
                            HKGrotesk-LightItalic.otf
                            HKGrotesk-Medium.otf
                            HKGrotesk-MediumItalic.otf
                            HKGrotesk-Regular.otf
                            HKGrotesk-SemiBold.otf
                            HKGrotesk-SemiBoldItalic.otf
                    images/
                        favicon.ico
                        glossary_scroller.gif
                        iterative-dev-loop.png
                        logo-long-vector.svg
                        logo-long.png
                        short-logo-vector.svg
                        short-logo.png
                        validation_failed_unexpected_values.gif
                    styles/
                        data_docs_custom_styles_template.css
                        data_docs_default_styles.css
                validations/
                    Titanic/
                        warning/
                            20190926T134241.000000Z/
                                20190926T134241.000000Z/
                                    foobarbazguid.html
        validations/
            .ge_store_backend_id
            Titanic/
                warning/
                    20190926T134241.000000Z/
                        20190926T134241.000000Z/
                            foobarbazguid.json
""")

    assert mock_emit.call_count == 9
    assert mock_emit.call_args_list[1] == mock.call({
        "event_payload": {},
        "event": "cli.init.create",
        "success": True
    })

    assert_no_logging_messages_or_tracebacks(caplog, result)
Exemple #5
0
def test_ValidationsStore_with_TupleFileSystemStoreBackend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir"
        )
    )
    project_path = str(tmp_path_factory.mktemp("my_dir"))

    my_store = ValidationsStore(
        store_backend={
            "module_name": "great_expectations.data_context.store",
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store/",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    ns_1 = ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier("asset.quarantine"),
        run_id="prod-100",
        batch_identifier="batch_id",
    )
    my_store.set(ns_1, ExpectationSuiteValidationResult(success=True))
    assert my_store.get(ns_1) == ExpectationSuiteValidationResult(
        success=True, statistics={}, results=[]
    )

    ns_2 = ValidationResultIdentifier.from_tuple(
        (
            "asset",
            "quarantine",
            "prod-20",
            datetime.datetime.now(datetime.timezone.utc),
            "batch_id",
        )
    )
    my_store.set(ns_2, ExpectationSuiteValidationResult(success=False))
    assert my_store.get(ns_2) == ExpectationSuiteValidationResult(
        success=False, statistics={}, results=[]
    )

    print(my_store.list_keys())
    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

    print(gen_directory_tree_str(path))
    assert (
        gen_directory_tree_str(path)
        == """\
test_ValidationResultStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        asset/
            quarantine/
                prod-100/
                    20190926T134241.000000Z/
                        batch_id.json
                prod-20/
                    20190926T134241.000000Z/
                        batch_id.json
"""
    )
def test_project_upgrade_with_exception(v10_project_directory, caplog):
    # test project upgrade that requires manual steps

    # copy v2 yml
    shutil.copy(
        file_relative_path(
            __file__,
            "../../test_fixtures/upgrade_helper/great_expectations_v1_basic_with_exception.yml",
        ),
        os.path.join(v10_project_directory, "great_expectations.yml"),
    )

    runner: CliRunner = CliRunner(mix_stderr=False)
    result: Result = runner.invoke(
        cli,
        ["-c", v10_project_directory, "--v3-api", "project", "upgrade"],
        input="\n",
        catch_exceptions=False,
    )
    stdout: str = result.stdout

    with open(
            file_relative_path(
                __file__,
                "../../test_fixtures/upgrade_helper/test_project_upgrade_with_exception_expected_stdout.fixture",
            )) as f:
        expected_stdout: str = f.read()
        expected_stdout = expected_stdout.replace("GE_PROJECT_DIR",
                                                  v10_project_directory)
        assert stdout == expected_stdout

    expected_project_tree_str: str = """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
        .gitkeep
    expectations/
        .ge_store_backend_id
        .gitkeep
    notebooks/
        .gitkeep
    plugins/
        custom_store_backends/
            __init__.py
            my_custom_store_backend.py
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                expectations/
                    .gitkeep
                static/
                    .gitkeep
                validations/
                    diabetic_data/
                        warning/
                            20200430T191246.763896Z/
                                20200430T191246.763896Z/
                                    c3b4c5df224fef4b1a056a0f3b93aba5.html
        logs/
            project_upgrades/
                UpgradeHelperV11_20190926T134241.000000Z.json
        validations/
            .ge_store_backend_id
            diabetic_data/
                warning/
                    20200430T191246.763896Z/
                        20200430T191246.763896Z/
                            c3b4c5df224fef4b1a056a0f3b93aba5.json
"""
    obs_project_tree_str: str = gen_directory_tree_str(
        startpath=v10_project_directory)
    assert obs_project_tree_str == expected_project_tree_str
    # make sure config number not incremented
    assert (DataContext.get_ge_config_version(
        context_root_dir=v10_project_directory) == 1.0)

    with open(
            file_relative_path(
                __file__,
                "../../test_fixtures/upgrade_helper/UpgradeHelperV11_basic_upgrade_with_exception_log.json",
            )) as f:
        expected_upgrade_log_dict: dict = json.load(f)
        expected_upgrade_log_str: str = json.dumps(expected_upgrade_log_dict)
        expected_upgrade_log_str = expected_upgrade_log_str.replace(
            "GE_PROJECT_DIR", v10_project_directory)
        expected_upgrade_log_str = expected_upgrade_log_str.replace(
            "GE_PATH",
            os.path.split(great_expectations.__file__)[0])
        expected_upgrade_log_dict = json.loads(expected_upgrade_log_str)

    with open(
            f"{v10_project_directory}/uncommitted/logs/project_upgrades/UpgradeHelperV11_20190926T134241.000000Z.json"
    ) as f:
        obs_upgrade_log_dict: dict = json.load(f)
        obs_upgrade_log_dict["exceptions"][0]["exception_message"] = ""

    assert obs_upgrade_log_dict == expected_upgrade_log_dict
def test_render_full_static_site_from_empty_project(tmp_path_factory, filesystem_csv_3):

    # TODO : Use a standard test fixture
    # TODO : Have that test fixture copy a directory, rather than building a new one from scratch

    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    shutil.copy(
        file_relative_path(__file__, "../test_sets/Titanic.csv"),
        str(os.path.join(project_dir, "data/titanic/Titanic.csv"))
    )

    os.makedirs(os.path.join(project_dir, "data/random"))
    shutil.copy(
        os.path.join(filesystem_csv_3, "f1.csv"),
        str(os.path.join(project_dir, "data/random/f1.csv"))
    )
    shutil.copy(
        os.path.join(filesystem_csv_3, "f2.csv"),
        str(os.path.join(project_dir, "data/random/f2.csv"))
    )

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
"""

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    context.add_datasource("titanic",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           generators={
                             "subdir_reader": {
                                 "class_name": "SubdirReaderBatchKwargsGenerator",
                                 "base_directory": os.path.join(project_dir, "data/titanic/")
                             }
                           }
                           )

    context.add_datasource("random",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           generators={
                               "subdir_reader": {
                                   "class_name": "SubdirReaderBatchKwargsGenerator",
                                   "base_directory": os.path.join(project_dir, "data/random/")
                               }
                           }
                           )

    context.profile_datasource("titanic")

    # Replicate the batch id of the batch that will be profiled in order to generate the file path of the
    # validation result
    titanic_profiled_batch_id = PathBatchKwargs({
        'path': os.path.join(project_dir, 'data/titanic/Titanic.csv'),
        'datasource': 'titanic'}
    ).to_id()


    tree_str = gen_directory_tree_str(project_dir)
    assert tree_str == """project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
    great_expectations/
        .gitignore
        great_expectations.yml
        expectations/
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler.json
        notebooks/
            pandas/
                validation_playground.ipynb
            spark/
                validation_playground.ipynb
            sql/
                validation_playground.ipynb
        plugins/
            custom_data_docs/
                renderers/
                styles/
                    data_docs_custom_styles.css
                views/
        uncommitted/
            config_variables.yml
            data_docs/
            validations/
                titanic/
                    subdir_reader/
                        Titanic/
                            BasicDatasetProfiler/
                                profiling/
                                    {}.json
""".format(titanic_profiled_batch_id)

    context.profile_datasource("random")
    context.build_data_docs()

    f1_profiled_batch_id = PathBatchKwargs({
        'path': os.path.join(project_dir, 'data/random/f1.csv'),
        'datasource': 'random'}
    ).to_id()

    f2_profiled_batch_id = PathBatchKwargs({
        'path': os.path.join(project_dir, 'data/random/f2.csv'),
        'datasource': 'random'}
    ).to_id()

    data_docs_dir = os.path.join(project_dir, "great_expectations/uncommitted/data_docs")
    observed = gen_directory_tree_str(data_docs_dir)
    assert observed == """\
data_docs/
    local_site/
        index.html
        expectations/
            random/
                subdir_reader/
                    f1/
                        BasicDatasetProfiler.html
                    f2/
                        BasicDatasetProfiler.html
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler.html
        static/
            fonts/
                HKGrotesk/
                    HKGrotesk-Bold.otf
                    HKGrotesk-BoldItalic.otf
                    HKGrotesk-Italic.otf
                    HKGrotesk-Light.otf
                    HKGrotesk-LightItalic.otf
                    HKGrotesk-Medium.otf
                    HKGrotesk-MediumItalic.otf
                    HKGrotesk-Regular.otf
                    HKGrotesk-SemiBold.otf
                    HKGrotesk-SemiBoldItalic.otf
            images/
                favicon.ico
                glossary_scroller.gif
                iterative-dev-loop.png
                logo-long-vector.svg
                logo-long.png
                short-logo-vector.svg
                short-logo.png
                validation_failed_unexpected_values.gif
            styles/
                data_docs_custom_styles_template.css
                data_docs_default_styles.css
        validations/
            random/
                subdir_reader/
                    f1/
                        BasicDatasetProfiler/
                            profiling/
                                {0:s}.html
                    f2/
                        BasicDatasetProfiler/
                            profiling/
                                {1:s}.html
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler/
                            profiling/
                                {2:s}.html
""".format(f1_profiled_batch_id, f2_profiled_batch_id, titanic_profiled_batch_id)

    # save data_docs locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/data_docs")

    if os.path.isdir("./tests/data_context/output/data_docs"):
        shutil.rmtree("./tests/data_context/output/data_docs")
    shutil.copytree(
        os.path.join(
            ge_directory,
            "uncommitted/data_docs/"
        ),
        "./tests/data_context/output/data_docs"
    )
def test_v3_configuration_store(tmp_path_factory):
    root_directory_path: str = "test_v3_configuration_store"
    root_directory: str = str(tmp_path_factory.mktemp(root_directory_path))
    base_directory: str = str(Path(root_directory) / "some_store_config_dir")

    config_0: SampleConfig = SampleConfig(some_param_0="test_str_0",
                                          some_param_1=65)
    store_name_0: str = "test_config_store_0"
    configuration_name_0: str = "test_config_name_0"

    with pytest.raises(FileNotFoundError):
        save_config_to_filesystem(
            configuration_store_class_name="unknown_class",
            configuration_store_module_name="unknown_module",
            store_name=store_name_0,
            base_directory=base_directory,
            configuration_key=configuration_name_0,
            configuration=config_0,
        )

    save_config_to_filesystem(
        configuration_store_class_name="SampleConfigurationStore",
        configuration_store_module_name=SampleConfigurationStore.__module__,
        store_name=store_name_0,
        base_directory=base_directory,
        configuration_key=configuration_name_0,
        configuration=config_0,
    )

    dir_tree: str = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """some_store_config_dir/
    .ge_store_backend_id
    test_config_name_0.yml
""")
    assert (len([
        path for path in Path(base_directory).iterdir()
        if str(path).find(".ge_store_backend_id") == (-1)
    ]) == 1)

    stored_file_name_0: str = Path(
        base_directory) / f"{configuration_name_0}.yml"
    with open(stored_file_name_0) as f:
        config: CommentedMap = yaml.load(f)
        expected_config: CommentedMap = CommentedMap({
            "some_param_0": "test_str_0",
            "some_param_1": 65
        })
        assert config == expected_config

    with pytest.raises(ValueError):
        # noinspection PyUnusedLocal
        loaded_config: BaseYamlConfig = load_config_from_filesystem(
            configuration_store_class_name="SampleConfigurationStore",
            configuration_store_module_name=SampleConfigurationStore.
            __module__,
            store_name=store_name_0,
            base_directory="unknown_base_directory",
            configuration_key=configuration_name_0,
        )
    with pytest.raises(ge_exceptions.InvalidKeyError):
        # noinspection PyUnusedLocal
        loaded_config: BaseYamlConfig = load_config_from_filesystem(
            configuration_store_class_name="SampleConfigurationStore",
            configuration_store_module_name=SampleConfigurationStore.
            __module__,
            store_name=store_name_0,
            base_directory=base_directory,
            configuration_key="unknown_configuration",
        )

    loaded_config: BaseYamlConfig = load_config_from_filesystem(
        configuration_store_class_name="SampleConfigurationStore",
        configuration_store_module_name=SampleConfigurationStore.__module__,
        store_name=store_name_0,
        base_directory=base_directory,
        configuration_key=configuration_name_0,
    )
    assert loaded_config.to_json_dict() == config_0.to_json_dict()

    config_1: SampleConfig = SampleConfig(some_param_0="test_str_1",
                                          some_param_1=26)
    store_name_1: str = "test_config_store_1"
    configuration_name_1: str = "test_config_name_1"
    save_config_to_filesystem(
        configuration_store_class_name="SampleConfigurationStore",
        configuration_store_module_name=SampleConfigurationStore.__module__,
        store_name=store_name_1,
        base_directory=base_directory,
        configuration_key=configuration_name_1,
        configuration=config_1,
    )

    dir_tree: str = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """some_store_config_dir/
    .ge_store_backend_id
    test_config_name_0.yml
    test_config_name_1.yml
""")
    assert (len([
        path for path in Path(base_directory).iterdir()
        if str(path).find(".ge_store_backend_id") == (-1)
    ]) == 2)

    stored_file_name_1: str = Path(
        base_directory) / f"{configuration_name_1}.yml"
    with open(stored_file_name_1) as f:
        config: CommentedMap = yaml.load(f)
        expected_config: CommentedMap = CommentedMap({
            "some_param_0": "test_str_1",
            "some_param_1": 26
        })
        assert config == expected_config

    loaded_config: BaseYamlConfig = load_config_from_filesystem(
        configuration_store_class_name="SampleConfigurationStore",
        configuration_store_module_name=SampleConfigurationStore.__module__,
        store_name=store_name_1,
        base_directory=base_directory,
        configuration_key=configuration_name_1,
    )
    assert loaded_config.to_json_dict() == config_1.to_json_dict()

    delete_config_from_filesystem(
        configuration_store_class_name="SampleConfigurationStore",
        configuration_store_module_name=SampleConfigurationStore.__module__,
        store_name=store_name_0,
        base_directory=base_directory,
        configuration_key=configuration_name_0,
    )
    assert (len([
        path for path in Path(base_directory).iterdir()
        if str(path).find(".ge_store_backend_id") == (-1)
    ]) == 1)

    delete_config_from_filesystem(
        configuration_store_class_name="SampleConfigurationStore",
        configuration_store_module_name=SampleConfigurationStore.__module__,
        store_name=store_name_1,
        base_directory=base_directory,
        configuration_key=configuration_name_1,
    )
    assert (len([
        path for path in Path(base_directory).iterdir()
        if str(path).find(".ge_store_backend_id") == (-1)
    ]) == 0)
Exemple #9
0
def test_cli_init_connection_string_non_working_db_connection_instructs_user_and_leaves_entries_in_config_files_for_debugging(
        mock_webbrowser, caplog, monkeypatch, tmp_path_factory, sa):
    root_dir = tmp_path_factory.mktemp("bad_con_string_test")
    root_dir = str(root_dir)
    os.chdir(root_dir)

    runner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(root_dir)
    result = runner.invoke(
        cli,
        ["--v3-api", "init"],
        input="\n\n2\n6\nmy_db\nsqlite:////not_a_real.db\n\nn\n",
        catch_exceptions=False,
    )
    stdout = result.output
    assert mock_webbrowser.call_count == 0

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "Which database backend are you using" in stdout
    assert "What is the url/connection string for the sqlalchemy connection" in stdout
    assert "Give your new Datasource a short name" in stdout
    assert "Attempting to connect to your database. This may take a moment" in stdout
    assert "Cannot connect to the database" in stdout

    assert "Profiling" not in stdout
    assert "Building" not in stdout
    assert "Data Docs" not in stdout
    assert "Great Expectations is now set up" not in stdout

    assert result.exit_code == 1

    ge_dir = os.path.join(root_dir, DataContext.GE_DIR)
    assert os.path.isdir(ge_dir)
    config_path = os.path.join(ge_dir, DataContext.GE_YML)
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    assert config["datasources"] == {
        "my_db": {
            "data_asset_type": {
                "module_name": None,
                "class_name": "SqlAlchemyDataset",
            },
            "credentials": "${my_db}",
            "class_name": "SqlAlchemyDatasource",
            "module_name": "great_expectations.datasource",
        }
    }

    config_path = os.path.join(ge_dir, DataContext.GE_UNCOMMITTED_DIR,
                               "config_variables.yml")
    config = yaml.load(open(config_path))
    assert config["my_db"] == {"url": "sqlite:////not_a_real.db"}

    obs_tree = gen_directory_tree_str(
        os.path.join(root_dir, "great_expectations"))
    assert (obs_tree == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
            .ge_store_backend_id
""")

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project(tmp_path_factory, filesystem_csv_2):
    try:
        basedir = tmp_path_factory.mktemp("test_cli_init_diff")
        basedir = str(basedir)
        os.makedirs(os.path.join(basedir, "data"))
        curdir = os.path.abspath(os.getcwd())
        shutil.copy("./tests/test_sets/Titanic.csv",
                    str(os.path.join(basedir, "data/Titanic.csv")))

        os.chdir(basedir)

        runner = CliRunner()
        result = runner.invoke(cli, ["init"],
                               input="Y\n1\n%s\n\nn\n\n" %
                               str(os.path.join(basedir, "data")))

        print(result.output)
        print("result.output length:", len(result.output))

        assert len(result.output) < 10000, "CLI output is unreasonably long."
        assert len(
            re.findall("{", result.output)
        ) < 100, "CLI contains way more '{' than we would reasonably expect."

        assert """Always know what to expect from your data""" in result.output
        assert """Let's add Great Expectations to your project""" in result.output
        assert """open a tutorial notebook""" in result.output

        assert os.path.isdir(os.path.join(basedir, "great_expectations"))
        assert os.path.isfile(
            os.path.join(basedir, "great_expectations/great_expectations.yml"))
        config = yaml.load(
            open(
                os.path.join(basedir,
                             "great_expectations/great_expectations.yml"),
                "r"))
        assert config["datasources"]["data__dir"][
            "class_name"] == "PandasDatasource"

        print(
            gen_directory_tree_str(os.path.join(basedir,
                                                "great_expectations")))
        assert gen_directory_tree_str(
            os.path.join(basedir, "great_expectations")) == """\
great_expectations/
    .gitignore
    great_expectations.yml
    datasources/
    expectations/
        data__dir/
            default/
                Titanic/
                    BasicDatasetProfiler.json
    notebooks/
        create_expectations.ipynb
        integrate_validation_into_pipeline.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    data__dir/
                        default/
                            Titanic/
                                BasicDatasetProfiler.html
                validations/
                    profiling/
                        data__dir/
                            default/
                                Titanic/
                                    BasicDatasetProfiler.html
        samples/
        validations/
            profiling/
                data__dir/
                    default/
                        Titanic/
                            BasicDatasetProfiler.json
"""

        assert os.path.isfile(
            os.path.join(
                basedir,
                "great_expectations/expectations/data__dir/default/Titanic/BasicDatasetProfiler.json"
            ))

        fnames = []
        path = os.path.join(
            basedir,
            "great_expectations/uncommitted/validations/profiling/data__dir/default/Titanic"
        )
        for (dirpath, dirnames, filenames) in os.walk(path):
            for filename in filenames:
                fnames.append(filename)
        assert fnames == ["BasicDatasetProfiler.json"]

        assert os.path.isfile(
            os.path.join(
                basedir,
                "great_expectations/uncommitted/data_docs/local_site/validations/profiling/data__dir/default/Titanic/BasicDatasetProfiler.html"
            ))

        assert os.path.getsize(
            os.path.join(
                basedir,
                "great_expectations/uncommitted/data_docs/local_site/validations/profiling/data__dir/default/Titanic/BasicDatasetProfiler.html"
            )) > 0
        print(result)
    except:
        raise
    finally:
        os.chdir(curdir)
def test_cli_init_on_existing_ge_yml_with_missing_uncommitted_dirs_and_missing_config_variables_yml(
        tmp_path_factory):
    """
    This test walks through an onboarding experience.

    The user just is missing some uncommitted dirs and is missing
    config_variables.yml
    """
    tmp_dir = str(tmp_path_factory.mktemp("more_stuff"))
    ge_dir = os.path.join(tmp_dir, "great_expectations")
    curdir = os.path.abspath(os.getcwd())
    os.chdir(tmp_dir)
    runner = CliRunner()
    runner.invoke(cli, ["init"], input="Y\n4\n")
    # mangle setup
    uncommitted_dir = os.path.join(ge_dir, "uncommitted")
    shutil.rmtree(os.path.join(uncommitted_dir, "data_docs"))
    config_var_path = os.path.join(uncommitted_dir, "config_variables.yml")
    os.remove(config_var_path)
    # sanity check
    assert not os.path.isfile(config_var_path)

    try:
        result = runner.invoke(cli, ["init"], input="Y\n")

        # check dir structure
        dir_structure = gen_directory_tree_str(ge_dir)
        assert dir_structure == """\
great_expectations/
    .gitignore
    great_expectations.yml
    datasources/
    expectations/
    notebooks/
        create_expectations.ipynb
        integrate_validation_into_pipeline.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        samples/
        validations/
"""
        # check config_variables.yml
        with open(config_var_path, 'r') as f:
            obs_yml = f.read()
        assert obs_yml == CONFIG_VARIABLES_TEMPLATE

        # Check CLI output
        obs = result.output
        assert "To run locally, we need some files that are not in source control." in obs
        assert "You may see new files in" in obs
        assert "Let's add Great Expectations to your project, by scaffolding" not in obs

        assert "open a tutorial notebook" not in obs
    except:
        raise
    finally:
        os.chdir(curdir)
def test_cli_init_on_new_project(tmp_path_factory):
    try:
        basedir = tmp_path_factory.mktemp("test_cli_init_diff")
        basedir = str(basedir)
        os.makedirs(os.path.join(basedir, "data"))
        curdir = os.path.abspath(os.getcwd())
        shutil.copy("./tests/test_sets/Titanic.csv",
                    str(os.path.join(basedir, "data/Titanic.csv")))

        os.chdir(basedir)

        runner = CliRunner()
        result = runner.invoke(cli, ["init", "--no-view"],
                               input="Y\n1\n%s\n\nn\n\n" %
                               str(os.path.join(basedir, "data")))

        print(result.output)
        print("result.output length:", len(result.output))

        assert len(result.output) < 10000, "CLI output is unreasonably long."
        assert len(
            re.findall("{", result.output)
        ) < 100, "CLI contains way more '{' than we would reasonably expect."

        assert """Always know what to expect from your data""" in result.output
        assert """Let's add Great Expectations to your project""" in result.output

        assert os.path.isdir(os.path.join(basedir, "great_expectations"))
        assert os.path.isfile(
            os.path.join(basedir, "great_expectations/great_expectations.yml"))
        config = yaml.load(
            open(
                os.path.join(basedir,
                             "great_expectations/great_expectations.yml"),
                "r"))
        assert config["datasources"]["data__dir"][
            "class_name"] == "PandasDatasource"

        print(
            gen_directory_tree_str(os.path.join(basedir,
                                                "great_expectations")))
        assert gen_directory_tree_str(
            os.path.join(basedir, "great_expectations")) == """\
great_expectations/
    .gitignore
    great_expectations.yml
    datasources/
    expectations/
    notebooks/
        pandas/
            create_expectations.ipynb
            validation_playground.ipynb
        spark/
            create_expectations.ipynb
            validation_playground.ipynb
        sql/
            create_expectations.ipynb
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        samples/
        validations/
"""

    except:
        raise
    finally:
        os.chdir(curdir)
def test_render_full_static_site_from_empty_project(tmp_path_factory,
                                                    filesystem_csv_3):

    # TODO : Use a standard test fixture
    # TODO : Have that test fixture copy a directory, rather than building a new one from scratch

    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
"""

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    context.add_datasource("titanic",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))

    context.add_datasource("random",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
    great_expectations/
        .gitignore
        great_expectations.yml
        datasources/
        expectations/
            titanic/
                default/
                    Titanic/
                        BasicDatasetProfiler.json
        notebooks/
            create_expectations.ipynb
            integrate_validation_into_pipeline.ipynb
        plugins/
            custom_data_docs/
                renderers/
                styles/
                    data_docs_custom_styles.css
                views/
        uncommitted/
            config_variables.yml
            data_docs/
            samples/
            validations/
                profiling/
                    titanic/
                        default/
                            Titanic/
                                BasicDatasetProfiler.json
"""

    context.profile_datasource("random")
    context.build_data_docs()

    data_docs_dir = os.path.join(project_dir,
                                 "great_expectations/uncommitted/data_docs")
    observed = gen_directory_tree_str(data_docs_dir)
    print(observed)
    assert observed == """\
data_docs/
    local_site/
        index.html
        expectations/
            random/
                default/
                    f1/
                        BasicDatasetProfiler.html
                    f2/
                        BasicDatasetProfiler.html
            titanic/
                default/
                    Titanic/
                        BasicDatasetProfiler.html
        validations/
            profiling/
                random/
                    default/
                        f1/
                            BasicDatasetProfiler.html
                        f2/
                            BasicDatasetProfiler.html
                titanic/
                    default/
                        Titanic/
                            BasicDatasetProfiler.html
"""

    # save data_docs locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/data_docs")

    if os.path.isdir("./tests/data_context/output/data_docs"):
        shutil.rmtree("./tests/data_context/output/data_docs")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"),
                    "./tests/data_context/output/data_docs")
def test_v2_to_v3_project_upgrade_without_manual_steps(
        v20_project_directory_with_v30_configuration_and_no_checkpoints,
        caplog):
    runner: CliRunner = CliRunner(mix_stderr=False)
    result: Result = runner.invoke(
        cli,
        [
            "-c",
            v20_project_directory_with_v30_configuration_and_no_checkpoints,
            "--v3-api",
            "project",
            "upgrade",
        ],
        input="\n",
        catch_exceptions=False,
    )
    stdout: str = result.stdout

    with open(
            file_relative_path(
                __file__,
                "../../test_fixtures/upgrade_helper/test_v2_to_v3_project_upgrade_without_manual_steps_expected_stdout.fixture",
            )) as f:
        expected_stdout: str = f.read()
        expected_stdout = expected_stdout.replace(
            "GE_PROJECT_DIR",
            v20_project_directory_with_v30_configuration_and_no_checkpoints,
        )
        assert stdout == expected_stdout

    expected_project_tree_str: str = """\
great_expectations/
    .gitignore
    great_expectations.yml
    expectations/
        .ge_store_backend_id
        .gitkeep
    notebooks/
        .gitkeep
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            styles/
                data_docs_custom_styles.css
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                expectations/
                    .gitkeep
                static/
                    .gitkeep
                validations/
                    diabetic_data/
                        warning/
                            20200430T191246.763896Z/
                                c3b4c5df224fef4b1a056a0f3b93aba5.html
        logs/
            project_upgrades/
                UpgradeHelperV13_20210119T132639.000000Z.json
        validations/
            .ge_store_backend_id
            diabetic_data/
                warning/
                    20200430T191246.763896Z/
                        c3b4c5df224fef4b1a056a0f3b93aba5.json
"""
    obs_project_tree_str: str = gen_directory_tree_str(
        startpath=
        v20_project_directory_with_v30_configuration_and_no_checkpoints)
    assert obs_project_tree_str == expected_project_tree_str
    # make sure config number incremented
    assert (DataContext.get_ge_config_version(
        context_root_dir=
        v20_project_directory_with_v30_configuration_and_no_checkpoints) == 3.0
            )

    with open(
            file_relative_path(
                __file__,
                "../../test_fixtures/upgrade_helper/UpgradeHelperV13_upgrade_without_manual_steps_log.json",
            )) as f:
        expected_upgrade_log_dict: dict = json.load(f)
        expected_upgrade_log_str: str = json.dumps(expected_upgrade_log_dict)
        expected_upgrade_log_str = expected_upgrade_log_str.replace(
            "GE_PROJECT_DIR",
            v20_project_directory_with_v30_configuration_and_no_checkpoints,
        )
        expected_upgrade_log_dict = json.loads(expected_upgrade_log_str)

    with open(
            f"{v20_project_directory_with_v30_configuration_and_no_checkpoints}/uncommitted/logs/project_upgrades/UpgradeHelperV13_20210119T132639.000000Z.json"
    ) as f:
        obs_upgrade_log_dict: dict = json.load(f)

    assert obs_upgrade_log_dict == expected_upgrade_log_dict
def test_cli_init_on_new_project_with_broken_excel_file_try_again_with_different_file(
        mock_webbrowser, caplog, tmp_path_factory):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_path = os.path.join(project_dir, "data", "broken_excel_file.xls")
    fixture_path = file_relative_path(__file__,
                                      "../test_sets/broken_excel_file.xls")
    data_path_2 = os.path.join(project_dir, "data", "Titanic.csv")
    fixture_path_2 = file_relative_path(__file__, "../test_sets/Titanic.csv")
    shutil.copy(fixture_path, data_path)
    shutil.copy(fixture_path_2, data_path_2)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="Y\n1\n1\n{}\n\n{}\n".format(data_path,
                                           data_path_2,
                                           catch_exceptions=False),
    )
    stdout = result.output
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/Titanic/warning/"
        .format(project_dir) in mock_webbrowser.call_args[0][0])

    assert len(stdout) < 3000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert "Enter the path (relative or absolute) of a data file" in stdout
    assert "Cannot load file." in stdout
    assert (
        "- Please check the file and try again or select a different data file."
        in stdout)
    assert (
        "- Error: Unsupported format, or corrupt file: Expected BOF record; found b'PRODUCTI'"
        in stdout)
    assert "Try again? [Y/n]:" in stdout
    assert "[{}]:".format(data_path) in stdout

    assert "Name the new expectation suite [Titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout)
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert (
        "A new Expectation suite 'Titanic.warning' was added to your project"
        in stdout)
    assert "Great Expectations is now set up" in stdout

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir,
                               "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path, "r"))
    data_source_class = config["datasources"]["files_datasource"][
        "data_asset_type"]["class_name"]
    assert data_source_class == "PandasDataset"

    obs_tree = gen_directory_tree_str(
        os.path.join(project_dir, "great_expectations"))

    # Instead of monkey patching datetime, just regex out the time directories
    date_safe_obs_tree = re.sub(r"\d*T\d*\.\d*Z", "9999.9999", obs_tree)
    # Instead of monkey patching guids, just regex out the guids
    guid_safe_obs_tree = re.sub(r"[a-z0-9]{32}(?=\.(json|html))",
                                "foobarbazguid", date_safe_obs_tree)
    assert (guid_safe_obs_tree == """great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        Titanic/
            warning.json
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    Titanic/
                        warning.html
                static/
                    fonts/
                        HKGrotesk/
                            HKGrotesk-Bold.otf
                            HKGrotesk-BoldItalic.otf
                            HKGrotesk-Italic.otf
                            HKGrotesk-Light.otf
                            HKGrotesk-LightItalic.otf
                            HKGrotesk-Medium.otf
                            HKGrotesk-MediumItalic.otf
                            HKGrotesk-Regular.otf
                            HKGrotesk-SemiBold.otf
                            HKGrotesk-SemiBoldItalic.otf
                    images/
                        favicon.ico
                        glossary_scroller.gif
                        iterative-dev-loop.png
                        logo-long-vector.svg
                        logo-long.png
                        short-logo-vector.svg
                        short-logo.png
                        validation_failed_unexpected_values.gif
                    styles/
                        data_docs_custom_styles_template.css
                        data_docs_default_styles.css
                validations/
                    Titanic/
                        warning/
                            9999.9999/
                                foobarbazguid.html
        validations/
            Titanic/
                warning/
                    9999.9999/
                        foobarbazguid.json
""")

    assert_no_logging_messages_or_tracebacks(caplog, result)
Exemple #16
0
def test_HtmlSiteStore_filesystem_backend(tmp_path_factory):
    path = str(
        tmp_path_factory.mktemp(
            "test_HtmlSiteStore_with_TupleFileSystemStoreBackend__dir"))

    my_store = HtmlSiteStore(
        store_backend={
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "my_store",
        },
        runtime_environment={"root_directory": path},
    )

    with pytest.raises(TypeError):
        my_store.get("not_a_ValidationResultIdentifier")

    with pytest.raises(ValidationError):
        my_store.get(validationResultIdentifierSchema.load({}))

    ns_1 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier.from_tuple((
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-100",
        )),
    )
    my_store.set(ns_1, "aaa")
    # assert my_store.get(ns_1) == "aaa"

    ns_2 = SiteSectionIdentifier(
        site_section_name="validations",
        resource_identifier=ValidationResultIdentifier.from_tuple((
            "a",
            "b",
            "c",
            "quarantine",
            datetime.datetime.now(datetime.timezone.utc),
            "prod-20",
        )),
    )
    my_store.set(ns_2, "bbb")
    # assert my_store.get(ns_2) == {"B": "bbb"}

    print(my_store.list_keys())
    # WARNING: OBSERVE THAT SITE_SECTION_NAME IS LOST IN THE CALL TO LIST_KEYS
    assert set(my_store.list_keys()) == {
        ns_1.resource_identifier,
        ns_2.resource_identifier,
    }

    print(gen_directory_tree_str(path))
    assert (gen_directory_tree_str(path) == """\
test_HtmlSiteStore_with_TupleFileSystemStoreBackend__dir0/
    my_store/
        validations/
            a/
                b/
                    c/
                        quarantine/
                            20190926T134241.000000Z/
                                prod-100.html
                                prod-20.html
""")
Exemple #17
0
def test_profiler_store_integration(
    empty_data_context: DataContext,
    profiler_store_name: str,
    profiler_name: str,
    profiler_config_with_placeholder_args: RuleBasedProfilerConfig,
):
    base_directory: str = str(
        Path(empty_data_context.root_directory) / "profilers")

    profiler_store: ProfilerStore = build_profiler_store_using_filesystem(
        store_name=profiler_store_name,
        base_directory=base_directory,
        overwrite_existing=True,
    )

    dir_tree: str

    dir_tree = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """profilers/
    .ge_store_backend_id
""")

    key: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=profiler_name)
    profiler_store.set(key=key, value=profiler_config_with_placeholder_args)

    dir_tree = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """profilers/
    .ge_store_backend_id
    my_first_profiler.yml
""")

    assert len(profiler_store.list_keys()) == 1
    profiler_store.remove_key(key=key)
    assert len(profiler_store.list_keys()) == 0

    data: dict = profiler_store.self_check()
    self_check_report: dict = convert_to_json_serializable(data=data)

    # Drop dynamic value to ensure appropriate assert
    self_check_report["config"]["store_backend"].pop("base_directory")

    assert self_check_report == {
        "config": {
            "class_name": "ProfilerStore",
            "module_name":
            "great_expectations.data_context.store.profiler_store",
            "overwrite_existing": True,
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "filepath_suffix": ".yml",
                "fixed_length_key": False,
                "module_name":
                "great_expectations.data_context.store.tuple_store_backend",
                "platform_specific_separator": True,
                "suppress_store_backend_id": False,
            },
            "store_name": "profiler_store",
        },
        "keys": [],
        "len_keys": 0,
    }
def test_upgrade_helper_intervention_on_cli_command(v10_project_directory,
                                                    caplog, monkeypatch):
    # test if cli detects out of date project and asks to run upgrade helper
    # decline upgrade and ensure config version was not modified
    runner: CliRunner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(os.path.dirname(v10_project_directory))
    result: Result = runner.invoke(
        cli,
        [
            "--v3-api",
            "checkpoint",
            "list",
        ],
        input="n\n",
        catch_exceptions=False,
    )
    stdout: str = result.stdout

    assert (
        "Your project appears to have an out-of-date config version (1.0) - the version number must be at least 3."
        in stdout)
    assert "In order to proceed, your project must be upgraded." in stdout
    assert (
        "Would you like to run the Upgrade Helper to bring your project up-to-date? [Y/n]:"
        in stdout)
    assert (
        "Ok, exiting now. To upgrade at a later time, use the following command: great_expectations project "
        "upgrade" in stdout)
    assert (
        "To learn more about the upgrade process, visit ["
        "36mhttps://docs.greatexpectations.io/docs/guides/miscellaneous/migration_guide#migrating-to-the-batch-request-v3-api"
        in stdout)
    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
    )

    # make sure config version unchanged
    assert (DataContext.get_ge_config_version(
        context_root_dir=v10_project_directory) == 1.0)

    expected_project_tree_str: str = """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
        .gitkeep
    expectations/
        .gitkeep
    notebooks/
        .gitkeep
    plugins/
        custom_store_backends/
            __init__.py
            my_custom_store_backend.py
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                expectations/
                    .gitkeep
                static/
                    .gitkeep
                validations/
                    diabetic_data/
                        warning/
                            20200430T191246.763896Z/
                                c3b4c5df224fef4b1a056a0f3b93aba5.html
        validations/
            diabetic_data/
                warning/
                    20200430T191246.763896Z/
                        c3b4c5df224fef4b1a056a0f3b93aba5.json
"""
    obs_project_tree_str: str = gen_directory_tree_str(
        startpath=v10_project_directory)
    assert obs_project_tree_str == expected_project_tree_str