コード例 #1
0
def test_cli_datasorce_new(caplog, empty_data_context, filesystem_csv_2):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    assert context.list_datasources() == []

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["datasource", "new", "-d", project_root_dir],
        input="1\n1\n%s\nmynewsource\n" % str(filesystem_csv_2),
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert "What data would you like Great Expectations to connect to?" in stdout
    assert "What are you processing your files with?" in stdout
    assert "Give your new Datasource a short name." in stdout
    assert "A new datasource 'mynewsource' was added to your project." in stdout

    assert result.exit_code == 0

    config_path = os.path.join(project_root_dir, DataContext.GE_YML)
    config = yaml.load(open(config_path))
    datasources = config["datasources"]
    assert "mynewsource" in datasources.keys()
    data_source_class = datasources["mynewsource"]["data_asset_type"][
        "class_name"]
    assert data_source_class == "PandasDataset"
    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #2
0
def _load_config_file(config_path):
    assert os.path.isfile(config_path), "Config file is missing. Check path"

    with open(config_path) as f:
        read = f.read()
        config = yaml.load(read)

    assert isinstance(config, dict)
    return config
コード例 #3
0
def test_cli_init_on_new_project_with_broken_excel_file_without_trying_again(
    caplog, tmp_path_factory
):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_folder_path = os.path.join(project_dir, "data")
    data_path = os.path.join(project_dir, "data", "broken_excel_file.xls")
    fixture_path = file_relative_path(__file__, "../../test_sets/broken_excel_file.xls")
    shutil.copy(fixture_path, data_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input=f"\n\n1\n1\n{data_folder_path}\n\n\n\n2\n{data_path}\nn\n",
        catch_exceptions=False,
    )
    stdout = result.output

    assert len(stdout) < 6000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert (
        "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)"
        in stdout
    )
    assert "Cannot load file." in stdout
    assert (
        "- Please check the file and try again or select a different data file."
        in stdout
    )
    assert (
        "- Error: Excel file format cannot be determined, you must specify an engine manually."
        in stdout
    ) or (
        "Error: Unsupported format, or corrupt file: Expected BOF record; found b'PRODUCTI'"
        in stdout
    )
    assert "Try again? [Y/n]:" in stdout
    assert (
        "We have saved your setup progress. When you are ready, run great_expectations init to continue."
        in stdout
    )

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["data__dir"]["data_asset_type"][
        "class_name"
    ]
    assert data_source_class == "PandasDataset"

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #4
0
def test_cli_datasource_new_connection_string(
    empty_data_context, empty_sqlite_db, caplog
):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    assert context.list_datasources() == []

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["datasource", "new", "-d", project_root_dir],
        input="2\n6\nmynewsource\n{}\n\n".format(str(empty_sqlite_db.url)),
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert "What data would you like Great Expectations to connect to?" in stdout
    assert "Give your new Datasource a short name." in stdout
    assert (
        "Next, we will configure database credentials and store them in the `mynewsource` section"
        in stdout
    )
    assert "What is the url/connection string for the sqlalchemy connection?" in stdout
    assert "Attempting to connect to your database. This may take a moment" in stdout
    assert "Great Expectations connected to your database" in stdout
    assert "A new datasource 'mynewsource' was added to your project." in stdout

    assert result.exit_code == 0

    config_path = os.path.join(project_root_dir, DataContext.GE_YML)
    config = yaml.load(open(config_path))
    datasources = config["datasources"]
    assert "mynewsource" in datasources.keys()
    data_source_class = datasources["mynewsource"]["data_asset_type"]["class_name"]
    assert data_source_class == "SqlAlchemyDataset"

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #5
0
def test_cli_init_on_new_project(mock_webbrowser, caplog, tmp_path_factory,
                                 titanic_sqlite_db_file, sa):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    ge_dir = os.path.join(project_dir, "great_expectations")

    database_path = os.path.join(project_dir, "titanic.db")
    shutil.copy(titanic_sqlite_db_file, database_path)
    engine = sa.create_engine(f"sqlite:///{database_path}", pool_recycle=3600)

    inspector = sa.inspect(engine)

    # get the default schema and table for testing
    schemas = inspector.get_schema_names()
    default_schema = schemas[0]

    tables = [
        table_name
        for table_name in inspector.get_table_names(schema=default_schema)
    ]
    default_table = tables[0]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input=
        "\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n".
        format(url=engine.url, schema=default_schema, table=default_table),
        catch_exceptions=False,
    )
    stdout = result.output
    assert len(stdout) < 6000, "CLI output is unreasonably long."

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "Which database backend are you using" in stdout
    assert "Give your new Datasource a short name" in stdout
    assert "What is the url/connection string for the sqlalchemy connection" in stdout
    assert "Attempting to connect to your database." in stdout
    assert "Great Expectations connected to your database" in stdout
    assert (
        "You have selected a datasource that is a SQL database. How would you like to specify the data?"
        in stdout)
    assert "Name the new Expectation Suite [main.titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout)
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert "Great Expectations is now set up" in stdout

    context = DataContext(ge_dir)
    assert len(context.list_datasources()) == 1
    assert context.list_datasources(
    )[0]["class_name"] == "SqlAlchemyDatasource"
    assert context.list_datasources()[0]["name"] == "titanic"

    first_suite = context.list_expectation_suites()[0]
    suite = context.get_expectation_suite(first_suite.expectation_suite_name)
    assert len(suite.expectations) == 14

    assert os.path.isdir(ge_dir)
    config_path = os.path.join(project_dir,
                               "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["titanic"]["data_asset_type"][
        "class_name"]
    assert data_source_class == "SqlAlchemyDataset"

    # Profilers are v014+ specific
    os.rmdir(os.path.join(ge_dir, "profilers"))

    obs_tree = gen_directory_tree_str(ge_dir)

    # Instead of monkey patching guids, just regex out the guids
    guid_safe_obs_tree = re.sub(r"[a-z0-9]{32}(?=\.(json|html))",
                                "foobarbazguid", obs_tree)
    # print(guid_safe_obs_tree)
    assert (guid_safe_obs_tree == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
        warning.json
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    warning.html
                static/
                    fonts/
                        HKGrotesk/
                            HKGrotesk-Bold.otf
                            HKGrotesk-BoldItalic.otf
                            HKGrotesk-Italic.otf
                            HKGrotesk-Light.otf
                            HKGrotesk-LightItalic.otf
                            HKGrotesk-Medium.otf
                            HKGrotesk-MediumItalic.otf
                            HKGrotesk-Regular.otf
                            HKGrotesk-SemiBold.otf
                            HKGrotesk-SemiBoldItalic.otf
                    images/
                        favicon.ico
                        glossary_scroller.gif
                        iterative-dev-loop.png
                        logo-long-vector.svg
                        logo-long.png
                        short-logo-vector.svg
                        short-logo.png
                        validation_failed_unexpected_values.gif
                    styles/
                        data_docs_custom_styles_template.css
                        data_docs_default_styles.css
                validations/
                    warning/
                        20190926T134241.000000Z/
                            20190926T134241.000000Z/
                                foobarbazguid.html
        validations/
            .ge_store_backend_id
            warning/
                20190926T134241.000000Z/
                    20190926T134241.000000Z/
                        foobarbazguid.json
""")

    assert_no_logging_messages_or_tracebacks(caplog, result)

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/"
        .format(project_dir) in mock_webbrowser.call_args[0][0])
コード例 #6
0
def test_cli_init_on_new_project_extra_whitespace_in_url(
        mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    ge_dir = os.path.join(project_dir, "great_expectations")

    database_path = os.path.join(project_dir, "titanic.db")
    shutil.copy(titanic_sqlite_db_file, database_path)
    engine = sa.create_engine(f"sqlite:///{database_path}", pool_recycle=3600)
    engine_url_with_added_whitespace = "    " + str(engine.url) + "  "

    inspector = sa.inspect(engine)

    # get the default schema and table for testing
    schemas = inspector.get_schema_names()
    default_schema = schemas[0]

    tables = [
        table_name
        for table_name in inspector.get_table_names(schema=default_schema)
    ]
    default_table = tables[0]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input=
        "\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n".
        format(
            url=engine_url_with_added_whitespace,
            schema=default_schema,
            table=default_table,
        ),
        catch_exceptions=False,
    )
    stdout = result.output
    assert len(stdout) < 6000, "CLI output is unreasonably long."

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "Which database backend are you using" in stdout
    assert "Give your new Datasource a short name" in stdout
    assert "What is the url/connection string for the sqlalchemy connection" in stdout
    assert "Attempting to connect to your database." in stdout
    assert "Great Expectations connected to your database" in stdout
    assert (
        "You have selected a datasource that is a SQL database. How would you like to specify the data?"
        in stdout)
    assert "Name the new Expectation Suite [main.titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout)
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert "Great Expectations is now set up" in stdout

    context = DataContext(ge_dir)
    assert len(context.list_datasources()) == 1
    assert context.list_datasources() == [{
        "class_name": "SqlAlchemyDatasource",
        "name": "titanic",
        "module_name": "great_expectations.datasource",
        "credentials": {
            "url": str(engine.url)
        },
        "data_asset_type": {
            "class_name": "SqlAlchemyDataset",
            "module_name": "great_expectations.dataset",
        },
    }]

    first_suite = context.list_expectation_suites()[0]
    suite = context.get_expectation_suite(first_suite.expectation_suite_name)
    assert len(suite.expectations) == 14

    assert os.path.isdir(ge_dir)
    config_path = os.path.join(project_dir,
                               "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["titanic"]["data_asset_type"][
        "class_name"]
    assert data_source_class == "SqlAlchemyDataset"

    assert_no_logging_messages_or_tracebacks(caplog, result)

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/"
        .format(project_dir) in mock_webbrowser.call_args[0][0])
コード例 #7
0
def test_cli_init_spark_without_library_installed_instructs_user(
        caplog, tmp_path_factory):
    basedir = tmp_path_factory.mktemp("test_cli_init_diff")
    basedir = str(basedir)

    with set_directory(basedir):
        runner = CliRunner(mix_stderr=False)
        result = runner.invoke(cli, ["init", "--no-view"],
                               input="\n\n1\n2\nn\n",
                               catch_exceptions=False)
        stdout = result.output

        assert "Always know what to expect from your data" in stdout
        assert "What data would you like Great Expectations to connect to" in stdout
        assert "What are you processing your files with" in stdout
        assert (
            "Great Expectations relies on the library `pyspark` to connect to your data"
            in stdout)
        assert (
            "but the package `pyspark` containing this library is not installed."
            in stdout)
        assert (
            "Would you like Great Expectations to try to execute `pip install pyspark` for you?"
            in stdout)
        assert "Please execute `pip install pyspark` before trying again." in stdout
        # assert "Great Expectations relies on the library `pyspark`" in stdout
        # assert "Please `pip install pyspark` before trying again" in stdout

        assert "Profiling" not in stdout
        assert "Building" not in stdout
        assert "Data Docs" not in stdout
        assert "Great Expectations is now set up" not in stdout

        assert result.exit_code == 1

        assert os.path.isdir(os.path.join(basedir, "great_expectations"))
        config_path = os.path.join(
            basedir, "great_expectations/great_expectations.yml")
        assert os.path.isfile(config_path)

        config = yaml.load(open(config_path))
        assert config["datasources"] == {}

        obs_tree = gen_directory_tree_str(
            os.path.join(basedir, "great_expectations"))
        assert (obs_tree == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    profilers/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
            .ge_store_backend_id
""")

        assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #8
0
def _library_not_loaded_test(tmp_path_factory, cli_input, library_name,
                             library_import_name, my_caplog):
    """
    This test requires that a library is NOT installed. It tests that:
    - a helpful error message is returned to install the missing library
    - the expected tree structure is in place
    - the config yml contains an empty dict in its datasource entry
    """
    basedir = tmp_path_factory.mktemp("test_cli_init_diff")
    basedir = str(basedir)

    with set_directory(basedir):
        runner = CliRunner(mix_stderr=False)
        result = runner.invoke(cli, ["init", "--no-view"],
                               input=cli_input,
                               catch_exceptions=False)
        stdout = result.output

        assert "Always know what to expect from your data" in stdout
        assert "What data would you like Great Expectations to connect to" in stdout
        assert "Which database backend are you using" in stdout
        assert "Give your new Datasource a short name" in stdout
        assert (
            "Next, we will configure database credentials and store them in the `my_db` section"
            in stdout)
        assert (
            f"Great Expectations relies on the library `{library_import_name}` to connect to your data"
            in stdout)
        assert (
            f"but the package `{library_name}` containing this library is not installed"
            in stdout)
        assert (
            f"Would you like Great Expectations to try to execute `pip install {library_name}` for you?"
            in stdout)
        assert (
            f"Please execute `pip install {library_name}` before trying again."
            in stdout)

        assert "Profiling" not in stdout
        assert "Building" not in stdout
        assert "Data Docs" not in stdout
        assert "Great Expectations is now set up" not in stdout

        assert result.exit_code == 1

        assert os.path.isdir(os.path.join(basedir, "great_expectations"))
        config_path = os.path.join(
            basedir, "great_expectations/great_expectations.yml")
        assert os.path.isfile(config_path)

        config = yaml.load(open(config_path))
        assert config["datasources"] == {}

        obs_tree = gen_directory_tree_str(
            os.path.join(basedir, "great_expectations"))
        assert (obs_tree == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    profilers/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
            .ge_store_backend_id
""")

        assert_no_logging_messages_or_tracebacks(my_caplog, result)
コード例 #9
0
def test_cli_init_on_new_project_with_broken_excel_file_try_again_with_different_file(
    mock_webbrowser, caplog, tmp_path_factory
):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_folder_path = os.path.join(project_dir, "data")
    data_path = os.path.join(project_dir, "data", "broken_excel_file.xls")
    fixture_path = file_relative_path(__file__, "../../test_sets/broken_excel_file.xls")
    data_path_2 = os.path.join(project_dir, "data", "Titanic.csv")
    fixture_path_2 = file_relative_path(__file__, "../../test_sets/Titanic.csv")
    shutil.copy(fixture_path, data_path)
    shutil.copy(fixture_path_2, data_path_2)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="\n\n1\n1\n{}\n\n\n\n2\n{}\n\n{}\n\n\n\n".format(
            data_folder_path, data_path, data_path_2
        ),
        catch_exceptions=False,
    )
    stdout = result.output
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/Titanic/warning/".format(
            project_dir
        )
        in mock_webbrowser.call_args[0][0]
    )

    assert len(stdout) < 6000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert (
        "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)"
        in stdout
    )
    assert "Cannot load file." in stdout
    assert (
        "- Please check the file and try again or select a different data file."
        in stdout
    )
    assert (
        "- Error: Excel file format cannot be determined, you must specify an engine manually."
        in stdout
    ) or (
        "Error: Unsupported format, or corrupt file: Expected BOF record; found b'PRODUCTI'"
        in stdout
    )
    assert "Try again? [Y/n]:" in stdout
    assert f"[{data_path}]:" in stdout

    assert "Name the new Expectation Suite [Titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout
    )
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert "Great Expectations is now set up" in stdout

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["data__dir"]["data_asset_type"][
        "class_name"
    ]
    assert data_source_class == "PandasDataset"

    obs_tree = gen_directory_tree_str(os.path.join(project_dir, "great_expectations"))

    # Instead of monkey patching guids, just regex out the guids
    guid_safe_obs_tree = re.sub(
        r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", obs_tree
    )
    # print(guid_safe_obs_tree)
    assert (
        guid_safe_obs_tree
        == """great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
        Titanic/
            warning.json
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    Titanic/
                        warning.html
                static/
                    fonts/
                        HKGrotesk/
                            HKGrotesk-Bold.otf
                            HKGrotesk-BoldItalic.otf
                            HKGrotesk-Italic.otf
                            HKGrotesk-Light.otf
                            HKGrotesk-LightItalic.otf
                            HKGrotesk-Medium.otf
                            HKGrotesk-MediumItalic.otf
                            HKGrotesk-Regular.otf
                            HKGrotesk-SemiBold.otf
                            HKGrotesk-SemiBoldItalic.otf
                    images/
                        favicon.ico
                        glossary_scroller.gif
                        iterative-dev-loop.png
                        logo-long-vector.svg
                        logo-long.png
                        short-logo-vector.svg
                        short-logo.png
                        validation_failed_unexpected_values.gif
                    styles/
                        data_docs_custom_styles_template.css
                        data_docs_default_styles.css
                validations/
                    Titanic/
                        warning/
                            20190926T134241.000000Z/
                                20190926T134241.000000Z/
                                    foobarbazguid.html
        validations/
            .ge_store_backend_id
            Titanic/
                warning/
                    20190926T134241.000000Z/
                        20190926T134241.000000Z/
                            foobarbazguid.json
"""
    )

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #10
0
def test_cli_init_on_new_project(
    mock_emit, mock_webbrowser, caplog, tmp_path_factory, monkeypatch
):
    monkeypatch.delenv(
        "GE_USAGE_STATS", raising=False
    )  # Undo the project-wide test default
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_folder_path = os.path.join(project_dir, "data")
    data_path = os.path.join(project_dir, "data", "Titanic.csv")
    fixture_path = file_relative_path(__file__, "../../test_sets/Titanic.csv")
    shutil.copy(fixture_path, data_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input=f"\n\n1\n1\n{data_folder_path}\n\n\n\n2\n{data_path}\n\n\n\n",
        catch_exceptions=False,
    )
    stdout = result.output
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/Titanic/warning/".format(
            project_dir
        )
        in mock_webbrowser.call_args[0][0]
    )

    assert len(stdout) < 6000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert (
        "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)"
        in stdout
    )
    assert "Name the new Expectation Suite [Titanic.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations about them"
        in stdout
    )
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "Data Docs" in stdout
    assert "Done generating example Expectation Suite" in stdout
    assert "Great Expectations is now set up" in stdout

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["data__dir"]["data_asset_type"][
        "class_name"
    ]
    assert data_source_class == "PandasDataset"

    obs_tree = gen_directory_tree_str(os.path.join(project_dir, "great_expectations"))

    # Instead of monkey patching guids, just regex out the guids
    guid_safe_obs_tree = re.sub(
        r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", obs_tree
    )
    # print(guid_safe_obs_tree)
    assert (
        guid_safe_obs_tree
        == """great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
        Titanic/
            warning.json
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
            local_site/
                index.html
                expectations/
                    Titanic/
                        warning.html
                static/
                    fonts/
                        HKGrotesk/
                            HKGrotesk-Bold.otf
                            HKGrotesk-BoldItalic.otf
                            HKGrotesk-Italic.otf
                            HKGrotesk-Light.otf
                            HKGrotesk-LightItalic.otf
                            HKGrotesk-Medium.otf
                            HKGrotesk-MediumItalic.otf
                            HKGrotesk-Regular.otf
                            HKGrotesk-SemiBold.otf
                            HKGrotesk-SemiBoldItalic.otf
                    images/
                        favicon.ico
                        glossary_scroller.gif
                        iterative-dev-loop.png
                        logo-long-vector.svg
                        logo-long.png
                        short-logo-vector.svg
                        short-logo.png
                        validation_failed_unexpected_values.gif
                    styles/
                        data_docs_custom_styles_template.css
                        data_docs_default_styles.css
                validations/
                    Titanic/
                        warning/
                            20190926T134241.000000Z/
                                20190926T134241.000000Z/
                                    foobarbazguid.html
        validations/
            .ge_store_backend_id
            Titanic/
                warning/
                    20190926T134241.000000Z/
                        20190926T134241.000000Z/
                            foobarbazguid.json
"""
    )

    assert mock_emit.call_count == 9
    assert mock_emit.call_args_list[1] == mock.call(
        {
            "event_payload": {"api_version": "v2"},
            "event": "cli.init.create",
            "success": True,
        }
    )

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #11
0
def test_cli_init_connection_string_non_working_db_connection_instructs_user_and_leaves_entries_in_config_files_for_debugging(
    mock_webbrowser, caplog, tmp_path_factory, sa
):
    root_dir = tmp_path_factory.mktemp("bad_con_string_test")
    root_dir = str(root_dir)
    os.chdir(root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init"],
        input="\n\n2\n6\nmy_db\nsqlite:////not_a_real.db\n\nn\n",
        catch_exceptions=False,
    )
    stdout = result.output
    assert mock_webbrowser.call_count == 0

    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "Which database backend are you using" in stdout
    assert "What is the url/connection string for the sqlalchemy connection" in stdout
    assert "Give your new Datasource a short name" in stdout
    assert "Attempting to connect to your database. This may take a moment" in stdout
    assert "Cannot connect to the database" in stdout

    assert "Profiling" not in stdout
    assert "Building" not in stdout
    assert "Data Docs" not in stdout
    assert "Great Expectations is now set up" not in stdout

    assert result.exit_code == 1

    ge_dir = os.path.join(root_dir, DataContext.GE_DIR)
    assert os.path.isdir(ge_dir)
    config_path = os.path.join(ge_dir, DataContext.GE_YML)
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    assert config["datasources"] == {
        "my_db": {
            "data_asset_type": {
                "module_name": None,
                "class_name": "SqlAlchemyDataset",
            },
            "credentials": "${my_db}",
            "class_name": "SqlAlchemyDatasource",
            "module_name": "great_expectations.datasource",
        }
    }

    config_path = os.path.join(
        ge_dir, DataContext.GE_UNCOMMITTED_DIR, "config_variables.yml"
    )
    config = yaml.load(open(config_path))
    assert config["my_db"] == {"url": "sqlite:////not_a_real.db"}

    obs_tree = gen_directory_tree_str(os.path.join(root_dir, "great_expectations"))
    assert (
        obs_tree
        == """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
        .ge_store_backend_id
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
            .ge_store_backend_id
"""
    )

    assert_no_logging_messages_or_tracebacks(caplog, result)