def test_cli_datasource_profile_with_valid_data_asset_arg(
        caplog, empty_data_context, filesystem_csv_2):
    empty_data_context.add_datasource(
        "my_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )
    context = empty_data_context

    project_root_dir = context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            "my_datasource",
            "--data-assets",
            "f1",
            "-d",
            project_root_dir,
            "--no-view",
        ],
        catch_exceptions=False,
    )

    assert result.exit_code == 0
    stdout = result.stdout
    assert "Profiling 'my_datasource'" in stdout
    assert "The following Data Docs sites were built:\n- local_site:" in stdout

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 1
    assert (suites[0].expectation_suite_name ==
            "my_datasource.subdir_reader.f1.BasicDatasetProfiler")

    validations_store = context.stores["validations_store"]
    validation_keys = validations_store.list_keys()
    assert len(validation_keys) == 1

    validation = validations_store.get(validation_keys[0])
    suite_name = validation.meta["expectation_suite_name"]
    assert suite_name == "my_datasource.subdir_reader.f1.BasicDatasetProfiler"
    assert validation.success is False
    assert len(validation.results) == 8

    assert "Preparing column 1 of 1" in caplog.messages[0]
    assert len(caplog.messages) == 1
    assert_no_tracebacks(result)
Example #2
0
def test_cli_datasource_profile_with_skip_prompt_flag(caplog,
                                                      empty_data_context,
                                                      filesystem_csv_2):
    empty_data_context.add_datasource(
        "my_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    not_so_empty_data_context = empty_data_context

    project_root_dir = not_so_empty_data_context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["datasource", "profile", "-d", project_root_dir, "--no-view", '-y'],
        input="Y\n",
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    stdout = result.stdout

    assert (
        "Profiling 'my_datasource' will create expectations and documentation."
        in stdout)
    assert "Would you like to profile 'my_datasource'" not in stdout
    assert (
        "Great Expectations is building Data Docs from the data you just profiled!"
        in stdout)

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 1
    assert (suites[0].expectation_suite_name ==
            "my_datasource.subdir_reader.f1.BasicDatasetProfiler")

    validations_store = context.stores["validations_store"]
    validation_keys = validations_store.list_keys()
    assert len(validation_keys) == 1

    validation = validations_store.get(validation_keys[0])
    assert (validation.meta["expectation_suite_name"] ==
            "my_datasource.subdir_reader.f1.BasicDatasetProfiler")
    assert validation.success is False
    assert len(validation.results) == 8

    assert "Preparing column 1 of 1" in caplog.messages[0]
    assert len(caplog.messages) == 1
    assert_no_tracebacks(result)
def test_cli_datasource_profile_with_valid_data_asset_arg(
    empty_data_context, titanic_sqlite_db, caplog
):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    datasource_name = "wow_a_datasource"
    context = _add_datasource_and_credentials_to_context(
        context, datasource_name, titanic_sqlite_db
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            datasource_name,
            "--data-assets",
            "main.titanic",
            "-d",
            project_root_dir,
            "--no-view",
        ],
        catch_exceptions=False,
    )

    stdout = result.stdout
    assert result.exit_code == 0
    assert "Profiling '{}'".format(datasource_name) in stdout
    assert "The following Data Docs sites were built:\n" in stdout
    assert "local_site:" in stdout

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 1
    assert (
        suites[0].expectation_suite_name
        == "wow_a_datasource.default.main.titanic.BasicDatasetProfiler"
    )

    validations_store = context.stores["validations_store"]
    validation_keys = validations_store.list_keys()
    assert len(validation_keys) == 1

    validation = validations_store.get(validation_keys[0])
    assert (
        validation.meta["expectation_suite_name"]
        == "wow_a_datasource.default.main.titanic.BasicDatasetProfiler"
    )
    assert validation.success is False
    assert len(validation.results) == 51

    assert "Preparing column 1 of 7" in caplog.messages[0]
    assert len(caplog.messages) == 10
    assert_no_tracebacks(result)
def test_cli_datasource_profile_with_datasource_arg_and_generator_name_arg(
    empty_data_context, titanic_sqlite_db, caplog
):
    """
    Here we are verifying that when generator_name argument is passed to
    the methods down the stack.

    We use a datasource with two generators. This way we can check that the
    name of the expectation suite created by the profiler corresponds to
    the name of the data asset listed by the generator that we told the profiler
    to use.

    The logic of processing this argument is testing in tests/profile.
    """
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    datasource_name = "wow_a_datasource"
    context = _add_datasource__with_two_generators_and_credentials_to_context(
        context, datasource_name, titanic_sqlite_db
    )

    second_generator_name = "second_generator"

    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            datasource_name,
            "--generator-name",
            second_generator_name,
            "-d",
            project_root_dir,
            "--no-view",
        ],
        input="Y\n",
    )
    stdout = result.stdout

    assert result.exit_code == 0
    assert "Profiling '{}'".format(datasource_name) in stdout

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 1
    assert (
        suites[0].expectation_suite_name
        == "wow_a_datasource.second_generator.asset_one.BasicDatasetProfiler"
    )

    assert "Preparing column 1 of 7" in caplog.messages[0]
    assert len(caplog.messages) == 7
    assert_no_tracebacks(result)
Example #5
0
def test_cli_datasource_profile_with_additional_batch_kwargs(
    caplog, empty_data_context, filesystem_csv_2
):
    empty_data_context.add_datasource(
        "my_datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    not_so_empty_data_context = empty_data_context

    project_root_dir = not_so_empty_data_context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            "-d",
            project_root_dir,
            "--additional-batch-kwargs",
            '{"reader_options": {"sep": ",", "parse_dates": [0]}}',
            "--no-view",
        ],
        input="Y\n",
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 0

    assert (
        "Profiling 'my_datasource' will create expectations and documentation."
        in stdout
    )
    assert "Would you like to profile 'my_datasource'" in stdout
    assert (
        "Great Expectations is building Data Docs from the data you just profiled!"
        in stdout
    )

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 1
    expected_suite_name = "my_datasource.subdir_reader.f1.BasicDatasetProfiler"
    assert suites[0].expectation_suite_name == expected_suite_name

    validations_store = context.stores["validations_store"]
    validation_keys = validations_store.list_keys()
    assert len(validation_keys) == 1

    validation = validations_store.get(validation_keys[0])
    assert validation.meta["expectation_suite_name"] == expected_suite_name
    assert validation.success is False
    assert len(validation.results) == 9

    batch_id = validation_keys[0].batch_identifier
    evr = context.get_validation_result(
        expectation_suite_name=expected_suite_name, batch_identifier=batch_id
    )
    reader_options = evr.meta["batch_kwargs"]["reader_options"]
    assert reader_options["parse_dates"] == [0]
    assert reader_options["sep"] == ","

    assert "Preparing column 1 of 1" in caplog.messages[0]
    assert len(caplog.messages) == 1
    assert_no_tracebacks(result)
def test_cli_datasource_profile_with_data_asset_and_additional_batch_kwargs_with_limit(
    empty_data_context, titanic_sqlite_db, caplog
):
    """
    User can pass additional batch kwargs (e.g., limit) to a sql backend.
    Here we are verifying that passing "limit" affects the query correctly -
    the row count in the batch that the profiler uses to profile the data asset
    must match the limit passed by the user.
    """
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    datasource_name = "wow_a_datasource"
    context = _add_datasource_and_credentials_to_context(
        context, datasource_name, titanic_sqlite_db
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            "-d",
            project_root_dir,
            "--data-assets",
            "main.titanic",
            "--additional-batch-kwargs",
            '{"limit": 97}',
            "--no-view",
        ],
        input="Y\n",
        catch_exceptions=False,
    )

    stdout = result.stdout
    assert result.exit_code == 0
    assert "Profiling '{}'".format(datasource_name) in stdout
    assert "The following Data Docs sites were built:\n- local_site:" in stdout

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 1
    assert (
        suites[0].expectation_suite_name
        == "wow_a_datasource.default.main.titanic.BasicDatasetProfiler"
    )

    validations_store = context.stores["validations_store"]
    validation_keys = validations_store.list_keys()
    assert len(validation_keys) == 1

    validation = validations_store.get(validation_keys[0])
    assert (
        validation.meta["expectation_suite_name"]
        == "wow_a_datasource.default.main.titanic.BasicDatasetProfiler"
    )
    assert validation.success is False

    row_count_validation_results = [
        validation_result
        for validation_result in validation.results
        if validation_result.expectation_config.expectation_type
        == "expect_table_row_count_to_be_between"
    ]
    assert len(row_count_validation_results) == 1
    assert row_count_validation_results[0].result["observed_value"] == 97

    assert "Preparing column 1 of 7" in caplog.messages[0]
    assert len(caplog.messages) == 7
    assert_no_tracebacks(result)