def initialized_sqlite_project(mock_webbrowser, caplog, tmp_path_factory,
                               titanic_sqlite_db_file, sa):
    """This is an initialized project through the CLI."""
    project_dir = str(tmp_path_factory.mktemp("my_rad_project"))

    engine = sa.create_engine("sqlite:///{}".format(titanic_sqlite_db_file))

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="\n\n2\n6\ntitanic\n{}\n\n\n1\nwarning\n\n\n\n".format(
            engine.url),
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/"
        .format(project_dir) in mock_webbrowser.call_args[0][0])

    assert_no_logging_messages_or_tracebacks(caplog, result)

    context = DataContext(os.path.join(project_dir, DataContext.GE_DIR))
    assert isinstance(context, DataContext)
    assert len(context.list_datasources()) == 1
    assert context.list_datasources() == [{
        "class_name": "SqlAlchemyDatasource",
        "name": "titanic",
        "module_name": "great_expectations.datasource",
        "credentials": {
            "url": str(engine.url)
        },
        "data_asset_type": {
            "class_name": "SqlAlchemyDataset",
            "module_name": "great_expectations.dataset",
        },
    }]
    return project_dir
def test_suite_delete_with_one_suite(mock_emit, caplog,
                                     empty_data_context_stats_enabled):
    project_dir = empty_data_context_stats_enabled.root_directory
    context = DataContext(project_dir)
    suite = context.create_expectation_suite("a.warning")
    context.save_expectation_suite(suite)
    mock_emit.reset_mock()

    suite_dir = os.path.join(project_dir, "expectations", "a")
    suite_path = os.path.join(suite_dir, "warning.json")
    assert os.path.isfile(suite_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        "suite delete a.warning -d {}".format(project_dir),
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert "Deleted the expectation suite named: a.warning" in result.output

    # assert not os.path.isdir(suite_dir)
    assert not os.path.isfile(suite_path)

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.suite.delete",
            "event_payload": {},
            "success": True
        }),
    ]

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_new_on_context_with_no_datasources(caplog,
                                                       empty_data_context):
    """
    We call the "checkpoint new" command on a data context that has no datasources
    configured.

    The command should:
    - exit with a clear error message
    """
    root_dir = empty_data_context.root_directory
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint new not_a_suite checkpoint.py -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 1
    assert "No datasources found in the context" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_new_with_filename_not_ending_in_py_raises_helpful_error(
        caplog, empty_data_context):
    """
    We call the "checkpoint new" command with a bogus filename

    The command should:
    - exit with a clear error message
    """
    context = empty_data_context
    root_dir = context.root_directory
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint new sweet_suite checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 1
    assert "Tap filename must end in .py. Please correct and re-run" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #5
0
def test_cli_command_entrance(caplog):
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, catch_exceptions=False)
    assert result.exit_code == 0
    print(result.output)
    assert (result.output == """Usage: cli [OPTIONS] COMMAND [ARGS]...

  Welcome to the great_expectations CLI!

  Most commands follow this format: great_expectations <NOUN> <VERB>

  The nouns are: datasource, docs, project, suite, validation-operator

  Most nouns accept the following verbs: new, list, edit

  In particular, the CLI supports the following special commands:

  - great_expectations init : create a new great_expectations project

  - great_expectations datasource profile : profile a datasource

  - great_expectations docs build : compile documentation from expectations

Options:
  --version      Show the version and exit.
  -v, --verbose  Set great_expectations to use verbose output.
  --help         Show this message and exit.

Commands:
  checkpoint           Checkpoint operations
  datasource           Datasource operations
  docs                 Data Docs operations
  init                 Initialize a new Great Expectations project.
  project              Project operations
  store                Store operations
  suite                Expectation Suite operations
  validation-operator  Validation Operator operations
""")
    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #6
0
def test_store_list_with_four_stores(caplog, empty_data_context, monkeypatch):
    project_dir = empty_data_context.root_directory
    runner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(os.path.dirname(project_dir))

    expected_result = """\
4 Stores found:

 - name: expectations_store
   class_name: ExpectationsStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: expectations/

 - name: validations_store
   class_name: ValidationsStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: uncommitted/validations/

 - name: evaluation_parameter_store
   class_name: EvaluationParameterStore

 - name: checkpoint_store
   class_name: CheckpointStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: checkpoints/
     suppress_store_backend_id: True"""

    result = runner.invoke(
        cli,
        f"--v3-api store list",
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output.strip() == expected_result

    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #7
0
def test_validation_operator_list_with_one_validation_operator(
        caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    runner = CliRunner(mix_stderr=False)

    expected_result = """Heads up! This feature is Experimental. It may change. Please give us your feedback!
1 Validation Operator found:

 - name: action_list_operator
   class_name: ActionListValidationOperator
   action_list: store_validation_result (StoreValidationResultAction) => store_evaluation_params (StoreEvaluationParametersAction) => update_data_docs (UpdateDataDocsAction)"""

    result = runner.invoke(
        cli,
        "validation-operator list -d {}".format(project_dir),
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    # _capture_ansi_codes_to_file(result)
    assert result.output.strip() == expected_result

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_profile_answering_no(caplog, empty_data_context,
                                             filesystem_csv_2):
    empty_data_context.add_datasource(
        "my_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    not_so_empty_data_context = empty_data_context
    project_root_dir = not_so_empty_data_context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            "my_datasource",
            "-d",
            project_root_dir,
            "--no-view",
        ],
        input="n\n",
        catch_exceptions=False,
    )

    stdout = result.stdout
    print(stdout)
    assert result.exit_code == 0
    assert "Warning - this is a BETA feature." in stdout
    assert "Profiling 'my_datasource'" in stdout
    assert "Skipping profiling for now." in stdout
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_noninteractive_golden_path(
        caplog, data_context, filesystem_csv_2):
    """
    Non-nteractive mode golden path - use the --validation_config_file argument to pass the path
    to a valid validation config file
    """
    not_so_empty_data_context = data_context
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    csv_path = os.path.join(filesystem_csv_2, "f1.csv")

    validation_config = {
        "validation_operator_name":
        "default",
        "batches": [{
            "batch_kwargs": {
                "path": csv_path,
                "datasource": "mydatasource",
                "reader_method": "read_csv"
            },
            "expectation_suite_names": ["my_dag_node.default"]
        }]
    }
    validation_config_file_path = os.path.join(root_dir, "uncommitted",
                                               "validation_config_1.json")
    with open(validation_config_file_path, 'w') as f:
        json.dump(validation_config, f)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, [
        "validation-operator", "run", "-d", root_dir,
        "--validation_config_file", validation_config_file_path
    ],
                           catch_exceptions=False)
    stdout = result.stdout
    assert "Validation Failed" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_init_on_existing_project_with_datasource_with_no_suite_create_one(
    mock_browser, caplog, initialized_project,
):
    project_dir = initialized_project
    ge_dir = os.path.join(project_dir, DataContext.GE_DIR)
    uncommitted_dir = os.path.join(ge_dir, "uncommitted")

    # mangle the setup to remove all traces of any suite
    expectations_dir = os.path.join(ge_dir, "expectations")
    data_docs_dir = os.path.join(uncommitted_dir, "data_docs")
    validations_dir = os.path.join(uncommitted_dir, "validations")

    _delete_and_recreate_dir(expectations_dir)
    _delete_and_recreate_dir(data_docs_dir)
    _delete_and_recreate_dir(validations_dir)

    context = DataContext(ge_dir)
    assert context.list_expectation_suites() == []

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="{}\nsink_me\n\n\n".format(os.path.join(project_dir, "data/Titanic.csv")),
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 0
    assert mock_browser.call_count == 1

    assert "Error: invalid input" not in stdout
    assert "Always know what to expect from your data" in stdout
    assert "Enter the path (relative or absolute) of a data file" in stdout
    assert "Generating example Expectation Suite..." in stdout
    assert "The following Data Docs sites were built" in stdout
    assert "Great Expectations is now set up" in stdout
    assert "A new Expectation suite 'sink_me' was added to your project" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_init_on_existing_project_with_datasource_with_existing_suite_offer_to_build_docs_answer_yes(
    mock_webbrowser, caplog, initialized_project,
):
    project_dir = initialized_project

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["init", "-d", project_dir], input="n\n", catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 0

    assert "Error: invalid input" not in stdout

    assert "Always know what to expect from your data" in stdout
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_noninteractive_validation_config_file_does_not_exist(caplog, data_context, filesystem_csv_2
):
    """
    Non-nteractive mode. Use the --validation_config_file argument to pass the path
    to a validation config file that does not exist.
    """
    not_so_empty_data_context = data_context
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    validation_config_file_path = os.path.join(root_dir, "uncommitted", "validation_config_1.json")

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["validation-operator", "run", "-d", root_dir, "--validation_config_file", validation_config_file_path],
        catch_exceptions=False
    )
    stdout = result.stdout
    assert "Failed to process the --validation_config_file argument" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #13
0
def test_init_on_existing_project_with_multiple_datasources_exist_do_nothing(
    mock_webbrowser,
    caplog,
    initialized_sqlite_project,
    titanic_sqlite_db,
    empty_sqlite_db,
):
    project_dir = initialized_sqlite_project
    ge_dir = os.path.join(project_dir, DataContext.GE_DIR)

    context = DataContext(ge_dir)
    datasource_name = "wow_a_datasource"
    context = _add_datasource_and_credentials_to_context(
        context, datasource_name, empty_sqlite_db)
    assert len(context.list_datasources()) == 2

    runner = CliRunner(mix_stderr=False)
    with pytest.warns(
            UserWarning,
            match="Warning. An existing `great_expectations.yml` was found"):
        result = runner.invoke(
            cli,
            ["init", "-d", project_dir],
            input="n\n",
            catch_exceptions=False,
        )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 0

    assert "Error: invalid input" not in stdout

    assert "Always know what to expect from your data" in stdout
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_run_on_checkpoint_with_not_found_suite_raises_error_with_ge_config_v2(
    mock_emit,
    caplog,
    titanic_data_context_stats_enabled_config_version_2_with_checkpoint,
):
    context = titanic_data_context_stats_enabled_config_version_2_with_checkpoint
    root_dir = context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint run my_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 1

    assert "expectation_suite suite_one not found" in stdout

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.run",
            "event_payload": {},
            "success": False
        }),
    ]

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
def test_checkpoint_script_raises_error_if_checkpoint_not_found_with_ge_config_v2(
        mock_emit, caplog,
        titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled):
    context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled
    root_dir = context.root_directory
    assert context.list_checkpoints() == ["my_checkpoint"]
    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint script not_a_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert "Could not find checkpoint `not_a_checkpoint`." in stdout
    assert "Try running" in stdout
    assert result.exit_code == 1

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.script",
            "event_payload": {},
            "success": False
        }),
    ]

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
Example #16
0
def test_suite_scaffold_on_context_with_no_datasource_raises_error(
    mock_subprocess, mock_emit, caplog, empty_data_context_stats_enabled
):
    """
    We call the "suite scaffold" command on a context with no datasource

    The command should:
    - exit with a clear error message
    - send a DataContext init success message
    - send a scaffold fail message
    """
    context = empty_data_context_stats_enabled
    root_dir = context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["suite", "scaffold", "foop", "-d", root_dir],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert (
        "No datasources found in the context. To add a datasource, run `great_expectations datasource new`"
        in stdout
    )

    assert mock_subprocess.call_count == 0
    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call(
            {"event_payload": {}, "event": "data_context.__init__", "success": True}
        ),
        mock.call(
            {"event": "cli.suite.scaffold", "event_payload": {}, "success": False}
        ),
    ]

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_new_raises_error_on_existing_checkpoint_with_ge_config_v2(
    mock_emit,
    caplog,
    titanic_data_context_stats_enabled_config_version_2_with_checkpoint,
):
    context = titanic_data_context_stats_enabled_config_version_2_with_checkpoint
    root_dir = context.root_directory
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint new my_checkpoint suite -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 1
    assert (
        "A checkpoint named `my_checkpoint` already exists. Please choose a new name."
        in stdout)

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.new",
            "event_payload": {},
            "success": False
        }),
    ]

    assert_no_logging_messages_or_tracebacks(
        caplog,
        result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
Example #18
0
def test_docs_build_assume_yes(
    caplog, site_builder_data_context_v013_with_html_store_titanic_random
):
    root_dir = (
        site_builder_data_context_v013_with_html_store_titanic_random.root_directory
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["docs", "build", "--no-view", "--assume-yes", "-d", root_dir],
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 0
    assert "Would you like to proceed? [Y/n]:" not in stdout
    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE,
    )
Example #19
0
def test_suite_list_with_multiple_suites(caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("a.warning")
    context.create_expectation_suite("b.warning")
    context.create_expectation_suite("c.warning")

    runner = CliRunner(mix_stderr=False)

    result = runner.invoke(
        cli,
        "suite list -d {}".format(project_dir),
        catch_exceptions=False,
    )
    output = result.output
    assert result.exit_code == 0
    assert "3 Expectation Suites found:" in output
    assert "a.warning" in output
    assert "b.warning" in output
    assert "c.warning" in output

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_interactive_pass_non_existing_operator_name(caplog, data_context, filesystem_csv_2
):
    """
    Interactive mode: pass an non-existing suite name and an existing validation
    operator name, select an existing file.
    """
    not_so_empty_data_context = data_context
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    runner = CliRunner(mix_stderr=False)
    csv_path = os.path.join(filesystem_csv_2, "f1.csv")
    result = runner.invoke(
        cli,
        ["validation-operator", "run", "-d", root_dir, "--name", "this_val_op_does_not_exist", "--suite", "my_dag_node.default"],
        input=f"{csv_path}\n",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert "Could not find a validation operator" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasorce_list(caplog, empty_data_context, filesystem_csv_2):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["datasource", "list", "-d", project_root_dir],
                           catch_exceptions=False)

    stdout = result.output.strip()
    assert "[]" in stdout
    assert context.list_datasources() == []

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    assert context.list_datasources() == [{
        "name": "wow_a_datasource",
        "class_name": "PandasDatasource"
    }]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["datasource", "list", "-d", project_root_dir],
                           catch_exceptions=False)

    stdout = result.output.strip()
    assert ("[{'name': 'wow_a_datasource', 'class_name': 'PandasDatasource'}]"
            in stdout)
    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #22
0
def test_suite_edit_on_exsiting_suite_one_datasources_with_batch_kwargs_without_datasource_raises_helpful_error(
    caplog,
    titanic_data_context,
):
    """
    Given:
    - the suite foo exists
    - the a datasource exists
    - and the users runs this
    great_expectations suite edit foo --batch-kwargs '{"path": "data/10k.csv"}'

    Then:
    - The user should see a nice error and the program halts before notebook compilation.
    '"""
    project_dir = titanic_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = {"path": "../data/Titanic.csv"}
    result = runner.invoke(
        cli,
        [
            "suite",
            "edit",
            "foo",
            "-d",
            project_dir,
            "--batch-kwargs",
            json.dumps(batch_kwargs),
        ],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "Please check that your batch_kwargs are able to load a batch." in stdout
    assert "Unable to load datasource `None`" in stdout
    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #23
0
def test_init_on_existing_project_with_multiple_datasources_exist_do_nothing(
    mock_webbrowser, caplog, monkeypatch, initialized_project, filesystem_csv_2
):
    project_dir = initialized_project
    ge_dir = os.path.join(project_dir, DataContext.GE_DIR)

    context = DataContext(ge_dir)
    context.add_datasource(
        "another_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
    )

    assert len(context.list_datasources()) == 2

    runner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(project_dir)
    with pytest.warns(
        UserWarning, match="Warning. An existing `great_expectations.yml` was found"
    ):
        result = runner.invoke(
            cli,
            ["--v3-api", "init"],
            input="n\n",
            catch_exceptions=False,
        )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 0
    assert "Error: invalid input" not in stdout

    assert "Always know what to expect from your data" in stdout
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_run_happy_path_with_successful_validation(
        mock_emit, caplog,
        titanic_data_context_with_checkpoint_suite_and_stats_enabled):
    context = titanic_data_context_with_checkpoint_suite_and_stats_enabled
    root_dir = context.root_directory
    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint run my_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 0
    assert "Validation Succeeded!" in stdout

    assert mock_emit.call_count == 4
    usage_emits = mock_emit.call_args_list
    assert usage_emits[0] == mock.call({
        "event_payload": {},
        "event": "data_context.__init__",
        "success": True
    })
    assert usage_emits[1][0][0]["event"] == "data_asset.validate"
    assert usage_emits[1][0][0]["success"] is True

    assert usage_emits[2][0][0][
        "event"] == "data_context.run_validation_operator"
    assert usage_emits[2][0][0]["success"] is True

    assert usage_emits[3] == mock.call({
        "event": "cli.checkpoint.run",
        "event_payload": {},
        "success": True
    })

    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #25
0
def test_suite_new_enter_existing_suite_name_as_arg(mock_webbrowser,
                                                    mock_subprocess, caplog,
                                                    data_context):
    """
    We call the "suite new" command with the name of an existing expectation
    suite in the --suite argument

    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """

    not_so_empty_data_context = data_context
    project_root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(project_root_dir, "uncommitted"))

    root_dir = project_root_dir
    os.chdir(root_dir)
    context = DataContext(root_dir)
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "suite", "new", "-d", root_dir, "--suite", "my_dag_node.default",
            "--no-view"
        ],
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 1
    assert "already exists. If you intend to edit the suite" in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)
Example #26
0
def test_docs_build_no_view(
    mock_webbrowser, caplog, site_builder_data_context_with_html_store_titanic_random
):
    root_dir = site_builder_data_context_with_html_store_titanic_random.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["docs", "build", "--no-view", "-d", root_dir],
        input="\n\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 0
    assert "Building" in stdout
    assert "The following Data Docs sites will be built:" in stdout
    assert "great_expectations/uncommitted/data_docs/local_site/index.html" in stdout

    context = DataContext(root_dir)
    obs_urls = context.get_docs_sites_urls()

    assert len(obs_urls) == 2
    assert (
        "great_expectations/uncommitted/data_docs/local_site/index.html"
        in obs_urls[0]["site_url"]
    )
    local_site_dir = os.path.join(root_dir, "uncommitted/data_docs/local_site/")

    assert os.path.isdir(os.path.join(local_site_dir))
    assert os.path.isfile(os.path.join(local_site_dir, "index.html"))
    assert os.path.isdir(os.path.join(local_site_dir, "expectations"))
    assert os.path.isdir(os.path.join(local_site_dir, "validations"))
    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
    )
def test_cli_datasorce_list(empty_data_context, empty_sqlite_db, caplog):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )

    stdout = result.output.strip()
    assert "[]" in stdout
    assert context.list_datasources() == []

    datasource_name = "wow_a_datasource"
    _add_datasource_and_credentials_to_context(
        context, datasource_name, empty_sqlite_db
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )
    stdout = result.output.strip()
    if PY2:
        # deal with legacy python dictionary sorting
        assert (
            "'name': 'wow_a_datasource'"
            and "'class_name': u'SqlAlchemyDatasource'" in stdout
        )
        assert len(stdout) >= 60 and len(stdout) <= 75
    else:
        assert (
            "[{'name': 'wow_a_datasource', 'class_name': 'SqlAlchemyDatasource'}]"
            in stdout
        )

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project_with_broken_excel_file_without_trying_again(caplog, tmp_path_factory):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_path = os.path.join(project_dir, "data", "broken_excel_file.xls")
    fixture_path = file_relative_path(__file__, "../test_sets/broken_excel_file.xls")
    shutil.copy(fixture_path, data_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="Y\n1\n1\n{}\n\n\nn\n".format(data_path, catch_exceptions=False),
    )
    stdout = result.output

    assert len(stdout) < 3000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert "Enter the path (relative or absolute) of a data file" in stdout
    assert "Cannot load file." in stdout
    assert "- Please check the file and try again or select a different data file." in stdout
    assert "- Error: Unsupported format, or corrupt file: Expected BOF record; found b'PRODUCTI'" in stdout
    assert "Try again? [Y/n]:" in stdout
    assert "[{}]:".format(data_path) in stdout
    assert "We have saved your setup progress. When you are ready, run great_expectations init to continue." in stdout

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path, "r"))
    data_source_class = config["datasources"]["files_datasource"]["data_asset_type"][
        "class_name"
    ]
    assert data_source_class == "PandasDataset"

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_edit_with_invalid_json_batch_kwargs_raises_helpful_error(
        mock_webbrowser, mock_subprocess, caplog, monkeypatch,
        empty_data_context):
    """
    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    monkeypatch.chdir(os.path.dirname(context.root_directory))
    result = runner.invoke(
        cli,
        [
            "--v3-api",
            "suite",
            "edit",
            "foo",
            "--batch-kwargs",
            "'{foobar}'",
        ],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "Please check that your batch_kwargs are valid JSON." in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
    )
Example #30
0
def test_project_upgrade_already_up_to_date(v10_project_directory, caplog):
    # test great_expectations project upgrade command with project with config_version 2

    # copy v2 yml
    shutil.copy(
        file_relative_path(
            __file__,
            "../../test_fixtures/upgrade_helper/great_expectations_v2.yml"),
        os.path.join(v10_project_directory, "great_expectations.yml"),
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["project", "upgrade", "-d", v10_project_directory],
        input="\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert "Checking project..." in stdout
    assert "Your project is up-to-date - no upgrade is necessary." in stdout
    assert_no_logging_messages_or_tracebacks(caplog, result)