コード例 #1
0
def generate_basic_suites(
    kedro_context: KedroContext,
    ge_context: DataContext,
    empty=False,
    replace=False,
    batch_kwargs=None,
):
    from great_expectations.profile import BasicSuiteBuilderProfiler

    if batch_kwargs is None:
        batch_kwargs = {}
    catalog = kedro_context.catalog

    existing_datasource_names = {
        ds["name"]
        for ds in ge_context.list_datasources()
    }
    for dataset_name in catalog.list():
        suite_name = generate_basic_suite_name(dataset_name)
        if suite_name in ge_context.list_expectation_suite_names(
        ) and not replace:
            continue

        datasource_name = generate_datasource_name(dataset_name)
        if datasource_name not in existing_datasource_names:
            continue

        dataset = catalog._get_dataset(dataset_name)
        data_path = str(dataset._filepath)
        dataasset_name, _ = os.path.splitext(os.path.basename(data_path))

        suite_batch_kwargs = {
            "datasource": datasource_name,
            "data_asset_name": dataasset_name,
            "path": data_path,
            "reader_options": dataset._load_args,
        }

        batch_kwargs_generator_name = "path"
        profiler_configuration = "demo"
        additional_batch_kwargs = batch_kwargs

        run_id = datetime.datetime.now(
            datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")

        if empty:
            create_empty_suite(ge_context, suite_name, suite_batch_kwargs)
        else:
            ge_context.profile_data_asset(
                datasource_name,
                batch_kwargs_generator_name=batch_kwargs_generator_name,
                data_asset_name=dataasset_name,
                batch_kwargs=suite_batch_kwargs,
                profiler=BasicSuiteBuilderProfiler,
                profiler_configuration=profiler_configuration,
                expectation_suite_name=suite_name,
                run_id=run_id,
                additional_batch_kwargs=additional_batch_kwargs,
            )
コード例 #2
0
def test_suite_demo_enter_existing_suite_name_as_arg(
    mock_webbrowser,
    mock_subprocess,
    caplog,
    data_context_parameterized_expectation_suite,
):
    """
    We call the "suite demo" command with the name of an existing expectation
    suite in the --suite argument

    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """

    not_so_empty_data_context = data_context_parameterized_expectation_suite
    project_root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(project_root_dir, "uncommitted"))

    context = DataContext(project_root_dir)
    existing_suite_name = "my_dag_node.default"
    assert context.list_expectation_suite_names() == [existing_suite_name]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "suite",
            "demo",
            "-d",
            project_root_dir,
            "--suite",
            existing_suite_name,
            "--no-view",
        ],
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 1
    assert (
        f"An expectation suite named `{existing_suite_name}` already exists." in stdout
    )
    assert (
        f"If you intend to edit the suite please use `great_expectations suite edit {existing_suite_name}`"
        in stdout
    )

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #3
0
def suite_list(directory):
    """Lists available Expectation Suites."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    try:
        suite_names = [
            " - <cyan>{}</cyan>".format(suite_name)
            for suite_name in context.list_expectation_suite_names()
        ]
        if len(suite_names) == 0:
            cli_message("No Expectation Suites found")
            send_usage_message(
                data_context=context,
                event="cli.suite.list",
                success=True
            )
            return

        if len(suite_names) == 1:
            list_intro_string = "1 Expectation Suite found:"

        if len(suite_names) > 1:
            list_intro_string = "{} Expectation Suites found:".format(len(suite_names))

        cli_message_list(suite_names, list_intro_string)
        send_usage_message(
            data_context=context,
            event="cli.suite.list",
            success=True
        )
    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.suite.list",
            success=False
        )
        raise e
コード例 #4
0
ファイル: suite.py プロジェクト: cicdw/great_expectations
def suite_list(directory):
    """Lists available expectation suites."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    suite_names = context.list_expectation_suite_names()
    if len(suite_names) == 0:
        cli_message("No expectation suites found")
        return

    if len(suite_names) == 1:
        cli_message("1 expectation suite found:")

    if len(suite_names) > 1:
        cli_message("{} expectation suites found:".format(len(suite_names)))

    for name in suite_names:
        cli_message("\t{}".format(name))
コード例 #5
0
ファイル: suite.py プロジェクト: rlshuhart/great_expectations
def suite_list(directory):
    """Lists available expectation suites."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)
        return

    suite_names = context.list_expectation_suite_names()
    if len(suite_names) == 0:
        cli_message("No expectation suites available")
        return

    if len(suite_names) == 1:
        cli_message("1 expectation suite available:")

    if len(suite_names) > 1:
        cli_message("{} expectation suites available:".format(len(suite_names)))

    for name in suite_names:
        cli_message("\t{}".format(name))
コード例 #6
0
def test_suite_new_empty_with_no_jupyter(
    mock_webbroser,
    mock_subprocess,
    caplog,
    data_context_parameterized_expectation_suite,
    filesystem_csv_2,
):
    """
    Running "suite new --no-jupyter" should:
    - make an empty suite
    - NOT open jupyter
    - NOT open data docs
    """
    os.mkdir(
        os.path.join(
            data_context_parameterized_expectation_suite.root_directory, "uncommitted"
        )
    )
    root_dir = data_context_parameterized_expectation_suite.root_directory
    runner = CliRunner(mix_stderr=False)
    csv = os.path.join(filesystem_csv_2, "f1.csv")
    # TODO this test must be updated to remove the --empty flag in the next major release
    result = runner.invoke(
        cli,
        ["suite", "new", "-d", root_dir, "--empty", "--suite", "foo", "--no-jupyter"],
        input=f"{csv}\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 0
    assert "Enter the path" in stdout
    assert "Name the new expectation suite" not in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations"
        not in stdout
    )
    assert "Generating example Expectation Suite..." not in stdout
    assert "The following Data Docs sites were built" not in stdout
    assert (
        "Great Expectations will create a new Expectation Suite 'foo' and store it here"
        in stdout
    )
    assert "open a notebook for you now" not in stdout

    expected_suite_path = os.path.join(root_dir, "expectations", "foo.json")
    assert os.path.isfile(expected_suite_path)

    expected_notebook = os.path.join(root_dir, "uncommitted", "edit_foo.ipynb")
    assert os.path.isfile(expected_notebook)

    context = DataContext(root_dir)
    assert "foo" in context.list_expectation_suite_names()
    suite = context.get_expectation_suite("foo")
    assert suite.expectations == []
    citations = suite.get_citations()
    citations[0].pop("citation_date")
    assert citations[0] == {
        "batch_kwargs": {
            "data_asset_name": "f1",
            "datasource": "mydatasource",
            "path": csv,
            "reader_method": "read_csv",
        },
        "batch_markers": None,
        "batch_parameters": None,
        "comment": "New suite added via CLI",
    }

    assert mock_subprocess.call_count == 0
    assert mock_webbroser.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #7
0
def test_suite_demo_answer_suite_name_prompts_with_name_of_existing_suite(
    mock_webbrowser,
    mock_subprocess,
    caplog,
    data_context_parameterized_expectation_suite,
    filesystem_csv_2,
):
    """
    We call the "suite demo" command without the suite name argument

    The command should:

    - prompt us to enter the name of the expectation suite that will be
    created. We answer the prompt with the name of an existing expectation suite.
    - display an error message and let us retry until we answer
    with a name that is not "taken".
    - create an example suite
    - NOT open jupyter
    - open DataDocs to the new example suite page
    """
    not_so_empty_data_context = data_context_parameterized_expectation_suite
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    runner = CliRunner(mix_stderr=False)
    csv_path = os.path.join(filesystem_csv_2, "f1.csv")

    existing_suite_name = "my_dag_node.default"
    context = DataContext(root_dir)
    assert context.list_expectation_suite_names() == [existing_suite_name]

    result = runner.invoke(
        cli,
        ["suite", "demo", "-d", root_dir],
        input=f"{csv_path}\n{existing_suite_name}\nmy_new_suite\n\n",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 0
    assert (
        f"An expectation suite named `{existing_suite_name}` already exists." in stdout
    )
    assert (
        f"If you intend to edit the suite please use `great_expectations suite edit {existing_suite_name}`"
        in stdout
    )
    assert "Enter the path" in stdout
    assert "Name the new Expectation Suite [f1.warning]" in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations"
        in stdout
    )
    assert "Generating example Expectation Suite..." in stdout
    assert "Building" in stdout
    assert "The following Data Docs sites will be built" in stdout
    assert (
        "Great Expectations will store these expectations in a new Expectation Suite 'my_new_suite' here"
        in stdout
    )
    assert "open a notebook for you now" not in stdout

    expected_suite_path = os.path.join(root_dir, "expectations", "my_new_suite.json")
    assert os.path.isfile(expected_suite_path)

    assert mock_subprocess.call_count == 0
    assert mock_webbrowser.call_count == 1

    foo = os.path.join(
        root_dir, "uncommitted/data_docs/local_site/validations/my_new_suite/"
    )
    assert f"file://{foo}" in mock_webbrowser.call_args[0][0]

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #8
0
def test_suite_new_empty_suite_creates_empty_suite(mock_webbroser,
                                                   mock_subprocess, caplog,
                                                   data_context,
                                                   filesystem_csv_2):
    """
    Running "suite new --empty" should:
    - make an empty suite
    - open jupyter
    - NOT open data docs
    """
    project_root_dir = data_context.root_directory
    os.mkdir(os.path.join(project_root_dir, "uncommitted"))
    root_dir = project_root_dir
    os.chdir(root_dir)
    runner = CliRunner(mix_stderr=False)
    csv = os.path.join(filesystem_csv_2, "f1.csv")
    result = runner.invoke(
        cli,
        ["suite", "new", "-d", root_dir, "--empty", "--suite", "foo"],
        input=f"{csv}\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert result.exit_code == 0
    assert "Enter the path" in stdout
    assert "Name the new expectation suite" not in stdout
    assert (
        "Great Expectations will choose a couple of columns and generate expectations"
        not in stdout)
    assert "Generating example Expectation Suite..." not in stdout
    assert "The following Data Docs sites were built" not in stdout
    assert "A new Expectation suite 'foo' was added to your project" in stdout
    assert (
        "Because you requested an empty suite, we'll open a notebook for you now to edit it!"
        in stdout)

    expected_suite_path = os.path.join(root_dir, "expectations", "foo.json")
    assert os.path.isfile(expected_suite_path)

    expected_notebook = os.path.join(root_dir, "uncommitted", "foo.ipynb")
    assert os.path.isfile(expected_notebook)

    context = DataContext(root_dir)
    assert "foo" in context.list_expectation_suite_names()
    suite = context.get_expectation_suite("foo")
    assert suite.expectations == []
    citations = suite.get_citations()
    citations[0].pop("citation_date")
    assert citations[0] == {
        "batch_kwargs": {
            "datasource": "mydatasource",
            "path": csv,
            "reader_method": "read_csv",
        },
        "batch_markers": None,
        "batch_parameters": None,
        "comment": "New suite added via CLI",
    }

    assert mock_subprocess.call_count == 1
    call_args = mock_subprocess.call_args[0][0]
    assert call_args[0] == "jupyter"
    assert call_args[1] == "notebook"
    assert expected_notebook in call_args[2]

    assert mock_webbroser.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)