Exemple #1
0
    def _ge_validation_fn(context, dataset):
        data_context = context.resources.ge_data_context
        validator_kwargs = {
            "datasource_name": datasource_name,
            "data_connector_name": data_connector_name,
            "data_asset_name": datasource_name or data_asset_name,
            "runtime_parameters": {
                runtime_method_type: dataset
            },
            "batch_identifiers": batch_identifiers,
            "expectation_suite_name": suite_name,
            **extra_kwargs,
        }
        validator = data_context.get_validator(**validator_kwargs)

        run_id = {
            "run_name": datasource_name + " run",
            "run_time": datetime.datetime.utcnow(),
        }
        results = validator.validate(run_id=run_id)

        validation_results_page_renderer = ValidationResultsPageRenderer(
            run_info_at_end=True)
        rendered_document_content_list = validation_results_page_renderer.render(
            validation_results=results)
        md_str = "".join(
            DefaultMarkdownPageView().render(rendered_document_content_list))

        meta_stats = MetadataEntry("Expectation Results",
                                   value=MetadataValue.md(md_str))
        yield ExpectationResult(
            success=bool(results["success"]),
            metadata_entries=[meta_stats],
        )
        yield Output(results.to_json_dict())
def render_multiple_validation_result_pages_markdown(
    validation_operator_result: ValidationOperatorResult,
    run_info_at_end: bool = True,
):
    """
    Loop through and render multiple validation results to markdown.
    Args:
        validation_operator_result: (ValidationOperatorResult) Result of validation operator run
        run_info_at_end: move run info below expectation results

    Returns:
        string containing formatted markdown validation results

    """

    md_str = ""
    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=run_info_at_end)
    for validation_result in validation_operator_result.list_validation_results(
    ):
        rendered_document_content = validation_results_page_renderer.render(
            validation_result)
        md_str += DefaultMarkdownPageView().render(
            rendered_document_content) + " "

    return md_str
def test_smoke_render_validation_results_page_renderer(titanic_profiler_evrs):
    rendered = ValidationResultsPageRenderer().render(titanic_profiler_evrs)
    with open(
            './tests/render/output/test_render_validation_results_page_renderer.json',
            'w') as outfile:
        json.dump(rendered, outfile, indent=2)
    assert len(rendered["sections"]) > 5
Exemple #4
0
def test_smoke_render_validation_results_page_renderer(titanic_profiler_evrs):
    rendered = ValidationResultsPageRenderer().render(titanic_profiler_evrs)
    with open(
            file_relative_path(
                __file__,
                "./output/test_render_validation_results_page_renderer.json"),
            "w",
    ) as outfile:
        json.dump(rendered.to_json_dict(), outfile, indent=2)
    assert len(rendered.sections) > 5
Exemple #5
0
def test_render_validation_results(titanic_profiled_evrs_1):
    rendered_content = ValidationResultsPageRenderer().render(titanic_profiled_evrs_1)
    rendered_page = DefaultJinjaPageView().render(rendered_content)

    with open(file_relative_path(__file__, './output/test_render_validation_results.html'), 'wb') as f:
        f.write(rendered_page.encode("utf-8"))

    assert rendered_page[:15] == "<!DOCTYPE html>"
    assert rendered_page[-7:] == "</html>"
    assert "Table-Level Expectations" in rendered_page
    assert 'Must have more than <span class="badge badge-secondary" >0</span> rows.' in rendered_page
Exemple #6
0
def test_render_validation_results(titanic_profiled_evrs_1):
    rendered_json = ValidationResultsPageRenderer().render(titanic_profiled_evrs_1)
    rendered_page = DefaultJinjaPageView().render(rendered_json)

    with open('./tests/render/output/test_render_validation_results.html', 'wb') as f:
        f.write(rendered_page.encode("utf-8"))

    assert rendered_page[:15] == "<!DOCTYPE html>"
    assert rendered_page[-7:] == "</html>"
    assert "Table-Level Expectations" in rendered_page
    assert 'Must have more than <span class="badge badge-secondary" >0</span> rows.' in rendered_page
    assert 'Must have between <span class="badge badge-secondary" >0</span> and <span class="badge badge-secondary" >23</span> columns.' in rendered_page
def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_start(
    titanic_profiled_evrs_1,
    ValidationResultsPageRenderer_render_with_run_info_at_start,
):
    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=False)
    rendered_validation_results = validation_results_page_renderer.render(
        titanic_profiled_evrs_1).to_json_dict()
    print(rendered_validation_results)
    # with open(file_relative_path(__file__, "./fixtures/ValidationResultsPageRenderer_render_with_run_info_at_start_nc.json"), "w") as f:
    #     json.dump(rendered_validation_results, f, indent=2)

    assert (rendered_validation_results ==
            ValidationResultsPageRenderer_render_with_run_info_at_start)
def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_end(
    titanic_profiled_evrs_1,
    ValidationResultsPageRenderer_render_with_run_info_at_end,
):
    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=True)
    rendered_validation_results = validation_results_page_renderer.render(
        titanic_profiled_evrs_1).to_json_dict()

    # replace version of vega-lite in res to match snapshot test
    content_block = rendered_validation_results["sections"][5][
        "content_blocks"][1]["table"][10][2]["content_blocks"][1]
    content_block["graph"]["$schema"] = re.sub(
        r"v\d*\.\d*\.\d*", "v4.8.1", content_block["graph"]["$schema"])
    assert (rendered_validation_results ==
            ValidationResultsPageRenderer_render_with_run_info_at_end)
def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_end(
    titanic_profiled_evrs_1,
    ValidationResultsPageRenderer_render_with_run_info_at_end,
):
    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=True)
    rendered_validation_results = validation_results_page_renderer.render(
        titanic_profiled_evrs_1).to_json_dict()
    import pprint

    pprint.pprint(rendered_validation_results["sections"])
    # with open(file_relative_path(__file__, "./fixtures/ValidationResultsPageRenderer_render_with_run_info_at_end_nc.json"), "w") as f:
    #     json.dump(rendered_validation_results, f, indent=2)
    pprint.pprint(ValidationResultsPageRenderer_render_with_run_info_at_end)
    assert (rendered_validation_results ==
            ValidationResultsPageRenderer_render_with_run_info_at_end)
Exemple #10
0
    def _ge_validation_fn(context, dataset):
        data_context = context.resources.ge_data_context
        if validation_operator_name is not None:
            validation_operator = validation_operator_name
        else:
            data_context.add_validation_operator(
                "ephemeral_validation",
                {
                    "class_name": "ActionListValidationOperator",
                    "action_list": []
                },
            )
            validation_operator = "ephemeral_validation"
        suite = data_context.get_expectation_suite(suite_name)
        final_batch_kwargs = batch_kwargs or {"dataset": dataset}
        if "datasource" in batch_kwargs:
            context.log.warning(
                "`datasource` field of `batch_kwargs` will be ignored; use the `datasource_name` "
                f"parameter of the {decorator_name} factory instead.")
        final_batch_kwargs["datasource"] = datasource_name
        batch = data_context.get_batch(final_batch_kwargs, suite)
        run_id = {
            "run_name": datasource_name + " run",
            "run_time": datetime.datetime.utcnow(),
        }
        results = data_context.run_validation_operator(
            validation_operator, assets_to_validate=[batch], run_id=run_id)
        res = convert_to_json_serializable(
            results.list_validation_results())[0]
        validation_results_page_renderer = ValidationResultsPageRenderer(
            run_info_at_end=True)
        rendered_document_content_list = (
            validation_results_page_renderer.render_validation_operator_result(
                results))
        md_str = " ".join(
            DefaultMarkdownPageView().render(rendered_document_content_list))

        meta_stats = MetadataEntry("Expectation Results",
                                   value=MetadataValue.md(md_str))
        yield ExpectationResult(
            success=res["success"],
            metadata_entries=[
                meta_stats,
            ],
        )
        yield Output(res)
def test_snapshot_ValidationResultsPageRenderer_render_with_run_info_at_start(
    titanic_profiled_evrs_1,
    ValidationResultsPageRenderer_render_with_run_info_at_start,
):
    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=False)
    rendered_validation_results = validation_results_page_renderer.render(
        titanic_profiled_evrs_1).to_json_dict()

    # replace version of vega-lite in res to match snapshot test
    content_block = rendered_validation_results["sections"][5][
        "content_blocks"][1]["table"][10][2]["content_blocks"][1]
    content_block["graph"]["$schema"] = re.sub(
        r"v\d*\.\d*\.\d*", "v4.8.1", content_block["graph"]["$schema"])

    # with open(file_relative_path(__file__, "./fixtures/ValidationResultsPageRenderer_render_with_run_info_at_start_nc.json"), "w") as f:
    #     json.dump(rendered_validation_results, f, indent=2)

    assert (rendered_validation_results ==
            ValidationResultsPageRenderer_render_with_run_info_at_start)
def render_multiple_validation_result_pages_markdown(
    validation_operator_result: ValidationOperatorResult,
    run_info_at_end: bool = True,
) -> str:
    """
    Loop through and render multiple validation results to markdown.
    Args:
        validation_operator_result: (ValidationOperatorResult) Result of validation operator run
        run_info_at_end: move run info below expectation results
    Returns:
        string containing formatted markdown validation results
    """

    # deprecated-v0.12.1
    warnings.warn(
        "This 'render_multiple_validation_result_pages_markdown' function is deprecated as of v0.12.1 and will be removed in v0.16."
        "Please use ValidationResultsPageRenderer.render_validation_operator_result() instead."
        "E.g. to replicate the functionality of rendering a ValidationOperatorResult to markdown:"
        "validation_results_page_renderer = ValidationResultsPageRenderer("
        "    run_info_at_end=run_info_at_end"
        ")"
        "rendered_document_content_list = validation_results_page_renderer.render_validation_operator_result("
        "   validation_operator_result=validation_operator_result"
        ")"
        'return " ".join(DefaultMarkdownPageView().render(rendered_document_content_list))',
        DeprecationWarning,
    )

    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=run_info_at_end
    )
    rendered_document_content_list = (
        validation_results_page_renderer.render_validation_operator_result(
            validation_operator_result=validation_operator_result
        )
    )

    return " ".join(DefaultMarkdownPageView().render(rendered_document_content_list))
Exemple #13
0
def validate_csv_using_greatexpectations(
        csv_path: InputPath(),
        expectation_suite_path: InputPath(),
        data_doc_path: OutputPath(),
):
    """Validate a CSV dataset against a Great Expectations suite and create
    Data Doc (a validation report). This component fails if validation is not
    successful.

    Annotations:
        authors: Yaroslav Beshta <*****@*****.**>, Anton Kiselev <*****@*****.**>

    Args:
        csv_path: Path to the CSV file with the dataset.
        expectation_suite_path: Path to Great Expectations expectation suite (in JSON format)
    """
    import json
    import os
    import sys

    import great_expectations as ge
    from great_expectations.render import DefaultJinjaPageView
    from great_expectations.render.renderer import ValidationResultsPageRenderer

    with open(expectation_suite_path, 'r') as json_file:
        expectation_suite = json.load(json_file)
    df = ge.read_csv(csv_path, expectation_suite=expectation_suite)
    result = df.validate()

    document_model = ValidationResultsPageRenderer().render(result)
    os.makedirs(os.path.dirname(data_doc_path), exist_ok=True)
    with open(data_doc_path, 'w') as writer:
        writer.write(DefaultJinjaPageView().render(document_model))

    print(f'Saved: {data_doc_path}')

    if not result.success:
        sys.exit(1)
def test_snapshot_render_section_page_with_fixture_data(
        validation_operator_result):
    """
    Make sure the appropriate markdown rendering is done for the applied fixture.
    Args:
        validation_operator_result: test fixture

    Returns: None

    """

    validation_operator_result = ValidationOperatorResult(
        **validation_operator_result)

    validation_results_page_renderer = ValidationResultsPageRenderer(
        run_info_at_end=True)

    rendered_document_content_list = validation_results_page_renderer.render_validation_operator_result(
        validation_operator_result=validation_operator_result)

    md_str_list = DefaultMarkdownPageView().render(
        rendered_document_content_list)

    md_str = " ".join(md_str_list)

    md_str = md_str.replace(" ", "").replace("\t", "").replace("\n", "")

    print(md_str)

    assert (md_str == """
# Validation Results




## Overview
### **Expectation Suite:** **basic.warning**
**Data asset:** **None**
**Status:**  **Failed**





### Statistics






 |  |  |
 | ------------  | ------------ |
Evaluated Expectations  | 11
Successful Expectations  | 9
Unsuccessful Expectations  | 2
Success Percent  | ≈81.82%





## Table-Level Expectations








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
❌  | Must have greater than or equal to **27000** and less than or equal to **33000** rows.  | 30
✅  | Must have exactly **3** columns.  | 3
✅  | Must have these columns in this order: **Team**, ** "Payroll (millions)"**, ** "Wins"**  | ['Team', ' "Payroll (millions)"', ' "Wins"']





##  "Payroll (millions)"








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | minimum value must be greater than or equal to **54.24** and less than or equal to **56.24**.  | 55.24
✅  | maximum value must be greater than or equal to **196.96** and less than or equal to **198.96**.  | 197.96
✅  | mean must be greater than or equal to **97.01899999999998** and less than or equal to **99.01899999999998**.  | ≈98.019
❌  | median must be greater than or equal to **84000.75** and less than or equal to **86000.75**.  | 85.75
✅  | quantiles must be within the following value ranges.




 | Quantile | Min Value | Max Value |
 | ------------  | ------------  | ------------ |
0.05  | 54.37  | 56.37
Q1  | 74.48  | 76.48
Median  | 82.31  | 84.31
Q3  | 116.62  | 118.62
0.95  | 173.54  | 175.54
  |




 | Quantile | Value |
 | ------------  | ------------ |
0.05  | 55.37
Q1  | 75.48
Median  | 83.31
Q3  | 117.62
0.95  | 174.54






## Team








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | values must always be greater than or equal to **1** characters long.  | 0% unexpected







### Info






 |  |  |
 | ------------  | ------------ |
Great Expectations Version  | 0.11.8+4.g4ab34df3.dirty
Run Name  | getest run
Run Time  | 2020-07-27T17:19:32.959193+00:00





### Batch Markers






 |  |  |
 | ------------  | ------------ |
**ge_load_time**  | **20200727T171932.954810Z**
**pandas_data_fingerprint**  | **8c46fdaf0bd356fd58b7bcd9b2e6012d**





### Batch Kwargs






 |  |  |
 | ------------  | ------------ |
**PandasInMemoryDF**  | **True**
**datasource**  | **getest**
**ge_batch_id**  | **56615f40-d02d-11ea-b6ea-acde48001122**




-----------------------------------------------------------
Powered by [Great Expectations](https://greatexpectations.io/)
# Validation Results




## Overview
### **Expectation Suite:** **basic.warning**
**Data asset:** **None**
**Status:**  **Failed**





### Statistics






 |  |  |
 | ------------  | ------------ |
Evaluated Expectations  | 11
Successful Expectations  | 9
Unsuccessful Expectations  | 2
Success Percent  | ≈81.82%





## Table-Level Expectations








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
❌  | Must have greater than or equal to **27000** and less than or equal to **33000** rows.  | 30
✅  | Must have exactly **3** columns.  | 3
✅  | Must have these columns in this order: **Team**, ** "Payroll (millions)"**, ** "Wins"**  | ['Team', ' "Payroll (millions)"', ' "Wins"']





##  "Payroll (millions)"








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | minimum value must be greater than or equal to **54.24** and less than or equal to **56.24**.  | 55.24
✅  | maximum value must be greater than or equal to **196.96** and less than or equal to **198.96**.  | 197.96
✅  | mean must be greater than or equal to **97.01899999999998** and less than or equal to **99.01899999999998**.  | ≈98.019
❌  | median must be greater than or equal to **84000.75** and less than or equal to **86000.75**.  | 85.75
✅  | quantiles must be within the following value ranges.




 | Quantile | Min Value | Max Value |
 | ------------  | ------------  | ------------ |
0.05  | 54.37  | 56.37
Q1  | 74.48  | 76.48
Median  | 82.31  | 84.31
Q3  | 116.62  | 118.62
0.95  | 173.54  | 175.54
  |




 | Quantile | Value |
 | ------------  | ------------ |
0.05  | 55.37
Q1  | 75.48
Median  | 83.31
Q3  | 117.62
0.95  | 174.54






## Team








 | Status | Expectation | Observed Value |
 | ------------  | ------------  | ------------ |
✅  | values must never be null.  | 100% not null
✅  | values must always be greater than or equal to **1** characters long.  | 0% unexpected







### Info






 |  |  |
 | ------------  | ------------ |
Great Expectations Version  | 0.11.8+4.g4ab34df3.dirty
Run Name  | getest run
Run Time  | 2020-07-27T17:19:32.959193+00:00





### Batch Markers






 |  |  |
 | ------------  | ------------ |
**ge_load_time**  | **20200727T171932.954810Z**
**pandas_data_fingerprint**  | **8c46fdaf0bd356fd58b7bcd9b2e6012d**





### Batch Kwargs






 |  |  |
 | ------------  | ------------ |
**PandasInMemoryDF**  | **True**
**datasource**  | **getest**
**ge_batch_id**  | **56615f40-d02d-11ea-b6ea-acde48001122**




-----------------------------------------------------------
Powered by [Great Expectations](https://greatexpectations.io/)
""".replace(" ", "").replace("\t", "").replace("\n", ""))