Beispiel #1
0
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema):
    mocked_md = mocker.patch("arche.report.display_markdown", autospec=True)
    url = f"{SH_URL}/112358/13/21/item/1"
    res = create_result(
        "JSON Schema Validation",
        {
            Level.ERROR: [(
                "4 items were checked, 1 error(s)",
                None,
                {
                    "'price' is a required property": {url}
                },
            )]
        },
    )
    schema = {
        "type": "object",
        "required": ["price"],
        "properties": {
            "price": {}
        }
    }
    a = Arche("source", schema=schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
    mocked_md.assert_any_call(
        f"1 items affected - 'price' is a required property: [1]({url})",
        raw=True)
Beispiel #2
0
def test_schema_setter(passed_schema_source, set_schema_source,
                       expected_schema):
    arche = Arche("source", schema=passed_schema_source)
    assert arche.schema_source == passed_schema_source
    arche.schema = set_schema_source
    assert arche.schema_source == set_schema_source
    assert arche.schema.raw == expected_schema
Beispiel #3
0
def test_schema():
    arche = Arche("source", schema=schema_dummies[0])
    assert arche.schema_source == schema_dummies[0]
    assert arche.schema.raw == schema_dummies[0]
    arche = Arche("source")
    assert not arche.schema_source
    assert not arche.schema
Beispiel #4
0
def test_run_all_rules_job(mocker, source_key, target_key):
    mocked_check_metadata = mocker.patch("arche.Arche.check_metadata", autospec=True)
    mocked_compare_metadata = mocker.patch(
        "arche.Arche.compare_metadata", autospec=True
    )

    mocked_run_general_rules = mocker.patch(
        "arche.Arche.run_general_rules", autospec=True
    )
    mocked_run_comparison_rules = mocker.patch(
        "arche.Arche.run_comparison_rules", autospec=True
    )
    mocked_run_schema_rules = mocker.patch(
        "arche.Arche.run_schema_rules", autospec=True
    )
    arche = Arche(source=source_key, target=target_key)
    arche._source_items = get_job_items_mock(mocker, key=source_key)
    arche._target_items = get_job_items_mock(mocker, key=target_key)
    arche.run_all_rules()

    mocked_check_metadata.assert_called_once_with(arche.source_items.job)
    mocked_compare_metadata.assert_called_once_with(
        arche.source_items.job, arche.target_items.job
    )
    mocked_run_general_rules.assert_called_once_with()
    mocked_run_comparison_rules.assert_called_once_with()
    mocked_run_schema_rules.assert_called_once_with(arche)
Beispiel #5
0
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema):
    mocked_html = mocker.patch("arche.report.HTML", autospec=True)
    key = f"112358/13/21"
    url_base = f"{SH_URL}/{key}/item"
    res = create_result(
        "JSON Schema Validation",
        {
            Level.ERROR: [(
                "4 items were checked, 1 error(s)",
                None,
                {
                    "'price' is a required property": {f"{key}/1"}
                },
            )]
        },
    )
    schema = {"type": "object", "required": ["price"]}
    a = Arche("source", schema=schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
    mocked_html.assert_any_call(
        f"1 items affected - 'price' is a required property: <a href='{url_base}/1'>1</a>"
    )
Beispiel #6
0
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema):
    mocked_display = mocker.patch("arche.report.display_html", autospec=True)
    url = f"{SH_URL}/112358/13/21/item/1"
    res = create_result(
        "JSON Schema Validation",
        {
            Level.ERROR: [(
                "1 (25%) items have 1 errors",
                None,
                {
                    "'price' is a required property": {url}
                },
            )]
        },
    )
    res.outcome = Outcome.FAILED
    schema = {
        "type": "object",
        "required": ["price"],
        "properties": {
            "price": {}
        }
    }
    a = Arche("source", schema=schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
    report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0])
    assert "JSON Schema Validation - FAILED" in report_html
Beispiel #7
0
def test_schema(passed_schema_source, set_schema_source, expected_schema):
    arche = Arche("source", schema=passed_schema_source)
    assert arche._schema == passed_schema_source
    assert arche.schema_source == passed_schema_source
    if set_schema_source:
        arche.schema = set_schema_source
        assert arche.schema_source == set_schema_source
    assert arche.schema == expected_schema
Beispiel #8
0
def test_get_items_from_bad_source():
    with pytest.raises(ValueError) as excinfo:
        Arche.get_items(source="bad_key",
                        count=1,
                        start=1,
                        filters=None,
                        expand=None)
    assert str(
        excinfo.value) == f"'bad_key' is not a valid job or collection key"
Beispiel #9
0
def test_get_items_start():
    with pytest.raises(ValueError) as excinfo:
        Arche.get_items(
            source="112358/collections/s/pages",
            count=1,
            start=1,
            filters=None,
            expand=None,
        )
    assert str(excinfo.value) == "Collections API does not support 'start' parameter"
Beispiel #10
0
def test_validate_with_json_schema(mocker, get_job_items, get_schema):
    res = create_result("JSON Schema Validation", {})
    mocked_call = mocker.patch("arche.report.Report.__call__", autospec=True)

    a = Arche("source", schema=get_schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    mocked_call.assert_called_once_with(a.report, res)
    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
Beispiel #11
0
def test_data_quality_report(mocker, get_job_items, get_schema):
    mocked_dqr = mocker.patch.object(arche,
                                     "DataQualityReport",
                                     autospec=True,
                                     return_value=None)

    g = Arche("source", schema=get_schema)
    g._source_items = get_job_items
    g.report.results = "some_res"
    g.data_quality_report("s3")
    mocked_dqr.assert_called_with(g.source_items, g.schema, g.report, "s3")
Beispiel #12
0
def test_validate_with_json_schema(mocker, get_job_items, get_schema):
    res = create_result("JSON Schema Validation",
                        {Level.INFO: [("4 items were checked, 0 error(s)", )]})
    mocked_show = mocker.patch("arche.rules.result.Result.show", autospec=True)

    a = Arche("source", schema=get_schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    mocked_show.assert_called_once_with(res)
    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
Beispiel #13
0
def test_run_all_rules_job(mocker, get_cloud_items):
    a = Arche(source=pd.DataFrame(get_cloud_items),
              target=pd.DataFrame(get_cloud_items[:2]))
    a.run_all_rules()
    executed = {
        "Garbage Symbols",
        "Fields Coverage",
        "Scraped Fields",
        "Boolean Fields",
        "Categories",
    }

    assert executed == a.report.results.keys()
Beispiel #14
0
def test_data_quality_report(mocker):
    mocked_validate = mocker.patch(
        "arche.rules.json_schema.validate", autospec=True, return_value=None
    )
    mocked_dqr = mocker.patch.object(
        arche, "DataQualityReport", autospec=True, return_value=None
    )

    g = Arche("source", schema={"$schema": "http://json-schema.org/draft-07/schema"})
    g._source_items = get_job_items_mock(mocker)
    g.report.results = "some_res"
    g.data_quality_report("s3")
    mocked_validate.assert_not_called()
    mocked_dqr.assert_called_with(g.source_items, g.schema, g.report, "s3")
Beispiel #15
0
def test_target_equals_source():
    with pytest.raises(ValueError) as excinfo:
        Arche(source="0/0/1", target="0/0/1")
    assert (
        str(excinfo.value) ==
        "'target' is equal to 'source'. Data to compare should have different sources."
    )
Beispiel #16
0
def test_get_items_from_iterable(get_cloud_items):
    items = Arche.get_items(get_cloud_items,
                            start=None,
                            count=None,
                            filters=None,
                            expand=True)
    assert items.raw == get_cloud_items
Beispiel #17
0
def test_compare_with_customized_rules(mocker, get_job_items):
    mocked_save_result = mocker.patch("arche.Arche.save_result", autospec=True)
    mocked_coverage = mocker.patch(
        "arche.rules.category_coverage.compare_coverage_per_category", autospec=True
    )
    mocked_price_url = mocker.patch(
        "arche.rules.price.compare_prices_for_same_urls", autospec=True
    )
    mocked_name_url = mocker.patch(
        "arche.rules.price.compare_names_for_same_urls", autospec=True
    )
    mocked_price_name = mocker.patch(
        "arche.rules.price.compare_prices_for_same_names", autospec=True
    )

    source_items = get_job_items_mock(mocker)
    target_items = get_job_items_mock(mocker)
    arche = Arche("source")
    arche.compare_with_customized_rules(source_items, target_items, {})

    mocked_coverage.assert_called_once_with(
        source_items.key, target_items.key, source_items.df, target_items.df, {}
    )
    mocked_price_url.assert_called_once_with(source_items.df, target_items.df, {})
    mocked_name_url.assert_called_once_with(source_items.df, target_items.df, {})
    mocked_price_name.assert_called_once_with(source_items.df, target_items.df, {})

    assert mocked_save_result.call_count == 4
Beispiel #18
0
def test_report_all(mocker, get_cloud_items):
    mocked_call = mocker.patch("arche.report.Report.__call__", autospec=True)

    source = pd.DataFrame(get_cloud_items)
    source["b"] = True
    a = Arche(source=source, target=pd.DataFrame(get_cloud_items[:2]))
    a.report_all()
    executed = {
        "Garbage Symbols",
        "Fields Coverage",
        "Scraped Fields",
        "Boolean Fields",
        "Categories",
    }
    assert executed == a.report.results.keys()
    mocked_call.assert_called_once_with(a.report, keys_limit=None)
Beispiel #19
0
def test_compare_with_customized_rules_none_target(mocker, get_job_items):
    mocked_coverage = mocker.patch("arche.rules.category.get_difference",
                                   autospec=True)
    arche = Arche("key")
    assert not arche.compare_with_customized_rules(
        source_items=get_job_items, target_items=None, tagged_fields={})
    mocked_coverage.assert_not_called()
Beispiel #20
0
def test_arche_dataframe(mocker):
    a = Arche(
        source=pd.DataFrame({"c": [0, 1]}),
        schema={"properties": {
            "c": {
                "type": "integer"
            }
        }},
        target=pd.DataFrame({"c": [1, 1]}),
    )
    mocker.patch("arche.report.Report.write_details", autospec=True)
    a.report_all()
    executed = [
        "Garbage Symbols",
        "Fields Coverage",
        "Scraped Fields",
        "Boolean Fields",
        "JSON Schema Validation",
        "Tags",
        "Compare Price Was And Now",
        "Duplicates By **unique** Tag",
        "Duplicates By **name_field, product_url_field** Tags",
        "Coverage For Scraped Categories",
        "Category Coverage Difference",
        "Compare Prices For Same Urls",
        "Compare Names Per Url",
        "Compare Prices For Same Names",
    ]
    for e in executed:
        assert a.report.results.get(e)
    assert a.report.results.get("JSON Schema Validation").errors is None
    assert (a.report.results.get("JSON Schema Validation").info[0].summary ==
            "2 items were checked, 0 error(s)")
    assert (Arche(
        pd.DataFrame({
            "_key": ["0", "1"],
            "c": [0, 1]
        }),
        schema={
            "properties": {
                "c": {
                    "type": "string"
                }
            }
        },
    ).report_all() is None)
Beispiel #21
0
def test_report_all(mocker, get_cloud_items):
    mocked_write_summaries = mocker.patch(
        "arche.report.Report.write_summaries", autospec=True)
    mocked_write = mocker.patch("arche.report.Report.write", autospec=True)

    source = pd.DataFrame(get_cloud_items)
    source["b"] = True
    a = Arche(source=source, target=pd.DataFrame(get_cloud_items[:2]))
    a.report_all()
    executed = {
        "Garbage Symbols",
        "Fields Coverage",
        "Scraped Fields",
        "Boolean Fields",
        "Categories",
    }
    mocked_write_summaries.assert_called_once_with(a.report)
    mocked_write.assert_called_once_with("\n" * 2)
    assert executed == a.report.results.keys()
Beispiel #22
0
def test_validate_with_json_schema(mocker):
    mocked_save_result = mocker.patch("arche.Arche.save_result", autospec=True)
    res = Result("fine")
    mocked_validate = mocker.patch(
        "arche.rules.json_schema.validate", autospec=True, return_value=res
    )
    mocked_show = mocker.patch("arche.rules.result.Result.show", autospec=True)

    arche = Arche(
        "source", schema={"$schema": "http://json-schema.org/draft-07/schema"}
    )
    arche._source_items = get_job_items_mock(mocker)
    arche.validate_with_json_schema()

    mocked_validate.assert_called_once_with(
        arche.schema, arche.source_items.dicts, False
    )
    mocked_save_result.assert_called_once_with(arche, res)
    mocked_show.assert_called_once_with(res)
Beispiel #23
0
def test_get_items(mocker, get_items, source, start, count, filters, expand):
    mocker.patch(
        "arche.readers.items.JobItems.fetch_data", return_value=get_items, autospec=True
    )
    items = Arche.get_items(
        source=source, start=start, count=count, filters=filters, expand=expand
    )
    assert items.key == source
    assert items.count == count
    assert items.filters == filters
    assert items.expand == expand
    assert items.start_index == start
Beispiel #24
0
def test_run_all_rules_collection(mocker, get_collection_items):
    mocked_check_metadata = mocker.patch("arche.Arche.check_metadata",
                                         autospec=True)
    mocked_compare_metadata = mocker.patch("arche.Arche.compare_metadata",
                                           autospec=True)

    mocked_run_general_rules = mocker.patch("arche.Arche.run_general_rules",
                                            autospec=True)
    mocked_run_comparison_rules = mocker.patch(
        "arche.Arche.run_comparison_rules", autospec=True)
    mocked_run_schema_rules = mocker.patch("arche.Arche.run_schema_rules",
                                           autospec=True)
    arche = Arche(source="collection_key")
    arche._source_items = get_collection_items
    arche.run_all_rules()

    mocked_check_metadata.assert_not_called()
    mocked_compare_metadata.assert_not_called()
    mocked_run_general_rules.assert_called_once_with(arche)
    mocked_run_comparison_rules.assert_called_once_with()
    mocked_run_schema_rules.assert_called_once_with(arche)
Beispiel #25
0
def test_get_items_from_collection(mocker, get_items, source, count, filters, expand):
    mocker.patch(
        "arche.readers.items.CollectionItems.fetch_data",
        return_value=get_items,
        autospec=True,
    )
    items = Arche.get_items(
        source=source, count=count, start=0, filters=filters, expand=expand
    )
    assert items.key == source
    assert items.count == 5
    assert items.filters == filters
    assert items.expand == expand
Beispiel #26
0
def test_report_all(mocker):
    mocked_run_all = mocker.patch("arche.Arche.run_all_rules", autospec=True)
    mocked_write_summary = mocker.patch(
        "arche.report.Report.write_summary", autospec=True
    )
    mocked_write = mocker.patch("arche.report.Report.write", autospec=True)
    mocked_write_details = mocker.patch(
        "arche.report.Report.write_details", autospec=True
    )

    arche = Arche("source")
    arche.report_all()

    mocked_run_all.assert_called_once_with(arche)
    mocked_write_summary.assert_called_once_with(arche.report)
    mocked_write.assert_called_once_with(arche.report, "\n" * 2)
    mocked_write_details.assert_called_once_with(arche.report, short=True)
Beispiel #27
0
def test_report_all(mocker):
    mocked_run_all = mocker.patch("arche.Arche.run_all_rules", autospec=True)
    mocked_write_summary = mocker.patch(
        "arche.report.Report.write_summaries", autospec=True
    )
    # autospec and classmethod bug https://github.com/python/cpython/pull/11613
    mocked_write = mocker.patch("arche.report.Report.write", autospec=False)
    mocked_write_details = mocker.patch(
        "arche.report.Report.write_details", autospec=True
    )

    arche = Arche("source")
    arche.report_all()

    mocked_run_all.assert_called_once_with(arche)
    mocked_write_summary.assert_called_once_with(arche.report)
    mocked_write.assert_called_once_with("\n" * 2)
    mocked_write_details.assert_called_once_with(arche.report, short=True)
Beispiel #28
0
def test_get_items(mocker, get_raw_items, source, start, count, filters,
                   expected_start):
    mocker.patch(
        "arche.readers.items.JobItems.fetch_data",
        return_value=get_raw_items,
        autospec=True,
    )
    mocker.patch(
        "arche.readers.items.api.get_items_count",
        return_value=len(get_raw_items),
        autospec=True,
    )
    mocker.patch("arche.readers.items.JobItems.job", autospec=True)
    items = Arche.get_items(source=source,
                            start=start,
                            count=count,
                            filters=filters)
    assert items.key == source
    assert items.count == count or len(get_raw_items)
    assert items.filters == filters
    assert items.start_index == expected_start
    assert items.start == f"{source}/{expected_start}"
Beispiel #29
0
def test_data_quality_report_fails(source, expected_message):
    with pytest.raises(ValueError) as excinfo:
        Arche(source).data_quality_report()
    assert str(excinfo.value) == expected_message
Beispiel #30
0
def test_arche_df(get_df):
    a = Arche(source=get_df, target=get_df)
    pd.testing.assert_frame_equal(a.source_items.df, get_df)
    pd.testing.assert_frame_equal(a.target_items.df, get_df)