def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema): mocked_md = mocker.patch("arche.report.display_markdown", autospec=True) url = f"{SH_URL}/112358/13/21/item/1" res = create_result( "JSON Schema Validation", { Level.ERROR: [( "4 items were checked, 1 error(s)", None, { "'price' is a required property": {url} }, )] }, ) schema = { "type": "object", "required": ["price"], "properties": { "price": {} } } a = Arche("source", schema=schema) a._source_items = get_job_items a.validate_with_json_schema() assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res mocked_md.assert_any_call( f"1 items affected - 'price' is a required property: [1]({url})", raw=True)
def test_schema_setter(passed_schema_source, set_schema_source, expected_schema): arche = Arche("source", schema=passed_schema_source) assert arche.schema_source == passed_schema_source arche.schema = set_schema_source assert arche.schema_source == set_schema_source assert arche.schema.raw == expected_schema
def test_schema(): arche = Arche("source", schema=schema_dummies[0]) assert arche.schema_source == schema_dummies[0] assert arche.schema.raw == schema_dummies[0] arche = Arche("source") assert not arche.schema_source assert not arche.schema
def test_run_all_rules_job(mocker, source_key, target_key): mocked_check_metadata = mocker.patch("arche.Arche.check_metadata", autospec=True) mocked_compare_metadata = mocker.patch( "arche.Arche.compare_metadata", autospec=True ) mocked_run_general_rules = mocker.patch( "arche.Arche.run_general_rules", autospec=True ) mocked_run_comparison_rules = mocker.patch( "arche.Arche.run_comparison_rules", autospec=True ) mocked_run_schema_rules = mocker.patch( "arche.Arche.run_schema_rules", autospec=True ) arche = Arche(source=source_key, target=target_key) arche._source_items = get_job_items_mock(mocker, key=source_key) arche._target_items = get_job_items_mock(mocker, key=target_key) arche.run_all_rules() mocked_check_metadata.assert_called_once_with(arche.source_items.job) mocked_compare_metadata.assert_called_once_with( arche.source_items.job, arche.target_items.job ) mocked_run_general_rules.assert_called_once_with() mocked_run_comparison_rules.assert_called_once_with() mocked_run_schema_rules.assert_called_once_with(arche)
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema): mocked_html = mocker.patch("arche.report.HTML", autospec=True) key = f"112358/13/21" url_base = f"{SH_URL}/{key}/item" res = create_result( "JSON Schema Validation", { Level.ERROR: [( "4 items were checked, 1 error(s)", None, { "'price' is a required property": {f"{key}/1"} }, )] }, ) schema = {"type": "object", "required": ["price"]} a = Arche("source", schema=schema) a._source_items = get_job_items a.validate_with_json_schema() assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res mocked_html.assert_any_call( f"1 items affected - 'price' is a required property: <a href='{url_base}/1'>1</a>" )
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema): mocked_display = mocker.patch("arche.report.display_html", autospec=True) url = f"{SH_URL}/112358/13/21/item/1" res = create_result( "JSON Schema Validation", { Level.ERROR: [( "1 (25%) items have 1 errors", None, { "'price' is a required property": {url} }, )] }, ) res.outcome = Outcome.FAILED schema = { "type": "object", "required": ["price"], "properties": { "price": {} } } a = Arche("source", schema=schema) a._source_items = get_job_items a.validate_with_json_schema() assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0]) assert "JSON Schema Validation - FAILED" in report_html
def test_schema(passed_schema_source, set_schema_source, expected_schema): arche = Arche("source", schema=passed_schema_source) assert arche._schema == passed_schema_source assert arche.schema_source == passed_schema_source if set_schema_source: arche.schema = set_schema_source assert arche.schema_source == set_schema_source assert arche.schema == expected_schema
def test_get_items_from_bad_source(): with pytest.raises(ValueError) as excinfo: Arche.get_items(source="bad_key", count=1, start=1, filters=None, expand=None) assert str( excinfo.value) == f"'bad_key' is not a valid job or collection key"
def test_get_items_start(): with pytest.raises(ValueError) as excinfo: Arche.get_items( source="112358/collections/s/pages", count=1, start=1, filters=None, expand=None, ) assert str(excinfo.value) == "Collections API does not support 'start' parameter"
def test_validate_with_json_schema(mocker, get_job_items, get_schema): res = create_result("JSON Schema Validation", {}) mocked_call = mocker.patch("arche.report.Report.__call__", autospec=True) a = Arche("source", schema=get_schema) a._source_items = get_job_items a.validate_with_json_schema() mocked_call.assert_called_once_with(a.report, res) assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res
def test_data_quality_report(mocker, get_job_items, get_schema): mocked_dqr = mocker.patch.object(arche, "DataQualityReport", autospec=True, return_value=None) g = Arche("source", schema=get_schema) g._source_items = get_job_items g.report.results = "some_res" g.data_quality_report("s3") mocked_dqr.assert_called_with(g.source_items, g.schema, g.report, "s3")
def test_validate_with_json_schema(mocker, get_job_items, get_schema): res = create_result("JSON Schema Validation", {Level.INFO: [("4 items were checked, 0 error(s)", )]}) mocked_show = mocker.patch("arche.rules.result.Result.show", autospec=True) a = Arche("source", schema=get_schema) a._source_items = get_job_items a.validate_with_json_schema() mocked_show.assert_called_once_with(res) assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res
def test_run_all_rules_job(mocker, get_cloud_items): a = Arche(source=pd.DataFrame(get_cloud_items), target=pd.DataFrame(get_cloud_items[:2])) a.run_all_rules() executed = { "Garbage Symbols", "Fields Coverage", "Scraped Fields", "Boolean Fields", "Categories", } assert executed == a.report.results.keys()
def test_data_quality_report(mocker): mocked_validate = mocker.patch( "arche.rules.json_schema.validate", autospec=True, return_value=None ) mocked_dqr = mocker.patch.object( arche, "DataQualityReport", autospec=True, return_value=None ) g = Arche("source", schema={"$schema": "http://json-schema.org/draft-07/schema"}) g._source_items = get_job_items_mock(mocker) g.report.results = "some_res" g.data_quality_report("s3") mocked_validate.assert_not_called() mocked_dqr.assert_called_with(g.source_items, g.schema, g.report, "s3")
def test_target_equals_source(): with pytest.raises(ValueError) as excinfo: Arche(source="0/0/1", target="0/0/1") assert ( str(excinfo.value) == "'target' is equal to 'source'. Data to compare should have different sources." )
def test_get_items_from_iterable(get_cloud_items): items = Arche.get_items(get_cloud_items, start=None, count=None, filters=None, expand=True) assert items.raw == get_cloud_items
def test_compare_with_customized_rules(mocker, get_job_items): mocked_save_result = mocker.patch("arche.Arche.save_result", autospec=True) mocked_coverage = mocker.patch( "arche.rules.category_coverage.compare_coverage_per_category", autospec=True ) mocked_price_url = mocker.patch( "arche.rules.price.compare_prices_for_same_urls", autospec=True ) mocked_name_url = mocker.patch( "arche.rules.price.compare_names_for_same_urls", autospec=True ) mocked_price_name = mocker.patch( "arche.rules.price.compare_prices_for_same_names", autospec=True ) source_items = get_job_items_mock(mocker) target_items = get_job_items_mock(mocker) arche = Arche("source") arche.compare_with_customized_rules(source_items, target_items, {}) mocked_coverage.assert_called_once_with( source_items.key, target_items.key, source_items.df, target_items.df, {} ) mocked_price_url.assert_called_once_with(source_items.df, target_items.df, {}) mocked_name_url.assert_called_once_with(source_items.df, target_items.df, {}) mocked_price_name.assert_called_once_with(source_items.df, target_items.df, {}) assert mocked_save_result.call_count == 4
def test_report_all(mocker, get_cloud_items): mocked_call = mocker.patch("arche.report.Report.__call__", autospec=True) source = pd.DataFrame(get_cloud_items) source["b"] = True a = Arche(source=source, target=pd.DataFrame(get_cloud_items[:2])) a.report_all() executed = { "Garbage Symbols", "Fields Coverage", "Scraped Fields", "Boolean Fields", "Categories", } assert executed == a.report.results.keys() mocked_call.assert_called_once_with(a.report, keys_limit=None)
def test_compare_with_customized_rules_none_target(mocker, get_job_items): mocked_coverage = mocker.patch("arche.rules.category.get_difference", autospec=True) arche = Arche("key") assert not arche.compare_with_customized_rules( source_items=get_job_items, target_items=None, tagged_fields={}) mocked_coverage.assert_not_called()
def test_arche_dataframe(mocker): a = Arche( source=pd.DataFrame({"c": [0, 1]}), schema={"properties": { "c": { "type": "integer" } }}, target=pd.DataFrame({"c": [1, 1]}), ) mocker.patch("arche.report.Report.write_details", autospec=True) a.report_all() executed = [ "Garbage Symbols", "Fields Coverage", "Scraped Fields", "Boolean Fields", "JSON Schema Validation", "Tags", "Compare Price Was And Now", "Duplicates By **unique** Tag", "Duplicates By **name_field, product_url_field** Tags", "Coverage For Scraped Categories", "Category Coverage Difference", "Compare Prices For Same Urls", "Compare Names Per Url", "Compare Prices For Same Names", ] for e in executed: assert a.report.results.get(e) assert a.report.results.get("JSON Schema Validation").errors is None assert (a.report.results.get("JSON Schema Validation").info[0].summary == "2 items were checked, 0 error(s)") assert (Arche( pd.DataFrame({ "_key": ["0", "1"], "c": [0, 1] }), schema={ "properties": { "c": { "type": "string" } } }, ).report_all() is None)
def test_report_all(mocker, get_cloud_items): mocked_write_summaries = mocker.patch( "arche.report.Report.write_summaries", autospec=True) mocked_write = mocker.patch("arche.report.Report.write", autospec=True) source = pd.DataFrame(get_cloud_items) source["b"] = True a = Arche(source=source, target=pd.DataFrame(get_cloud_items[:2])) a.report_all() executed = { "Garbage Symbols", "Fields Coverage", "Scraped Fields", "Boolean Fields", "Categories", } mocked_write_summaries.assert_called_once_with(a.report) mocked_write.assert_called_once_with("\n" * 2) assert executed == a.report.results.keys()
def test_validate_with_json_schema(mocker): mocked_save_result = mocker.patch("arche.Arche.save_result", autospec=True) res = Result("fine") mocked_validate = mocker.patch( "arche.rules.json_schema.validate", autospec=True, return_value=res ) mocked_show = mocker.patch("arche.rules.result.Result.show", autospec=True) arche = Arche( "source", schema={"$schema": "http://json-schema.org/draft-07/schema"} ) arche._source_items = get_job_items_mock(mocker) arche.validate_with_json_schema() mocked_validate.assert_called_once_with( arche.schema, arche.source_items.dicts, False ) mocked_save_result.assert_called_once_with(arche, res) mocked_show.assert_called_once_with(res)
def test_get_items(mocker, get_items, source, start, count, filters, expand): mocker.patch( "arche.readers.items.JobItems.fetch_data", return_value=get_items, autospec=True ) items = Arche.get_items( source=source, start=start, count=count, filters=filters, expand=expand ) assert items.key == source assert items.count == count assert items.filters == filters assert items.expand == expand assert items.start_index == start
def test_run_all_rules_collection(mocker, get_collection_items): mocked_check_metadata = mocker.patch("arche.Arche.check_metadata", autospec=True) mocked_compare_metadata = mocker.patch("arche.Arche.compare_metadata", autospec=True) mocked_run_general_rules = mocker.patch("arche.Arche.run_general_rules", autospec=True) mocked_run_comparison_rules = mocker.patch( "arche.Arche.run_comparison_rules", autospec=True) mocked_run_schema_rules = mocker.patch("arche.Arche.run_schema_rules", autospec=True) arche = Arche(source="collection_key") arche._source_items = get_collection_items arche.run_all_rules() mocked_check_metadata.assert_not_called() mocked_compare_metadata.assert_not_called() mocked_run_general_rules.assert_called_once_with(arche) mocked_run_comparison_rules.assert_called_once_with() mocked_run_schema_rules.assert_called_once_with(arche)
def test_get_items_from_collection(mocker, get_items, source, count, filters, expand): mocker.patch( "arche.readers.items.CollectionItems.fetch_data", return_value=get_items, autospec=True, ) items = Arche.get_items( source=source, count=count, start=0, filters=filters, expand=expand ) assert items.key == source assert items.count == 5 assert items.filters == filters assert items.expand == expand
def test_report_all(mocker): mocked_run_all = mocker.patch("arche.Arche.run_all_rules", autospec=True) mocked_write_summary = mocker.patch( "arche.report.Report.write_summary", autospec=True ) mocked_write = mocker.patch("arche.report.Report.write", autospec=True) mocked_write_details = mocker.patch( "arche.report.Report.write_details", autospec=True ) arche = Arche("source") arche.report_all() mocked_run_all.assert_called_once_with(arche) mocked_write_summary.assert_called_once_with(arche.report) mocked_write.assert_called_once_with(arche.report, "\n" * 2) mocked_write_details.assert_called_once_with(arche.report, short=True)
def test_report_all(mocker): mocked_run_all = mocker.patch("arche.Arche.run_all_rules", autospec=True) mocked_write_summary = mocker.patch( "arche.report.Report.write_summaries", autospec=True ) # autospec and classmethod bug https://github.com/python/cpython/pull/11613 mocked_write = mocker.patch("arche.report.Report.write", autospec=False) mocked_write_details = mocker.patch( "arche.report.Report.write_details", autospec=True ) arche = Arche("source") arche.report_all() mocked_run_all.assert_called_once_with(arche) mocked_write_summary.assert_called_once_with(arche.report) mocked_write.assert_called_once_with("\n" * 2) mocked_write_details.assert_called_once_with(arche.report, short=True)
def test_get_items(mocker, get_raw_items, source, start, count, filters, expected_start): mocker.patch( "arche.readers.items.JobItems.fetch_data", return_value=get_raw_items, autospec=True, ) mocker.patch( "arche.readers.items.api.get_items_count", return_value=len(get_raw_items), autospec=True, ) mocker.patch("arche.readers.items.JobItems.job", autospec=True) items = Arche.get_items(source=source, start=start, count=count, filters=filters) assert items.key == source assert items.count == count or len(get_raw_items) assert items.filters == filters assert items.start_index == expected_start assert items.start == f"{source}/{expected_start}"
def test_data_quality_report_fails(source, expected_message): with pytest.raises(ValueError) as excinfo: Arche(source).data_quality_report() assert str(excinfo.value) == expected_message
def test_arche_df(get_df): a = Arche(source=get_df, target=get_df) pd.testing.assert_frame_equal(a.source_items.df, get_df) pd.testing.assert_frame_equal(a.target_items.df, get_df)