def test_get_difference(source_stats, target_stats, expected_messages, expected_stats): result = cov.get_difference(Job(stats=source_stats, key="s"), Job(stats=target_stats, key="t")) assert result == create_result("Coverage Difference", expected_messages, stats=expected_stats)
def test_check_errors(get_job, error_count, expected_messages): job = get_job job.metadata = {"scrapystats": error_count} job.key = "112358/13/21" result = check_errors(job) assert result == create_result("Job Errors", expected_messages)
def test_compare_prices_for_same_urls(source_data, target_data, tagged_fields, expected_messages): result = p.compare_prices_for_same_urls(pd.DataFrame(source_data), pd.DataFrame(target_data), tagged_fields) assert result == create_result("Compare Prices For Same Urls", expected_messages)
def test_show(mocker, capsys, message, stats, outputs): mock_pio_show = mocker.patch("plotly.io.show", autospec=True) mocked_md = mocker.patch("arche.report.display_markdown", autospec=True) res = create_result("test show", message, stats=stats) res.show() mock_pio_show.assert_called_once_with(res.figures[0]) mocked_md.assert_has_calls(mocker.call(o) for o in outputs)
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema): mocked_display = mocker.patch("arche.report.display_html", autospec=True) url = f"{SH_URL}/112358/13/21/item/1" res = create_result( "JSON Schema Validation", { Level.ERROR: [( "1 (25%) items have 1 errors", None, { "'price' is a required property": {url} }, )] }, ) res.outcome = Outcome.FAILED schema = { "type": "object", "required": ["price"], "properties": { "price": {} } } a = Arche("source", schema=schema) a._source_items = get_job_items a.validate_with_json_schema() assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0]) assert "JSON Schema Validation - FAILED" in report_html
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema): mocked_html = mocker.patch("arche.report.HTML", autospec=True) key = f"112358/13/21" url_base = f"{SH_URL}/{key}/item" res = create_result( "JSON Schema Validation", { Level.ERROR: [( "4 items were checked, 1 error(s)", None, { "'price' is a required property": {f"{key}/1"} }, )] }, ) schema = {"type": "object", "required": ["price"]} a = Arche("source", schema=schema) a._source_items = get_job_items a.validate_with_json_schema() assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res mocked_html.assert_any_call( f"1 items affected - 'price' is a required property: <a href='{url_base}/1'>1</a>" )
def test_compare_boolean_fields(source_data, target_data, expected_messages, expected_stats): source_df = pd.DataFrame(source_data) target_df = pd.DataFrame(target_data) rule_result = compare_boolean_fields(source_df, target_df) assert rule_result == create_result("Boolean Fields", expected_messages, expected_stats)
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema): mocked_md = mocker.patch("arche.report.display_markdown", autospec=True) url = f"{SH_URL}/112358/13/21/item/1" res = create_result( "JSON Schema Validation", { Level.ERROR: [( "4 items were checked, 1 error(s)", None, { "'price' is a required property": {url} }, )] }, ) schema = { "type": "object", "required": ["price"], "properties": { "price": {} } } a = Arche("source", schema=schema) a._source_items = get_job_items a.validate_with_json_schema() assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res mocked_md.assert_any_call( f"1 items affected - 'price' is a required property: [1]({url})", raw=True)
def test_find_by(data, columns, expected_messages, expected_err_items_count): df = pd.DataFrame(data) assert duplicates.find_by(df, columns) == create_result( "Duplicates", expected_messages, items_count=len(df), err_items_count=expected_err_items_count, )
def test_garbage_symbols(raw_items, expected_messages, expected_items_count, expected_err_items_count): assert garbage_symbols(pd.DataFrame(raw_items)) == create_result( "Garbage Symbols", expected_messages, items_count=expected_items_count, err_items_count=expected_err_items_count, )
def test_compare_response_ratio(source_stats, source_metadata, target_stats, target_metadata, expected_messages): source_job = Job(stats=source_stats, metadata=source_metadata) target_job = Job(stats=target_stats, metadata=target_metadata) result = compare_response_ratio(source_job, target_job) assert result == create_result("Compare Responses Per Item Ratio", expected_messages)
def test_garbage_symbols(get_job_items, expected_messages, expected_items_count, expected_err_items_count): assert garbage_symbols(get_job_items) == create_result( "Garbage Symbols", expected_messages, items_count=expected_items_count, err_items_count=expected_err_items_count, )
def test_find_by_name_url(data, tagged_fields, expected_messages): df = pd.DataFrame(data) result = duplicates.find_by_name_url(df, tagged_fields) assert result == create_result( "Duplicates By **name_field, product_url_field** Tags", expected_messages, items_count=len(df), )
def test_write_details(mocker, get_df, capsys, messages, expected_details): mock_pio_show = mocker.patch("plotly.io.show", autospec=True) r = Report() for m in messages: result = create_result(*m, stats=[get_df]) r.save(result) r.write_details() mock_pio_show.assert_called_with(result.figures[0]) assert capsys.readouterr().out == expected_details
def test_compare_finish_time(get_jobs, source_metadata, target_metadata, expected_messages): source_job, target_job = get_jobs source_job.metadata = source_metadata target_job.metadata = target_metadata result = compare_finish_time(source_job, target_job) assert result == create_result("Finish Time", expected_messages)
def test_report_call_arguments(mocker): message = {Level.INFO: [("summary", "very detailed message")]} mocked_display = mocker.patch("arche.report.display_html", autospec=True) outcome = create_result("rule name here", message) Report()(outcome) report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0]) assert report_html.count("very detailed message") == 1
def test_show(mocker, capsys, message, stats, exp_md_output, exp_txt_outputs): mock_pio_show = mocker.patch("plotly.io.show", autospec=True) mocked_md = mocker.patch("arche.report.display_markdown", autospec=True) mocked_print = mocker.patch("builtins.print", autospec=True) res = create_result("rule name here", message, stats=stats) res.show() mock_pio_show.assert_called_once_with(res.figures[0]) mocked_md.assert_called_with(exp_md_output) mocked_print.assert_has_calls(mocker.call(o) for o in exp_txt_outputs)
def test_find_by_unique(data, tagged_fields, expected_messages, expected_err_items_count): df = pd.DataFrame(data) assert duplicates.find_by_unique(df, tagged_fields) == create_result( "Duplicates By **unique** Tag", expected_messages, items_count=len(df), err_items_count=expected_err_items_count, )
def test_check_uniqueness(data, tagged_fields, expected_messages, expected_err_items_count): df = pd.DataFrame(data) assert check_uniqueness(df, tagged_fields) == create_result( "Uniqueness", expected_messages, items_count=len(df), err_items_count=expected_err_items_count, )
def test_compare_was_now(data, tagged_fields, expected_messages, expected_errors): df = pd.DataFrame(data) result = p.compare_was_now(df, tagged_fields) assert result == create_result( "Compare Price Was And Now", expected_messages, expected_errors, items_count=len(df), )
def test_anomalies( mocker, jobs_stats: Dict, expected_messages: Dict, stats: pd.DataFrame ): jobs = [ Job(key=key, stats={"counts": counts, "totals": {"input_values": input_values}}) for key, counts, input_values in jobs_stats ] mocker.patch("arche.rules.coverage.api.get_jobs", return_value=jobs) result = cov.anomalies(jobs_stats[-1][0], [key for key, *_ in jobs_stats[:-1]]) assert result == create_result("Anomalies", expected_messages, stats=stats)
def test_check_items(data, tagged_fields, expected_messages, expected_err_items_count): df = pd.DataFrame(data) result = check_items(df, tagged_fields) assert result == create_result( "Duplicated Items", expected_messages, items_count=len(df), err_items_count=expected_err_items_count, )
def test_validate_with_json_schema(mocker, get_job_items, get_schema): res = create_result("JSON Schema Validation", {}) mocked_call = mocker.patch("arche.report.Report.__call__", autospec=True) a = Arche("source", schema=get_schema) a._source_items = get_job_items a.validate_with_json_schema() mocked_call.assert_called_once_with(a.report, res) assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res
def test_write_details(mocker, get_df, capsys, messages, expected_details): mock_pio_show = mocker.patch("plotly.io.show", autospec=True) md_mock = mocker.patch("arche.report.display_markdown", autospec=True) r = Report() for m in messages: result = create_result(*m, stats=[get_df]) r.save(result) r.write_details() mock_pio_show.assert_called_with(result.figures[0]) calls = [mocker.call(e) for e in expected_details] md_mock.assert_has_calls(calls, any_order=True)
def test_validate_with_json_schema(mocker, get_job_items, get_schema): res = create_result("JSON Schema Validation", {Level.INFO: [("4 items were checked, 0 error(s)", )]}) mocked_show = mocker.patch("arche.rules.result.Result.show", autospec=True) a = Arche("source", schema=get_schema) a._source_items = get_job_items a.validate_with_json_schema() mocked_show.assert_called_once_with(res) assert len(a.report.results) == 1 assert a.report.results.get("JSON Schema Validation") == res
def test_report_call(mocker, get_df, capsys, messages, expected_details): mocked_display = mocker.patch("arche.report.display_html", autospec=True) r = Report() for m in messages: result = create_result(*m, stats=[get_df]) r.save(result) r() report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0]) assert report_html.count("Plotly.newPlot") == 2 assert report_html.count("rule name here - INFO") == 2 assert report_html.count("other result there - INFO") == 2
def test_get_difference(source, target, categories, expected_messages, expected_stats): assert c.get_difference( "s", "t", pd.DataFrame(source), pd.DataFrame(target), categories ) == create_result( "Category Coverage Difference", expected_messages, stats=expected_stats )
def test_get_coverage_per_category(data, cat_names, expected_messages, expected_stats): assert c.get_coverage_per_category(pd.DataFrame(data), cat_names) == create_result( "Coverage For Scraped Categories", expected_messages, expected_stats )
def test_check_outcome(get_job, metadata, expected_messages): job = get_job job.metadata = metadata result = check_outcome(job) assert result == create_result("Job Outcome", expected_messages)
def test_check_tags(source_columns, target_columns, tags, expected_messages): result = check_tags(np.array(source_columns), np.array(target_columns), tags) print(result) assert result == create_result("Tags", expected_messages)