Beispiel #1
0
def test_get_difference(source_stats, target_stats, expected_messages,
                        expected_stats):
    result = cov.get_difference(Job(stats=source_stats, key="s"),
                                Job(stats=target_stats, key="t"))
    assert result == create_result("Coverage Difference",
                                   expected_messages,
                                   stats=expected_stats)
Beispiel #2
0
def test_check_errors(get_job, error_count, expected_messages):
    job = get_job
    job.metadata = {"scrapystats": error_count}
    job.key = "112358/13/21"

    result = check_errors(job)
    assert result == create_result("Job Errors", expected_messages)
Beispiel #3
0
def test_compare_prices_for_same_urls(source_data, target_data, tagged_fields,
                                      expected_messages):
    result = p.compare_prices_for_same_urls(pd.DataFrame(source_data),
                                            pd.DataFrame(target_data),
                                            tagged_fields)
    assert result == create_result("Compare Prices For Same Urls",
                                   expected_messages)
Beispiel #4
0
def test_show(mocker, capsys, message, stats, outputs):
    mock_pio_show = mocker.patch("plotly.io.show", autospec=True)
    mocked_md = mocker.patch("arche.report.display_markdown", autospec=True)
    res = create_result("test show", message, stats=stats)
    res.show()
    mock_pio_show.assert_called_once_with(res.figures[0])
    mocked_md.assert_has_calls(mocker.call(o) for o in outputs)
Beispiel #5
0
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema):
    mocked_display = mocker.patch("arche.report.display_html", autospec=True)
    url = f"{SH_URL}/112358/13/21/item/1"
    res = create_result(
        "JSON Schema Validation",
        {
            Level.ERROR: [(
                "1 (25%) items have 1 errors",
                None,
                {
                    "'price' is a required property": {url}
                },
            )]
        },
    )
    res.outcome = Outcome.FAILED
    schema = {
        "type": "object",
        "required": ["price"],
        "properties": {
            "price": {}
        }
    }
    a = Arche("source", schema=schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
    report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0])
    assert "JSON Schema Validation - FAILED" in report_html
Beispiel #6
0
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema):
    mocked_html = mocker.patch("arche.report.HTML", autospec=True)
    key = f"112358/13/21"
    url_base = f"{SH_URL}/{key}/item"
    res = create_result(
        "JSON Schema Validation",
        {
            Level.ERROR: [(
                "4 items were checked, 1 error(s)",
                None,
                {
                    "'price' is a required property": {f"{key}/1"}
                },
            )]
        },
    )
    schema = {"type": "object", "required": ["price"]}
    a = Arche("source", schema=schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
    mocked_html.assert_any_call(
        f"1 items affected - 'price' is a required property: <a href='{url_base}/1'>1</a>"
    )
Beispiel #7
0
def test_compare_boolean_fields(source_data, target_data, expected_messages,
                                expected_stats):
    source_df = pd.DataFrame(source_data)
    target_df = pd.DataFrame(target_data)
    rule_result = compare_boolean_fields(source_df, target_df)
    assert rule_result == create_result("Boolean Fields", expected_messages,
                                        expected_stats)
Beispiel #8
0
def test_validate_with_json_schema_fails(mocker, get_job_items, get_schema):
    mocked_md = mocker.patch("arche.report.display_markdown", autospec=True)
    url = f"{SH_URL}/112358/13/21/item/1"
    res = create_result(
        "JSON Schema Validation",
        {
            Level.ERROR: [(
                "4 items were checked, 1 error(s)",
                None,
                {
                    "'price' is a required property": {url}
                },
            )]
        },
    )
    schema = {
        "type": "object",
        "required": ["price"],
        "properties": {
            "price": {}
        }
    }
    a = Arche("source", schema=schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
    mocked_md.assert_any_call(
        f"1 items affected - 'price' is a required property: [1]({url})",
        raw=True)
Beispiel #9
0
def test_find_by(data, columns, expected_messages, expected_err_items_count):
    df = pd.DataFrame(data)
    assert duplicates.find_by(df, columns) == create_result(
        "Duplicates",
        expected_messages,
        items_count=len(df),
        err_items_count=expected_err_items_count,
    )
Beispiel #10
0
def test_garbage_symbols(raw_items, expected_messages, expected_items_count,
                         expected_err_items_count):
    assert garbage_symbols(pd.DataFrame(raw_items)) == create_result(
        "Garbage Symbols",
        expected_messages,
        items_count=expected_items_count,
        err_items_count=expected_err_items_count,
    )
Beispiel #11
0
def test_compare_response_ratio(source_stats, source_metadata, target_stats,
                                target_metadata, expected_messages):
    source_job = Job(stats=source_stats, metadata=source_metadata)
    target_job = Job(stats=target_stats, metadata=target_metadata)

    result = compare_response_ratio(source_job, target_job)
    assert result == create_result("Compare Responses Per Item Ratio",
                                   expected_messages)
Beispiel #12
0
def test_garbage_symbols(get_job_items, expected_messages,
                         expected_items_count, expected_err_items_count):
    assert garbage_symbols(get_job_items) == create_result(
        "Garbage Symbols",
        expected_messages,
        items_count=expected_items_count,
        err_items_count=expected_err_items_count,
    )
Beispiel #13
0
def test_find_by_name_url(data, tagged_fields, expected_messages):
    df = pd.DataFrame(data)
    result = duplicates.find_by_name_url(df, tagged_fields)
    assert result == create_result(
        "Duplicates By **name_field, product_url_field** Tags",
        expected_messages,
        items_count=len(df),
    )
Beispiel #14
0
def test_write_details(mocker, get_df, capsys, messages, expected_details):
    mock_pio_show = mocker.patch("plotly.io.show", autospec=True)
    r = Report()
    for m in messages:
        result = create_result(*m, stats=[get_df])
        r.save(result)
    r.write_details()
    mock_pio_show.assert_called_with(result.figures[0])
    assert capsys.readouterr().out == expected_details
Beispiel #15
0
def test_compare_finish_time(get_jobs, source_metadata, target_metadata,
                             expected_messages):
    source_job, target_job = get_jobs

    source_job.metadata = source_metadata
    target_job.metadata = target_metadata

    result = compare_finish_time(source_job, target_job)
    assert result == create_result("Finish Time", expected_messages)
Beispiel #16
0
def test_report_call_arguments(mocker):
    message = {Level.INFO: [("summary", "very detailed message")]}

    mocked_display = mocker.patch("arche.report.display_html", autospec=True)
    outcome = create_result("rule name here", message)

    Report()(outcome)
    report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0])
    assert report_html.count("very detailed message") == 1
Beispiel #17
0
def test_show(mocker, capsys, message, stats, exp_md_output, exp_txt_outputs):
    mock_pio_show = mocker.patch("plotly.io.show", autospec=True)
    mocked_md = mocker.patch("arche.report.display_markdown", autospec=True)
    mocked_print = mocker.patch("builtins.print", autospec=True)
    res = create_result("rule name here", message, stats=stats)
    res.show()
    mock_pio_show.assert_called_once_with(res.figures[0])
    mocked_md.assert_called_with(exp_md_output)
    mocked_print.assert_has_calls(mocker.call(o) for o in exp_txt_outputs)
Beispiel #18
0
def test_find_by_unique(data, tagged_fields, expected_messages,
                        expected_err_items_count):
    df = pd.DataFrame(data)
    assert duplicates.find_by_unique(df, tagged_fields) == create_result(
        "Duplicates By **unique** Tag",
        expected_messages,
        items_count=len(df),
        err_items_count=expected_err_items_count,
    )
Beispiel #19
0
def test_check_uniqueness(data, tagged_fields, expected_messages,
                          expected_err_items_count):
    df = pd.DataFrame(data)
    assert check_uniqueness(df, tagged_fields) == create_result(
        "Uniqueness",
        expected_messages,
        items_count=len(df),
        err_items_count=expected_err_items_count,
    )
Beispiel #20
0
def test_compare_was_now(data, tagged_fields, expected_messages, expected_errors):
    df = pd.DataFrame(data)
    result = p.compare_was_now(df, tagged_fields)
    assert result == create_result(
        "Compare Price Was And Now",
        expected_messages,
        expected_errors,
        items_count=len(df),
    )
Beispiel #21
0
def test_anomalies(
    mocker, jobs_stats: Dict, expected_messages: Dict, stats: pd.DataFrame
):
    jobs = [
        Job(key=key, stats={"counts": counts, "totals": {"input_values": input_values}})
        for key, counts, input_values in jobs_stats
    ]
    mocker.patch("arche.rules.coverage.api.get_jobs", return_value=jobs)
    result = cov.anomalies(jobs_stats[-1][0], [key for key, *_ in jobs_stats[:-1]])
    assert result == create_result("Anomalies", expected_messages, stats=stats)
Beispiel #22
0
def test_check_items(data, tagged_fields, expected_messages,
                     expected_err_items_count):
    df = pd.DataFrame(data)
    result = check_items(df, tagged_fields)
    assert result == create_result(
        "Duplicated Items",
        expected_messages,
        items_count=len(df),
        err_items_count=expected_err_items_count,
    )
Beispiel #23
0
def test_validate_with_json_schema(mocker, get_job_items, get_schema):
    res = create_result("JSON Schema Validation", {})
    mocked_call = mocker.patch("arche.report.Report.__call__", autospec=True)

    a = Arche("source", schema=get_schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    mocked_call.assert_called_once_with(a.report, res)
    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
Beispiel #24
0
def test_write_details(mocker, get_df, capsys, messages, expected_details):
    mock_pio_show = mocker.patch("plotly.io.show", autospec=True)
    md_mock = mocker.patch("arche.report.display_markdown", autospec=True)

    r = Report()
    for m in messages:
        result = create_result(*m, stats=[get_df])
        r.save(result)
    r.write_details()
    mock_pio_show.assert_called_with(result.figures[0])
    calls = [mocker.call(e) for e in expected_details]
    md_mock.assert_has_calls(calls, any_order=True)
Beispiel #25
0
def test_validate_with_json_schema(mocker, get_job_items, get_schema):
    res = create_result("JSON Schema Validation",
                        {Level.INFO: [("4 items were checked, 0 error(s)", )]})
    mocked_show = mocker.patch("arche.rules.result.Result.show", autospec=True)

    a = Arche("source", schema=get_schema)
    a._source_items = get_job_items
    a.validate_with_json_schema()

    mocked_show.assert_called_once_with(res)
    assert len(a.report.results) == 1
    assert a.report.results.get("JSON Schema Validation") == res
Beispiel #26
0
def test_report_call(mocker, get_df, capsys, messages, expected_details):
    mocked_display = mocker.patch("arche.report.display_html", autospec=True)

    r = Report()
    for m in messages:
        result = create_result(*m, stats=[get_df])
        r.save(result)
    r()

    report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0])
    assert report_html.count("Plotly.newPlot") == 2
    assert report_html.count("rule name here - INFO") == 2
    assert report_html.count("other result there - INFO") == 2
Beispiel #27
0
def test_get_difference(source, target, categories, expected_messages, expected_stats):
    assert c.get_difference(
        "s", "t", pd.DataFrame(source), pd.DataFrame(target), categories
    ) == create_result(
        "Category Coverage Difference", expected_messages, stats=expected_stats
    )
Beispiel #28
0
def test_get_coverage_per_category(data, cat_names, expected_messages, expected_stats):
    assert c.get_coverage_per_category(pd.DataFrame(data), cat_names) == create_result(
        "Coverage For Scraped Categories", expected_messages, expected_stats
    )
Beispiel #29
0
def test_check_outcome(get_job, metadata, expected_messages):
    job = get_job
    job.metadata = metadata

    result = check_outcome(job)
    assert result == create_result("Job Outcome", expected_messages)
Beispiel #30
0
def test_check_tags(source_columns, target_columns, tags, expected_messages):
    result = check_tags(np.array(source_columns), np.array(target_columns),
                        tags)
    print(result)

    assert result == create_result("Tags", expected_messages)