Exemple #1
0
def test_process(dataset_id):
    """Test common interface for processing scripts."""
    # Always use the path from within the repo
    paths["historical input"] = Path(item.__file__).parent.joinpath(
        "data", "historical", "input")

    process(dataset_id)
def test_process(caplog, dataset_id):
    """Test common interface for processing scripts."""
    # Always use the path from within the repo
    paths["historical input"] = Path(item.__file__).parent.joinpath(
        "data", "historical", "input")

    process(dataset_id)

    # Processing produced valid results that can be pivoted to wide format
    assert "Processing produced non-unique keys; no -wide output" not in caplog.messages
Exemple #3
0
def run_all(output_path):
    """Run all diagnostics."""
    from zipfile import ZIP_DEFLATED, ZipFile

    from jinja2 import Template

    output_path = Path(output_path)
    output_path.mkdir(parents=True, exist_ok=True)

    data_files = []

    # Coverage
    groups = {"Coverage": [], "Quality": []}

    for source_id in [0, 1, 2, 3]:
        # Output filename
        filename = f"{source_str(source_id)}.txt"
        groups["Coverage"].append(filename)

        # Read source data
        data_files.append(fetch_source(source_id, use_cache=True))
        data = pd.read_csv(data_files[-1])

        # Generate coverage and write to file
        # TODO this doesn't allow for column names other than the defaults to
        #      coverage(), above; generalize
        (output_path / filename).write_text(coverage(data))

    # Quality
    from item.historical import process

    for check in QUALITY:
        # Output filename
        filename = f"{check.__name__.split('.')[-1]}.csv"
        groups["Quality"].append(filename)

        data_files.append(output_path / filename)
        # TODO this is specific to A003; generalize
        check(process(3), process(9)).to_csv(data_files[-1])

    # Archive data files
    zf = ZipFile(output_path / "data.zip",
                 mode="w",
                 compression=ZIP_DEFLATED,
                 compresslevel=9)
    for path in data_files:
        zf.write(filename=path, arcname=path.name)

    groups["Cached raw source data"] = ["data.zip"]

    # Generate index file
    t = Template(INDEX_TEMPLATE)
    (output_path / "index.html").write_text(t.render(groups=groups))
Exemple #4
0
def test_A003():
    """Test historical.diagnostic.A003."""
    activity = process(3)
    stock = process(9)
    result = A003.compute(activity, stock)

    # Number of unique values computed
    assert len(result) == 929

    # A specific value is present and as expected
    obs = result.query(
        "`ISO Code` == 'USA' and Year == 2015")["Value"].squeeze()
    assert np.isclose(obs, 0.02098, rtol=1e-3)
def test_diagnostic(id, N, query, expected):
    """Test checks from :mod:`.historical.diagnostic`."""
    module = import_module(f"item.historical.diagnostic.{id}")

    # Generate inputs
    inputs = [process(arg) for arg in module.ARGS]

    # Diagnostic can be computed
    result = module.compute(*inputs)

    # Number of unique values computed
    assert N <= len(result), result

    # A specific value is present and as expected
    obs = result.query(query)["VALUE"].squeeze()
    assert np.isclose(obs, expected, rtol=1e-3), result.query(query)