def test_process(dataset_id): """Test common interface for processing scripts.""" # Always use the path from within the repo paths["historical input"] = Path(item.__file__).parent.joinpath( "data", "historical", "input") process(dataset_id)
def test_process(caplog, dataset_id): """Test common interface for processing scripts.""" # Always use the path from within the repo paths["historical input"] = Path(item.__file__).parent.joinpath( "data", "historical", "input") process(dataset_id) # Processing produced valid results that can be pivoted to wide format assert "Processing produced non-unique keys; no -wide output" not in caplog.messages
def run_all(output_path): """Run all diagnostics.""" from zipfile import ZIP_DEFLATED, ZipFile from jinja2 import Template output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) data_files = [] # Coverage groups = {"Coverage": [], "Quality": []} for source_id in [0, 1, 2, 3]: # Output filename filename = f"{source_str(source_id)}.txt" groups["Coverage"].append(filename) # Read source data data_files.append(fetch_source(source_id, use_cache=True)) data = pd.read_csv(data_files[-1]) # Generate coverage and write to file # TODO this doesn't allow for column names other than the defaults to # coverage(), above; generalize (output_path / filename).write_text(coverage(data)) # Quality from item.historical import process for check in QUALITY: # Output filename filename = f"{check.__name__.split('.')[-1]}.csv" groups["Quality"].append(filename) data_files.append(output_path / filename) # TODO this is specific to A003; generalize check(process(3), process(9)).to_csv(data_files[-1]) # Archive data files zf = ZipFile(output_path / "data.zip", mode="w", compression=ZIP_DEFLATED, compresslevel=9) for path in data_files: zf.write(filename=path, arcname=path.name) groups["Cached raw source data"] = ["data.zip"] # Generate index file t = Template(INDEX_TEMPLATE) (output_path / "index.html").write_text(t.render(groups=groups))
def test_A003(): """Test historical.diagnostic.A003.""" activity = process(3) stock = process(9) result = A003.compute(activity, stock) # Number of unique values computed assert len(result) == 929 # A specific value is present and as expected obs = result.query( "`ISO Code` == 'USA' and Year == 2015")["Value"].squeeze() assert np.isclose(obs, 0.02098, rtol=1e-3)
def test_diagnostic(id, N, query, expected): """Test checks from :mod:`.historical.diagnostic`.""" module = import_module(f"item.historical.diagnostic.{id}") # Generate inputs inputs = [process(arg) for arg in module.ARGS] # Diagnostic can be computed result = module.compute(*inputs) # Number of unique values computed assert N <= len(result), result # A specific value is present and as expected obs = result.query(query)["VALUE"].squeeze() assert np.isclose(obs, expected, rtol=1e-3), result.query(query)