def test_basic_merging_functionality(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", second_dp=second, second_resource_group_label="sa-data-array", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), ) assert isinstance(result, DatapackageBase) assert isinstance(result.fs, MemoryFS) assert len(result.resources) == 5 d, r = result.get_resource("sa-data-vector.data") assert r["name"] == "sa-data-vector.data" assert r["path"] == "sa-data-vector.data.npy" assert r["group"] == "sa-data-vector" assert r["nrows"] == 5 assert np.allclose(d, np.array([0, 2, 4, 6, 8])) d, r = result.get_resource("sa-data-array.data") assert r["name"] == "sa-data-array.data" assert r["path"] == "sa-data-array.data.npy" assert r["group"] == "sa-data-array" assert r["nrows"] == 5 assert d.shape == (5, 10) assert np.allclose(d[:, 0], np.array([1, 3, 5, 7, 9]) + 10)
def test_new_metadata(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", second_dp=second, second_resource_group_label="sa-data-array", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), metadata={ "name": "something something", "id_": "danger zone", "combinatorial": True, "sequential": False, "seed": 2000, "foo bar baz": True, }, ) assert result.metadata["name"] == "something_something" assert result.metadata["id"] == "danger zone" assert result.metadata["combinatorial"] assert not result.metadata["sequential"] assert result.metadata["seed"] == 2000 assert result.metadata["foo bar baz"]
def test_shape_mismatch_mask(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))) with pytest.raises(LengthMismatch): merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", second_dp=second, second_resource_group_label="sa-data-array", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), )
def get_datapackage(obj): if isinstance(obj, bwp.DatapackageBase): return obj elif isinstance(obj, FS): return bwp.load_datapackage(obj) elif isinstance(obj, Path) and obj.suffix.lower() == ".zip": return bwp.load_datapackage(ZipFS(obj)) elif isinstance(obj, Path) and obj.is_dir(): return bwp.load_datapackage(OSFS(obj)) else: raise TypeError( "Unknown input type for loading datapackage: {}: {}".format( type(obj), obj))
def test_write_new_datapackage(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))) with tempfile.TemporaryDirectory() as td: temp_fs = OSFS(td) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", second_dp=second, second_resource_group_label="sa-data-array", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), output_fs=temp_fs, ) result = load_datapackage(OSFS(td)) assert isinstance(result, DatapackageBase) assert not isinstance(result.fs, MemoryFS) assert len(result.resources) == 5 for suffix in {"indices", "data", "distributions", "flip"}: try: d, r = result.get_resource(f"sa-data-vector.{suffix}") except KeyError: continue assert r["name"] == f"sa-data-vector.{suffix}" assert r["path"] == f"sa-data-vector.{suffix}.npy" assert r["group"] == "sa-data-vector" assert r["nrows"] == 5 if suffix == "data": assert np.allclose(d, np.array([0, 2, 4, 6, 8])) try: d, r = result.get_resource(f"sa-data-array.{suffix}") except KeyError: continue assert r["name"] == f"sa-data-array.{suffix}" assert r["path"] == f"sa-data-array.{suffix}.npy" assert r["group"] == "sa-data-array" assert r["nrows"] == 5 if suffix == "data": assert d.shape == (5, 10) assert np.allclose(d[:, 0], np.array([1, 3, 5, 7, 9]) + 10)
def test_save_modifications(tmp_path): copy_fixture("tfd", tmp_path) dp = load_datapackage(OSFS(str(tmp_path))) assert dp.resources[1]["name"] == "sa-data-vector-from-dict.data" assert np.allclose(dp.data[1], [3.3, 8.3]) dp.data[1][:] = 42 dp._modified = [1] dp.write_modified() assert np.allclose(dp.data[1], 42) assert not dp._modified dp = load_datapackage(OSFS(str(tmp_path))) assert np.allclose(dp.data[1], 42)
def test_processed_array(): database = DatabaseChooser("a database") database.write( { ("a database", "2"): { "type": "process", "exchanges": [ { "input": ("a database", "2"), "amount": 42, "uncertainty_type": 7, "type": "production", } ], } } ) package = load_datapackage(ZipFS(database.filepath_processed())) print(package.resources) array = package.get_resource("a_database_technosphere_matrix.data")[0] assert array.shape == (1,) assert array[0] == 42 array = package.get_resource("a_database_technosphere_matrix.distributions")[0] assert array.shape == (1,) assert array[0]["uncertainty_type"] == 7
def test_ordering(): dps = [ load_datapackage(ZipFS(dirpath / "b-second.zip")), load_datapackage(ZipFS(dirpath / "a-first.zip")), ] for dp in dps: dp.rehydrate_interface("w-fourth", Interface()) print(list(dp.groups)) mm = MappedMatrix(packages=dps, matrix="matrix-a") assert [grp.label for grp in mm.groups] == [ "y-second", "w-fourth", "y-second", "w-fourth", ]
def test_process_without_exchanges_still_in_processed_array(): database = DatabaseChooser("a database") database.write({("a database", "foo"): {}}) package = load_datapackage(ZipFS(database.filepath_processed())) array = package.get_resource("a_database_technosphere_matrix.data")[0] assert array[0] == 1 assert array.shape == (1,)
def test_default_metadata(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", second_dp=second, second_resource_group_label="sa-data-array", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), ) assert result.metadata["name"] assert result.metadata["id"] assert not result.metadata["combinatorial"] assert not result.metadata["sequential"] assert not result.metadata["seed"]
def test_database_process_adds_correct_geo(add_biosphere): database = Database("food") database.write(food) package = load_datapackage(ZipFS(database.filepath_processed())) data = package.get_resource("food_inventory_geomapping_matrix.indices")[0] assert geomapping["CA"] in data["col"].tolist() assert geomapping["CH"] in data["col"].tolist()
def test_group_ordering_consistent(): dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) assert list(dp.groups) == [ "sa-data-vector-from-dict", "sa-data-vector", "sa-data-array", "sa-vector-interface", "sa-array-interface", ]
def test_add_suffix(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_same_1.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_same_2.zip"))) with pytest.warns(UserWarning): result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="same", second_dp=second, second_resource_group_label="same", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), ) assert isinstance(result, DatapackageBase) assert len(result.resources) == 5 for suffix in {"indices", "data", "distributions", "flip"}: try: d, r = result.get_resource(f"same_true.{suffix}") except KeyError: continue assert r["name"] == f"same_true.{suffix}" assert r["path"] == f"same_true.{suffix}.npy" assert r["group"] == "same_true" assert r["nrows"] == 5 if suffix == "data": assert np.allclose(d, np.array([0, 2, 4, 6, 8])) try: d, r = result.get_resource(f"same_false.{suffix}") except KeyError: continue assert r["name"] == f"same_false.{suffix}" assert r["path"] == f"same_false.{suffix}.npy" assert r["group"] == "same_false" assert r["nrows"] == 5 if suffix == "data": assert d.shape == (5, 10) assert np.allclose(d[:, 0], np.array([1, 3, 5, 7, 9]) + 10)
def test_metadata_is_the_same_object(): dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip"))) fdp = dp.filter_by_attribute("matrix", "sa_matrix") for k, v in fdp.metadata.items(): if k != "resources": assert id(v) == id(dp.metadata[k]) for resource in fdp.resources: assert any(obj for obj in dp.resources if obj is resource)
def test_data_is_the_same_object_when_not_proxy(): dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip"))) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr1, _ = dp.get_resource("sa-data-array.data") arr2, _ = fdp.get_resource("sa-data-array.data") assert np.allclose(arr1, arr2) assert arr1 is arr2 assert np.shares_memory(arr1, arr2)
def test_integration_test_fixture_zipfile(): loaded = load_datapackage( ZipFS( str( Path(__file__).parent.resolve() / "fixtures" / "test-fixture.zip"), write=False, )) check_metadata(loaded, False) check_data(loaded)
def test_database_process_adds_default_geo(add_biosphere): database = Database("food") new_food = copy.deepcopy(food) for v in new_food.values(): del v["location"] database.write(new_food) package = load_datapackage(ZipFS(database.filepath_processed())) data = package.get_resource("food_inventory_geomapping_matrix.indices")[0] assert np.allclose(data["col"], geomapping[config.global_location])
def test_data_is_readable_multiple_times_when_proxy_directory(): dp = load_datapackage(fs_or_obj=OSFS(str(dirpath / "tfd")), proxy=True) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr1, _ = dp.get_resource("sa-data-array.data") arr2, _ = fdp.get_resource("sa-data-array.data") assert np.allclose(arr1, arr2) assert arr1.base is not arr2 assert arr2.base is not arr1 assert not np.shares_memory(arr1, arr2)
def sensitivity_dps(): class VectorInterface: def __next__(self): return np.array([1, 2, 3]) class ArrayInterface: @property def shape(self): return (3, 100) def __getitem__(self, args): return np.ones((3, )) * args[1] dp_1 = bwp.load_datapackage(ZipFS(dirpath / "sa-1.zip")) dp_1.rehydrate_interface("a", ArrayInterface()) dp_2 = bwp.load_datapackage(ZipFS(dirpath / "sa-2.zip")) dp_2.rehydrate_interface("d", VectorInterface()) return dp_1, dp_2
def test_fdp_can_load_proxy_first(): dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")), proxy=True) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr2, _ = fdp.get_resource("sa-data-array.data") arr1, _ = dp.get_resource("sa-data-array.data") assert np.allclose(arr1, arr2) assert arr1.base is not arr2 assert arr2.base is not arr1 assert not np.shares_memory(arr1, arr2)
def test_data_current(): dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute( "group", "x-third" ) expected = np.array([101, -111, 112]) mm = MappedMatrix(packages=[dp], matrix="matrix-b") for group in mm.groups: assert np.allclose(group.data_current, expected) dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute( "group", "x-third" ) expected = np.array([101, -111]) mm = MappedMatrix( packages=[dp], matrix="matrix-b", custom_filter=lambda x: x["row"] < 3 ) for group in mm.groups: assert np.allclose(group.data_current, expected) dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute( "group", "x-third" ) expected = np.array([-111, 112]) am = ArrayMapper(array=np.array([2, 3])) mm = MappedMatrix(packages=[dp], matrix="matrix-b", row_mapper=am) for group in mm.groups: assert np.allclose(group.data_current, expected) dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute( "group", "x-third" ) expected = np.array([-111]) am = ArrayMapper(array=np.array([2, 3])) mm = MappedMatrix( packages=[dp], matrix="matrix-b", row_mapper=am, custom_filter=lambda x: x["row"] < 3, ) for group in mm.groups: assert np.allclose(group.data_current, expected)
def test_weighting_process(reset): weighting = Weighting(("foo",)) weighting.write([42]) package = load_datapackage(ZipFS(weighting.filepath_processed())) print(package.resources) data = package.get_resource("foo_matrix_data.data")[0] assert np.allclose(data, [42]) indices = package.get_resource("foo_matrix_data.indices")[0] assert np.allclose(indices["row"], 0) assert np.allclose(indices["col"], 0)
def test_wrong_resource_group_name(): first = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) second = load_datapackage( ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))) with pytest.raises(ValueError): merge_datapackages_with_mask( first_dp=first, first_resource_group_label="wrong", second_dp=second, second_resource_group_label="sa-data-array", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), ) with pytest.raises(ValueError): merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", second_dp=second, second_resource_group_label="wrong", mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), )
def test_method_process_adds_correct_geo(add_method): method = Method(("test method",)) package = load_datapackage(ZipFS(method.filepath_processed())) print(package.resources) mapped = { row["row"]: row["col"] for row in package.get_resource("test_method_matrix_data.indices")[0] } assert geomapping["foo"] == mapped[get_id(("biosphere", 1))] assert geomapping["bar"] == mapped[get_id(("biosphere", 2))] assert package.get_resource("test_method_matrix_data.data")[0].shape == (2,)
def test_method_processed_array(reset): database = DatabaseChooser("foo") database.write({("foo", "bar"): {}}) method = Method(("a", "method")) method.write([[("foo", "bar"), 42]]) package = load_datapackage(ZipFS(method.filepath_processed())) data = package.get_resource("a_method_matrix_data.data")[0] assert np.allclose(data, [42]) indices = package.get_resource("a_method_matrix_data.indices")[0] assert np.allclose(indices["row"], get_id(("foo", "bar"))) assert np.allclose(indices["col"], geomapping[config.global_location])
def test_sqlite_processed_array_order(): database = DatabaseChooser("testy") data = { ("testy", "C"): {}, ("testy", "A"): {}, ("testy", "B"): { "exchanges": [ {"input": ("testy", "A"), "amount": 1, "type": "technosphere"}, {"input": ("testy", "A"), "amount": 2, "type": "technosphere"}, {"input": ("testy", "C"), "amount": 2, "type": "biosphere"}, {"input": ("testy", "C"), "amount": 3, "type": "biosphere"}, {"input": ("testy", "B"), "amount": 4, "type": "production"}, {"input": ("testy", "B"), "amount": 1, "type": "production"}, ] }, } database.write(data) lookup = {k: get_id(("testy", k)) for k in "ABC"} t = sorted( [ (lookup["A"], lookup["B"], 1), (lookup["A"], lookup["B"], 2), # Implicit production (lookup["C"], lookup["C"], 1), (lookup["A"], lookup["A"], 1), # Explicit production (lookup["B"], lookup["B"], 4), (lookup["B"], lookup["B"], 1), ] ) b = sorted([(lookup["C"], lookup["B"], 2), (lookup["C"], lookup["B"], 3),]) package = load_datapackage(ZipFS(database.filepath_processed())) array = package.get_resource("testy_technosphere_matrix.data")[0] assert array.shape == (6,) assert np.allclose(array, [x[2] for x in t]) array = package.get_resource("testy_technosphere_matrix.indices")[0] assert array.shape == (6,) assert np.allclose(array["row"], [x[0] for x in t]) assert np.allclose(array["col"], [x[1] for x in t]) array = package.get_resource("testy_biosphere_matrix.data")[0] assert array.shape == (2,) assert np.allclose(array, [x[2] for x in b]) array = package.get_resource("testy_biosphere_matrix.indices")[0] assert array.shape == (2,) assert np.allclose(array["row"], [x[0] for x in b]) assert np.allclose(array["col"], [x[1] for x in b])
def test_normalization_process_row(reset): database = DatabaseChooser("foo") database.write({("foo", "bar"): {}}) norm = Normalization(("foo",)) norm.write([[("foo", "bar"), 42]]) package = load_datapackage(ZipFS(norm.filepath_processed())) data = package.get_resource("foo_matrix_data.data")[0] assert np.allclose(data, [42]) indices = package.get_resource("foo_matrix_data.indices")[0] assert np.allclose(indices["row"], get_id(("foo", "bar"))) assert np.allclose(indices["col"], get_id(("foo", "bar")))
def test_del_resource_group_filesystem(tmp_path): copy_fixture("tfd", tmp_path) dp = load_datapackage(OSFS(str(tmp_path))) reference_length = len(dp) assert "sa-data-vector.indices.npy" in [o.name for o in tmp_path.iterdir()] dp.del_resource_group("sa-data-vector") assert "sa-data-vector.indices.npy" not in [ o.name for o in tmp_path.iterdir() ] assert len(dp) == reference_length - 3 assert len(dp.data) == reference_length - 3 assert len(dp.metadata["resources"]) == reference_length - 3 assert len(dp.resources) == reference_length - 3
def test_flip_masked(): dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute( "group", "x-third" ) expected = np.array([True]) am = ArrayMapper(array=np.array([2, 3])) mm = MappedMatrix( packages=[dp], matrix="matrix-b", row_mapper=am, custom_filter=lambda x: x["row"] < 3, ) for group in mm.groups: assert np.allclose(group.flip, expected)
def test_reset_index_modified(fixture): assert not fixture._modified reset_index(fixture, "vector-csv-rows") assert fixture._modified == set([fixture._get_index("vector.indices")]) fixture = load_datapackage( OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing"))) assert not fixture._modified reset_index(fixture, "csv-multiple") assert fixture._modified == set([ fixture._get_index("vector.indices"), fixture._get_index("array.indices") ])