Esempio n. 1
0
def test_basic_merging_functionality():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_second.zip")))
    result = merge_datapackages_with_mask(
        first_dp=first,
        first_resource_group_label="sa-data-vector",
        second_dp=second,
        second_resource_group_label="sa-data-array",
        mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
    )
    assert isinstance(result, DatapackageBase)
    assert isinstance(result.fs, MemoryFS)
    assert len(result.resources) == 5

    d, r = result.get_resource("sa-data-vector.data")

    assert r["name"] == "sa-data-vector.data"
    assert r["path"] == "sa-data-vector.data.npy"
    assert r["group"] == "sa-data-vector"
    assert r["nrows"] == 5

    assert np.allclose(d, np.array([0, 2, 4, 6, 8]))

    d, r = result.get_resource("sa-data-array.data")

    assert r["name"] == "sa-data-array.data"
    assert r["path"] == "sa-data-array.data.npy"
    assert r["group"] == "sa-data-array"
    assert r["nrows"] == 5

    assert d.shape == (5, 10)
    assert np.allclose(d[:, 0], np.array([1, 3, 5, 7, 9]) + 10)
Esempio n. 2
0
def test_new_metadata():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_second.zip")))
    result = merge_datapackages_with_mask(
        first_dp=first,
        first_resource_group_label="sa-data-vector",
        second_dp=second,
        second_resource_group_label="sa-data-array",
        mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
        metadata={
            "name": "something something",
            "id_": "danger zone",
            "combinatorial": True,
            "sequential": False,
            "seed": 2000,
            "foo bar baz": True,
        },
    )

    assert result.metadata["name"] == "something_something"
    assert result.metadata["id"] == "danger zone"
    assert result.metadata["combinatorial"]
    assert not result.metadata["sequential"]
    assert result.metadata["seed"] == 2000
    assert result.metadata["foo bar baz"]
Esempio n. 3
0
def test_shape_mismatch_mask():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_second.zip")))
    with pytest.raises(LengthMismatch):
        merge_datapackages_with_mask(
            first_dp=first,
            first_resource_group_label="sa-data-vector",
            second_dp=second,
            second_resource_group_label="sa-data-array",
            mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
        )
Esempio n. 4
0
def get_datapackage(obj):
    if isinstance(obj, bwp.DatapackageBase):
        return obj
    elif isinstance(obj, FS):
        return bwp.load_datapackage(obj)
    elif isinstance(obj, Path) and obj.suffix.lower() == ".zip":
        return bwp.load_datapackage(ZipFS(obj))
    elif isinstance(obj, Path) and obj.is_dir():
        return bwp.load_datapackage(OSFS(obj))
    else:
        raise TypeError(
            "Unknown input type for loading datapackage: {}: {}".format(
                type(obj), obj))
Esempio n. 5
0
def test_write_new_datapackage():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_second.zip")))
    with tempfile.TemporaryDirectory() as td:
        temp_fs = OSFS(td)
        result = merge_datapackages_with_mask(
            first_dp=first,
            first_resource_group_label="sa-data-vector",
            second_dp=second,
            second_resource_group_label="sa-data-array",
            mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
            output_fs=temp_fs,
        )
        result = load_datapackage(OSFS(td))

        assert isinstance(result, DatapackageBase)
        assert not isinstance(result.fs, MemoryFS)
        assert len(result.resources) == 5

        for suffix in {"indices", "data", "distributions", "flip"}:
            try:
                d, r = result.get_resource(f"sa-data-vector.{suffix}")
            except KeyError:
                continue

            assert r["name"] == f"sa-data-vector.{suffix}"
            assert r["path"] == f"sa-data-vector.{suffix}.npy"
            assert r["group"] == "sa-data-vector"
            assert r["nrows"] == 5

            if suffix == "data":
                assert np.allclose(d, np.array([0, 2, 4, 6, 8]))

            try:
                d, r = result.get_resource(f"sa-data-array.{suffix}")
            except KeyError:
                continue

            assert r["name"] == f"sa-data-array.{suffix}"
            assert r["path"] == f"sa-data-array.{suffix}.npy"
            assert r["group"] == "sa-data-array"
            assert r["nrows"] == 5

            if suffix == "data":
                assert d.shape == (5, 10)
                assert np.allclose(d[:, 0], np.array([1, 3, 5, 7, 9]) + 10)
Esempio n. 6
0
def test_save_modifications(tmp_path):
    copy_fixture("tfd", tmp_path)
    dp = load_datapackage(OSFS(str(tmp_path)))

    assert dp.resources[1]["name"] == "sa-data-vector-from-dict.data"
    assert np.allclose(dp.data[1], [3.3, 8.3])

    dp.data[1][:] = 42
    dp._modified = [1]
    dp.write_modified()

    assert np.allclose(dp.data[1], 42)
    assert not dp._modified

    dp = load_datapackage(OSFS(str(tmp_path)))
    assert np.allclose(dp.data[1], 42)
Esempio n. 7
0
def test_processed_array():
    database = DatabaseChooser("a database")
    database.write(
        {
            ("a database", "2"): {
                "type": "process",
                "exchanges": [
                    {
                        "input": ("a database", "2"),
                        "amount": 42,
                        "uncertainty_type": 7,
                        "type": "production",
                    }
                ],
            }
        }
    )
    package = load_datapackage(ZipFS(database.filepath_processed()))
    print(package.resources)
    array = package.get_resource("a_database_technosphere_matrix.data")[0]

    assert array.shape == (1,)
    assert array[0] == 42

    array = package.get_resource("a_database_technosphere_matrix.distributions")[0]
    assert array.shape == (1,)
    assert array[0]["uncertainty_type"] == 7
Esempio n. 8
0
def test_ordering():
    dps = [
        load_datapackage(ZipFS(dirpath / "b-second.zip")),
        load_datapackage(ZipFS(dirpath / "a-first.zip")),
    ]
    for dp in dps:
        dp.rehydrate_interface("w-fourth", Interface())
        print(list(dp.groups))

    mm = MappedMatrix(packages=dps, matrix="matrix-a")
    assert [grp.label for grp in mm.groups] == [
        "y-second",
        "w-fourth",
        "y-second",
        "w-fourth",
    ]
Esempio n. 9
0
def test_process_without_exchanges_still_in_processed_array():
    database = DatabaseChooser("a database")
    database.write({("a database", "foo"): {}})

    package = load_datapackage(ZipFS(database.filepath_processed()))
    array = package.get_resource("a_database_technosphere_matrix.data")[0]
    assert array[0] == 1
    assert array.shape == (1,)
Esempio n. 10
0
def test_default_metadata():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_second.zip")))
    result = merge_datapackages_with_mask(
        first_dp=first,
        first_resource_group_label="sa-data-vector",
        second_dp=second,
        second_resource_group_label="sa-data-array",
        mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
    )

    assert result.metadata["name"]
    assert result.metadata["id"]
    assert not result.metadata["combinatorial"]
    assert not result.metadata["sequential"]
    assert not result.metadata["seed"]
Esempio n. 11
0
def test_database_process_adds_correct_geo(add_biosphere):
    database = Database("food")
    database.write(food)

    package = load_datapackage(ZipFS(database.filepath_processed()))
    data = package.get_resource("food_inventory_geomapping_matrix.indices")[0]

    assert geomapping["CA"] in data["col"].tolist()
    assert geomapping["CH"] in data["col"].tolist()
Esempio n. 12
0
def test_group_ordering_consistent():
    dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
    assert list(dp.groups) == [
        "sa-data-vector-from-dict",
        "sa-data-vector",
        "sa-data-array",
        "sa-vector-interface",
        "sa-array-interface",
    ]
Esempio n. 13
0
def test_add_suffix():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_same_1.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_same_2.zip")))
    with pytest.warns(UserWarning):
        result = merge_datapackages_with_mask(
            first_dp=first,
            first_resource_group_label="same",
            second_dp=second,
            second_resource_group_label="same",
            mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
        )

    assert isinstance(result, DatapackageBase)
    assert len(result.resources) == 5

    for suffix in {"indices", "data", "distributions", "flip"}:
        try:
            d, r = result.get_resource(f"same_true.{suffix}")
        except KeyError:
            continue

        assert r["name"] == f"same_true.{suffix}"
        assert r["path"] == f"same_true.{suffix}.npy"
        assert r["group"] == "same_true"
        assert r["nrows"] == 5

        if suffix == "data":
            assert np.allclose(d, np.array([0, 2, 4, 6, 8]))

        try:
            d, r = result.get_resource(f"same_false.{suffix}")
        except KeyError:
            continue

        assert r["name"] == f"same_false.{suffix}"
        assert r["path"] == f"same_false.{suffix}.npy"
        assert r["group"] == "same_false"
        assert r["nrows"] == 5

        if suffix == "data":
            assert d.shape == (5, 10)
            assert np.allclose(d[:, 0], np.array([1, 3, 5, 7, 9]) + 10)
Esempio n. 14
0
def test_metadata_is_the_same_object():
    dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")))
    fdp = dp.filter_by_attribute("matrix", "sa_matrix")

    for k, v in fdp.metadata.items():
        if k != "resources":
            assert id(v) == id(dp.metadata[k])

    for resource in fdp.resources:
        assert any(obj for obj in dp.resources if obj is resource)
Esempio n. 15
0
def test_data_is_the_same_object_when_not_proxy():
    dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")))
    fdp = dp.filter_by_attribute("matrix", "sa_matrix")

    arr1, _ = dp.get_resource("sa-data-array.data")
    arr2, _ = fdp.get_resource("sa-data-array.data")

    assert np.allclose(arr1, arr2)
    assert arr1 is arr2
    assert np.shares_memory(arr1, arr2)
Esempio n. 16
0
def test_integration_test_fixture_zipfile():
    loaded = load_datapackage(
        ZipFS(
            str(
                Path(__file__).parent.resolve() / "fixtures" /
                "test-fixture.zip"),
            write=False,
        ))

    check_metadata(loaded, False)
    check_data(loaded)
Esempio n. 17
0
def test_database_process_adds_default_geo(add_biosphere):
    database = Database("food")
    new_food = copy.deepcopy(food)
    for v in new_food.values():
        del v["location"]
    database.write(new_food)

    package = load_datapackage(ZipFS(database.filepath_processed()))
    data = package.get_resource("food_inventory_geomapping_matrix.indices")[0]

    assert np.allclose(data["col"], geomapping[config.global_location])
Esempio n. 18
0
def test_data_is_readable_multiple_times_when_proxy_directory():
    dp = load_datapackage(fs_or_obj=OSFS(str(dirpath / "tfd")), proxy=True)
    fdp = dp.filter_by_attribute("matrix", "sa_matrix")

    arr1, _ = dp.get_resource("sa-data-array.data")
    arr2, _ = fdp.get_resource("sa-data-array.data")

    assert np.allclose(arr1, arr2)
    assert arr1.base is not arr2
    assert arr2.base is not arr1
    assert not np.shares_memory(arr1, arr2)
Esempio n. 19
0
def sensitivity_dps():
    class VectorInterface:
        def __next__(self):
            return np.array([1, 2, 3])

    class ArrayInterface:
        @property
        def shape(self):
            return (3, 100)

        def __getitem__(self, args):
            return np.ones((3, )) * args[1]

    dp_1 = bwp.load_datapackage(ZipFS(dirpath / "sa-1.zip"))
    dp_1.rehydrate_interface("a", ArrayInterface())

    dp_2 = bwp.load_datapackage(ZipFS(dirpath / "sa-2.zip"))
    dp_2.rehydrate_interface("d", VectorInterface())

    return dp_1, dp_2
Esempio n. 20
0
def test_fdp_can_load_proxy_first():
    dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")),
                          proxy=True)
    fdp = dp.filter_by_attribute("matrix", "sa_matrix")
    arr2, _ = fdp.get_resource("sa-data-array.data")
    arr1, _ = dp.get_resource("sa-data-array.data")

    assert np.allclose(arr1, arr2)
    assert arr1.base is not arr2
    assert arr2.base is not arr1
    assert not np.shares_memory(arr1, arr2)
Esempio n. 21
0
def test_data_current():
    dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute(
        "group", "x-third"
    )
    expected = np.array([101, -111, 112])
    mm = MappedMatrix(packages=[dp], matrix="matrix-b")
    for group in mm.groups:
        assert np.allclose(group.data_current, expected)

    dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute(
        "group", "x-third"
    )
    expected = np.array([101, -111])
    mm = MappedMatrix(
        packages=[dp], matrix="matrix-b", custom_filter=lambda x: x["row"] < 3
    )
    for group in mm.groups:
        assert np.allclose(group.data_current, expected)

    dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute(
        "group", "x-third"
    )
    expected = np.array([-111, 112])
    am = ArrayMapper(array=np.array([2, 3]))
    mm = MappedMatrix(packages=[dp], matrix="matrix-b", row_mapper=am)
    for group in mm.groups:
        assert np.allclose(group.data_current, expected)

    dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute(
        "group", "x-third"
    )
    expected = np.array([-111])
    am = ArrayMapper(array=np.array([2, 3]))
    mm = MappedMatrix(
        packages=[dp],
        matrix="matrix-b",
        row_mapper=am,
        custom_filter=lambda x: x["row"] < 3,
    )
    for group in mm.groups:
        assert np.allclose(group.data_current, expected)
Esempio n. 22
0
def test_weighting_process(reset):
    weighting = Weighting(("foo",))
    weighting.write([42])
    package = load_datapackage(ZipFS(weighting.filepath_processed()))
    print(package.resources)

    data = package.get_resource("foo_matrix_data.data")[0]
    assert np.allclose(data, [42])

    indices = package.get_resource("foo_matrix_data.indices")[0]
    assert np.allclose(indices["row"], 0)
    assert np.allclose(indices["col"], 0)
Esempio n. 23
0
def test_wrong_resource_group_name():
    first = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
    second = load_datapackage(
        ZipFS(str(fixture_dir / "merging" / "merging_second.zip")))
    with pytest.raises(ValueError):
        merge_datapackages_with_mask(
            first_dp=first,
            first_resource_group_label="wrong",
            second_dp=second,
            second_resource_group_label="sa-data-array",
            mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
        )
    with pytest.raises(ValueError):
        merge_datapackages_with_mask(
            first_dp=first,
            first_resource_group_label="sa-data-vector",
            second_dp=second,
            second_resource_group_label="wrong",
            mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
        )
Esempio n. 24
0
def test_method_process_adds_correct_geo(add_method):
    method = Method(("test method",))
    package = load_datapackage(ZipFS(method.filepath_processed()))
    print(package.resources)

    mapped = {
        row["row"]: row["col"]
        for row in package.get_resource("test_method_matrix_data.indices")[0]
    }
    assert geomapping["foo"] == mapped[get_id(("biosphere", 1))]
    assert geomapping["bar"] == mapped[get_id(("biosphere", 2))]
    assert package.get_resource("test_method_matrix_data.data")[0].shape == (2,)
Esempio n. 25
0
def test_method_processed_array(reset):
    database = DatabaseChooser("foo")
    database.write({("foo", "bar"): {}})

    method = Method(("a", "method"))
    method.write([[("foo", "bar"), 42]])
    package = load_datapackage(ZipFS(method.filepath_processed()))
    data = package.get_resource("a_method_matrix_data.data")[0]
    assert np.allclose(data, [42])

    indices = package.get_resource("a_method_matrix_data.indices")[0]
    assert np.allclose(indices["row"], get_id(("foo", "bar")))
    assert np.allclose(indices["col"], geomapping[config.global_location])
Esempio n. 26
0
def test_sqlite_processed_array_order():
    database = DatabaseChooser("testy")
    data = {
        ("testy", "C"): {},
        ("testy", "A"): {},
        ("testy", "B"): {
            "exchanges": [
                {"input": ("testy", "A"), "amount": 1, "type": "technosphere"},
                {"input": ("testy", "A"), "amount": 2, "type": "technosphere"},
                {"input": ("testy", "C"), "amount": 2, "type": "biosphere"},
                {"input": ("testy", "C"), "amount": 3, "type": "biosphere"},
                {"input": ("testy", "B"), "amount": 4, "type": "production"},
                {"input": ("testy", "B"), "amount": 1, "type": "production"},
            ]
        },
    }
    database.write(data)
    lookup = {k: get_id(("testy", k)) for k in "ABC"}
    t = sorted(
        [
            (lookup["A"], lookup["B"], 1),
            (lookup["A"], lookup["B"], 2),
            # Implicit production
            (lookup["C"], lookup["C"], 1),
            (lookup["A"], lookup["A"], 1),
            # Explicit production
            (lookup["B"], lookup["B"], 4),
            (lookup["B"], lookup["B"], 1),
        ]
    )
    b = sorted([(lookup["C"], lookup["B"], 2), (lookup["C"], lookup["B"], 3),])

    package = load_datapackage(ZipFS(database.filepath_processed()))

    array = package.get_resource("testy_technosphere_matrix.data")[0]
    assert array.shape == (6,)
    assert np.allclose(array, [x[2] for x in t])

    array = package.get_resource("testy_technosphere_matrix.indices")[0]
    assert array.shape == (6,)
    assert np.allclose(array["row"], [x[0] for x in t])
    assert np.allclose(array["col"], [x[1] for x in t])

    array = package.get_resource("testy_biosphere_matrix.data")[0]
    assert array.shape == (2,)
    assert np.allclose(array, [x[2] for x in b])

    array = package.get_resource("testy_biosphere_matrix.indices")[0]
    assert array.shape == (2,)
    assert np.allclose(array["row"], [x[0] for x in b])
    assert np.allclose(array["col"], [x[1] for x in b])
Esempio n. 27
0
def test_normalization_process_row(reset):
    database = DatabaseChooser("foo")
    database.write({("foo", "bar"): {}})

    norm = Normalization(("foo",))
    norm.write([[("foo", "bar"), 42]])
    package = load_datapackage(ZipFS(norm.filepath_processed()))

    data = package.get_resource("foo_matrix_data.data")[0]
    assert np.allclose(data, [42])

    indices = package.get_resource("foo_matrix_data.indices")[0]
    assert np.allclose(indices["row"], get_id(("foo", "bar")))
    assert np.allclose(indices["col"], get_id(("foo", "bar")))
Esempio n. 28
0
def test_del_resource_group_filesystem(tmp_path):
    copy_fixture("tfd", tmp_path)
    dp = load_datapackage(OSFS(str(tmp_path)))

    reference_length = len(dp)
    assert "sa-data-vector.indices.npy" in [o.name for o in tmp_path.iterdir()]
    dp.del_resource_group("sa-data-vector")
    assert "sa-data-vector.indices.npy" not in [
        o.name for o in tmp_path.iterdir()
    ]
    assert len(dp) == reference_length - 3
    assert len(dp.data) == reference_length - 3
    assert len(dp.metadata["resources"]) == reference_length - 3
    assert len(dp.resources) == reference_length - 3
Esempio n. 29
0
def test_flip_masked():
    dp = load_datapackage(ZipFS(dirpath / "a-first.zip")).filter_by_attribute(
        "group", "x-third"
    )

    expected = np.array([True])
    am = ArrayMapper(array=np.array([2, 3]))
    mm = MappedMatrix(
        packages=[dp],
        matrix="matrix-b",
        row_mapper=am,
        custom_filter=lambda x: x["row"] < 3,
    )
    for group in mm.groups:
        assert np.allclose(group.flip, expected)
Esempio n. 30
0
def test_reset_index_modified(fixture):
    assert not fixture._modified

    reset_index(fixture, "vector-csv-rows")
    assert fixture._modified == set([fixture._get_index("vector.indices")])

    fixture = load_datapackage(
        OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing")))
    assert not fixture._modified

    reset_index(fixture, "csv-multiple")
    assert fixture._modified == set([
        fixture._get_index("vector.indices"),
        fixture._get_index("array.indices")
    ])