Exemple #1
0
def test_shape_mismatch_data():
    dp1 = create_datapackage()
    data_array = np.arange(10)
    indices_array = np.array([(x, y)
                              for x, y in zip(range(10), range(10, 20))],
                             dtype=INDICES_DTYPE)
    dp1.add_persistent_vector(
        matrix="sa_matrix",
        data_array=data_array,
        name="sa-data-vector",
        indices_array=indices_array,
    )

    dp2 = create_datapackage()
    data_array = np.arange(5)
    indices_array = np.array([(x, y) for x, y in zip(range(5), range(10, 15))],
                             dtype=INDICES_DTYPE)
    dp2.add_persistent_vector(
        matrix="sa_matrix",
        data_array=data_array,
        name="sa-data-vector2",
        indices_array=indices_array,
    )
    with pytest.raises(LengthMismatch):
        merge_datapackages_with_mask(
            first_dp=dp1,
            first_resource_group_label="sa-data-vector",
            second_dp=dp2,
            second_resource_group_label="sa-data-vector2",
            mask_array=np.zeros((5, ), dtype=bool),
        )
Exemple #2
0
def test_interface_error():
    dp1 = create_datapackage()
    data_array = np.arange(10)
    indices_array = np.array([(x, y)
                              for x, y in zip(range(10), range(10, 20))],
                             dtype=INDICES_DTYPE)
    dp1.add_persistent_vector(
        matrix="sa_matrix",
        data_array=data_array,
        name="sa-data-vector",
        indices_array=indices_array,
    )

    class Dummy:
        pass

    dp2 = create_datapackage()
    indices_array = np.array([(x, y)
                              for x, y in zip(range(10), range(10, 20))],
                             dtype=INDICES_DTYPE)
    dp2.add_dynamic_vector(
        interface=Dummy(),
        indices_array=indices_array,
        matrix="sa_matrix",
        name="sa-vector-interface",
    )
    with pytest.raises(ValueError):
        merge_datapackages_with_mask(
            first_dp=dp1,
            first_resource_group_label="sa-data-vector",
            second_dp=dp2,
            second_resource_group_label="sa-vector-interface",
            mask_array=np.zeros((10, ), dtype=bool),
        )
def generate_local_sa_biosphere_datapackage(cutoff=1e-4, const_factor=10):

    lca = setup_bw_project_archetypes()
    uncertain_biosphere_exchanges = filter_uncertain_biosphere_exchanges(
        lca, cutoff)

    dp = bwp.create_datapackage(
        fs=ZipFS(str(DATA_DIR / "local-sa-biosphere.zip"), write=True),
        name="local sa biosphere",
    )

    amounts = np.array([exc.amount for exc in uncertain_biosphere_exchanges])
    num_samples = len(amounts)
    data_array = np.tile(amounts, num_samples) * (np.diag(
        np.ones(num_samples) * const_factor))

    indices_array = np.array(
        [(exc.input.id, exc.output.id)
         for exc in uncertain_biosphere_exchanges],
        dtype=bwp.INDICES_DTYPE,
    )

    # All inputs -> all True
    flip_array = np.ones(len(indices_array), dtype=bool)

    dp.add_persistent_array(
        matrix="biosphere_matrix",
        data_array=data_array,
        name="local sa biosphere",
        indices_array=indices_array,
        flip_array=flip_array,
    )

    dp.finalize_serialization()
Exemple #4
0
def create_dp(vector, array, distributions):
    dp = bwp.create_datapackage()
    if distributions:
        dp.add_persistent_vector(
            matrix="foo",
            name="distributions",
            indices_array=np.array([(0, 0)], dtype=bwp.INDICES_DTYPE),
            distributions_array=np.array(
                [
                    (4, 0.5, np.NaN, np.NaN, 0.2, 0.8, False),
                ],
                dtype=bwp.UNCERTAINTY_DTYPE,
            ),
        )
    if vector:
        dp.add_persistent_vector(
            matrix="foo",
            name="vector",
            indices_array=np.array(
                [(10, 10), (12, 9), (14, 8), (18, 7)], dtype=bwp.INDICES_DTYPE
            ),
            data_array=np.array([11, 12.3, 14, 125]),
        )
    if array:
        dp.add_persistent_array(
            matrix="foo",
            name="array",
            indices_array=np.array(
                [(1, 0), (2, 1), (5, 1), (8, 1)], dtype=bwp.INDICES_DTYPE
            ),
            data_array=np.array([[1, 2.3, 4, 25]]).T,
        )
    return dp
    def process(self, **extra_metadata):
        """
Process intermediate data from a Python dictionary to a `stats_arrays <https://pypi.python.org/pypi/stats_arrays/>`_ array, which is a `NumPy <http://numpy.scipy.org/>`_ `Structured <http://docs.scipy.org/doc/numpy/reference/generated/numpy.recarray.html#numpy.recarray>`_ `Array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_. A structured array (also called record array) is a heterogeneous array, where each column has a different label and data type.

Processed arrays are saved in the ``processed`` directory.

If the uncertainty type is no uncertainty, undefined, or not specified, then the 'amount' value is used for 'loc' as well. This is needed for the random number generator.

Doesn't return anything, but writes a file to disk.

        """
        data = self.load()
        dp = create_datapackage(
            fs=ZipFS(str(self.filepath_processed()), write=True),
            name=self.filename_processed(),
            sum_intra_duplicates=True,
            sum_inter_duplicates=False,
        )
        dp.add_persistent_vector_from_iterator(
            matrix=self.matrix,
            name=clean_datapackage_name(str(self.name) + " matrix data"),
            dict_iterator=(self.process_row(row) for row in data),
            nrows=len(data),
            **extra_metadata)
        dp.finalize_serialization()
def create_ordering_datapackages():
    dp = create_datapackage(
        fs=ZipFS(str(dirpath / "a-first.zip"), write=True),
        name="test-fixture-a",
        id_="fixture-a",
    )
    add_data(dp)
    dp.finalize_serialization()

    dp = create_datapackage(
        fs=ZipFS(str(dirpath / "b-second.zip"), write=True),
        name="test-fixture-b",
        id_="fixture-b",
    )
    add_data(dp)
    dp.finalize_serialization()
Exemple #7
0
def test_integration_test_in_memory():
    dp = create_datapackage(fs=None, name="test-fixture", id_="fixture-42")
    assert isinstance(dp.fs, MemoryFS)
    add_data(dp)

    check_metadata(dp)
    check_data(dp)
Exemple #8
0
def erg():
    dp = create_datapackage(fs=None,
                            name="frg-fixture",
                            id_="something something danger zone")

    data_array = np.arange(3)
    indices_array = np.array([(0, 1), (2, 3), (4, 5)], dtype=INDICES_DTYPE)
    flip_array = np.array([1, 0, 1], dtype=bool)
    distributions_array = np.array(
        [
            (5, 1, 2, 3, 4, 5, False),
            (4, 1, 2, 3, 4, 5, False),
            (0, 1, 2, 3, 4, 5, False),
        ],
        dtype=UNCERTAINTY_DTYPE,
    )

    dp.add_persistent_vector(
        matrix="one",
        data_array=data_array,
        name="first",
        indices_array=indices_array,
        distributions_array=distributions_array,
        nrows=3,
        flip_array=flip_array,
    )
    dp.add_persistent_array(
        matrix="two",
        data_array=np.arange(12).reshape((3, 4)),
        indices_array=indices_array,
        name="second",
    )
    return dp
Exemple #9
0
def empty_biosphere():
    # Flow 1: The flow
    # Activity 1: The activity

    dp = create_datapackage(fs=ZipFS(str(fixture_dir / "empty_biosphere.zip"),
                                     write=True), )

    data_array = np.array([1, 2, 3])
    indices_array = np.array([(2, 1), (1, 1), (2, 2)], dtype=INDICES_DTYPE)
    flip_array = np.array([1, 0, 0], dtype=bool)
    dp.add_persistent_vector(
        matrix="technosphere_matrix",
        data_array=data_array,
        name="eb-technosphere",
        indices_array=indices_array,
        nrows=3,
        flip_array=flip_array,
    )

    data_array = np.array([1])
    indices_array = np.array([(1, 0)], dtype=INDICES_DTYPE)
    dp.add_persistent_vector(
        matrix="characterization_matrix",
        data_array=data_array,
        name="eb-characterization",
        indices_array=indices_array,
        global_index=0,
        nrows=1,
    )

    dp.finalize_serialization()
Exemple #10
0
def process_delta_database(name, tech, bio, dependents):
    """A modification of ``bw2data.backends.base.SQLiteBackend.process`` to skip retrieving data from the database."""
    print("Tech:", tech)
    print("Bio:", bio)

    db = bd.Database(name)
    db.metadata["processed"] = datetime.datetime.now().isoformat()

    # Create geomapping array, from dataset interger ids to locations
    inv_mapping_qs = ActivityDataset.select(
        ActivityDataset.id, ActivityDataset.location
    ).where(ActivityDataset.database == name, ActivityDataset.type == "process")

    # self.filepath_processed checks if data is dirty,
    # and processes if it is. This causes an infinite loop.
    # So we construct the filepath ourselves.
    fp = str(db.dirpath_processed() / db.filename_processed())

    dp = bwp.create_datapackage(
        fs=ZipFS(fp, write=True),
        name=bwp.clean_datapackage_name(name),
        sum_intra_duplicates=True,
        sum_inter_duplicates=False,
    )
    dp.add_persistent_vector_from_iterator(
        matrix="inv_geomapping_matrix",
        name=bwp.clean_datapackage_name(name + " inventory geomapping matrix"),
        dict_iterator=(
            {
                "row": row[0],
                "col": bd.geomapping[
                    bd.backends.utils.retupleize_geo_strings(row[1])
                    or bd.config.global_location
                ],
                "amount": 1,
            }
            for row in inv_mapping_qs.tuples()
        ),
        nrows=inv_mapping_qs.count(),
    )

    dp.add_persistent_vector_from_iterator(
        matrix="biosphere_matrix",
        name=bwp.clean_datapackage_name(name + " biosphere matrix"),
        dict_iterator=bio,
    )
    dp.add_persistent_vector_from_iterator(
        matrix="technosphere_matrix",
        name=bwp.clean_datapackage_name(name + " technosphere matrix"),
        dict_iterator=tech,
    )
    dp.finalize_serialization()

    db.metadata["depends"] = sorted(dependents.difference({name}))
    db.metadata["dirty"] = False
    db._metadata.flush()
Exemple #11
0
def aggregation(**kwargs):
    dp = bwp.create_datapackage(**kwargs)
    dp.add_persistent_vector(
        matrix="foo",
        name="vector",
        indices_array=np.array([(0, 0), (2, 1), (4, 2), (4, 2), (8, 3)],
                               dtype=bwp.INDICES_DTYPE),
        data_array=np.array([1, 2.3, 4, 17, 25]),
    )
    return dp
Exemple #12
0
def diagonal(**kwargs):
    dp = bwp.create_datapackage(**kwargs)
    dp.add_persistent_vector(
        matrix="foo",
        name="vector",
        indices_array=np.array([(0, 1), (1, 1), (2, 0), (3, 1)],
                               dtype=bwp.INDICES_DTYPE),
        data_array=np.array([1, 2.3, 4, 25]),
        flip_array=np.array([0, 1, 0, 0], dtype=bool),
    )
    return dp
def test_add_persistent_array_data_shapemismatch_nrows():
    dp = create_datapackage()
    data_array = np.arange(12).reshape(4, 3)
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_array(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
def test_add_persistent_vector_data_shapemismatch_ndimensions():
    dp = create_datapackage()
    data_array = np.array([[2, 7, 12], [4, 5, 15]])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_vector(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
def test_exclude_no_match():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"foo": "bar"})
    assert ndp is not dp
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    assert "sa-data-vector.indices" in [o["name"] for o in ndp.resources]
    assert len(ndp) == reference_length
def test_add_dynamic_vector_flip_shapemistmatch():
    dp = create_datapackage()
    flip_array = np.array([0, 1, 0, 1], dtype=bool)
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=ShapeMismatch)
    with pytest.raises(ShapeMismatch):
        dp.add_dynamic_vector(
            matrix="sa_matrix",
            interface=Dummy(),
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
Exemple #17
0
    def write_exchanges(self, technosphere, biosphere, dependents):
        """

        Write IO data directly to processed arrays.

        Product data is stored in SQLite as normal activities.
        Exchange data is written directly to NumPy structured arrays.

        Technosphere and biosphere data has format ``(row id, col id, value, flip)``.

        """
        print("Starting IO table write")

        dp = create_datapackage(
            fs=ZipFS(str(self.filepath_processed()), write=True),
            name=clean_datapackage_name(self.name),
            sum_intra_duplicates=True,
            sum_inter_duplicates=False,
        )

        dp.add_persistent_vector_from_iterator(
            dict_iterator=({
                "row":
                obj.id,
                "col":
                geomapping[obj["location"] or config.global_location],
                "amount":
                1,
            } for obj in self),
            matrix="inv_geomapping_matrix",
            name=clean_datapackage_name(self.name +
                                        " inventory geomapping matrix"),
            nrows=len(self),
        )
        print("Adding technosphere matrix")
        dp.add_persistent_vector_from_iterator(
            matrix="technosphere_matrix",
            name=clean_datapackage_name(self.name + " technosphere matrix"),
            dict_iterator=technosphere,
        )

        print("Adding biosphere matrix")
        dp.add_persistent_vector_from_iterator(
            matrix="biosphere_matrix",
            name=clean_datapackage_name(self.name + " biosphere matrix"),
            dict_iterator=biosphere,
        )
        dp.finalize_serialization()

        databases[self.name]["depends"] = sorted(
            set(dependents).difference({self.name}))
        databases[self.name]["processed"] = datetime.datetime.now().isoformat()
        databases.flush()
def test_add_dynamic_vector_flip_dtype():
    dp = create_datapackage()
    flip_array = np.array([0, 1, 0])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(WrongDatatype):
        dp.add_dynamic_vector(
            matrix="sa_matrix",
            interface=Dummy(),
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
def test_add_resource_with_same_name():
    dp = create_datapackage()
    add_data(dp)

    data_array = np.array([2, 7, 12])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(NonUnique):
        dp.add_persistent_vector(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
def test_del_resource_group_in_memory():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    dp.del_resource_group("sa-data-vector")
    assert "sa-data-vector.indices" not in [o["name"] for o in dp.resources]
    assert len(dp) == reference_length - 3
    assert len(dp.data) == reference_length - 3
    assert len(dp.metadata["resources"]) == reference_length - 3
    assert len(dp.resources) == reference_length - 3
def test_add_persistent_array_flip_shapemistmatch():
    dp = create_datapackage()
    data_array = np.arange(12).reshape(3, 4)
    flip_array = np.array([0, 1, 0, 1], dtype=bool)
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=ShapeMismatch)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_array(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
def test_add_persistent_array_flip_dtype():
    dp = create_datapackage()
    data_array = np.arange(12).reshape(3, 4)
    flip_array = np.array([0, 1, 0])
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(WrongDatatype):
        dp.add_persistent_array(
            matrix="sa_matrix",
            data_array=data_array,
            name="sa-data-vector",
            flip_array=flip_array,
            indices_array=indices_array,
        )
def test_reindex_custom_id_field_datapackage():
    dp = create_datapackage()
    add_data(dp, "bar")

    destination = [
        {
            "id": 21,
            "a": 1,
            "c": 3,
            "d": 11
        },
        {
            "id": 22,
            "a": 2,
            "c": 4,
            "d": 11
        },
        {
            "id": 23,
            "a": 1,
            "c": 4,
            "d": 11
        },
        {
            "id": 24,
            "a": 3,
            "c": 5,
            "d": 11
        },
        {
            "id": 25,
            "a": 4,
            "c": 5,
            "d": 11
        },
        {
            "id": 26,
            "a": 4,
            "c": 6,
            "d": 11
        },
    ]
    array, _ = dp.get_resource("vector.indices")
    df, _ = dp.get_resource("vector-csv-rows")
    assert np.allclose(array["row"], np.array([11, 11, 13]))
    assert np.allclose(df["bar"], np.array([11, 12, 13, 14, 15, 16]))

    reindex(dp, "vector-csv-rows", destination, id_field_datapackage="bar")

    assert np.allclose(array["row"], np.array([21, 21, 23]))
    assert np.allclose(df["bar"], np.array([21, 22, 23, 24, 25, 26]))
def test_exclude_multiple_matrix():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"matrix": "sa_matrix"})
    assert ndp is not dp
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    assert "sa-data-vector.indices" not in [o["name"] for o in ndp.resources]
    assert len(ndp) == 2
    assert len(ndp.data) == 2
    assert len(ndp.metadata["resources"]) == 2
    assert len(ndp.resources) == 2
def test_exclude_basic():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"group": "sa-data-vector"})
    assert ndp is not dp
    assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
    assert "sa-data-vector.indices" not in [o["name"] for o in ndp.resources]
    assert len(ndp) == reference_length - 3
    assert len(ndp.data) == reference_length - 3
    assert len(ndp.metadata["resources"]) == reference_length - 3
    assert len(ndp.resources) == reference_length - 3
Exemple #26
0
def test_one_empty_after_custom_filter():
    s = bwp.create_datapackage()
    s.add_persistent_vector(
        matrix="foo",
        data_array=np.arange(2),
        indices_array=np.array([(0, 0), (1, 0)], dtype=bwp.INDICES_DTYPE),
    )
    s.add_persistent_vector(
        matrix="foo",
        data_array=np.arange(10, 12),
        indices_array=np.array([(0, 1), (1, 1)], dtype=bwp.INDICES_DTYPE),
    )
    mm = MappedMatrix(packages=[s],
                      matrix="foo",
                      custom_filter=lambda x: x['col'] > 0)
    assert mm.matrix.sum() == 21
Exemple #27
0
def test_all_empty_after_custom_filter():
    s = bwp.create_datapackage()
    s.add_persistent_vector(
        matrix="foo",
        data_array=np.arange(2),
        indices_array=np.array([(0, 0), (1, 0)], dtype=bwp.INDICES_DTYPE),
    )
    with pytest.raises(EmptyArray):
        MappedMatrix(packages=[s],
                     matrix="foo",
                     custom_filter=lambda x: x['col'] > 0)

    assert MappedMatrix(packages=[s],
                        matrix="foo",
                        custom_filter=lambda x: x['col'] > 0,
                        empty_ok=True)
Exemple #28
0
def test_integration_test_fs_temp_directory():
    with tempfile.TemporaryDirectory() as td:
        dp = create_datapackage(fs=OSFS(td),
                                name="test-fixture",
                                id_="fixture-42")
        add_data(dp)
        dp.finalize_serialization()

        check_metadata(dp)
        check_data(dp)

        loaded = load_datapackage(OSFS(td))

        check_metadata(loaded, False)
        check_data(loaded)

        loaded.fs.close()
def test_add_persistent_vector_distributions_shapemismatch():
    dp = create_datapackage()
    distributions_array = np.array(
        [
            (3, 1.3, 2.5, np.NaN, np.NaN, np.NaN, False),
            (0, 1.3, 2.5, np.NaN, np.NaN, np.NaN, False),
        ],
        dtype=UNCERTAINTY_DTYPE,
    )
    indices_array = np.array([(1, 4), (2, 5), (3, 6)], dtype=INDICES_DTYPE)
    with pytest.raises(ShapeMismatch):
        dp.add_persistent_vector(
            matrix="sa_matrix",
            distributions_array=distributions_array,
            name="sa-data-vector",
            indices_array=indices_array,
        )
def test_exclude_multiple_filters():
    dp = create_datapackage()
    add_data(dp)
    assert isinstance(dp.fs, MemoryFS)

    reference_length = len(dp)
    assert "sa-array-interface.indices" in [o["name"] for o in dp.resources]
    ndp = dp.exclude({"group": "sa-array-interface", "matrix": "sa_matrix"})
    assert ndp is not dp
    assert "sa-array-interface.indices" in [o["name"] for o in dp.resources]
    assert "sa-array-interface.indices" not in [
        o["name"] for o in ndp.resources
    ]
    assert len(ndp) == reference_length - 2
    assert len(ndp.data) == reference_length - 2
    assert len(ndp.metadata["resources"]) == reference_length - 2
    assert len(ndp.resources) == reference_length - 2