Exemple #1
0
    def _store(self, met_data: METData) -> None:
        """Store MET data within a Datasource

        Here we do some retrieve on the request JSON to
        make the metadata more easily searchable and of a similar
        format to Datasources used in other modules of OpenGHG.

        Args:
            met_data: Dataset
        Returns:
            None
        """
        from openghg.store.base import Datasource

        metadata = met_data.metadata

        datasource = Datasource()
        datasource.add_data(metadata=metadata,
                            data=met_data.data,
                            data_type="met")
        datasource.save()

        date_str = f"{metadata['start_date']}_{metadata['end_date']}"

        name = "_".join((metadata["site"], metadata["network"], date_str))
        self._datasource_uuids[datasource.uuid()] = name
        # Write this updated object back to the object store
        self.save()
Exemple #2
0
def test_load_dataset():
    filename = "WAO-20magl_EUROPE_201306_small.nc"
    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filepath = os.path.join(dir_path, test_data, filename)

    ds = xr.load_dataset(filepath)

    metadata = {"some": "metadata"}

    d = Datasource()

    d.add_data(metadata=metadata, data=ds, data_type="footprints")

    d.save()

    keys = d._data_keys["latest"]["keys"]

    key = list(keys.values())[0]

    bucket = get_local_bucket()

    loaded_ds = Datasource.load_dataset(bucket=bucket, key=key)

    assert loaded_ds.equals(ds)
Exemple #3
0
def test_in_daterange(data):
    metadata = data["ch4"]["metadata"]
    data = data["ch4"]["data"]

    d = Datasource()
    d._uuid = "test-id-123"
    d.add_data(metadata=metadata, data=data, data_type="timeseries")
    d.save()

    expected_keys = [
        "data/uuid/test-id-123/v1/2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "data/uuid/test-id-123/v1/2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00",
        "data/uuid/test-id-123/v1/2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00",
        "data/uuid/test-id-123/v1/2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00",
        "data/uuid/test-id-123/v1/2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00",
        "data/uuid/test-id-123/v1/2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00",
        "data/uuid/test-id-123/v1/2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00",
    ]

    assert d.data_keys() == expected_keys

    start = pd.Timestamp("2014-1-1")
    end = pd.Timestamp("2014-2-1")
    daterange = create_daterange_str(start=start, end=end)

    dated_keys = d.keys_in_daterange_str(daterange=daterange)

    assert dated_keys[0].split(
        "/")[-1] == "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00"
Exemple #4
0
def test_exists():
    d = Datasource()
    d.save()

    exists = Datasource.exists(datasource_id=d.uuid())

    assert exists == True
Exemple #5
0
def test_save_footprint():
    bucket = get_local_bucket(empty=True)

    metadata = {"test": "testing123"}

    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filename = "WAO-20magl_EUROPE_201306_downsampled.nc"
    filepath = os.path.join(dir_path, test_data, filename)

    data = xr.open_dataset(filepath)

    datasource = Datasource()
    datasource.add_data(data=data, metadata=metadata, data_type="footprints")
    datasource.save()

    prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}"
    objs = get_object_names(bucket, prefix)

    datasource_2 = Datasource.load(bucket=bucket, key=objs[0])

    date_key = "2013-06-02-00:00:00+00:00_2013-06-30-00:00:00+00:00"

    data = datasource_2._data[date_key]

    assert float(data.pressure[0].values) == pytest.approx(1023.971)
    assert float(data.pressure[2].values) == pytest.approx(1009.940)
    assert float(data.pressure[-1].values) == pytest.approx(1021.303)

    assert datasource_2._data_type == "footprints"
Exemple #6
0
def assign_data(
    data_dict: Dict,
    lookup_results: Dict,
    overwrite: bool,
    data_type: str = "timeseries",
) -> Dict[str, str]:
    """Assign data to a Datasource. This will either create a new Datasource
    Create or get an existing Datasource for each gas in the file

        Args:
            data_dict: Dictionary containing data and metadata for species
            lookup_results: Dictionary of lookup results]
            overwrite: If True overwrite current data stored
        Returns:
            dict: Dictionary of UUIDs of Datasources data has been assigned to keyed by species name
    """
    from openghg.store.base import Datasource

    uuids = {}

    for key in data_dict:
        metadata = data_dict[key]["metadata"]
        data = data_dict[key]["data"]

        # Our lookup results and gas data have the same keys
        uuid = lookup_results[key]

        # TODO - Could this be done somewhere else? It doesn't feel quite right it
        # being here

        # Add the read metadata to the Dataset attributes being careful
        # not to overwrite any attributes that are already there
        to_add = {k: v for k, v in metadata.items() if k not in data.attrs}
        data.attrs.update(to_add)

        # If we have a UUID for this Datasource load the existing object
        # from the object store
        if uuid is False:
            datasource = Datasource()
        else:
            datasource = Datasource.load(uuid=uuid)

        # Add the dataframe to the datasource
        datasource.add_data(metadata=metadata,
                            data=data,
                            overwrite=overwrite,
                            data_type=data_type)
        # Save Datasource to object store
        datasource.save()

        uuids[key] = datasource.uuid()

    return uuids
Exemple #7
0
def test_save(mock_uuid2):
    bucket = get_local_bucket()

    datasource = Datasource()
    datasource.add_metadata_key(key="data_type", value="timeseries")
    datasource.save(bucket)

    prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}"

    objs = get_object_names(bucket, prefix)

    assert objs[0].split("/")[-1] == mocked_uuid2
Exemple #8
0
def test_add_data(data):
    d = Datasource()

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    assert ch4_data["ch4"][0] == pytest.approx(1959.55)
    assert ch4_data["ch4_variability"][0] == pytest.approx(0.79)
    assert ch4_data["ch4_number_of_observations"][0] == pytest.approx(26.0)

    d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries")
    d.save()
    bucket = get_local_bucket()

    data_chunks = [
        Datasource.load_dataset(bucket=bucket, key=k) for k in d.data_keys()
    ]

    # Now read it out and make sure it's what we expect
    combined = xr.concat(data_chunks, dim="time")

    assert combined.equals(ch4_data)

    expected_metadata = {
        "site": "bsd",
        "instrument": "picarro",
        "sampling_period": "60",
        "inlet": "248m",
        "port": "9",
        "type": "air",
        "network": "decc",
        "species": "ch4",
        "scale": "wmo-x2004a",
        "long_name": "bilsdale",
        "data_owner": "simon o'doherty",
        "data_owner_email": "*****@*****.**",
        "inlet_height_magl": "248m",
        "comment": "cavity ring-down measurements. output from gcwerks",
        "source": "in situ measurements of air",
        "conventions": "cf-1.6",
        "calibration_scale": "wmo-x2004a",
        "station_longitude": -1.15033,
        "station_latitude": 54.35858,
        "station_long_name": "bilsdale, uk",
        "station_height_masl": 380.0,
        "data_type": "timeseries",
    }

    assert d.metadata() == expected_metadata
Exemple #9
0
def test_shallow_then_load_data(data):
    metadata = data["ch4"]["metadata"]
    data = data["ch4"]["data"]

    d = Datasource()
    d.add_data(metadata=metadata, data=data, data_type="timeseries")
    d.save()

    new_d = Datasource.load(uuid=d.uuid(), shallow=True)

    assert not new_d._data

    ds_data = new_d.data()

    assert ds_data

    ch4_data = ds_data["2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00"]

    assert ch4_data.time[0] == pd.Timestamp("2014-01-30-11:12:30")
Exemple #10
0
def test_versioning(data):
    # Take head of data
    # Then add the full data, check versioning works correctly
    metadata = {"foo": "bar"}

    d = Datasource()
    # Fix the UUID for the tests
    d._uuid = "4b91f73e-3d57-47e4-aa13-cb28c35d3b3d"

    ch4_data = data["ch4"]["data"]

    v1 = ch4_data.head(20)
    v2 = ch4_data.head(30)
    v3 = ch4_data.head(40)

    d.add_data(metadata=metadata, data=v1, data_type="timeseries")

    d.save()

    d.add_data(metadata=metadata, data=v2, data_type="timeseries")

    d.save()

    d.add_data(metadata=metadata, data=v3, data_type="timeseries")

    d.save()

    keys = d.versions()

    assert keys["v1"]["keys"] == {
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v1/2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-01-30-11:19:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v1/2015-01-30-11:12:30+00:00_2015-01-30-11:19:30+00:00",
    }
    assert keys["v2"]["keys"] == {
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v2/2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-01-30-11:19:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v2/2015-01-30-11:12:30+00:00_2015-01-30-11:19:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:17:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v2/2015-01-30-11:12:30+00:00_2015-11-30-11:17:30+00:00",
    }
    assert keys["v3"]["keys"] == {
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-01-30-11:19:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2015-01-30-11:12:30+00:00_2015-01-30-11:19:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:17:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2015-01-30-11:12:30+00:00_2015-11-30-11:17:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00",
        "2016-04-02-06:52:30+00:00_2016-04-02-06:55:30+00:00":
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2016-04-02-06:52:30+00:00_2016-04-02-06:55:30+00:00",
    }

    assert keys["v3"]["keys"] == keys["latest"]["keys"]
Exemple #11
0
def test_from_data(data):
    d = Datasource()

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries")
    d.save()

    obj_data = d.to_data()

    bucket = get_local_bucket()

    # Create a new object with the data from d
    d_2 = Datasource.from_data(bucket=bucket, data=obj_data, shallow=False)

    metadata = d_2.metadata()
    assert metadata["site"] == "bsd"
    assert metadata["instrument"] == "picarro"
    assert metadata["sampling_period"] == "60"
    assert metadata["inlet"] == "248m"

    assert sorted(d_2.data_keys()) == sorted(d.data_keys())
    assert d_2.metadata() == d.metadata()