def test_add_data(data): d = Datasource(name="test") metadata = data["ch4"]["metadata"] ch4_data = data["ch4"]["data"] assert ch4_data["ch4"][0] == pytest.approx(1960.24) assert ch4_data["ch4 stdev"][0] == pytest.approx(0.236) assert ch4_data["ch4 n_meas"][0] == pytest.approx(26.0) d.add_data(metadata=metadata, data=ch4_data) date_key = "2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00" assert d._data[date_key]["ch4"].equals(ch4_data["ch4"]) assert d._data[date_key]["ch4 stdev"].equals(ch4_data["ch4 stdev"]) assert d._data[date_key]["ch4 n_meas"].equals(ch4_data["ch4 n_meas"]) datasource_metadata = d.metadata() assert datasource_metadata["data_type"] == "timeseries" assert datasource_metadata["inlet"] == "248m" assert datasource_metadata["instrument"] == "picarro" assert datasource_metadata["port"] == "8" assert datasource_metadata["site"] == "bsd" assert datasource_metadata["species"] == "ch4"
def test_load_dataset(): filename = "WAO-20magl_EUROPE_201306_small.nc" dir_path = os.path.dirname(__file__) test_data = "../data/emissions" filepath = os.path.join(dir_path, test_data, filename) ds = xarray.load_dataset(filepath) metadata = {"some": "metadata"} d = Datasource("dataset_test") d.add_data(metadata=metadata, data=ds, data_type="footprint") d.save() keys = d._data_keys["latest"]["keys"] key = list(keys.values())[0] bucket = get_local_bucket() loaded_ds = Datasource.load_dataset(bucket=bucket, key=key) assert loaded_ds.equals(ds)
def test_save_footprint(): bucket = get_local_bucket(empty=True) metadata = {"test": "testing123"} dir_path = os.path.dirname(__file__) test_data = "../data/emissions" filename = "WAO-20magl_EUROPE_201306_downsampled.nc" filepath = os.path.join(dir_path, test_data, filename) data = xarray.open_dataset(filepath) datasource = Datasource(name="test_name") datasource.add_data(metadata=metadata, data=data, data_type="footprint") datasource.save() prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}" objs = get_object_names(bucket, prefix) datasource_2 = Datasource.load(bucket=bucket, key=objs[0]) date_key = "2013-06-02-00:00:00+00:00_2013-06-30-00:00:00+00:00" data = datasource_2._data[date_key] assert float(data.pressure[0].values) == pytest.approx(1023.971) assert float(data.pressure[2].values) == pytest.approx(1009.940) assert float(data.pressure[-1].values) == pytest.approx(1021.303)
def assign_data(gas_data, lookup_results, overwrite): """ Create or get an existing Datasource for each gas in the file Args: gas_data (dict): Dictionary containing data and metadata for species Returns: dict: Dictionary of UUIDs of Datasources data has been assigned to keyed by species name """ from HUGS.Modules import Datasource uuids = {} # Add in copying of attributes, or add attributes to the metadata at an earlier state. for species in gas_data: metadata = gas_data[species]["metadata"] data = gas_data[species]["data"] name = lookup_results[species]["name"] uuid = lookup_results[species]["uuid"] # If we have a UUID for this Datasource load the existing object # from the object store if uuid: datasource = Datasource.load(uuid=uuid) else: datasource = Datasource(name=name) # Add the dataframe to the datasource datasource.add_data(metadata=metadata, data=data, overwrite=overwrite) # Save Datasource to object store datasource.save() uuids[name] = datasource.uuid() return uuids
def test_incorrect_datatype_raises(data): d = Datasource(name="testing_123") metadata = data["ch4"]["metadata"] ch4_data = data["ch4"]["data"] with pytest.raises(TypeError): d.add_data(metadata=metadata, data=ch4_data, data_type="CRDS")
def test_in_daterange(data): metadata = data["ch4"]["metadata"] data = data["ch4"]["data"] d = Datasource() d.add_data(metadata=metadata, data=data) d.save() start = pd.Timestamp("2014-1-1") end = pd.Timestamp("2014-2-1") daterange = create_daterange_str(start=start, end=end) d._data_keys["latest"]["2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00"] = ['data/uuid/ace2bb89-7618-4104-9404-a329c2bcd318/v1/2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00'] d._data_keys["latest"]["2015-01-30-10:52:30+00:00_2016-01-30-14:20:30+00:00"] = ['data/uuid/ace2bb89-7618-4104-9404-a329c2bcd318/v1/2015-01-30-10:52:30+00:00_2016-01-30-14:20:30+00:00'] d._data_keys["latest"]["2016-01-31-10:52:30+00:00_2017-01-30-14:20:30+00:00"] = ['data/uuid/ace2bb89-7618-4104-9404-a329c2bcd318/v1/2016-01-31-10:52:30+00:00_2017-01-30-14:20:30+00:00'] keys = d.in_daterange(daterange=daterange) assert keys[0].split("/")[-1] == '2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00'
def test_update_daterange_replacement(data): metadata = {"foo": "bar"} d = Datasource(name="foo") ch4_data = data["ch4"]["data"] d.add_data(metadata=metadata, data=ch4_data) assert d._start_datetime == pd.Timestamp("2014-01-30 10:52:30+00:00") assert d._end_datetime == pd.Timestamp("2014-01-30 14:20:30+00:00") ch4_short = ch4_data.head(40) d._data = None d.add_data(metadata=metadata, data=ch4_short, overwrite=True) assert d._start_datetime == pd.Timestamp("2014-01-30 10:52:30+00:00") assert d._end_datetime == pd.Timestamp("2014-01-30 13:22:30+00:00")
def test_from_data(data): d = Datasource(name="testing_123") metadata = data["ch4"]["metadata"] ch4_data = data["ch4"]["data"] d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries") obj_data = d.to_data() bucket = get_local_bucket() # Create a new object with the data from d d_2 = Datasource.from_data(bucket=bucket, data=obj_data, shallow=False) metadata = d_2.metadata() assert metadata["site"] == "bsd" assert metadata["instrument"] == "picarro" assert metadata["time_resolution"] == "1_minute" assert metadata["inlet"] == "248m" assert d_2.to_data() == d.to_data()
def test_to_data(data): d = Datasource(name="testing_123") metadata = data["ch4"]["metadata"] ch4_data = data["ch4"]["data"] assert ch4_data["ch4"][0] == pytest.approx(1960.24) assert ch4_data["ch4 stdev"][0] == pytest.approx(0.236) assert ch4_data["ch4 n_meas"][0] == pytest.approx(26.0) d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries") obj_data = d.to_data() metadata = obj_data["metadata"] assert obj_data["name"] == "testing_123" assert metadata["site"] == "bsd" assert metadata["instrument"] == "picarro" assert metadata["time_resolution"] == "1_minute" assert metadata["inlet"] == "248m" assert obj_data["data_type"] == "timeseries" assert len(obj_data["data_keys"]) == 0
def assign_data(self, lookup_results, source_name, data, metadata, overwrite=False): """ Assign data to a new or existing Datasource Args: lookup_results (dict): Results of Datasource lookup source_name (str): Name of data source data (xarray.Dataset): Data metadata (dict): Dictionary of metadata overwrite (bool, default=False): Should exisiting data be overwritten Returns: list: List of Datasource UUIDs """ from HUGS.Modules import Datasource uuids = {} for key in lookup_results: uuid = lookup_results[key]["uuid"] name = metadata["name"] if uuid: datasource = Datasource.load(uuid=uuid) else: datasource = Datasource(name=name) datasource.add_data(metadata=metadata, data=data, data_type="footprint") datasource.save() uuids[name] = datasource.uuid() return uuids
def test_versioning(data): # Take head of data # Then add the full data, check versioning works correctly metadata = {"foo": "bar"} d = Datasource(name="foo") # Fix the UUID for the tests d._uuid = "4b91f73e-3d57-47e4-aa13-cb28c35d3b3d" ch4_data = data["ch4"]["data"] v1 = ch4_data.head(20) v2 = ch4_data.head(30) v3 = ch4_data.head(40) d.add_data(metadata=metadata, data=v1) d.save() d.add_data(metadata=metadata, data=v2) d.save() d.add_data(metadata=metadata, data=v3) d.save() keys = d.versions() assert ( keys["v1"]["keys"]["2014-01-30-10:52:30+00:00_2014-01-30-12:20:30+00:00"] == "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v1/2014-01-30-10:52:30+00:00_2014-01-30-12:20:30+00:00" ) assert list(keys["v2"]["keys"].values()) == [ "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v2/2014-01-30-10:52:30+00:00_2014-01-30-13:12:30+00:00" ] assert list(keys["v3"]["keys"].values()) == [ "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2014-01-30-10:52:30+00:00_2014-01-30-13:22:30+00:00" ] assert keys["v3"]["keys"] == keys["latest"]["keys"]