Beispiel #1
0
def test_add_data(data):
    d = Datasource(name="test")

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    assert ch4_data["ch4"][0] == pytest.approx(1960.24)
    assert ch4_data["ch4 stdev"][0] == pytest.approx(0.236)
    assert ch4_data["ch4 n_meas"][0] == pytest.approx(26.0)

    d.add_data(metadata=metadata, data=ch4_data)

    date_key = "2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00"

    assert d._data[date_key]["ch4"].equals(ch4_data["ch4"])
    assert d._data[date_key]["ch4 stdev"].equals(ch4_data["ch4 stdev"])
    assert d._data[date_key]["ch4 n_meas"].equals(ch4_data["ch4 n_meas"])

    datasource_metadata = d.metadata()

    assert datasource_metadata["data_type"] == "timeseries"
    assert datasource_metadata["inlet"] == "248m"
    assert datasource_metadata["instrument"] == "picarro"
    assert datasource_metadata["port"] == "8"
    assert datasource_metadata["site"] == "bsd"
    assert datasource_metadata["species"] == "ch4"
Beispiel #2
0
def test_load_dataset():
    filename = "WAO-20magl_EUROPE_201306_small.nc"
    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filepath = os.path.join(dir_path, test_data, filename)

    ds = xarray.load_dataset(filepath)

    metadata = {"some": "metadata"}

    d = Datasource("dataset_test")

    d.add_data(metadata=metadata, data=ds, data_type="footprint")

    d.save()

    keys = d._data_keys["latest"]["keys"]

    key = list(keys.values())[0]

    bucket = get_local_bucket()

    loaded_ds = Datasource.load_dataset(bucket=bucket, key=key)

    assert loaded_ds.equals(ds)
Beispiel #3
0
def test_save_footprint():
    bucket = get_local_bucket(empty=True)

    metadata = {"test": "testing123"}

    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filename = "WAO-20magl_EUROPE_201306_downsampled.nc"
    filepath = os.path.join(dir_path, test_data, filename)

    data = xarray.open_dataset(filepath)

    datasource = Datasource(name="test_name")
    datasource.add_data(metadata=metadata, data=data, data_type="footprint")
    datasource.save()

    prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}"
    objs = get_object_names(bucket, prefix)

    datasource_2 = Datasource.load(bucket=bucket, key=objs[0])

    date_key = "2013-06-02-00:00:00+00:00_2013-06-30-00:00:00+00:00"

    data = datasource_2._data[date_key]

    assert float(data.pressure[0].values) == pytest.approx(1023.971)
    assert float(data.pressure[2].values) == pytest.approx(1009.940)
    assert float(data.pressure[-1].values) == pytest.approx(1021.303)
Beispiel #4
0
def assign_data(gas_data, lookup_results, overwrite):
    """ Create or get an existing Datasource for each gas in the file

        Args:
            gas_data (dict): Dictionary containing data and metadata for species
        Returns:
            dict: Dictionary of UUIDs of Datasources data has been assigned to keyed by species name
    """
    from HUGS.Modules import Datasource

    uuids = {}
    # Add in copying of attributes, or add attributes to the metadata at an earlier state.
    for species in gas_data:
        metadata = gas_data[species]["metadata"]
        data = gas_data[species]["data"]
        name = lookup_results[species]["name"]
        uuid = lookup_results[species]["uuid"]

        # If we have a UUID for this Datasource load the existing object
        # from the object store
        if uuid:
            datasource = Datasource.load(uuid=uuid)
        else:
            datasource = Datasource(name=name)

        # Add the dataframe to the datasource
        datasource.add_data(metadata=metadata, data=data, overwrite=overwrite)
        # Save Datasource to object store
        datasource.save()

        uuids[name] = datasource.uuid()

    return uuids
Beispiel #5
0
def test_incorrect_datatype_raises(data):
    d = Datasource(name="testing_123")

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    with pytest.raises(TypeError):
        d.add_data(metadata=metadata, data=ch4_data, data_type="CRDS")
Beispiel #6
0
def test_in_daterange(data):
    metadata = data["ch4"]["metadata"]
    data = data["ch4"]["data"]

    d = Datasource()
    d.add_data(metadata=metadata, data=data)
    d.save()

    start = pd.Timestamp("2014-1-1")
    end = pd.Timestamp("2014-2-1")

    daterange = create_daterange_str(start=start, end=end)

    d._data_keys["latest"]["2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00"] = ['data/uuid/ace2bb89-7618-4104-9404-a329c2bcd318/v1/2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00']
    d._data_keys["latest"]["2015-01-30-10:52:30+00:00_2016-01-30-14:20:30+00:00"] = ['data/uuid/ace2bb89-7618-4104-9404-a329c2bcd318/v1/2015-01-30-10:52:30+00:00_2016-01-30-14:20:30+00:00']
    d._data_keys["latest"]["2016-01-31-10:52:30+00:00_2017-01-30-14:20:30+00:00"] = ['data/uuid/ace2bb89-7618-4104-9404-a329c2bcd318/v1/2016-01-31-10:52:30+00:00_2017-01-30-14:20:30+00:00']

    keys = d.in_daterange(daterange=daterange)

    assert keys[0].split("/")[-1] == '2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00'
Beispiel #7
0
def test_update_daterange_replacement(data):
    metadata = {"foo": "bar"}

    d = Datasource(name="foo")

    ch4_data = data["ch4"]["data"]

    d.add_data(metadata=metadata, data=ch4_data)

    assert d._start_datetime == pd.Timestamp("2014-01-30 10:52:30+00:00")
    assert d._end_datetime == pd.Timestamp("2014-01-30 14:20:30+00:00")

    ch4_short = ch4_data.head(40)

    d._data = None

    d.add_data(metadata=metadata, data=ch4_short, overwrite=True)

    assert d._start_datetime == pd.Timestamp("2014-01-30 10:52:30+00:00")
    assert d._end_datetime == pd.Timestamp("2014-01-30 13:22:30+00:00")
Beispiel #8
0
def test_from_data(data):
    d = Datasource(name="testing_123")

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries")

    obj_data = d.to_data()

    bucket = get_local_bucket()

    # Create a new object with the data from d
    d_2 = Datasource.from_data(bucket=bucket, data=obj_data, shallow=False)

    metadata = d_2.metadata()
    assert metadata["site"] == "bsd"
    assert metadata["instrument"] == "picarro"
    assert metadata["time_resolution"] == "1_minute"
    assert metadata["inlet"] == "248m"

    assert d_2.to_data() == d.to_data()
Beispiel #9
0
def test_to_data(data):
    d = Datasource(name="testing_123")

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    assert ch4_data["ch4"][0] == pytest.approx(1960.24)
    assert ch4_data["ch4 stdev"][0] == pytest.approx(0.236)
    assert ch4_data["ch4 n_meas"][0] == pytest.approx(26.0)

    d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries")

    obj_data = d.to_data()

    metadata = obj_data["metadata"]
    assert obj_data["name"] == "testing_123"
    assert metadata["site"] == "bsd"
    assert metadata["instrument"] == "picarro"
    assert metadata["time_resolution"] == "1_minute"
    assert metadata["inlet"] == "248m"
    assert obj_data["data_type"] == "timeseries"
    assert len(obj_data["data_keys"]) == 0
Beispiel #10
0
    def assign_data(self,
                    lookup_results,
                    source_name,
                    data,
                    metadata,
                    overwrite=False):
        """ Assign data to a new or existing Datasource

            Args:
                lookup_results (dict): Results of Datasource lookup
                source_name (str): Name of data source
                data (xarray.Dataset): Data
                metadata (dict): Dictionary of metadata
                overwrite (bool, default=False): Should exisiting data be overwritten
            Returns:
                list: List of Datasource UUIDs
        """
        from HUGS.Modules import Datasource

        uuids = {}
        for key in lookup_results:
            uuid = lookup_results[key]["uuid"]
            name = metadata["name"]

            if uuid:
                datasource = Datasource.load(uuid=uuid)
            else:
                datasource = Datasource(name=name)

            datasource.add_data(metadata=metadata,
                                data=data,
                                data_type="footprint")
            datasource.save()

            uuids[name] = datasource.uuid()

        return uuids
Beispiel #11
0
def test_versioning(data):
    # Take head of data
    # Then add the full data, check versioning works correctly
    metadata = {"foo": "bar"}

    d = Datasource(name="foo")
    # Fix the UUID for the tests
    d._uuid = "4b91f73e-3d57-47e4-aa13-cb28c35d3b3d"

    ch4_data = data["ch4"]["data"]

    v1 = ch4_data.head(20)
    v2 = ch4_data.head(30)
    v3 = ch4_data.head(40)

    d.add_data(metadata=metadata, data=v1)

    d.save()

    d.add_data(metadata=metadata, data=v2)

    d.save()

    d.add_data(metadata=metadata, data=v3)

    d.save()

    keys = d.versions()

    

    assert (
        keys["v1"]["keys"]["2014-01-30-10:52:30+00:00_2014-01-30-12:20:30+00:00"]
        == "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v1/2014-01-30-10:52:30+00:00_2014-01-30-12:20:30+00:00"
    )

    assert list(keys["v2"]["keys"].values()) == [
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v2/2014-01-30-10:52:30+00:00_2014-01-30-13:12:30+00:00"
    ]

    assert list(keys["v3"]["keys"].values()) == [
        "data/uuid/4b91f73e-3d57-47e4-aa13-cb28c35d3b3d/v3/2014-01-30-10:52:30+00:00_2014-01-30-13:22:30+00:00"
    ]

    assert keys["v3"]["keys"] == keys["latest"]["keys"]