Exemple #1
0
def test_read_cranfield():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="THB_hourly_means_test.csv",
                                 data_type="Cranfield_CRDS")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="CRANFIELD",
                                   site="THB",
                                   network="CRANFIELD")

    expected_keys = ["ch4", "co", "co2"]

    assert sorted(results["processed"]
                  ["THB_hourly_means_test.csv"].keys()) == expected_keys

    uuid = results["processed"]["THB_hourly_means_test.csv"]["ch4"]

    ch4_data = Datasource.load(uuid=uuid, shallow=False).data()
    ch4_data = ch4_data["2018-05-05-00:00:00+00:00_2018-05-13-16:00:00+00:00"]

    assert ch4_data.time[0] == Timestamp("2018-05-05")
    assert ch4_data.time[-1] == Timestamp("2018-05-13T16:00:00")

    assert ch4_data["ch4"][0] == pytest.approx(2585.651)
    assert ch4_data["ch4"][-1] == pytest.approx(1999.018)

    assert ch4_data["ch4 variability"][0] == pytest.approx(75.50218)
    assert ch4_data["ch4 variability"][-1] == pytest.approx(6.48413)
Exemple #2
0
def test_read_file():
    get_local_bucket(empty=True)

    test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc")

    proc_results = Emissions.read_file(
        filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False
    )

    assert "co2_gppcardamom_europe_2012" in proc_results

    search_results = search(species="co2", source="gpp-cardamom", date="2012", domain="europe", data_type="emissions")

    key = list(search_results.keys())[0]

    data_keys = search_results[key]["keys"]
    emissions_data = recombine_datasets(keys=data_keys, sort=False)

    metadata = search_results[key]["metadata"]

    orig_data = open_dataset(test_datapath)

    assert orig_data.lat.equals(emissions_data.lat)
    assert orig_data.lon.equals(emissions_data.lon)
    assert orig_data.time.equals(emissions_data.time)
    assert orig_data.flux.equals(emissions_data.flux)

    expected_metadata = {
        "title": "gross primary productivity co2",
        "author": "openghg cloud",
        "date_created": "2018-05-20 19:44:14.968710",
        "number_of_prior_files_used": 1,
        "prior_file_1": "cardamom gpp",
        "prior_file_1_raw_resolution": "25x25km",
        "prior_file_1_reference": "t.l. smallman, jgr biogeosciences, 2017",
        "regridder_used": "acrg_grid.regrid.regrid_3d",
        "comments": "fluxes copied from year 2013. december 2012 values copied from january 2013 values.",
        "species": "co2",
        "domain": "europe",
        "source": "gppcardamom",
        "date": "2012",
        "start_date": "2012-12-01 00:00:00+00:00",
        "end_date": "2012-12-01 00:00:00+00:00",
        "max_longitude": 39.38,
        "min_longitude": -97.9,
        "max_latitude": 79.057,
        "min_latitude": 10.729,
        "time_resolution": "standard",
        "data_type": "emissions",
    }

    del metadata["processed"]
    del metadata["prior_file_1_version"]

    assert metadata == expected_metadata
Exemple #3
0
def data_read():
    '''
    Data set up for running tests for these sets of modules.
    '''
    get_local_bucket(empty=True)

    # Files for creating forward model (mf_mod) for methane at TAC site

    # Observation data
    #  - TAC at 100m for 201208
    site = "tac"
    network = "DECC"
    data_type = "CRDS"

    tac_path = get_datapath(filename="tac.picarro.1minute.100m.201208.dat",
                            data_type="CRDS")
    ObsSurface.read_file(filepath=tac_path,
                         data_type=data_type,
                         site=site,
                         network=network)

    # Emissions data
    # Anthropogenic ch4 (methane) data from 2012 for EUROPE
    species = "ch4"
    source = "anthro"
    domain = "EUROPE"

    emissions_datapath = get_emissions_datapath("ch4-anthro_EUROPE_2012.nc")

    Emissions.read_file(
        filepath=emissions_datapath,
        species=species,
        source=source,
        date="2012",
        domain=domain,
        high_time_resolution=False,
    )

    # Footprint data
    # TAC footprint from 2012-08 - 2012-09 at 100m
    height = "100m"
    model = "NAME"

    fp_datapath = get_footprint_datapath("TAC-100magl_EUROPE_201208.nc")

    Footprints.read_file(filepath=fp_datapath,
                         site=site,
                         model=model,
                         network=network,
                         height=height,
                         domain=domain)
Exemple #4
0
def test_delete_Datasource():
    bucket = get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="thames_test_20190707.csv",
                                 data_type="THAMESBARRIER")

    ObsSurface.read_file(filepath=data_filepath,
                         data_type="THAMESBARRIER",
                         site="tmb",
                         network="LGHG",
                         sampling_period=60)

    obs = ObsSurface.load()

    datasources = obs.datasources()

    uuid = datasources[0]

    datasource = Datasource.load(uuid=uuid)

    data_keys = datasource.data_keys()

    key = data_keys[0]

    assert exists(bucket=bucket, key=key)

    obs.delete(uuid=uuid)

    assert uuid not in obs.datasources()

    assert not exists(bucket=bucket, key=key)
Exemple #5
0
def test_load_dataset():
    filename = "WAO-20magl_EUROPE_201306_small.nc"
    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filepath = os.path.join(dir_path, test_data, filename)

    ds = xr.load_dataset(filepath)

    metadata = {"some": "metadata"}

    d = Datasource()

    d.add_data(metadata=metadata, data=ds, data_type="footprints")

    d.save()

    keys = d._data_keys["latest"]["keys"]

    key = list(keys.values())[0]

    bucket = get_local_bucket()

    loaded_ds = Datasource.load_dataset(bucket=bucket, key=key)

    assert loaded_ds.equals(ds)
Exemple #6
0
def test_save_footprint():
    bucket = get_local_bucket(empty=True)

    metadata = {"test": "testing123"}

    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filename = "WAO-20magl_EUROPE_201306_downsampled.nc"
    filepath = os.path.join(dir_path, test_data, filename)

    data = xr.open_dataset(filepath)

    datasource = Datasource()
    datasource.add_data(data=data, metadata=metadata, data_type="footprints")
    datasource.save()

    prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}"
    objs = get_object_names(bucket, prefix)

    datasource_2 = Datasource.load(bucket=bucket, key=objs[0])

    date_key = "2013-06-02-00:00:00+00:00_2013-06-30-00:00:00+00:00"

    data = datasource_2._data[date_key]

    assert float(data.pressure[0].values) == pytest.approx(1023.971)
    assert float(data.pressure[2].values) == pytest.approx(1009.940)
    assert float(data.pressure[-1].values) == pytest.approx(1021.303)

    assert datasource_2._data_type == "footprints"
Exemple #7
0
def test_add_new_data_correct_datasource():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="capegrim-medusa.05.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.05.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS",
                                   site="CGO",
                                   network="AGAGE")

    first_results = results["processed"]["capegrim-medusa.05.C"]

    sorted_keys = sorted(
        list(results["processed"]["capegrim-medusa.05.C"].keys()))

    assert sorted_keys[:4] == [
        'c2cl4_10m', 'c2cl4_70m', 'c2f6_10m', 'c2f6_70m'
    ]
    assert sorted_keys[-4:] == [
        'hfc32_70m', 'sf6_70m', 'so2f2_10m', 'so2f2_70m'
    ]
    assert len(sorted_keys) == 69

    data_filepath = get_datapath(filename="capegrim-medusa.06.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.06.precisions.C", data_type="GC")

    new_results = ObsSurface.read_file(filepath=(data_filepath,
                                                 precision_filepath),
                                       data_type="GCWERKS",
                                       site="CGO",
                                       network="AGAGE")

    second_results = new_results["processed"]["capegrim-medusa.06.C"]

    shared_keys = [key for key in first_results if key in second_results]

    assert len(shared_keys) == 67

    for key in shared_keys:
        assert first_results[key] == second_results[key]
def co2_setup():
    get_local_bucket(empty=True)

    data_type = "CRDS"

    tac_file = get_datapath(filename="tac.picarro.hourly.100m.test.dat",
                            data_type=data_type)
    tac_footprint = get_fp_datapath("TAC-100magl_UKV_co2_TEST_201407.nc")
    co2_emissions = get_flux_datapath("co2-rtot-cardamom-2hr_TEST_2014.nc")

    site = "tac"
    species = "co2"
    network = "DECC"
    height = "100m"

    domain = "TEST"
    model = "NAME"
    metmodel = "UKV"

    source = "rtot-cardamom"
    date = "2014"

    ObsSurface.read_file(filepath=tac_file,
                         data_type=data_type,
                         site=site,
                         network=network,
                         inlet=height)

    Footprints.read_file(filepath=tac_footprint,
                         site=site,
                         height=height,
                         domain=domain,
                         model=model,
                         metmodel=metmodel,
                         species=species)

    Emissions.read_file(filepath=co2_emissions,
                        species=species,
                        source=source,
                        domain=domain,
                        date=date,
                        high_time_resolution=True)
Exemple #9
0
def test_upload_same_file_twice_raises():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="thames_test_20190707.csv",
                                 data_type="THAMESBARRIER")

    ObsSurface.read_file(filepath=data_filepath,
                         data_type="THAMESBARRIER",
                         site="tmb",
                         network="LGHG",
                         sampling_period=60)

    # assert not res["error"]

    with pytest.raises(ValueError):
        ObsSurface.read_file(filepath=data_filepath,
                             data_type="THAMESBARRIER",
                             site="tmb",
                             network="LGHG",
                             sampling_period=60)
Exemple #10
0
def test_save(mock_uuid2):
    bucket = get_local_bucket()

    datasource = Datasource()
    datasource.add_metadata_key(key="data_type", value="timeseries")
    datasource.save(bucket)

    prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}"

    objs = get_object_names(bucket, prefix)

    assert objs[0].split("/")[-1] == mocked_uuid2
Exemple #11
0
def test_recombination_GC():
    get_local_bucket(empty=True)

    data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    precision = get_datapath(filename="capegrim-medusa.18.precisions.C",
                             data_type="GC")

    ObsSurface.read_file((data, precision),
                         data_type="GCWERKS",
                         site="cgo",
                         network="agage")

    data = parse_gcwerks(data_filepath=data,
                         precision_filepath=precision,
                         site="CGO",
                         instrument="medusa",
                         network="AGAGE")

    toluene_data = data["c6h5ch3_70m"]["data"]

    species = "c6h5ch3"
    site = "CGO"
    inlet = "70m"

    result = search(species=species, site=site, inlet=inlet)
    keys = result.keys(site=site, species=species, inlet=inlet)

    toluene_data_recombined = recombine_datasets(keys=keys)

    toluene_data.attrs = {}
    toluene_data_recombined.attrs = {}

    assert toluene_data.time.equals(toluene_data_recombined.time)
    assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"])
    assert toluene_data["c6h5ch3_repeatability"].equals(
        toluene_data_recombined["c6h5ch3_repeatability"])
    assert toluene_data["c6h5ch3_status_flag"].equals(
        toluene_data_recombined["c6h5ch3_status_flag"])
    assert toluene_data["c6h5ch3_integration_flag"].equals(
        toluene_data_recombined["c6h5ch3_integration_flag"])
Exemple #12
0
def test_read_file():
    get_local_bucket(empty=True)

    test_datapath = get_datapath("GEOSChem.SpeciesConc.20150101_0000z_reduced.nc4")

    proc_results = EulerianModel.read_file(filepath=test_datapath, model="GEOSChem", species="ch4")

    assert "geoschem_ch4_2015-01-01" in proc_results

    search_results = search(species="ch4", model="geoschem", start_date="2015-01-01", data_type="eulerian_model")

    key = list(search_results.keys())[0]

    data_keys = search_results[key]["keys"]
    eulerian_data = recombine_datasets(keys=data_keys, sort=False)

    metadata = search_results[key]["metadata"]

    orig_data = open_dataset(test_datapath)

    assert orig_data["lat"].equals(eulerian_data["lat"])
    assert orig_data["lon"].equals(eulerian_data["lon"])
    assert orig_data["time"].equals(eulerian_data["time"])
    assert orig_data["lev"].equals(eulerian_data["lev"])
    assert orig_data["SpeciesConc_CH4"].equals(eulerian_data["SpeciesConc_CH4"])

    expected_metadata_values = {
        "species": "ch4",
        "date": "2015-01-01",
        "start_date": "2015-01-01 00:00:00+00:00",
        "end_date": "2016-01-01 00:00:00+00:00",
        "max_longitude": 175.0,
        "min_longitude": -180.0,
        "max_latitude": 89.0,
        "min_latitude": -89.0,
    }

    for key, expected_value in expected_metadata_values.items():
        assert metadata[key] == expected_value
Exemple #13
0
def test_retrieve(met_object):
    met = met_object

    # Empty the object store to force retrieval
    get_local_bucket(empty=True)

    expected_metadata = {
        "product_type": "reanalysis",
        "format": "netcdf",
        "variable": ["u_component_of_wind", "v_component_of_wind"],
        "pressure_level": ["975", "1000"],
        "area": [-40.5, 144.5, -40.75, 144.75],
        "site": "CGO",
        "network": "AGAGE",
        "start_date": "2012-01-01 00:00:00+00:00",
        "end_date": "2012-12-31 00:00:00+00:00",
    }

    assert met.metadata == expected_metadata

    assert met.data["longitude"][0] == 144.5
    assert met.data["latitude"][0] == -40.5
    assert met.data["level"][0] == 975
Exemple #14
0
def test_add_data(data):
    d = Datasource()

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    assert ch4_data["ch4"][0] == pytest.approx(1959.55)
    assert ch4_data["ch4_variability"][0] == pytest.approx(0.79)
    assert ch4_data["ch4_number_of_observations"][0] == pytest.approx(26.0)

    d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries")
    d.save()
    bucket = get_local_bucket()

    data_chunks = [
        Datasource.load_dataset(bucket=bucket, key=k) for k in d.data_keys()
    ]

    # Now read it out and make sure it's what we expect
    combined = xr.concat(data_chunks, dim="time")

    assert combined.equals(ch4_data)

    expected_metadata = {
        "site": "bsd",
        "instrument": "picarro",
        "sampling_period": "60",
        "inlet": "248m",
        "port": "9",
        "type": "air",
        "network": "decc",
        "species": "ch4",
        "scale": "wmo-x2004a",
        "long_name": "bilsdale",
        "data_owner": "simon o'doherty",
        "data_owner_email": "*****@*****.**",
        "inlet_height_magl": "248m",
        "comment": "cavity ring-down measurements. output from gcwerks",
        "source": "in situ measurements of air",
        "conventions": "cf-1.6",
        "calibration_scale": "wmo-x2004a",
        "station_longitude": -1.15033,
        "station_latitude": 54.35858,
        "station_long_name": "bilsdale, uk",
        "station_height_masl": 380.0,
        "data_type": "timeseries",
    }

    assert d.metadata() == expected_metadata
Exemple #15
0
def test_read_noaa_raw():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(
        filename="co_pocn25_surface-flask_1_ccgg_event.txt", data_type="NOAA")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="NOAA",
                                   site="POCN25",
                                   network="NOAA",
                                   inlet="flask")

    uuid = results["processed"]["co_pocn25_surface-flask_1_ccgg_event.txt"][
        "co"]

    co_data = Datasource.load(uuid=uuid, shallow=False).data()

    assert sorted(list(co_data.keys())) == [
        "1990-06-29-05:00:00+00:00_1990-07-10-21:28:00+00:00",
        "2009-06-13-16:32:00+00:00_2009-12-03-00:30:00+00:00",
        "2010-01-10-00:13:00+00:00_2010-12-09-16:05:00+00:00",
        "2011-01-27-04:55:00+00:00_2011-11-11-14:45:00+00:00",
        "2016-12-03-12:37:00+00:00_2016-12-18-05:30:00+00:00",
        "2017-01-27-19:10:00+00:00_2017-07-15-04:15:00+00:00",
    ]

    co_data = co_data["1990-06-29-05:00:00+00:00_1990-07-10-21:28:00+00:00"]

    assert co_data["co"][0] == pytest.approx(94.9)
    assert co_data["co"][-1] == pytest.approx(95.65)

    assert co_data["co_repeatability"][0] == pytest.approx(-999.99)
    assert co_data["co_repeatability"][-1] == pytest.approx(-999.99)

    assert co_data["co_selection_flag"][0] == 0
    assert co_data["co_selection_flag"][-1] == 0
Exemple #16
0
def load_CRDS():
    get_local_bucket(empty=True)

    tac_100m = get_datapath("tac.picarro.1minute.100m.min.dat",
                            data_type="CRDS")
    hfd_50m = get_datapath("hfd.picarro.1minute.50m.min.dat", data_type="CRDS")
    bsd_42m = get_datapath("bsd.picarro.1minute.42m.min.dat", data_type="CRDS")
    bsd_108m = get_datapath("bsd.picarro.1minute.108m.min.dat",
                            data_type="CRDS")
    bsd_248m = get_datapath("bsd.picarro.1minute.248m.min.dat",
                            data_type="CRDS")

    ObsSurface.read_file(filepath=tac_100m,
                         data_type="CRDS",
                         site="tac",
                         network="DECC")
    ObsSurface.read_file(filepath=hfd_50m,
                         data_type="CRDS",
                         site="hfd",
                         network="DECC")
    ObsSurface.read_file(filepath=[bsd_42m, bsd_108m, bsd_248m],
                         data_type="CRDS",
                         site="bsd",
                         network="DECC")
Exemple #17
0
def test_from_data(data):
    d = Datasource()

    metadata = data["ch4"]["metadata"]
    ch4_data = data["ch4"]["data"]

    d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries")
    d.save()

    obj_data = d.to_data()

    bucket = get_local_bucket()

    # Create a new object with the data from d
    d_2 = Datasource.from_data(bucket=bucket, data=obj_data, shallow=False)

    metadata = d_2.metadata()
    assert metadata["site"] == "bsd"
    assert metadata["instrument"] == "picarro"
    assert metadata["sampling_period"] == "60"
    assert metadata["inlet"] == "248m"

    assert sorted(d_2.data_keys()) == sorted(d.data_keys())
    assert d_2.metadata() == d.metadata()
Exemple #18
0
def data_read():
    get_local_bucket(empty=True)

    # DECC network sites
    network = "DECC"
    bsd_248_path = get_datapath(filename="bsd.picarro.1minute.248m.min.dat", data_type="CRDS")
    bsd_108_path = get_datapath(filename="bsd.picarro.1minute.108m.min.dat", data_type="CRDS")
    bsd_42_path = get_datapath(filename="bsd.picarro.1minute.42m.min.dat", data_type="CRDS")

    bsd_paths = [bsd_248_path, bsd_108_path, bsd_42_path]

    bsd_results = ObsSurface.read_file(filepath=bsd_paths, data_type="CRDS", site="bsd", network=network)

    hfd_100_path = get_datapath(filename="hfd.picarro.1minute.100m.min.dat", data_type="CRDS")
    hfd_50_path = get_datapath(filename="hfd.picarro.1minute.50m.min.dat", data_type="CRDS")
    hfd_paths = [hfd_100_path, hfd_50_path]

    ObsSurface.read_file(filepath=hfd_paths, data_type="CRDS", site="hfd", network=network)

    tac_path = get_datapath(filename="tac.picarro.1minute.100m.test.dat", data_type="CRDS")
    ObsSurface.read_file(filepath=tac_path, data_type="CRDS", site="tac", network=network)

    # GCWERKS data (AGAGE network sites)
    data_filepath = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    prec_filepath = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC")

    ObsSurface.read_file(filepath=(data_filepath, prec_filepath), site="CGO", data_type="GCWERKS", network="AGAGE")

    mhd_data_filepath = get_datapath(filename="macehead.12.C", data_type="GC")
    mhd_prec_filepath = get_datapath(filename="macehead.12.precisions.C", data_type="GC")

    ObsSurface.read_file(filepath=(mhd_data_filepath, mhd_prec_filepath), site="MHD", data_type="GCWERKS", network="AGAGE", instrument="GCMD")

    # Set ranking information for BSD
    obs = ObsSurface.load()

    uid_248 = bsd_results["processed"]["bsd.picarro.1minute.248m.min.dat"]["ch4"]
    obs.set_rank(uuid=uid_248, rank=1, date_range="2012-01-01_2013-01-01")

    uid_108 = bsd_results["processed"]["bsd.picarro.1minute.108m.min.dat"]["ch4"]
    obs.set_rank(uuid=uid_108, rank=1, date_range="2014-09-02_2014-11-01")

    obs.set_rank(uuid=uid_248, rank=1, date_range="2015-01-01_2015-11-01")

    obs.set_rank(uuid=uid_108, rank=1, date_range="2016-09-02_2018-11-01")

    uid_42 = bsd_results["processed"]["bsd.picarro.1minute.42m.min.dat"]["ch4"]
    obs.set_rank(uuid=uid_42, rank=1, date_range="2019-01-02_2021-01-01")

    # Emissions data
    test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc")

    Emissions.read_file(
        filepath=test_datapath,
        species="co2",
        source="gpp-cardamom",
        date="2012",
        domain="europe",
        high_time_resolution=False,
    )

    # Footprint data
    datapath = get_footprint_datapath("footprint_test.nc")

    site = "TMB"
    network = "LGHG"
    height = "10m"
    domain = "EUROPE"
    model = "test_model"

    Footprints.read_file(
        filepath=datapath, site=site, model=model, network=network, height=height, domain=domain
    )
Exemple #19
0
def test_read_GC():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="capegrim-medusa.18.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.18.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS",
                                   site="CGO",
                                   network="AGAGE")

    # 30/11/2021: Species labels were updated to be standardised in line with variable naming
    # This list of expected labels was updated.
    expected_keys = [
        'c2cl4_70m', 'c2f6_70m', 'c2h2_70m', 'c2h6_70m', 'c2hcl3_70m',
        'c3f8_70m', 'c3h8_70m', 'c4f10_70m', 'c4f8_70m', 'c6f14_70m',
        'c6h5ch3_70m', 'c6h6_70m', 'cc3h8_70m', 'ccl4_70m', 'cf4_70m',
        'cfc112_70m', 'cfc113_70m', 'cfc114_70m', 'cfc115_70m', 'cfc11_70m',
        'cfc12_70m', 'cfc13_70m', 'ch2br2_70m', 'ch2cl2_70m', 'ch3br_70m',
        'ch3ccl3_70m', 'ch3cl_70m', 'ch3i_70m', 'chbr3_70m', 'chcl3_70m',
        'cos_70m', 'desflurane_70m', 'halon1211_70m', 'halon1301_70m',
        'halon2402_70m', 'hcfc124_70m', 'hcfc132b_70m', 'hcfc133a_70m',
        'hcfc141b_70m', 'hcfc142b_70m', 'hcfc22_70m', 'hfc125_70m',
        'hfc134a_70m', 'hfc143a_70m', 'hfc152a_70m', 'hfc227ea_70m',
        'hfc236fa_70m', 'hfc23_70m', 'hfc245fa_70m', 'hfc32_70m',
        'hfc365mfc_70m', 'hfc4310mee_70m', 'nf3_70m', 'sf5cf3_70m', 'sf6_70m',
        'so2f2_70m'
    ]

    assert sorted(list(
        results["processed"]["capegrim-medusa.18.C"].keys())) == expected_keys

    # Load in some data
    uuid = results["processed"]["capegrim-medusa.18.C"]["hfc152a_70m"]

    hfc152a_data = Datasource.load(uuid=uuid, shallow=False).data()
    hfc152a_data = hfc152a_data[
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"]

    assert hfc152a_data.time[0] == Timestamp("2018-01-01T02:24:00")
    assert hfc152a_data.time[-1] == Timestamp("2018-01-31T23:33:00")

    assert hfc152a_data["hfc152a"][0] == 4.409
    assert hfc152a_data["hfc152a"][-1] == 4.262

    assert hfc152a_data["hfc152a_repeatability"][0] == 0.03557
    assert hfc152a_data["hfc152a_repeatability"][-1] == 0.03271

    assert hfc152a_data["hfc152a_status_flag"][0] == 0
    assert hfc152a_data["hfc152a_status_flag"][-1] == 0

    assert hfc152a_data["hfc152a_integration_flag"][0] == 0
    assert hfc152a_data["hfc152a_integration_flag"][-1] == 0

    # Check we have the Datasource info saved
    obs = ObsSurface.load()

    assert sorted(obs._datasource_uuids.values()) == expected_keys

    attrs = hfc152a_data.attrs

    assert attributes_checker_obssurface(attrs=attrs, species="hfc152a")

    # # Now test that if we add more data it adds it to the same Datasource
    uuid_one = obs.datasources()[0]

    datasource = Datasource.load(uuid=uuid_one)

    data_one = datasource.data()
    assert list(data_one.keys()) == [
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"
    ]

    data_filepath = get_datapath(filename="capegrim-medusa.future.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.future.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS",
                                   site="CGO",
                                   network="AGAGE")

    datasource = Datasource.load(uuid=uuid_one)
    data_one = datasource.data()

    assert sorted(list(data_one.keys())) == [
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00",
        "2023-01-01-02:24:00+00:00_2023-01-31-23:33:00+00:00",
    ]

    data_filepath = get_datapath(filename="trinidadhead.01.C", data_type="GC")
    precision_filepath = get_datapath(filename="trinidadhead.01.precisions.C",
                                      data_type="GC")

    ObsSurface.read_file(
        filepath=(data_filepath, precision_filepath),
        data_type="GCWERKS",
        site="THD",
        instrument="gcmd",
        network="AGAGE",
    )

    obs = ObsSurface.load()
    table = obs._datasource_table

    assert table["cgo"]["agage"]["nf3"]["70m"]
    assert table["cgo"]["agage"]["hfc236fa"]["70m"]
    assert table["cgo"]["agage"]["halon1211"]["70m"]

    assert table["thd"]["agage"]["cfc11"]["10m"]
    assert table["thd"]["agage"]["n2o"]["10m"]
    assert table["thd"]["agage"]["ccl4"]["10m"]
Exemple #20
0
def test_read_footprint():
    get_local_bucket(empty=True)

    datapath = get_footprint_datapath("footprint_test.nc")
    # model_params = {"simulation_params": "123"}

    site = "TMB"
    network = "LGHG"
    height = "10m"
    domain = "EUROPE"
    model = "test_model"

    Footprints.read_file(
        filepath=datapath, site=site, model=model, network=network, height=height, domain=domain
    )

    # Get the footprints data
    footprint_results = search(site=site, domain=domain, data_type="footprints")

    fp_site_key = list(footprint_results.keys())[0]

    footprint_keys = footprint_results[fp_site_key]["keys"]
    footprint_data = recombine_datasets(keys=footprint_keys, sort=False)

    footprint_coords = list(footprint_data.coords.keys())
    footprint_dims = list(footprint_data.dims)

    # Sorting to allow comparison - coords / dims can be stored in different orders
    # depending on how the Dataset has been manipulated
    footprint_coords.sort()
    footprint_dims.sort()

    assert footprint_coords == ["height", "lat", "lat_high", "lev", "lon", "lon_high", "time"]
    assert footprint_dims == ["height", "index", "lat", "lat_high", "lev", "lon", "lon_high", "time"]

    assert (
        footprint_data.attrs["heights"]
        == [
            500.0,
            1500.0,
            2500.0,
            3500.0,
            4500.0,
            5500.0,
            6500.0,
            7500.0,
            8500.0,
            9500.0,
            10500.0,
            11500.0,
            12500.0,
            13500.0,
            14500.0,
            15500.0,
            16500.0,
            17500.0,
            18500.0,
            19500.0,
        ]
    ).all()

    assert footprint_data.attrs["variables"] == [
        "fp",
        "temperature",
        "pressure",
        "wind_speed",
        "wind_direction",
        "PBLH",
        "release_lon",
        "release_lat",
        "particle_locations_n",
        "particle_locations_e",
        "particle_locations_s",
        "particle_locations_w",
        "mean_age_particles_n",
        "mean_age_particles_e",
        "mean_age_particles_s",
        "mean_age_particles_w",
        "fp_low",
        "fp_high",
        "index_lons",
        "index_lats",
    ]

    del footprint_data.attrs["processed"]
    del footprint_data.attrs["heights"]
    del footprint_data.attrs["variables"]

    expected_attrs = {
        "author": "OpenGHG Cloud",
        "data_type": "footprints",
        "site": "tmb",
        "network": "lghg",
        "height": "10m",
        "model": "test_model",
        "domain": "europe",
        "start_date": "2020-08-01 00:00:00+00:00",
        "end_date": "2020-08-01 00:00:00+00:00",
        "max_longitude": 39.38,
        "min_longitude": -97.9,
        "max_latitude": 79.057,
        "min_latitude": 10.729,
        "time_resolution": "standard_time_resolution",
    }

    assert footprint_data.attrs == expected_attrs

    footprint_data["fp_low"].max().values == pytest.approx(0.43350983)
    footprint_data["fp_high"].max().values == pytest.approx(0.11853027)
    footprint_data["pressure"].max().values == pytest.approx(1011.92)
    footprint_data["fp_low"].min().values == 0.0
    footprint_data["fp_high"].min().values == 0.0
    footprint_data["pressure"].min().values == pytest.approx(1011.92)
Exemple #21
0
def test_read_CRDS():
    get_local_bucket(empty=True)

    filepath = get_datapath(filename="bsd.picarro.1minute.248m.min.dat",
                            data_type="CRDS")
    results = ObsSurface.read_file(filepath=filepath,
                                   data_type="CRDS",
                                   site="bsd",
                                   network="DECC")

    keys = results["processed"]["bsd.picarro.1minute.248m.min.dat"].keys()

    assert sorted(keys) == ["ch4", "co", "co2"]

    # Load up the assigned Datasources and check they contain the correct data
    data = results["processed"]["bsd.picarro.1minute.248m.min.dat"]

    ch4_data = Datasource.load(uuid=data["ch4"]).data()
    ch4_data = ch4_data["2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00"]

    assert ch4_data.time[0] == Timestamp("2014-01-30T11:12:30")
    assert ch4_data["ch4"][0] == 1959.55
    assert ch4_data["ch4"][-1] == 1962.8
    assert ch4_data["ch4_variability"][-1] == 1.034
    assert ch4_data["ch4_number_of_observations"][-1] == 26.0

    obs = ObsSurface.load()
    uuid_one = obs.datasources()[0]
    datasource = Datasource.load(uuid=uuid_one)

    data_keys = list(datasource.data().keys())

    expected_keys = [
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00",
        "2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00",
        "2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00",
        "2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00",
        "2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00",
        "2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00",
    ]

    assert data_keys == expected_keys

    filepath = get_datapath(filename="bsd.picarro.1minute.248m.future.dat",
                            data_type="CRDS")
    results = ObsSurface.read_file(filepath=filepath,
                                   data_type="CRDS",
                                   site="bsd",
                                   network="DECC")

    uuid_one = obs.datasources()[0]
    datasource = Datasource.load(uuid=uuid_one)
    data_keys = sorted(list(datasource.data().keys()))

    new_expected_keys = [
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00",
        "2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00",
        "2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00",
        "2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00",
        "2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00",
        "2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00",
        "2023-01-30-13:56:30+00:00_2023-01-30-14:20:30+00:00",
    ]

    assert data_keys == new_expected_keys

    table = obs._datasource_table
    assert table["bsd"]["decc"]["ch4"]["248m"]
    assert table["bsd"]["decc"]["co2"]["248m"]
    assert table["bsd"]["decc"]["co"]["248m"]
def populate_store():
    get_local_bucket(empty=True)
    filepath = hfd_filepath()
    ObsSurface.read_file(filepath=filepath, data_type="CRDS", site="hfd")