def test_read_cranfield(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="THB_hourly_means_test.csv", data_type="Cranfield_CRDS") results = ObsSurface.read_file(filepath=data_filepath, data_type="CRANFIELD", site="THB", network="CRANFIELD") expected_keys = ["ch4", "co", "co2"] assert sorted(results["processed"] ["THB_hourly_means_test.csv"].keys()) == expected_keys uuid = results["processed"]["THB_hourly_means_test.csv"]["ch4"] ch4_data = Datasource.load(uuid=uuid, shallow=False).data() ch4_data = ch4_data["2018-05-05-00:00:00+00:00_2018-05-13-16:00:00+00:00"] assert ch4_data.time[0] == Timestamp("2018-05-05") assert ch4_data.time[-1] == Timestamp("2018-05-13T16:00:00") assert ch4_data["ch4"][0] == pytest.approx(2585.651) assert ch4_data["ch4"][-1] == pytest.approx(1999.018) assert ch4_data["ch4 variability"][0] == pytest.approx(75.50218) assert ch4_data["ch4 variability"][-1] == pytest.approx(6.48413)
def test_read_file(): get_local_bucket(empty=True) test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc") proc_results = Emissions.read_file( filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False ) assert "co2_gppcardamom_europe_2012" in proc_results search_results = search(species="co2", source="gpp-cardamom", date="2012", domain="europe", data_type="emissions") key = list(search_results.keys())[0] data_keys = search_results[key]["keys"] emissions_data = recombine_datasets(keys=data_keys, sort=False) metadata = search_results[key]["metadata"] orig_data = open_dataset(test_datapath) assert orig_data.lat.equals(emissions_data.lat) assert orig_data.lon.equals(emissions_data.lon) assert orig_data.time.equals(emissions_data.time) assert orig_data.flux.equals(emissions_data.flux) expected_metadata = { "title": "gross primary productivity co2", "author": "openghg cloud", "date_created": "2018-05-20 19:44:14.968710", "number_of_prior_files_used": 1, "prior_file_1": "cardamom gpp", "prior_file_1_raw_resolution": "25x25km", "prior_file_1_reference": "t.l. smallman, jgr biogeosciences, 2017", "regridder_used": "acrg_grid.regrid.regrid_3d", "comments": "fluxes copied from year 2013. december 2012 values copied from january 2013 values.", "species": "co2", "domain": "europe", "source": "gppcardamom", "date": "2012", "start_date": "2012-12-01 00:00:00+00:00", "end_date": "2012-12-01 00:00:00+00:00", "max_longitude": 39.38, "min_longitude": -97.9, "max_latitude": 79.057, "min_latitude": 10.729, "time_resolution": "standard", "data_type": "emissions", } del metadata["processed"] del metadata["prior_file_1_version"] assert metadata == expected_metadata
def data_read(): ''' Data set up for running tests for these sets of modules. ''' get_local_bucket(empty=True) # Files for creating forward model (mf_mod) for methane at TAC site # Observation data # - TAC at 100m for 201208 site = "tac" network = "DECC" data_type = "CRDS" tac_path = get_datapath(filename="tac.picarro.1minute.100m.201208.dat", data_type="CRDS") ObsSurface.read_file(filepath=tac_path, data_type=data_type, site=site, network=network) # Emissions data # Anthropogenic ch4 (methane) data from 2012 for EUROPE species = "ch4" source = "anthro" domain = "EUROPE" emissions_datapath = get_emissions_datapath("ch4-anthro_EUROPE_2012.nc") Emissions.read_file( filepath=emissions_datapath, species=species, source=source, date="2012", domain=domain, high_time_resolution=False, ) # Footprint data # TAC footprint from 2012-08 - 2012-09 at 100m height = "100m" model = "NAME" fp_datapath = get_footprint_datapath("TAC-100magl_EUROPE_201208.nc") Footprints.read_file(filepath=fp_datapath, site=site, model=model, network=network, height=height, domain=domain)
def test_delete_Datasource(): bucket = get_local_bucket(empty=True) data_filepath = get_datapath(filename="thames_test_20190707.csv", data_type="THAMESBARRIER") ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="tmb", network="LGHG", sampling_period=60) obs = ObsSurface.load() datasources = obs.datasources() uuid = datasources[0] datasource = Datasource.load(uuid=uuid) data_keys = datasource.data_keys() key = data_keys[0] assert exists(bucket=bucket, key=key) obs.delete(uuid=uuid) assert uuid not in obs.datasources() assert not exists(bucket=bucket, key=key)
def test_load_dataset(): filename = "WAO-20magl_EUROPE_201306_small.nc" dir_path = os.path.dirname(__file__) test_data = "../data/emissions" filepath = os.path.join(dir_path, test_data, filename) ds = xr.load_dataset(filepath) metadata = {"some": "metadata"} d = Datasource() d.add_data(metadata=metadata, data=ds, data_type="footprints") d.save() keys = d._data_keys["latest"]["keys"] key = list(keys.values())[0] bucket = get_local_bucket() loaded_ds = Datasource.load_dataset(bucket=bucket, key=key) assert loaded_ds.equals(ds)
def test_save_footprint(): bucket = get_local_bucket(empty=True) metadata = {"test": "testing123"} dir_path = os.path.dirname(__file__) test_data = "../data/emissions" filename = "WAO-20magl_EUROPE_201306_downsampled.nc" filepath = os.path.join(dir_path, test_data, filename) data = xr.open_dataset(filepath) datasource = Datasource() datasource.add_data(data=data, metadata=metadata, data_type="footprints") datasource.save() prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}" objs = get_object_names(bucket, prefix) datasource_2 = Datasource.load(bucket=bucket, key=objs[0]) date_key = "2013-06-02-00:00:00+00:00_2013-06-30-00:00:00+00:00" data = datasource_2._data[date_key] assert float(data.pressure[0].values) == pytest.approx(1023.971) assert float(data.pressure[2].values) == pytest.approx(1009.940) assert float(data.pressure[-1].values) == pytest.approx(1021.303) assert datasource_2._data_type == "footprints"
def test_add_new_data_correct_datasource(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="capegrim-medusa.05.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.05.precisions.C", data_type="GC") results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="CGO", network="AGAGE") first_results = results["processed"]["capegrim-medusa.05.C"] sorted_keys = sorted( list(results["processed"]["capegrim-medusa.05.C"].keys())) assert sorted_keys[:4] == [ 'c2cl4_10m', 'c2cl4_70m', 'c2f6_10m', 'c2f6_70m' ] assert sorted_keys[-4:] == [ 'hfc32_70m', 'sf6_70m', 'so2f2_10m', 'so2f2_70m' ] assert len(sorted_keys) == 69 data_filepath = get_datapath(filename="capegrim-medusa.06.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.06.precisions.C", data_type="GC") new_results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="CGO", network="AGAGE") second_results = new_results["processed"]["capegrim-medusa.06.C"] shared_keys = [key for key in first_results if key in second_results] assert len(shared_keys) == 67 for key in shared_keys: assert first_results[key] == second_results[key]
def co2_setup(): get_local_bucket(empty=True) data_type = "CRDS" tac_file = get_datapath(filename="tac.picarro.hourly.100m.test.dat", data_type=data_type) tac_footprint = get_fp_datapath("TAC-100magl_UKV_co2_TEST_201407.nc") co2_emissions = get_flux_datapath("co2-rtot-cardamom-2hr_TEST_2014.nc") site = "tac" species = "co2" network = "DECC" height = "100m" domain = "TEST" model = "NAME" metmodel = "UKV" source = "rtot-cardamom" date = "2014" ObsSurface.read_file(filepath=tac_file, data_type=data_type, site=site, network=network, inlet=height) Footprints.read_file(filepath=tac_footprint, site=site, height=height, domain=domain, model=model, metmodel=metmodel, species=species) Emissions.read_file(filepath=co2_emissions, species=species, source=source, domain=domain, date=date, high_time_resolution=True)
def test_upload_same_file_twice_raises(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="thames_test_20190707.csv", data_type="THAMESBARRIER") ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="tmb", network="LGHG", sampling_period=60) # assert not res["error"] with pytest.raises(ValueError): ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="tmb", network="LGHG", sampling_period=60)
def test_save(mock_uuid2): bucket = get_local_bucket() datasource = Datasource() datasource.add_metadata_key(key="data_type", value="timeseries") datasource.save(bucket) prefix = f"{Datasource._datasource_root}/uuid/{datasource._uuid}" objs = get_object_names(bucket, prefix) assert objs[0].split("/")[-1] == mocked_uuid2
def test_recombination_GC(): get_local_bucket(empty=True) data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") ObsSurface.read_file((data, precision), data_type="GCWERKS", site="cgo", network="agage") data = parse_gcwerks(data_filepath=data, precision_filepath=precision, site="CGO", instrument="medusa", network="AGAGE") toluene_data = data["c6h5ch3_70m"]["data"] species = "c6h5ch3" site = "CGO" inlet = "70m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) toluene_data_recombined = recombine_datasets(keys=keys) toluene_data.attrs = {} toluene_data_recombined.attrs = {} assert toluene_data.time.equals(toluene_data_recombined.time) assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"]) assert toluene_data["c6h5ch3_repeatability"].equals( toluene_data_recombined["c6h5ch3_repeatability"]) assert toluene_data["c6h5ch3_status_flag"].equals( toluene_data_recombined["c6h5ch3_status_flag"]) assert toluene_data["c6h5ch3_integration_flag"].equals( toluene_data_recombined["c6h5ch3_integration_flag"])
def test_read_file(): get_local_bucket(empty=True) test_datapath = get_datapath("GEOSChem.SpeciesConc.20150101_0000z_reduced.nc4") proc_results = EulerianModel.read_file(filepath=test_datapath, model="GEOSChem", species="ch4") assert "geoschem_ch4_2015-01-01" in proc_results search_results = search(species="ch4", model="geoschem", start_date="2015-01-01", data_type="eulerian_model") key = list(search_results.keys())[0] data_keys = search_results[key]["keys"] eulerian_data = recombine_datasets(keys=data_keys, sort=False) metadata = search_results[key]["metadata"] orig_data = open_dataset(test_datapath) assert orig_data["lat"].equals(eulerian_data["lat"]) assert orig_data["lon"].equals(eulerian_data["lon"]) assert orig_data["time"].equals(eulerian_data["time"]) assert orig_data["lev"].equals(eulerian_data["lev"]) assert orig_data["SpeciesConc_CH4"].equals(eulerian_data["SpeciesConc_CH4"]) expected_metadata_values = { "species": "ch4", "date": "2015-01-01", "start_date": "2015-01-01 00:00:00+00:00", "end_date": "2016-01-01 00:00:00+00:00", "max_longitude": 175.0, "min_longitude": -180.0, "max_latitude": 89.0, "min_latitude": -89.0, } for key, expected_value in expected_metadata_values.items(): assert metadata[key] == expected_value
def test_retrieve(met_object): met = met_object # Empty the object store to force retrieval get_local_bucket(empty=True) expected_metadata = { "product_type": "reanalysis", "format": "netcdf", "variable": ["u_component_of_wind", "v_component_of_wind"], "pressure_level": ["975", "1000"], "area": [-40.5, 144.5, -40.75, 144.75], "site": "CGO", "network": "AGAGE", "start_date": "2012-01-01 00:00:00+00:00", "end_date": "2012-12-31 00:00:00+00:00", } assert met.metadata == expected_metadata assert met.data["longitude"][0] == 144.5 assert met.data["latitude"][0] == -40.5 assert met.data["level"][0] == 975
def test_add_data(data): d = Datasource() metadata = data["ch4"]["metadata"] ch4_data = data["ch4"]["data"] assert ch4_data["ch4"][0] == pytest.approx(1959.55) assert ch4_data["ch4_variability"][0] == pytest.approx(0.79) assert ch4_data["ch4_number_of_observations"][0] == pytest.approx(26.0) d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries") d.save() bucket = get_local_bucket() data_chunks = [ Datasource.load_dataset(bucket=bucket, key=k) for k in d.data_keys() ] # Now read it out and make sure it's what we expect combined = xr.concat(data_chunks, dim="time") assert combined.equals(ch4_data) expected_metadata = { "site": "bsd", "instrument": "picarro", "sampling_period": "60", "inlet": "248m", "port": "9", "type": "air", "network": "decc", "species": "ch4", "scale": "wmo-x2004a", "long_name": "bilsdale", "data_owner": "simon o'doherty", "data_owner_email": "*****@*****.**", "inlet_height_magl": "248m", "comment": "cavity ring-down measurements. output from gcwerks", "source": "in situ measurements of air", "conventions": "cf-1.6", "calibration_scale": "wmo-x2004a", "station_longitude": -1.15033, "station_latitude": 54.35858, "station_long_name": "bilsdale, uk", "station_height_masl": 380.0, "data_type": "timeseries", } assert d.metadata() == expected_metadata
def test_read_noaa_raw(): get_local_bucket(empty=True) data_filepath = get_datapath( filename="co_pocn25_surface-flask_1_ccgg_event.txt", data_type="NOAA") results = ObsSurface.read_file(filepath=data_filepath, data_type="NOAA", site="POCN25", network="NOAA", inlet="flask") uuid = results["processed"]["co_pocn25_surface-flask_1_ccgg_event.txt"][ "co"] co_data = Datasource.load(uuid=uuid, shallow=False).data() assert sorted(list(co_data.keys())) == [ "1990-06-29-05:00:00+00:00_1990-07-10-21:28:00+00:00", "2009-06-13-16:32:00+00:00_2009-12-03-00:30:00+00:00", "2010-01-10-00:13:00+00:00_2010-12-09-16:05:00+00:00", "2011-01-27-04:55:00+00:00_2011-11-11-14:45:00+00:00", "2016-12-03-12:37:00+00:00_2016-12-18-05:30:00+00:00", "2017-01-27-19:10:00+00:00_2017-07-15-04:15:00+00:00", ] co_data = co_data["1990-06-29-05:00:00+00:00_1990-07-10-21:28:00+00:00"] assert co_data["co"][0] == pytest.approx(94.9) assert co_data["co"][-1] == pytest.approx(95.65) assert co_data["co_repeatability"][0] == pytest.approx(-999.99) assert co_data["co_repeatability"][-1] == pytest.approx(-999.99) assert co_data["co_selection_flag"][0] == 0 assert co_data["co_selection_flag"][-1] == 0
def load_CRDS(): get_local_bucket(empty=True) tac_100m = get_datapath("tac.picarro.1minute.100m.min.dat", data_type="CRDS") hfd_50m = get_datapath("hfd.picarro.1minute.50m.min.dat", data_type="CRDS") bsd_42m = get_datapath("bsd.picarro.1minute.42m.min.dat", data_type="CRDS") bsd_108m = get_datapath("bsd.picarro.1minute.108m.min.dat", data_type="CRDS") bsd_248m = get_datapath("bsd.picarro.1minute.248m.min.dat", data_type="CRDS") ObsSurface.read_file(filepath=tac_100m, data_type="CRDS", site="tac", network="DECC") ObsSurface.read_file(filepath=hfd_50m, data_type="CRDS", site="hfd", network="DECC") ObsSurface.read_file(filepath=[bsd_42m, bsd_108m, bsd_248m], data_type="CRDS", site="bsd", network="DECC")
def test_from_data(data): d = Datasource() metadata = data["ch4"]["metadata"] ch4_data = data["ch4"]["data"] d.add_data(metadata=metadata, data=ch4_data, data_type="timeseries") d.save() obj_data = d.to_data() bucket = get_local_bucket() # Create a new object with the data from d d_2 = Datasource.from_data(bucket=bucket, data=obj_data, shallow=False) metadata = d_2.metadata() assert metadata["site"] == "bsd" assert metadata["instrument"] == "picarro" assert metadata["sampling_period"] == "60" assert metadata["inlet"] == "248m" assert sorted(d_2.data_keys()) == sorted(d.data_keys()) assert d_2.metadata() == d.metadata()
def data_read(): get_local_bucket(empty=True) # DECC network sites network = "DECC" bsd_248_path = get_datapath(filename="bsd.picarro.1minute.248m.min.dat", data_type="CRDS") bsd_108_path = get_datapath(filename="bsd.picarro.1minute.108m.min.dat", data_type="CRDS") bsd_42_path = get_datapath(filename="bsd.picarro.1minute.42m.min.dat", data_type="CRDS") bsd_paths = [bsd_248_path, bsd_108_path, bsd_42_path] bsd_results = ObsSurface.read_file(filepath=bsd_paths, data_type="CRDS", site="bsd", network=network) hfd_100_path = get_datapath(filename="hfd.picarro.1minute.100m.min.dat", data_type="CRDS") hfd_50_path = get_datapath(filename="hfd.picarro.1minute.50m.min.dat", data_type="CRDS") hfd_paths = [hfd_100_path, hfd_50_path] ObsSurface.read_file(filepath=hfd_paths, data_type="CRDS", site="hfd", network=network) tac_path = get_datapath(filename="tac.picarro.1minute.100m.test.dat", data_type="CRDS") ObsSurface.read_file(filepath=tac_path, data_type="CRDS", site="tac", network=network) # GCWERKS data (AGAGE network sites) data_filepath = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") prec_filepath = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") ObsSurface.read_file(filepath=(data_filepath, prec_filepath), site="CGO", data_type="GCWERKS", network="AGAGE") mhd_data_filepath = get_datapath(filename="macehead.12.C", data_type="GC") mhd_prec_filepath = get_datapath(filename="macehead.12.precisions.C", data_type="GC") ObsSurface.read_file(filepath=(mhd_data_filepath, mhd_prec_filepath), site="MHD", data_type="GCWERKS", network="AGAGE", instrument="GCMD") # Set ranking information for BSD obs = ObsSurface.load() uid_248 = bsd_results["processed"]["bsd.picarro.1minute.248m.min.dat"]["ch4"] obs.set_rank(uuid=uid_248, rank=1, date_range="2012-01-01_2013-01-01") uid_108 = bsd_results["processed"]["bsd.picarro.1minute.108m.min.dat"]["ch4"] obs.set_rank(uuid=uid_108, rank=1, date_range="2014-09-02_2014-11-01") obs.set_rank(uuid=uid_248, rank=1, date_range="2015-01-01_2015-11-01") obs.set_rank(uuid=uid_108, rank=1, date_range="2016-09-02_2018-11-01") uid_42 = bsd_results["processed"]["bsd.picarro.1minute.42m.min.dat"]["ch4"] obs.set_rank(uuid=uid_42, rank=1, date_range="2019-01-02_2021-01-01") # Emissions data test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc") Emissions.read_file( filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False, ) # Footprint data datapath = get_footprint_datapath("footprint_test.nc") site = "TMB" network = "LGHG" height = "10m" domain = "EUROPE" model = "test_model" Footprints.read_file( filepath=datapath, site=site, model=model, network=network, height=height, domain=domain )
def test_read_GC(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.18.precisions.C", data_type="GC") results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="CGO", network="AGAGE") # 30/11/2021: Species labels were updated to be standardised in line with variable naming # This list of expected labels was updated. expected_keys = [ 'c2cl4_70m', 'c2f6_70m', 'c2h2_70m', 'c2h6_70m', 'c2hcl3_70m', 'c3f8_70m', 'c3h8_70m', 'c4f10_70m', 'c4f8_70m', 'c6f14_70m', 'c6h5ch3_70m', 'c6h6_70m', 'cc3h8_70m', 'ccl4_70m', 'cf4_70m', 'cfc112_70m', 'cfc113_70m', 'cfc114_70m', 'cfc115_70m', 'cfc11_70m', 'cfc12_70m', 'cfc13_70m', 'ch2br2_70m', 'ch2cl2_70m', 'ch3br_70m', 'ch3ccl3_70m', 'ch3cl_70m', 'ch3i_70m', 'chbr3_70m', 'chcl3_70m', 'cos_70m', 'desflurane_70m', 'halon1211_70m', 'halon1301_70m', 'halon2402_70m', 'hcfc124_70m', 'hcfc132b_70m', 'hcfc133a_70m', 'hcfc141b_70m', 'hcfc142b_70m', 'hcfc22_70m', 'hfc125_70m', 'hfc134a_70m', 'hfc143a_70m', 'hfc152a_70m', 'hfc227ea_70m', 'hfc236fa_70m', 'hfc23_70m', 'hfc245fa_70m', 'hfc32_70m', 'hfc365mfc_70m', 'hfc4310mee_70m', 'nf3_70m', 'sf5cf3_70m', 'sf6_70m', 'so2f2_70m' ] assert sorted(list( results["processed"]["capegrim-medusa.18.C"].keys())) == expected_keys # Load in some data uuid = results["processed"]["capegrim-medusa.18.C"]["hfc152a_70m"] hfc152a_data = Datasource.load(uuid=uuid, shallow=False).data() hfc152a_data = hfc152a_data[ "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"] assert hfc152a_data.time[0] == Timestamp("2018-01-01T02:24:00") assert hfc152a_data.time[-1] == Timestamp("2018-01-31T23:33:00") assert hfc152a_data["hfc152a"][0] == 4.409 assert hfc152a_data["hfc152a"][-1] == 4.262 assert hfc152a_data["hfc152a_repeatability"][0] == 0.03557 assert hfc152a_data["hfc152a_repeatability"][-1] == 0.03271 assert hfc152a_data["hfc152a_status_flag"][0] == 0 assert hfc152a_data["hfc152a_status_flag"][-1] == 0 assert hfc152a_data["hfc152a_integration_flag"][0] == 0 assert hfc152a_data["hfc152a_integration_flag"][-1] == 0 # Check we have the Datasource info saved obs = ObsSurface.load() assert sorted(obs._datasource_uuids.values()) == expected_keys attrs = hfc152a_data.attrs assert attributes_checker_obssurface(attrs=attrs, species="hfc152a") # # Now test that if we add more data it adds it to the same Datasource uuid_one = obs.datasources()[0] datasource = Datasource.load(uuid=uuid_one) data_one = datasource.data() assert list(data_one.keys()) == [ "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00" ] data_filepath = get_datapath(filename="capegrim-medusa.future.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.future.precisions.C", data_type="GC") results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="CGO", network="AGAGE") datasource = Datasource.load(uuid=uuid_one) data_one = datasource.data() assert sorted(list(data_one.keys())) == [ "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00", "2023-01-01-02:24:00+00:00_2023-01-31-23:33:00+00:00", ] data_filepath = get_datapath(filename="trinidadhead.01.C", data_type="GC") precision_filepath = get_datapath(filename="trinidadhead.01.precisions.C", data_type="GC") ObsSurface.read_file( filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="THD", instrument="gcmd", network="AGAGE", ) obs = ObsSurface.load() table = obs._datasource_table assert table["cgo"]["agage"]["nf3"]["70m"] assert table["cgo"]["agage"]["hfc236fa"]["70m"] assert table["cgo"]["agage"]["halon1211"]["70m"] assert table["thd"]["agage"]["cfc11"]["10m"] assert table["thd"]["agage"]["n2o"]["10m"] assert table["thd"]["agage"]["ccl4"]["10m"]
def test_read_footprint(): get_local_bucket(empty=True) datapath = get_footprint_datapath("footprint_test.nc") # model_params = {"simulation_params": "123"} site = "TMB" network = "LGHG" height = "10m" domain = "EUROPE" model = "test_model" Footprints.read_file( filepath=datapath, site=site, model=model, network=network, height=height, domain=domain ) # Get the footprints data footprint_results = search(site=site, domain=domain, data_type="footprints") fp_site_key = list(footprint_results.keys())[0] footprint_keys = footprint_results[fp_site_key]["keys"] footprint_data = recombine_datasets(keys=footprint_keys, sort=False) footprint_coords = list(footprint_data.coords.keys()) footprint_dims = list(footprint_data.dims) # Sorting to allow comparison - coords / dims can be stored in different orders # depending on how the Dataset has been manipulated footprint_coords.sort() footprint_dims.sort() assert footprint_coords == ["height", "lat", "lat_high", "lev", "lon", "lon_high", "time"] assert footprint_dims == ["height", "index", "lat", "lat_high", "lev", "lon", "lon_high", "time"] assert ( footprint_data.attrs["heights"] == [ 500.0, 1500.0, 2500.0, 3500.0, 4500.0, 5500.0, 6500.0, 7500.0, 8500.0, 9500.0, 10500.0, 11500.0, 12500.0, 13500.0, 14500.0, 15500.0, 16500.0, 17500.0, 18500.0, 19500.0, ] ).all() assert footprint_data.attrs["variables"] == [ "fp", "temperature", "pressure", "wind_speed", "wind_direction", "PBLH", "release_lon", "release_lat", "particle_locations_n", "particle_locations_e", "particle_locations_s", "particle_locations_w", "mean_age_particles_n", "mean_age_particles_e", "mean_age_particles_s", "mean_age_particles_w", "fp_low", "fp_high", "index_lons", "index_lats", ] del footprint_data.attrs["processed"] del footprint_data.attrs["heights"] del footprint_data.attrs["variables"] expected_attrs = { "author": "OpenGHG Cloud", "data_type": "footprints", "site": "tmb", "network": "lghg", "height": "10m", "model": "test_model", "domain": "europe", "start_date": "2020-08-01 00:00:00+00:00", "end_date": "2020-08-01 00:00:00+00:00", "max_longitude": 39.38, "min_longitude": -97.9, "max_latitude": 79.057, "min_latitude": 10.729, "time_resolution": "standard_time_resolution", } assert footprint_data.attrs == expected_attrs footprint_data["fp_low"].max().values == pytest.approx(0.43350983) footprint_data["fp_high"].max().values == pytest.approx(0.11853027) footprint_data["pressure"].max().values == pytest.approx(1011.92) footprint_data["fp_low"].min().values == 0.0 footprint_data["fp_high"].min().values == 0.0 footprint_data["pressure"].min().values == pytest.approx(1011.92)
def test_read_CRDS(): get_local_bucket(empty=True) filepath = get_datapath(filename="bsd.picarro.1minute.248m.min.dat", data_type="CRDS") results = ObsSurface.read_file(filepath=filepath, data_type="CRDS", site="bsd", network="DECC") keys = results["processed"]["bsd.picarro.1minute.248m.min.dat"].keys() assert sorted(keys) == ["ch4", "co", "co2"] # Load up the assigned Datasources and check they contain the correct data data = results["processed"]["bsd.picarro.1minute.248m.min.dat"] ch4_data = Datasource.load(uuid=data["ch4"]).data() ch4_data = ch4_data["2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00"] assert ch4_data.time[0] == Timestamp("2014-01-30T11:12:30") assert ch4_data["ch4"][0] == 1959.55 assert ch4_data["ch4"][-1] == 1962.8 assert ch4_data["ch4_variability"][-1] == 1.034 assert ch4_data["ch4_number_of_observations"][-1] == 26.0 obs = ObsSurface.load() uuid_one = obs.datasources()[0] datasource = Datasource.load(uuid=uuid_one) data_keys = list(datasource.data().keys()) expected_keys = [ "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00", "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00", "2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00", "2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00", "2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00", "2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00", "2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00", ] assert data_keys == expected_keys filepath = get_datapath(filename="bsd.picarro.1minute.248m.future.dat", data_type="CRDS") results = ObsSurface.read_file(filepath=filepath, data_type="CRDS", site="bsd", network="DECC") uuid_one = obs.datasources()[0] datasource = Datasource.load(uuid=uuid_one) data_keys = sorted(list(datasource.data().keys())) new_expected_keys = [ "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00", "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00", "2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00", "2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00", "2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00", "2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00", "2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00", "2023-01-30-13:56:30+00:00_2023-01-30-14:20:30+00:00", ] assert data_keys == new_expected_keys table = obs._datasource_table assert table["bsd"]["decc"]["ch4"]["248m"] assert table["bsd"]["decc"]["co2"]["248m"] assert table["bsd"]["decc"]["co"]["248m"]
def populate_store(): get_local_bucket(empty=True) filepath = hfd_filepath() ObsSurface.read_file(filepath=filepath, data_type="CRDS", site="hfd")