def test_unranked_search_datetimes(): species = ["co2"] locations = ["bsd"] start = timestamp_tzaware("2014-1-1") end = timestamp_tzaware("2015-1-1") results = search( species=species, site=locations, inlet="248m", start_date=start, end_date=end, ) metadata = results.metadata(site="bsd", species="co2", inlet="248m") assert metadata_checker_obssurface(metadata=metadata, species="co2") data_keys = results.keys(site="bsd", species="co2", inlet="248m") assert len(data_keys) == 1 start = timestamp_tzaware("2001-1-1") end = timestamp_tzaware("2021-1-1") results = search( species=species, site=locations, inlet="248m", start_date=start, end_date=end, ) data_keys = results.keys(site="bsd", species="co2", inlet="248m") assert len(data_keys) == 7
def test_no_ranked_data_raises(): with pytest.raises(ValueError): _ = search(site="hfd", species="ch4") # Make sure this doesn't fail res = search(site="hfd", species="ch4", inlet="100m") assert res
def test_specific_search_translator(): results = search(species="toluene", site="CGO", skip_ranking=True) metadata = results.results["cgo"]["c6h5ch3"]["70m"]["metadata"] assert metadata["species"] == "c6h5ch3" results = search(species="methylbenzene", site="CGO", skip_ranking=True) metadata = results.results["cgo"]["c6h5ch3"]["70m"]["metadata"] assert metadata["species"] == "c6h5ch3" results = search(species="c6h5ch3", site="CGO", skip_ranking=True) metadata = results.results["cgo"]["c6h5ch3"]["70m"]["metadata"] assert metadata["species"] == "c6h5ch3"
def test_unranked_location_search(): species = ["co2", "ch4"] sites = ["hfd", "tac", "bsd"] results = search(species=species, site=sites, inlet="100m") assert len(results) == 2 tac_data = results.results["tac"] hfd_data = results.results["hfd"] assert sorted(list(tac_data.keys())) == ["ch4", "co2"] assert sorted(list(hfd_data.keys())) == ["ch4", "co2"] with pytest.raises(ValueError): tac_co2_keys = results.keys(site="tac", species="co2", inlet="105m") tac_co2_keys = results.keys(site="tac", species="co2", inlet="100m") tac_ch4_keys = results.keys(site="tac", species="co2", inlet="100m") assert len(tac_co2_keys) == 4 assert len(tac_ch4_keys) == 4 with pytest.raises(ValueError): results.keys(site="bsd", species="co2") with pytest.raises(ValueError): results.keys(site="bsd", species="ch4")
def test_recombination_CRDS(): get_local_bucket(empty=True) filename = "hfd.picarro.1minute.100m.min.dat" filepath = get_datapath(filename=filename, data_type="CRDS") ObsSurface.read_file(filepath, data_type="CRDS", site="hfd", network="DECC") gas_data = parse_crds(data_filepath=filepath, site="HFD", network="AGAGE") ch4_data_read = gas_data["ch4"]["data"] species = "ch4" site = "hfd" inlet = "100m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) ch4_data_recombined = recombine_datasets(keys=keys) ch4_data_recombined.attrs = {} assert ch4_data_read.time.equals(ch4_data_recombined.time) assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])
def test_search_nonsense_terms(): species = ["spam", "eggs", "terry"] locations = ["capegrim"] results = search(species=species, locations=locations) assert not results
def test_ranked_bsd_search(): site = "bsd" species = "ch4" result = search(site=site, species=species) raw_result = result.raw() expected_rank_metadata = { "2015-01-01-00:00:00+00:00_2015-11-01-00:00:00+00:00": "248m", "2014-09-02-00:00:00+00:00_2014-11-01-00:00:00+00:00": "108m", "2016-09-02-00:00:00+00:00_2018-11-01-00:00:00+00:00": "108m", "2019-01-02-00:00:00+00:00_2021-01-01-00:00:00+00:00": "42m", } assert expected_rank_metadata == raw_result["bsd"]["ch4"]["rank_metadata"] metadata = result.metadata(site="bsd", species="ch4") for key, meta in metadata.items(): assert metadata_checker_obssurface(metadata=meta, species="ch4") obs_data = result.retrieve(site="bsd", species="ch4") ch4_data = obs_data.data assert ch4_data.time[0] == Timestamp("2014-01-30T11:12:30") assert ch4_data.time[-1] == Timestamp("2020-12-01T22:31:30") assert ch4_data["ch4"][0] == pytest.approx(1959.55) assert ch4_data["ch4"][-1] == pytest.approx(1955.93) assert ch4_data["ch4_variability"][0] == 0.79 assert ch4_data["ch4_variability"][-1] == 0.232 assert len(ch4_data.time) == 196
def test_search_incorrect_inlet_site_finds_nothing(): locations = "hfd" inlet = "3695m" species = "CH4" results = search(site=locations, species=species, inlet=inlet) assert not results
def test_retrieve_unranked(): results = search(species="ch4", skip_ranking=True) assert results.ranked_data is False assert results.cloud is False raw_results = results.raw() assert raw_results["tac"]["ch4"]["100m"] assert raw_results["hfd"]["ch4"]["50m"] assert raw_results["bsd"]["ch4"]["42m"]
def test_read_file(): get_local_bucket(empty=True) test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc") proc_results = Emissions.read_file( filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False ) assert "co2_gppcardamom_europe_2012" in proc_results search_results = search(species="co2", source="gpp-cardamom", date="2012", domain="europe", data_type="emissions") key = list(search_results.keys())[0] data_keys = search_results[key]["keys"] emissions_data = recombine_datasets(keys=data_keys, sort=False) metadata = search_results[key]["metadata"] orig_data = open_dataset(test_datapath) assert orig_data.lat.equals(emissions_data.lat) assert orig_data.lon.equals(emissions_data.lon) assert orig_data.time.equals(emissions_data.time) assert orig_data.flux.equals(emissions_data.flux) expected_metadata = { "title": "gross primary productivity co2", "author": "openghg cloud", "date_created": "2018-05-20 19:44:14.968710", "number_of_prior_files_used": 1, "prior_file_1": "cardamom gpp", "prior_file_1_raw_resolution": "25x25km", "prior_file_1_reference": "t.l. smallman, jgr biogeosciences, 2017", "regridder_used": "acrg_grid.regrid.regrid_3d", "comments": "fluxes copied from year 2013. december 2012 values copied from january 2013 values.", "species": "co2", "domain": "europe", "source": "gppcardamom", "date": "2012", "start_date": "2012-12-01 00:00:00+00:00", "end_date": "2012-12-01 00:00:00+00:00", "max_longitude": 39.38, "min_longitude": -97.9, "max_latitude": 79.057, "min_latitude": 10.729, "time_resolution": "standard", "data_type": "emissions", } del metadata["processed"] del metadata["prior_file_1_version"] assert metadata == expected_metadata
def test_search_find_any_unranked(): species = ["co2"] sites = ["bsd"] inlet = "248m" instrument = "picarro" results = search(find_all=False, species=species, site=sites, inlet=inlet, instrument=instrument) raw_results = results.raw() assert len(raw_results) == 3 bsd_expected = ["ch4", "co", "co2"] hfd_expected = ["ch4", "co", "co2"] tac_expected = ["ch4", "co2"] assert sorted(list(raw_results["bsd"].keys())) == bsd_expected assert sorted(list(raw_results["hfd"].keys())) == hfd_expected assert sorted(list(raw_results["tac"].keys())) == tac_expected start = timestamp_tzaware("2014-1-1") end = timestamp_tzaware("2015-1-1") results = search( find_all=False, species=species, site=sites, start_date=start, end_date=end, inlet=inlet, instrument=instrument, ) raw_results = results.raw() assert len(raw_results) == 2 assert sorted(list(raw_results.keys())) == ["bsd", "hfd"]
def test_specific_keyword_search(): site = "bsd" species = "co2" inlet = "248m" instrument = "picarro" results = search(species=species, site=site, inlet=inlet, instrument=instrument) metadata = results.metadata(site=site, species=species, inlet=inlet) assert metadata_checker_obssurface(metadata=metadata, species="co2") data = results.retrieve(site=site, species=species, inlet="248m") ds = data.data del ds.attrs["file_created"] expected_attrs = { "data_owner": "Simon O'Doherty", "data_owner_email": "*****@*****.**", "inlet_height_magl": "248m", "comment": "Cavity ring-down measurements. Output from GCWerks", "conditions_of_use": "Ensure that you contact the data owner at the outset of your project.", "source": "In situ measurements of air", "Conventions": "CF-1.6", "processed_by": "OpenGHG_Cloud", "species": "co2", "calibration_scale": "WMO-X2007", "station_longitude": -1.15033, "station_latitude": 54.35858, "station_long_name": "Bilsdale, UK", "station_height_masl": 380.0, "site": "bsd", "instrument": "picarro", "sampling_period": "60", "inlet": "248m", "port": "9", "type": "air", "network": "decc", "scale": "WMO-X2007", "long_name": "bilsdale", } assert ds.attrs == expected_attrs
def test_search_find_any(): species = ["co2"] sites = ["bsd"] inlet = "248m" instrument = "picarro" start = timestamp_tzaware("2014-1-1") end = timestamp_tzaware("2015-1-1") results = search( find_all=False, species=species, site=sites, start_date=start, end_date=end, inlet=inlet, instrument=instrument, ) raw_results = results.raw() bsd_data = raw_results["bsd"] hfd_data = raw_results["hfd"] expected_bsd_heights = sorted(["248m", "108m", "42m"]) assert sorted(list(bsd_data["ch4"].keys())) == expected_bsd_heights assert sorted(list(bsd_data["co"].keys())) == expected_bsd_heights assert sorted(list(bsd_data["co2"].keys())) == expected_bsd_heights expected_hfd_heights = ["100m", "50m"] assert sorted(list(hfd_data["ch4"].keys())) == expected_hfd_heights assert sorted(list(hfd_data["co"].keys())) == expected_hfd_heights assert sorted(list(hfd_data["co2"].keys())) == expected_hfd_heights ch4_metadata = bsd_data["ch4"]["42m"]["metadata"] co2_metadata = bsd_data["co2"]["42m"]["metadata"] assert metadata_checker_obssurface(metadata=ch4_metadata, species="ch4") assert metadata_checker_obssurface(metadata=co2_metadata, species="co2")
def test_recombination_GC(): get_local_bucket(empty=True) data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") ObsSurface.read_file((data, precision), data_type="GCWERKS", site="cgo", network="agage") data = parse_gcwerks(data_filepath=data, precision_filepath=precision, site="CGO", instrument="medusa", network="AGAGE") toluene_data = data["c6h5ch3_70m"]["data"] species = "c6h5ch3" site = "CGO" inlet = "70m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) toluene_data_recombined = recombine_datasets(keys=keys) toluene_data.attrs = {} toluene_data_recombined.attrs = {} assert toluene_data.time.equals(toluene_data_recombined.time) assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"]) assert toluene_data["c6h5ch3_repeatability"].equals( toluene_data_recombined["c6h5ch3_repeatability"]) assert toluene_data["c6h5ch3_status_flag"].equals( toluene_data_recombined["c6h5ch3_status_flag"]) assert toluene_data["c6h5ch3_integration_flag"].equals( toluene_data_recombined["c6h5ch3_integration_flag"])
def test_read_file(): get_local_bucket(empty=True) test_datapath = get_datapath("GEOSChem.SpeciesConc.20150101_0000z_reduced.nc4") proc_results = EulerianModel.read_file(filepath=test_datapath, model="GEOSChem", species="ch4") assert "geoschem_ch4_2015-01-01" in proc_results search_results = search(species="ch4", model="geoschem", start_date="2015-01-01", data_type="eulerian_model") key = list(search_results.keys())[0] data_keys = search_results[key]["keys"] eulerian_data = recombine_datasets(keys=data_keys, sort=False) metadata = search_results[key]["metadata"] orig_data = open_dataset(test_datapath) assert orig_data["lat"].equals(eulerian_data["lat"]) assert orig_data["lon"].equals(eulerian_data["lon"]) assert orig_data["time"].equals(eulerian_data["time"]) assert orig_data["lev"].equals(eulerian_data["lev"]) assert orig_data["SpeciesConc_CH4"].equals(eulerian_data["SpeciesConc_CH4"]) expected_metadata_values = { "species": "ch4", "date": "2015-01-01", "start_date": "2015-01-01 00:00:00+00:00", "end_date": "2016-01-01 00:00:00+00:00", "max_longitude": 175.0, "min_longitude": -180.0, "max_latitude": 89.0, "min_latitude": -89.0, } for key, expected_value in expected_metadata_values.items(): assert metadata[key] == expected_value
def get_obs_surface( site: str, species: str, inlet: str = None, start_date: Optional[Union[str, Timestamp]] = None, end_date: Optional[Union[str, Timestamp]] = None, average: Optional[str] = None, network: Optional[str] = None, instrument: Optional[str] = None, calibration_scale: Optional[str] = None, keep_missing: Optional[bool] = False, skip_ranking: Optional[bool] = False, ) -> ObsData: """Get measurements from one site. Args: site: Site of interest e.g. MHD for the Mace Head site. species: Species identifier e.g. ch4 for methane. start_date: Output start date in a format that Pandas can interpret end_date: Output end date in a format that Pandas can interpret inlet: Inlet label average: Averaging period for each dataset. Each value should be a string of the form e.g. "2H", "30min" (should match pandas offset aliases format). keep_missing: Keep missing data points or drop them. network: Network for the site/instrument (must match number of sites). instrument: Specific instrument for the site (must match number of sites). calibration_scale: Convert to this calibration scale Returns: ObsData: ObsData object """ from pandas import Timestamp, Timedelta import numpy as np from xarray import concat as xr_concat from openghg.retrieve import search from openghg.store import recombine_datasets from openghg.util import clean_string, load_json, timestamp_tzaware site_info = load_json(filename="acrg_site_info.json") site = site.upper() if site not in site_info: raise ValueError( f"No site called {site}, please enter a valid site name.") # Find the correct synonym for the passed species species = clean_string(_synonyms(species)) # Get the observation data obs_results = search( site=site, species=species, inlet=inlet, start_date=start_date, end_date=end_date, instrument=instrument, find_all=True, skip_ranking=skip_ranking, ) if not obs_results: raise ValueError(f"Unable to find results for {species} at {site}") # TODO - for some reason mypy doesn't pick up the ObsData being returned here, look into this # GJ - 2021-07-19 retrieved_data: ObsData = obs_results.retrieve(site=site, species=species, inlet=inlet) # type: ignore data = retrieved_data.data if data.attrs["inlet"] == "multiple": data.attrs["inlet_height_magl"] = "multiple" retrieved_data.metadata["inlet"] = "multiple" if start_date is not None and end_date is not None: start_date_tzaware = timestamp_tzaware(start_date) end_date_tzaware = timestamp_tzaware(end_date) end_date_tzaware_exclusive = end_date_tzaware - Timedelta( 1, unit="nanosecond" ) # Deduct 1 ns to make the end day (date) exclusive. # Slice the data to only cover the dates we're interested in data = data.sel( time=slice(start_date_tzaware, end_date_tzaware_exclusive)) try: start_date_data = timestamp_tzaware(data.time[0].values) end_date_data = timestamp_tzaware(data.time[-1].values) except AttributeError: raise AttributeError( "This dataset does not have a time attribute, unable to read date range" ) if average is not None: # GJ - 2021-03-09 # TODO - check by RT # # Average the Dataset over a given period # if keep_missing is True: # # Create a dataset with one element and NaNs to prepend or append # ds_single_element = data[{"time": 0}] # for v in ds_single_element.variables: # if v != "time": # ds_single_element[v].values = np.nan # ds_concat = [] # # Pad with an empty entry at the start date # if timestamp_tzaware(data.time.min()) > start_date: # ds_single_element_start = ds_single_element.copy() # ds_single_element_start.time.values = Timestamp(start_date) # ds_concat.append(ds_single_element_start) # ds_concat.append(data) # # Pad with an empty entry at the end date # if data.time.max() < Timestamp(end_date): # ds_single_element_end = ds_single_element.copy() # ds_single_element_end.time.values = Timestamp(end_date) - Timedelta("1ns") # ds_concat.append(ds_single_element_end) # data = xr_concat(ds_concat, dim="time") # # Now sort to get everything in the right order # data = data.sortby("time") # First do a mean resample on all variables ds_resampled = data.resample(time=average).mean(skipna=False, keep_attrs=True) # keep_attrs doesn't seem to work for some reason, so manually copy ds_resampled.attrs = data.attrs.copy() average_in_seconds = Timedelta(average).total_seconds() ds_resampled.attrs["averaged_period"] = average_in_seconds ds_resampled.attrs["averaged_period_str"] = average # For some variables, need a different type of resampling data_variables: List[str] = [str(v) for v in data.variables] for var in data_variables: if "repeatability" in var: ds_resampled[var] = (np.sqrt( (data[var]**2).resample(time=average).sum()) / data[var].resample(time=average).count()) # Copy over some attributes if "long_name" in data[var].attrs: ds_resampled[var].attrs["long_name"] = data[var].attrs[ "long_name"] if "units" in data[var].attrs: ds_resampled[var].attrs["units"] = data[var].attrs["units"] # Create a new variability variable, containing the standard deviation within the resampling period ds_resampled[f"{species}_variability"] = (data[species].resample( time=average).std(skipna=False, keep_attrs=True)) # If there are any periods where only one measurement was resampled, just use the median variability ds_resampled[f"{species}_variability"][ ds_resampled[f"{species}_variability"] == 0.0] = ds_resampled[f"{species}_variability"].median() # Create attributes for variability variable ds_resampled[f"{species}_variability"].attrs[ "long_name"] = f"{data.attrs['long_name']}_variability" ds_resampled[f"{species}_variability"].attrs["units"] = data[ species].attrs["units"] # Resampling may introduce NaNs, so remove, if not keep_missing if keep_missing is False: ds_resampled = ds_resampled.dropna(dim="time") data = ds_resampled # Rename variables rename: Dict[str, str] = {} data_variables = [str(v) for v in data.variables] for var in data_variables: if var.lower() == species.lower(): rename[var] = "mf" if "repeatability" in var: rename[var] = "mf_repeatability" if "variability" in var: rename[var] = "mf_variability" if "number_of_observations" in var: rename[var] = "mf_number_of_observations" if "status_flag" in var: rename[var] = "status_flag" if "integration_flag" in var: rename[var] = "integration_flag" data = data.rename_vars(rename) # type: ignore data.attrs["species"] = species if "calibration_scale" in data.attrs: data.attrs["scale"] = data.attrs.pop("calibration_scale") if calibration_scale is not None: data = _scale_convert(data, species, calibration_scale) metadata = retrieved_data.metadata metadata.update(data.attrs) obs_data = ObsData(data=data, metadata=metadata) # It doesn't make sense to do this now as we've only got a single Dataset # # Now check if the units match for each of the observation Datasets # units = set((f.data.mf.attrs["units"] for f in obs_files)) # scales = set((f.data.attrs["scale"] for f in obs_files)) # if len(units) > 1: # raise ValueError( # f"Units do not match for these observation Datasets {[(f.mf.attrs['station_long_name'],f.attrs['units']) for f in obs_files]}" # ) # if len(scales) > 1: # print( # f"Scales do not match for these observation Datasets {[(f.mf.attrs['station_long_name'],f.attrs['units']) for f in obs_files]}" # ) # print("Suggestion: set calibration_scale to convert scales") return obs_data
def test_retrieve_complex_ranked(): rank = rank_sources(site="bsd", species="co") expected_res = { "42m": { "rank_data": "NA", "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00" }, "108m": { "rank_data": "NA", "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00" }, "248m": { "rank_data": "NA", "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00" }, } assert rank.raw() == expected_res rank.set_rank(inlet="42m", rank=1, start_date="2014-01-01", end_date="2015-03-01") rank.set_rank(inlet="108m", rank=1, start_date="2015-03-02", end_date="2016-08-01") rank.set_rank(inlet="42m", rank=1, start_date="2016-08-02", end_date="2017-03-01") rank.set_rank(inlet="248m", rank=1, start_date="2017-03-02", end_date="2019-03-01") rank.set_rank(inlet="108m", rank=1, start_date="2019-03-02", end_date="2021-12-01") updated_res = rank.get_sources(site="bsd", species="co") expected_updated_res = { "42m": { "rank_data": { "2014-01-01-00:00:00+00:00_2015-03-01-00:00:00+00:00": 1, "2016-08-02-00:00:00+00:00_2017-03-01-00:00:00+00:00": 1, }, "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00", }, "108m": { "rank_data": { "2015-03-02-00:00:00+00:00_2016-08-01-00:00:00+00:00": 1, "2019-03-02-00:00:00+00:00_2021-12-01-00:00:00+00:00": 1, }, "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00", }, "248m": { "rank_data": { "2017-03-02-00:00:00+00:00_2019-03-01-00:00:00+00:00": 1 }, "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00", }, } assert updated_res == expected_updated_res search_res = search(site="bsd", species="co") expected_rankings = { "2014-01-01-00:00:00+00:00_2015-03-01-00:00:00+00:00": "42m", "2016-08-02-00:00:00+00:00_2017-03-01-00:00:00+00:00": "42m", "2015-03-02-00:00:00+00:00_2016-08-01-00:00:00+00:00": "108m", "2019-03-02-00:00:00+00:00_2021-12-01-00:00:00+00:00": "108m", "2017-03-02-00:00:00+00:00_2019-03-01-00:00:00+00:00": "248m", } data = search_res.retrieve(site="bsd", species="co") assert data.metadata["rank_metadata"] == expected_rankings measurement_data = data.data assert measurement_data.time.size == 234
def get_flux( species: str, sources: Union[str, List[str]], domain: str, start_date: Optional[Timestamp] = None, end_date: Optional[Timestamp] = None, time_resolution: Optional[str] = "standard", ) -> FluxData: """ The flux function reads in all flux files for the domain and species as an xarray Dataset. Note that at present ALL flux data is read in per species per domain or by emissions name. To be consistent with the footprints, fluxes should be in mol/m2/s. Args: species: Species name sources: Source name domain: Domain e.g. EUROPE start_date: Start date end_date: End date time_resolution: One of ["standard", "high"] Returns: FluxData: FluxData object TODO: Update this to output to a FluxData class? TODO: Update inputs to just accept a string and extract one flux file at a time? As it stands, this only extracts one flux at a time but is set up to be extended to to extract multiple. So if this is removed from this function the functionality itself would need to be wrapped up in another function call. """ from openghg.retrieve import search from openghg.store import recombine_datasets from openghg.util import timestamp_epoch, timestamp_now if start_date is None: start_date = timestamp_epoch() if end_date is None: end_date = timestamp_now() results: Dict = search( species=species, source=sources, domain=domain, time_resolution=time_resolution, start_date=start_date, end_date=end_date, data_type="emissions", ) # type: ignore if not results: raise ValueError( f"Unable to find flux data for {species} from {sources}") # TODO - more than one emissions file (but see above) try: em_key = list(results.keys())[0] except IndexError: raise ValueError( f"Unable to find any footprints data for {domain} for {species}.") data_keys = results[em_key]["keys"] metadata = results[em_key]["metadata"] em_ds = recombine_datasets(keys=data_keys, sort=False) # Check for level coordinate. If one level, assume surface and drop if "lev" in em_ds.coords: if len(em_ds.lev) > 1: raise ValueError("Error: More than one flux level") em_ds = em_ds.drop_vars(names="lev") if species is None: species = metadata.get("species", "NA") return FluxData( data=em_ds, metadata=metadata, flux={}, bc={}, species=species, scales="FIXME", units="FIXME", )
def test_specific_search_gc(): results = search(species=["NF3"], site="CGO", inlet="70m") metadata = results.metadata(site="cgo", species="nf3", inlet="70m") assert metadata_checker_obssurface(metadata=metadata, species="nf3")
def test_read_footprint(): get_local_bucket(empty=True) datapath = get_footprint_datapath("footprint_test.nc") # model_params = {"simulation_params": "123"} site = "TMB" network = "LGHG" height = "10m" domain = "EUROPE" model = "test_model" Footprints.read_file( filepath=datapath, site=site, model=model, network=network, height=height, domain=domain ) # Get the footprints data footprint_results = search(site=site, domain=domain, data_type="footprints") fp_site_key = list(footprint_results.keys())[0] footprint_keys = footprint_results[fp_site_key]["keys"] footprint_data = recombine_datasets(keys=footprint_keys, sort=False) footprint_coords = list(footprint_data.coords.keys()) footprint_dims = list(footprint_data.dims) # Sorting to allow comparison - coords / dims can be stored in different orders # depending on how the Dataset has been manipulated footprint_coords.sort() footprint_dims.sort() assert footprint_coords == ["height", "lat", "lat_high", "lev", "lon", "lon_high", "time"] assert footprint_dims == ["height", "index", "lat", "lat_high", "lev", "lon", "lon_high", "time"] assert ( footprint_data.attrs["heights"] == [ 500.0, 1500.0, 2500.0, 3500.0, 4500.0, 5500.0, 6500.0, 7500.0, 8500.0, 9500.0, 10500.0, 11500.0, 12500.0, 13500.0, 14500.0, 15500.0, 16500.0, 17500.0, 18500.0, 19500.0, ] ).all() assert footprint_data.attrs["variables"] == [ "fp", "temperature", "pressure", "wind_speed", "wind_direction", "PBLH", "release_lon", "release_lat", "particle_locations_n", "particle_locations_e", "particle_locations_s", "particle_locations_w", "mean_age_particles_n", "mean_age_particles_e", "mean_age_particles_s", "mean_age_particles_w", "fp_low", "fp_high", "index_lons", "index_lats", ] del footprint_data.attrs["processed"] del footprint_data.attrs["heights"] del footprint_data.attrs["variables"] expected_attrs = { "author": "OpenGHG Cloud", "data_type": "footprints", "site": "tmb", "network": "lghg", "height": "10m", "model": "test_model", "domain": "europe", "start_date": "2020-08-01 00:00:00+00:00", "end_date": "2020-08-01 00:00:00+00:00", "max_longitude": 39.38, "min_longitude": -97.9, "max_latitude": 79.057, "min_latitude": 10.729, "time_resolution": "standard_time_resolution", } assert footprint_data.attrs == expected_attrs footprint_data["fp_low"].max().values == pytest.approx(0.43350983) footprint_data["fp_high"].max().values == pytest.approx(0.11853027) footprint_data["pressure"].max().values == pytest.approx(1011.92) footprint_data["fp_low"].min().values == 0.0 footprint_data["fp_high"].min().values == 0.0 footprint_data["pressure"].min().values == pytest.approx(1011.92)
def get_footprint( site: str, domain: str, height: str, model: str = None, start_date: Timestamp = None, end_date: Timestamp = None, species: str = None, ) -> FootprintData: """ Get footprints from one site. Args: site: The name of the site given in the footprints. This often matches to the site name but if the same site footprints are run with a different met and they are named slightly differently from the obs file. E.g. site="DJI", site_modifier = "DJI-SAM" - station called DJI, footprints site called DJI-SAM domain : Domain name for the footprints height: Height of inlet in metres start_date: Output start date in a format that Pandas can interpret end_date: Output end date in a format that Pandas can interpret species: Species identifier e.g. "co2" for carbon dioxide. Only needed if species needs a modified footprints from the typical 30-day footprints appropriate for a long-lived species (like methane) e.g. for high time resolution (co2) or is a short-lived species. Returns: FootprintData: FootprintData dataclass """ from openghg.store import recombine_datasets from openghg.retrieve import search from openghg.dataobjects import FootprintData results = search( site=site, domain=domain, height=height, start_date=start_date, end_date=end_date, species=species, data_type="footprints", ) # type: ignore # Get the footprints data # if species is not None: # else: # results = search( # site=site, # domain=domain, # height=height, # start_date=start_date, # end_date=end_date, # data_type="footprints", # ) # type: ignore try: fp_site_key = list(results.keys())[0] except IndexError: if species is not None: raise ValueError( f"Unable to find any footprints data for {site} at a height of {height} for species {species}." ) else: raise ValueError( f"Unable to find any footprints data for {site} at a height of {height}." ) keys = results[fp_site_key]["keys"] metadata = results[fp_site_key]["metadata"] # fp_ds = recombine_datasets(keys=keys, sort=False) # Why did this have sort=False before? fp_ds = recombine_datasets(keys=keys, sort=True) if species is None: species = metadata.get("species", "NA") return FootprintData( data=fp_ds, metadata=metadata, flux={}, bc={}, species=species, scales="FIXME", units="FIXME", )