def test_recombination_CRDS(): get_local_bucket(empty=True) filename = "hfd.picarro.1minute.100m.min.dat" filepath = get_datapath(filename=filename, data_type="CRDS") ObsSurface.read_file(filepath, data_type="CRDS", site="hfd", network="DECC") gas_data = parse_crds(data_filepath=filepath, site="HFD", network="AGAGE") ch4_data_read = gas_data["ch4"]["data"] species = "ch4" site = "hfd" inlet = "100m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) ch4_data_recombined = recombine_datasets(keys=keys) ch4_data_recombined.attrs = {} assert ch4_data_read.time.equals(ch4_data_recombined.time) assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])
def test_read_file(): get_local_bucket(empty=True) test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc") proc_results = Emissions.read_file( filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False ) assert "co2_gppcardamom_europe_2012" in proc_results search_results = search(species="co2", source="gpp-cardamom", date="2012", domain="europe", data_type="emissions") key = list(search_results.keys())[0] data_keys = search_results[key]["keys"] emissions_data = recombine_datasets(keys=data_keys, sort=False) metadata = search_results[key]["metadata"] orig_data = open_dataset(test_datapath) assert orig_data.lat.equals(emissions_data.lat) assert orig_data.lon.equals(emissions_data.lon) assert orig_data.time.equals(emissions_data.time) assert orig_data.flux.equals(emissions_data.flux) expected_metadata = { "title": "gross primary productivity co2", "author": "openghg cloud", "date_created": "2018-05-20 19:44:14.968710", "number_of_prior_files_used": 1, "prior_file_1": "cardamom gpp", "prior_file_1_raw_resolution": "25x25km", "prior_file_1_reference": "t.l. smallman, jgr biogeosciences, 2017", "regridder_used": "acrg_grid.regrid.regrid_3d", "comments": "fluxes copied from year 2013. december 2012 values copied from january 2013 values.", "species": "co2", "domain": "europe", "source": "gppcardamom", "date": "2012", "start_date": "2012-12-01 00:00:00+00:00", "end_date": "2012-12-01 00:00:00+00:00", "max_longitude": 39.38, "min_longitude": -97.9, "max_latitude": 79.057, "min_latitude": 10.729, "time_resolution": "standard", "data_type": "emissions", } del metadata["processed"] del metadata["prior_file_1_version"] assert metadata == expected_metadata
def _create_obsdata(self, site: str, species: str, inlet: str = None) -> ObsData: """Creates an ObsData object for return to the user Args: site: Site code species: Species name Returns: ObsData: ObsData object """ if self.ranked_data: specific_source = self.results[site][species] else: specific_source = self.results[site][species][inlet] data_keys = specific_source["keys"] metadata = specific_source["metadata"] # If cloud use the Retrieve object if self.cloud: raise NotImplementedError # from Acquire.Client import Wallet # from xarray import open_dataset # wallet = Wallet() # self._service_url = "https://fn.openghg.org/t" # self._service = wallet.get_service(service_url=f"{self._service_url}/openghg") # key = f"{site}_{species}" # keys_to_retrieve = {key: data_keys} # args = {"keys": keys_to_retrieve} # response: Dict = self._service.call_function(function="retrieve.retrieve", args=args) # response_data = response["results"] # data = open_dataset(response_data[key]) else: data = recombine_datasets(data_keys, sort=True) metadata = specific_source["metadata"] if self.ranked_data: metadata["rank_metadata"] = specific_source["rank_metadata"] return ObsData(data=data, metadata=metadata)
def test_recombination_GC(): get_local_bucket(empty=True) data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") ObsSurface.read_file((data, precision), data_type="GCWERKS", site="cgo", network="agage") data = parse_gcwerks(data_filepath=data, precision_filepath=precision, site="CGO", instrument="medusa", network="AGAGE") toluene_data = data["c6h5ch3_70m"]["data"] species = "c6h5ch3" site = "CGO" inlet = "70m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) toluene_data_recombined = recombine_datasets(keys=keys) toluene_data.attrs = {} toluene_data_recombined.attrs = {} assert toluene_data.time.equals(toluene_data_recombined.time) assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"]) assert toluene_data["c6h5ch3_repeatability"].equals( toluene_data_recombined["c6h5ch3_repeatability"]) assert toluene_data["c6h5ch3_status_flag"].equals( toluene_data_recombined["c6h5ch3_status_flag"]) assert toluene_data["c6h5ch3_integration_flag"].equals( toluene_data_recombined["c6h5ch3_integration_flag"])
def test_read_file(): get_local_bucket(empty=True) test_datapath = get_datapath("GEOSChem.SpeciesConc.20150101_0000z_reduced.nc4") proc_results = EulerianModel.read_file(filepath=test_datapath, model="GEOSChem", species="ch4") assert "geoschem_ch4_2015-01-01" in proc_results search_results = search(species="ch4", model="geoschem", start_date="2015-01-01", data_type="eulerian_model") key = list(search_results.keys())[0] data_keys = search_results[key]["keys"] eulerian_data = recombine_datasets(keys=data_keys, sort=False) metadata = search_results[key]["metadata"] orig_data = open_dataset(test_datapath) assert orig_data["lat"].equals(eulerian_data["lat"]) assert orig_data["lon"].equals(eulerian_data["lon"]) assert orig_data["time"].equals(eulerian_data["time"]) assert orig_data["lev"].equals(eulerian_data["lev"]) assert orig_data["SpeciesConc_CH4"].equals(eulerian_data["SpeciesConc_CH4"]) expected_metadata_values = { "species": "ch4", "date": "2015-01-01", "start_date": "2015-01-01 00:00:00+00:00", "end_date": "2016-01-01 00:00:00+00:00", "max_longitude": 175.0, "min_longitude": -180.0, "max_latitude": 89.0, "min_latitude": -89.0, } for key, expected_value in expected_metadata_values.items(): assert metadata[key] == expected_value
def get_footprint( site: str, domain: str, height: str, model: str = None, start_date: Timestamp = None, end_date: Timestamp = None, species: str = None, ) -> FootprintData: """ Get footprints from one site. Args: site: The name of the site given in the footprints. This often matches to the site name but if the same site footprints are run with a different met and they are named slightly differently from the obs file. E.g. site="DJI", site_modifier = "DJI-SAM" - station called DJI, footprints site called DJI-SAM domain : Domain name for the footprints height: Height of inlet in metres start_date: Output start date in a format that Pandas can interpret end_date: Output end date in a format that Pandas can interpret species: Species identifier e.g. "co2" for carbon dioxide. Only needed if species needs a modified footprints from the typical 30-day footprints appropriate for a long-lived species (like methane) e.g. for high time resolution (co2) or is a short-lived species. Returns: FootprintData: FootprintData dataclass """ from openghg.store import recombine_datasets from openghg.retrieve import search from openghg.dataobjects import FootprintData results = search( site=site, domain=domain, height=height, start_date=start_date, end_date=end_date, species=species, data_type="footprints", ) # type: ignore # Get the footprints data # if species is not None: # else: # results = search( # site=site, # domain=domain, # height=height, # start_date=start_date, # end_date=end_date, # data_type="footprints", # ) # type: ignore try: fp_site_key = list(results.keys())[0] except IndexError: if species is not None: raise ValueError( f"Unable to find any footprints data for {site} at a height of {height} for species {species}." ) else: raise ValueError( f"Unable to find any footprints data for {site} at a height of {height}." ) keys = results[fp_site_key]["keys"] metadata = results[fp_site_key]["metadata"] # fp_ds = recombine_datasets(keys=keys, sort=False) # Why did this have sort=False before? fp_ds = recombine_datasets(keys=keys, sort=True) if species is None: species = metadata.get("species", "NA") return FootprintData( data=fp_ds, metadata=metadata, flux={}, bc={}, species=species, scales="FIXME", units="FIXME", )
def get_flux( species: str, sources: Union[str, List[str]], domain: str, start_date: Optional[Timestamp] = None, end_date: Optional[Timestamp] = None, time_resolution: Optional[str] = "standard", ) -> FluxData: """ The flux function reads in all flux files for the domain and species as an xarray Dataset. Note that at present ALL flux data is read in per species per domain or by emissions name. To be consistent with the footprints, fluxes should be in mol/m2/s. Args: species: Species name sources: Source name domain: Domain e.g. EUROPE start_date: Start date end_date: End date time_resolution: One of ["standard", "high"] Returns: FluxData: FluxData object TODO: Update this to output to a FluxData class? TODO: Update inputs to just accept a string and extract one flux file at a time? As it stands, this only extracts one flux at a time but is set up to be extended to to extract multiple. So if this is removed from this function the functionality itself would need to be wrapped up in another function call. """ from openghg.retrieve import search from openghg.store import recombine_datasets from openghg.util import timestamp_epoch, timestamp_now if start_date is None: start_date = timestamp_epoch() if end_date is None: end_date = timestamp_now() results: Dict = search( species=species, source=sources, domain=domain, time_resolution=time_resolution, start_date=start_date, end_date=end_date, data_type="emissions", ) # type: ignore if not results: raise ValueError( f"Unable to find flux data for {species} from {sources}") # TODO - more than one emissions file (but see above) try: em_key = list(results.keys())[0] except IndexError: raise ValueError( f"Unable to find any footprints data for {domain} for {species}.") data_keys = results[em_key]["keys"] metadata = results[em_key]["metadata"] em_ds = recombine_datasets(keys=data_keys, sort=False) # Check for level coordinate. If one level, assume surface and drop if "lev" in em_ds.coords: if len(em_ds.lev) > 1: raise ValueError("Error: More than one flux level") em_ds = em_ds.drop_vars(names="lev") if species is None: species = metadata.get("species", "NA") return FluxData( data=em_ds, metadata=metadata, flux={}, bc={}, species=species, scales="FIXME", units="FIXME", )
def test_read_footprint(): get_local_bucket(empty=True) datapath = get_footprint_datapath("footprint_test.nc") # model_params = {"simulation_params": "123"} site = "TMB" network = "LGHG" height = "10m" domain = "EUROPE" model = "test_model" Footprints.read_file( filepath=datapath, site=site, model=model, network=network, height=height, domain=domain ) # Get the footprints data footprint_results = search(site=site, domain=domain, data_type="footprints") fp_site_key = list(footprint_results.keys())[0] footprint_keys = footprint_results[fp_site_key]["keys"] footprint_data = recombine_datasets(keys=footprint_keys, sort=False) footprint_coords = list(footprint_data.coords.keys()) footprint_dims = list(footprint_data.dims) # Sorting to allow comparison - coords / dims can be stored in different orders # depending on how the Dataset has been manipulated footprint_coords.sort() footprint_dims.sort() assert footprint_coords == ["height", "lat", "lat_high", "lev", "lon", "lon_high", "time"] assert footprint_dims == ["height", "index", "lat", "lat_high", "lev", "lon", "lon_high", "time"] assert ( footprint_data.attrs["heights"] == [ 500.0, 1500.0, 2500.0, 3500.0, 4500.0, 5500.0, 6500.0, 7500.0, 8500.0, 9500.0, 10500.0, 11500.0, 12500.0, 13500.0, 14500.0, 15500.0, 16500.0, 17500.0, 18500.0, 19500.0, ] ).all() assert footprint_data.attrs["variables"] == [ "fp", "temperature", "pressure", "wind_speed", "wind_direction", "PBLH", "release_lon", "release_lat", "particle_locations_n", "particle_locations_e", "particle_locations_s", "particle_locations_w", "mean_age_particles_n", "mean_age_particles_e", "mean_age_particles_s", "mean_age_particles_w", "fp_low", "fp_high", "index_lons", "index_lats", ] del footprint_data.attrs["processed"] del footprint_data.attrs["heights"] del footprint_data.attrs["variables"] expected_attrs = { "author": "OpenGHG Cloud", "data_type": "footprints", "site": "tmb", "network": "lghg", "height": "10m", "model": "test_model", "domain": "europe", "start_date": "2020-08-01 00:00:00+00:00", "end_date": "2020-08-01 00:00:00+00:00", "max_longitude": 39.38, "min_longitude": -97.9, "max_latitude": 79.057, "min_latitude": 10.729, "time_resolution": "standard_time_resolution", } assert footprint_data.attrs == expected_attrs footprint_data["fp_low"].max().values == pytest.approx(0.43350983) footprint_data["fp_high"].max().values == pytest.approx(0.11853027) footprint_data["pressure"].max().values == pytest.approx(1011.92) footprint_data["fp_low"].min().values == 0.0 footprint_data["fp_high"].min().values == 0.0 footprint_data["pressure"].min().values == pytest.approx(1011.92)