Python search Examples, openghg.retrieve.search Python Examples

Example #1

0

Show file

def test_unranked_search_datetimes():
    species = ["co2"]
    locations = ["bsd"]

    start = timestamp_tzaware("2014-1-1")
    end = timestamp_tzaware("2015-1-1")

    results = search(
        species=species,
        site=locations,
        inlet="248m",
        start_date=start,
        end_date=end,
    )

    metadata = results.metadata(site="bsd", species="co2", inlet="248m")

    assert metadata_checker_obssurface(metadata=metadata, species="co2")

    data_keys = results.keys(site="bsd", species="co2", inlet="248m")
    assert len(data_keys) == 1

    start = timestamp_tzaware("2001-1-1")
    end = timestamp_tzaware("2021-1-1")

    results = search(
        species=species,
        site=locations,
        inlet="248m",
        start_date=start,
        end_date=end,
    )

    data_keys = results.keys(site="bsd", species="co2", inlet="248m")
    assert len(data_keys) == 7

Example #2

0

Show file

def test_no_ranked_data_raises():
    with pytest.raises(ValueError):
        _ = search(site="hfd", species="ch4")

    # Make sure this doesn't fail
    res = search(site="hfd", species="ch4", inlet="100m")

    assert res

Example #3

0

Show file

def test_specific_search_translator():
    results = search(species="toluene", site="CGO", skip_ranking=True)

    metadata = results.results["cgo"]["c6h5ch3"]["70m"]["metadata"]
    assert metadata["species"] == "c6h5ch3"

    results = search(species="methylbenzene", site="CGO", skip_ranking=True)

    metadata = results.results["cgo"]["c6h5ch3"]["70m"]["metadata"]
    assert metadata["species"] == "c6h5ch3"

    results = search(species="c6h5ch3", site="CGO", skip_ranking=True)

    metadata = results.results["cgo"]["c6h5ch3"]["70m"]["metadata"]
    assert metadata["species"] == "c6h5ch3"

Example #4

0

Show file

def test_unranked_location_search():
    species = ["co2", "ch4"]
    sites = ["hfd", "tac", "bsd"]

    results = search(species=species, site=sites, inlet="100m")

    assert len(results) == 2

    tac_data = results.results["tac"]
    hfd_data = results.results["hfd"]

    assert sorted(list(tac_data.keys())) == ["ch4", "co2"]
    assert sorted(list(hfd_data.keys())) == ["ch4", "co2"]

    with pytest.raises(ValueError):
        tac_co2_keys = results.keys(site="tac", species="co2", inlet="105m")

    tac_co2_keys = results.keys(site="tac", species="co2", inlet="100m")
    tac_ch4_keys = results.keys(site="tac", species="co2", inlet="100m")

    assert len(tac_co2_keys) == 4
    assert len(tac_ch4_keys) == 4

    with pytest.raises(ValueError):
        results.keys(site="bsd", species="co2")

    with pytest.raises(ValueError):
        results.keys(site="bsd", species="ch4")

Example #5

0

Show file

def test_recombination_CRDS():
    get_local_bucket(empty=True)

    filename = "hfd.picarro.1minute.100m.min.dat"
    filepath = get_datapath(filename=filename, data_type="CRDS")

    ObsSurface.read_file(filepath,
                         data_type="CRDS",
                         site="hfd",
                         network="DECC")

    gas_data = parse_crds(data_filepath=filepath, site="HFD", network="AGAGE")

    ch4_data_read = gas_data["ch4"]["data"]

    species = "ch4"
    site = "hfd"
    inlet = "100m"

    result = search(species=species, site=site, inlet=inlet)

    keys = result.keys(site=site, species=species, inlet=inlet)

    ch4_data_recombined = recombine_datasets(keys=keys)

    ch4_data_recombined.attrs = {}

    assert ch4_data_read.time.equals(ch4_data_recombined.time)
    assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])

Example #6

0

Show file

def test_search_nonsense_terms():
    species = ["spam", "eggs", "terry"]
    locations = ["capegrim"]

    results = search(species=species, locations=locations)

    assert not results

Example #7

0

Show file

def test_ranked_bsd_search():
    site = "bsd"
    species = "ch4"

    result = search(site=site, species=species)

    raw_result = result.raw()

    expected_rank_metadata = {
        "2015-01-01-00:00:00+00:00_2015-11-01-00:00:00+00:00": "248m",
        "2014-09-02-00:00:00+00:00_2014-11-01-00:00:00+00:00": "108m",
        "2016-09-02-00:00:00+00:00_2018-11-01-00:00:00+00:00": "108m",
        "2019-01-02-00:00:00+00:00_2021-01-01-00:00:00+00:00": "42m",
    }

    assert expected_rank_metadata == raw_result["bsd"]["ch4"]["rank_metadata"]

    metadata = result.metadata(site="bsd", species="ch4")

    for key, meta in metadata.items():
        assert metadata_checker_obssurface(metadata=meta, species="ch4")

    obs_data = result.retrieve(site="bsd", species="ch4")

    ch4_data = obs_data.data

    assert ch4_data.time[0] == Timestamp("2014-01-30T11:12:30")
    assert ch4_data.time[-1] == Timestamp("2020-12-01T22:31:30")
    assert ch4_data["ch4"][0] == pytest.approx(1959.55)
    assert ch4_data["ch4"][-1] == pytest.approx(1955.93)
    assert ch4_data["ch4_variability"][0] == 0.79
    assert ch4_data["ch4_variability"][-1] == 0.232
    assert len(ch4_data.time) == 196

Example #8

0

Show file

def test_search_incorrect_inlet_site_finds_nothing():
    locations = "hfd"
    inlet = "3695m"
    species = "CH4"

    results = search(site=locations, species=species, inlet=inlet)

    assert not results

Example #9

0

Show file

File: test_searchresults.py Project: openghg/openghg

def test_retrieve_unranked():
    results = search(species="ch4", skip_ranking=True)

    assert results.ranked_data is False
    assert results.cloud is False

    raw_results = results.raw()
    assert raw_results["tac"]["ch4"]["100m"]
    assert raw_results["hfd"]["ch4"]["50m"]
    assert raw_results["bsd"]["ch4"]["42m"]

Example #10

0

Show file

File: test_emissions.py Project: openghg/openghg

def test_read_file():
    get_local_bucket(empty=True)

    test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc")

    proc_results = Emissions.read_file(
        filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False
    )

    assert "co2_gppcardamom_europe_2012" in proc_results

    search_results = search(species="co2", source="gpp-cardamom", date="2012", domain="europe", data_type="emissions")

    key = list(search_results.keys())[0]

    data_keys = search_results[key]["keys"]
    emissions_data = recombine_datasets(keys=data_keys, sort=False)

    metadata = search_results[key]["metadata"]

    orig_data = open_dataset(test_datapath)

    assert orig_data.lat.equals(emissions_data.lat)
    assert orig_data.lon.equals(emissions_data.lon)
    assert orig_data.time.equals(emissions_data.time)
    assert orig_data.flux.equals(emissions_data.flux)

    expected_metadata = {
        "title": "gross primary productivity co2",
        "author": "openghg cloud",
        "date_created": "2018-05-20 19:44:14.968710",
        "number_of_prior_files_used": 1,
        "prior_file_1": "cardamom gpp",
        "prior_file_1_raw_resolution": "25x25km",
        "prior_file_1_reference": "t.l. smallman, jgr biogeosciences, 2017",
        "regridder_used": "acrg_grid.regrid.regrid_3d",
        "comments": "fluxes copied from year 2013. december 2012 values copied from january 2013 values.",
        "species": "co2",
        "domain": "europe",
        "source": "gppcardamom",
        "date": "2012",
        "start_date": "2012-12-01 00:00:00+00:00",
        "end_date": "2012-12-01 00:00:00+00:00",
        "max_longitude": 39.38,
        "min_longitude": -97.9,
        "max_latitude": 79.057,
        "min_latitude": 10.729,
        "time_resolution": "standard",
        "data_type": "emissions",
    }

    del metadata["processed"]
    del metadata["prior_file_1_version"]

    assert metadata == expected_metadata

Example #11

0

Show file

def test_search_find_any_unranked():
    species = ["co2"]
    sites = ["bsd"]
    inlet = "248m"
    instrument = "picarro"

    results = search(find_all=False,
                     species=species,
                     site=sites,
                     inlet=inlet,
                     instrument=instrument)

    raw_results = results.raw()

    assert len(raw_results) == 3

    bsd_expected = ["ch4", "co", "co2"]
    hfd_expected = ["ch4", "co", "co2"]
    tac_expected = ["ch4", "co2"]

    assert sorted(list(raw_results["bsd"].keys())) == bsd_expected
    assert sorted(list(raw_results["hfd"].keys())) == hfd_expected
    assert sorted(list(raw_results["tac"].keys())) == tac_expected

    start = timestamp_tzaware("2014-1-1")
    end = timestamp_tzaware("2015-1-1")

    results = search(
        find_all=False,
        species=species,
        site=sites,
        start_date=start,
        end_date=end,
        inlet=inlet,
        instrument=instrument,
    )

    raw_results = results.raw()

    assert len(raw_results) == 2

    assert sorted(list(raw_results.keys())) == ["bsd", "hfd"]

Example #12

0

Show file

def test_specific_keyword_search():
    site = "bsd"
    species = "co2"
    inlet = "248m"
    instrument = "picarro"

    results = search(species=species,
                     site=site,
                     inlet=inlet,
                     instrument=instrument)

    metadata = results.metadata(site=site, species=species, inlet=inlet)

    assert metadata_checker_obssurface(metadata=metadata, species="co2")

    data = results.retrieve(site=site, species=species, inlet="248m")
    ds = data.data

    del ds.attrs["file_created"]

    expected_attrs = {
        "data_owner": "Simon O'Doherty",
        "data_owner_email": "*****@*****.**",
        "inlet_height_magl": "248m",
        "comment": "Cavity ring-down measurements. Output from GCWerks",
        "conditions_of_use":
        "Ensure that you contact the data owner at the outset of your project.",
        "source": "In situ measurements of air",
        "Conventions": "CF-1.6",
        "processed_by": "OpenGHG_Cloud",
        "species": "co2",
        "calibration_scale": "WMO-X2007",
        "station_longitude": -1.15033,
        "station_latitude": 54.35858,
        "station_long_name": "Bilsdale, UK",
        "station_height_masl": 380.0,
        "site": "bsd",
        "instrument": "picarro",
        "sampling_period": "60",
        "inlet": "248m",
        "port": "9",
        "type": "air",
        "network": "decc",
        "scale": "WMO-X2007",
        "long_name": "bilsdale",
    }

    assert ds.attrs == expected_attrs

Example #13

0

Show file

def test_search_find_any():
    species = ["co2"]
    sites = ["bsd"]
    inlet = "248m"
    instrument = "picarro"

    start = timestamp_tzaware("2014-1-1")
    end = timestamp_tzaware("2015-1-1")

    results = search(
        find_all=False,
        species=species,
        site=sites,
        start_date=start,
        end_date=end,
        inlet=inlet,
        instrument=instrument,
    )

    raw_results = results.raw()

    bsd_data = raw_results["bsd"]
    hfd_data = raw_results["hfd"]

    expected_bsd_heights = sorted(["248m", "108m", "42m"])

    assert sorted(list(bsd_data["ch4"].keys())) == expected_bsd_heights
    assert sorted(list(bsd_data["co"].keys())) == expected_bsd_heights
    assert sorted(list(bsd_data["co2"].keys())) == expected_bsd_heights

    expected_hfd_heights = ["100m", "50m"]

    assert sorted(list(hfd_data["ch4"].keys())) == expected_hfd_heights
    assert sorted(list(hfd_data["co"].keys())) == expected_hfd_heights
    assert sorted(list(hfd_data["co2"].keys())) == expected_hfd_heights

    ch4_metadata = bsd_data["ch4"]["42m"]["metadata"]
    co2_metadata = bsd_data["co2"]["42m"]["metadata"]

    assert metadata_checker_obssurface(metadata=ch4_metadata, species="ch4")
    assert metadata_checker_obssurface(metadata=co2_metadata, species="co2")

Example #14

0

Show file

def test_recombination_GC():
    get_local_bucket(empty=True)

    data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    precision = get_datapath(filename="capegrim-medusa.18.precisions.C",
                             data_type="GC")

    ObsSurface.read_file((data, precision),
                         data_type="GCWERKS",
                         site="cgo",
                         network="agage")

    data = parse_gcwerks(data_filepath=data,
                         precision_filepath=precision,
                         site="CGO",
                         instrument="medusa",
                         network="AGAGE")

    toluene_data = data["c6h5ch3_70m"]["data"]

    species = "c6h5ch3"
    site = "CGO"
    inlet = "70m"

    result = search(species=species, site=site, inlet=inlet)
    keys = result.keys(site=site, species=species, inlet=inlet)

    toluene_data_recombined = recombine_datasets(keys=keys)

    toluene_data.attrs = {}
    toluene_data_recombined.attrs = {}

    assert toluene_data.time.equals(toluene_data_recombined.time)
    assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"])
    assert toluene_data["c6h5ch3_repeatability"].equals(
        toluene_data_recombined["c6h5ch3_repeatability"])
    assert toluene_data["c6h5ch3_status_flag"].equals(
        toluene_data_recombined["c6h5ch3_status_flag"])
    assert toluene_data["c6h5ch3_integration_flag"].equals(
        toluene_data_recombined["c6h5ch3_integration_flag"])

Example #15

0

Show file

def test_read_file():
    get_local_bucket(empty=True)

    test_datapath = get_datapath("GEOSChem.SpeciesConc.20150101_0000z_reduced.nc4")

    proc_results = EulerianModel.read_file(filepath=test_datapath, model="GEOSChem", species="ch4")

    assert "geoschem_ch4_2015-01-01" in proc_results

    search_results = search(species="ch4", model="geoschem", start_date="2015-01-01", data_type="eulerian_model")

    key = list(search_results.keys())[0]

    data_keys = search_results[key]["keys"]
    eulerian_data = recombine_datasets(keys=data_keys, sort=False)

    metadata = search_results[key]["metadata"]

    orig_data = open_dataset(test_datapath)

    assert orig_data["lat"].equals(eulerian_data["lat"])
    assert orig_data["lon"].equals(eulerian_data["lon"])
    assert orig_data["time"].equals(eulerian_data["time"])
    assert orig_data["lev"].equals(eulerian_data["lev"])
    assert orig_data["SpeciesConc_CH4"].equals(eulerian_data["SpeciesConc_CH4"])

    expected_metadata_values = {
        "species": "ch4",
        "date": "2015-01-01",
        "start_date": "2015-01-01 00:00:00+00:00",
        "end_date": "2016-01-01 00:00:00+00:00",
        "max_longitude": 175.0,
        "min_longitude": -180.0,
        "max_latitude": 89.0,
        "min_latitude": -89.0,
    }

    for key, expected_value in expected_metadata_values.items():
        assert metadata[key] == expected_value

Example #16

0

Show file

def get_obs_surface(
    site: str,
    species: str,
    inlet: str = None,
    start_date: Optional[Union[str, Timestamp]] = None,
    end_date: Optional[Union[str, Timestamp]] = None,
    average: Optional[str] = None,
    network: Optional[str] = None,
    instrument: Optional[str] = None,
    calibration_scale: Optional[str] = None,
    keep_missing: Optional[bool] = False,
    skip_ranking: Optional[bool] = False,
) -> ObsData:
    """Get measurements from one site.

    Args:
        site: Site of interest e.g. MHD for the Mace Head site.
        species: Species identifier e.g. ch4 for methane.
        start_date: Output start date in a format that Pandas can interpret
        end_date: Output end date in a format that Pandas can interpret
        inlet: Inlet label
        average: Averaging period for each dataset. Each value should be a string of
        the form e.g. "2H", "30min" (should match pandas offset aliases format).
        keep_missing: Keep missing data points or drop them.
        network: Network for the site/instrument (must match number of sites).
        instrument: Specific instrument for the site (must match number of sites).
        calibration_scale: Convert to this calibration scale
    Returns:
        ObsData: ObsData object
    """
    from pandas import Timestamp, Timedelta
    import numpy as np
    from xarray import concat as xr_concat
    from openghg.retrieve import search
    from openghg.store import recombine_datasets
    from openghg.util import clean_string, load_json, timestamp_tzaware

    site_info = load_json(filename="acrg_site_info.json")
    site = site.upper()

    if site not in site_info:
        raise ValueError(
            f"No site called {site}, please enter a valid site name.")

    # Find the correct synonym for the passed species
    species = clean_string(_synonyms(species))

    # Get the observation data
    obs_results = search(
        site=site,
        species=species,
        inlet=inlet,
        start_date=start_date,
        end_date=end_date,
        instrument=instrument,
        find_all=True,
        skip_ranking=skip_ranking,
    )

    if not obs_results:
        raise ValueError(f"Unable to find results for {species} at {site}")

    # TODO - for some reason mypy doesn't pick up the ObsData being returned here, look into this
    # GJ - 2021-07-19
    retrieved_data: ObsData = obs_results.retrieve(site=site,
                                                   species=species,
                                                   inlet=inlet)  # type: ignore
    data = retrieved_data.data

    if data.attrs["inlet"] == "multiple":
        data.attrs["inlet_height_magl"] = "multiple"
        retrieved_data.metadata["inlet"] = "multiple"

    if start_date is not None and end_date is not None:
        start_date_tzaware = timestamp_tzaware(start_date)
        end_date_tzaware = timestamp_tzaware(end_date)
        end_date_tzaware_exclusive = end_date_tzaware - Timedelta(
            1, unit="nanosecond"
        )  # Deduct 1 ns to make the end day (date) exclusive.

        # Slice the data to only cover the dates we're interested in
        data = data.sel(
            time=slice(start_date_tzaware, end_date_tzaware_exclusive))

    try:
        start_date_data = timestamp_tzaware(data.time[0].values)
        end_date_data = timestamp_tzaware(data.time[-1].values)
    except AttributeError:
        raise AttributeError(
            "This dataset does not have a time attribute, unable to read date range"
        )

    if average is not None:
        # GJ - 2021-03-09
        # TODO - check by RT

        # # Average the Dataset over a given period
        # if keep_missing is True:
        #     # Create a dataset with one element and NaNs to prepend or append
        #     ds_single_element = data[{"time": 0}]

        #     for v in ds_single_element.variables:
        #         if v != "time":
        #             ds_single_element[v].values = np.nan

        #     ds_concat = []

        #     # Pad with an empty entry at the start date
        #     if timestamp_tzaware(data.time.min()) > start_date:
        #         ds_single_element_start = ds_single_element.copy()
        #         ds_single_element_start.time.values = Timestamp(start_date)
        #         ds_concat.append(ds_single_element_start)

        #     ds_concat.append(data)

        #     # Pad with an empty entry at the end date
        #     if data.time.max() < Timestamp(end_date):
        #         ds_single_element_end = ds_single_element.copy()
        #         ds_single_element_end.time.values = Timestamp(end_date) - Timedelta("1ns")
        #         ds_concat.append(ds_single_element_end)

        #     data = xr_concat(ds_concat, dim="time")

        #     # Now sort to get everything in the right order
        #     data = data.sortby("time")

        # First do a mean resample on all variables
        ds_resampled = data.resample(time=average).mean(skipna=False,
                                                        keep_attrs=True)
        # keep_attrs doesn't seem to work for some reason, so manually copy
        ds_resampled.attrs = data.attrs.copy()

        average_in_seconds = Timedelta(average).total_seconds()
        ds_resampled.attrs["averaged_period"] = average_in_seconds
        ds_resampled.attrs["averaged_period_str"] = average

        # For some variables, need a different type of resampling
        data_variables: List[str] = [str(v) for v in data.variables]

        for var in data_variables:
            if "repeatability" in var:
                ds_resampled[var] = (np.sqrt(
                    (data[var]**2).resample(time=average).sum()) /
                                     data[var].resample(time=average).count())

            # Copy over some attributes
            if "long_name" in data[var].attrs:
                ds_resampled[var].attrs["long_name"] = data[var].attrs[
                    "long_name"]

            if "units" in data[var].attrs:
                ds_resampled[var].attrs["units"] = data[var].attrs["units"]

        # Create a new variability variable, containing the standard deviation within the resampling period
        ds_resampled[f"{species}_variability"] = (data[species].resample(
            time=average).std(skipna=False, keep_attrs=True))

        # If there are any periods where only one measurement was resampled, just use the median variability
        ds_resampled[f"{species}_variability"][
            ds_resampled[f"{species}_variability"] ==
            0.0] = ds_resampled[f"{species}_variability"].median()

        # Create attributes for variability variable
        ds_resampled[f"{species}_variability"].attrs[
            "long_name"] = f"{data.attrs['long_name']}_variability"

        ds_resampled[f"{species}_variability"].attrs["units"] = data[
            species].attrs["units"]

        # Resampling may introduce NaNs, so remove, if not keep_missing
        if keep_missing is False:
            ds_resampled = ds_resampled.dropna(dim="time")

        data = ds_resampled

    # Rename variables
    rename: Dict[str, str] = {}

    data_variables = [str(v) for v in data.variables]
    for var in data_variables:
        if var.lower() == species.lower():
            rename[var] = "mf"
        if "repeatability" in var:
            rename[var] = "mf_repeatability"
        if "variability" in var:
            rename[var] = "mf_variability"
        if "number_of_observations" in var:
            rename[var] = "mf_number_of_observations"
        if "status_flag" in var:
            rename[var] = "status_flag"
        if "integration_flag" in var:
            rename[var] = "integration_flag"

    data = data.rename_vars(rename)  # type: ignore

    data.attrs["species"] = species

    if "calibration_scale" in data.attrs:
        data.attrs["scale"] = data.attrs.pop("calibration_scale")

    if calibration_scale is not None:
        data = _scale_convert(data, species, calibration_scale)

    metadata = retrieved_data.metadata
    metadata.update(data.attrs)

    obs_data = ObsData(data=data, metadata=metadata)

    # It doesn't make sense to do this now as we've only got a single Dataset
    # # Now check if the units match for each of the observation Datasets
    # units = set((f.data.mf.attrs["units"] for f in obs_files))
    # scales = set((f.data.attrs["scale"] for f in obs_files))

    # if len(units) > 1:
    #     raise ValueError(
    #         f"Units do not match for these observation Datasets {[(f.mf.attrs['station_long_name'],f.attrs['units']) for f in obs_files]}"
    #     )

    # if len(scales) > 1:
    #     print(
    #         f"Scales do not match for these observation Datasets {[(f.mf.attrs['station_long_name'],f.attrs['units']) for f in obs_files]}"
    #     )
    #     print("Suggestion: set calibration_scale to convert scales")

    return obs_data

Example #17

0

Show file

File: test_searchresults.py Project: openghg/openghg

def test_retrieve_complex_ranked():
    rank = rank_sources(site="bsd", species="co")

    expected_res = {
        "42m": {
            "rank_data": "NA",
            "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00"
        },
        "108m": {
            "rank_data": "NA",
            "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00"
        },
        "248m": {
            "rank_data": "NA",
            "data_range": "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00"
        },
    }

    assert rank.raw() == expected_res

    rank.set_rank(inlet="42m",
                  rank=1,
                  start_date="2014-01-01",
                  end_date="2015-03-01")
    rank.set_rank(inlet="108m",
                  rank=1,
                  start_date="2015-03-02",
                  end_date="2016-08-01")
    rank.set_rank(inlet="42m",
                  rank=1,
                  start_date="2016-08-02",
                  end_date="2017-03-01")
    rank.set_rank(inlet="248m",
                  rank=1,
                  start_date="2017-03-02",
                  end_date="2019-03-01")
    rank.set_rank(inlet="108m",
                  rank=1,
                  start_date="2019-03-02",
                  end_date="2021-12-01")

    updated_res = rank.get_sources(site="bsd", species="co")

    expected_updated_res = {
        "42m": {
            "rank_data": {
                "2014-01-01-00:00:00+00:00_2015-03-01-00:00:00+00:00": 1,
                "2016-08-02-00:00:00+00:00_2017-03-01-00:00:00+00:00": 1,
            },
            "data_range":
            "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00",
        },
        "108m": {
            "rank_data": {
                "2015-03-02-00:00:00+00:00_2016-08-01-00:00:00+00:00": 1,
                "2019-03-02-00:00:00+00:00_2021-12-01-00:00:00+00:00": 1,
            },
            "data_range":
            "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00",
        },
        "248m": {
            "rank_data": {
                "2017-03-02-00:00:00+00:00_2019-03-01-00:00:00+00:00": 1
            },
            "data_range":
            "2014-01-30 11:12:30+00:00_2020-12-01 22:31:30+00:00",
        },
    }

    assert updated_res == expected_updated_res

    search_res = search(site="bsd", species="co")

    expected_rankings = {
        "2014-01-01-00:00:00+00:00_2015-03-01-00:00:00+00:00": "42m",
        "2016-08-02-00:00:00+00:00_2017-03-01-00:00:00+00:00": "42m",
        "2015-03-02-00:00:00+00:00_2016-08-01-00:00:00+00:00": "108m",
        "2019-03-02-00:00:00+00:00_2021-12-01-00:00:00+00:00": "108m",
        "2017-03-02-00:00:00+00:00_2019-03-01-00:00:00+00:00": "248m",
    }

    data = search_res.retrieve(site="bsd", species="co")

    assert data.metadata["rank_metadata"] == expected_rankings

    measurement_data = data.data

    assert measurement_data.time.size == 234

Example #18

0

Show file

def get_flux(
    species: str,
    sources: Union[str, List[str]],
    domain: str,
    start_date: Optional[Timestamp] = None,
    end_date: Optional[Timestamp] = None,
    time_resolution: Optional[str] = "standard",
) -> FluxData:
    """
    The flux function reads in all flux files for the domain and species as an xarray Dataset.
    Note that at present ALL flux data is read in per species per domain or by emissions name.
    To be consistent with the footprints, fluxes should be in mol/m2/s.

    Args:
        species: Species name
        sources: Source name
        domain: Domain e.g. EUROPE
        start_date: Start date
        end_date: End date
        time_resolution: One of ["standard", "high"]
    Returns:
        FluxData: FluxData object

    TODO: Update this to output to a FluxData class?
    TODO: Update inputs to just accept a string and extract one flux file at a time?
    As it stands, this only extracts one flux at a time but is set up to be extended
    to to extract multiple. So if this is removed from this function the functionality
    itself would need to be wrapped up in another function call.
    """
    from openghg.retrieve import search
    from openghg.store import recombine_datasets
    from openghg.util import timestamp_epoch, timestamp_now

    if start_date is None:
        start_date = timestamp_epoch()
    if end_date is None:
        end_date = timestamp_now()

    results: Dict = search(
        species=species,
        source=sources,
        domain=domain,
        time_resolution=time_resolution,
        start_date=start_date,
        end_date=end_date,
        data_type="emissions",
    )  # type: ignore

    if not results:
        raise ValueError(
            f"Unable to find flux data for {species} from {sources}")

    # TODO - more than one emissions file (but see above)
    try:
        em_key = list(results.keys())[0]
    except IndexError:
        raise ValueError(
            f"Unable to find any footprints data for {domain} for {species}.")

    data_keys = results[em_key]["keys"]
    metadata = results[em_key]["metadata"]

    em_ds = recombine_datasets(keys=data_keys, sort=False)

    # Check for level coordinate. If one level, assume surface and drop
    if "lev" in em_ds.coords:
        if len(em_ds.lev) > 1:
            raise ValueError("Error: More than one flux level")

        em_ds = em_ds.drop_vars(names="lev")

    if species is None:
        species = metadata.get("species", "NA")

    return FluxData(
        data=em_ds,
        metadata=metadata,
        flux={},
        bc={},
        species=species,
        scales="FIXME",
        units="FIXME",
    )

Example #19

0

Show file

def test_specific_search_gc():
    results = search(species=["NF3"], site="CGO", inlet="70m")

    metadata = results.metadata(site="cgo", species="nf3", inlet="70m")

    assert metadata_checker_obssurface(metadata=metadata, species="nf3")

Example #20

0

Show file

def test_read_footprint():
    get_local_bucket(empty=True)

    datapath = get_footprint_datapath("footprint_test.nc")
    # model_params = {"simulation_params": "123"}

    site = "TMB"
    network = "LGHG"
    height = "10m"
    domain = "EUROPE"
    model = "test_model"

    Footprints.read_file(
        filepath=datapath, site=site, model=model, network=network, height=height, domain=domain
    )

    # Get the footprints data
    footprint_results = search(site=site, domain=domain, data_type="footprints")

    fp_site_key = list(footprint_results.keys())[0]

    footprint_keys = footprint_results[fp_site_key]["keys"]
    footprint_data = recombine_datasets(keys=footprint_keys, sort=False)

    footprint_coords = list(footprint_data.coords.keys())
    footprint_dims = list(footprint_data.dims)

    # Sorting to allow comparison - coords / dims can be stored in different orders
    # depending on how the Dataset has been manipulated
    footprint_coords.sort()
    footprint_dims.sort()

    assert footprint_coords == ["height", "lat", "lat_high", "lev", "lon", "lon_high", "time"]
    assert footprint_dims == ["height", "index", "lat", "lat_high", "lev", "lon", "lon_high", "time"]

    assert (
        footprint_data.attrs["heights"]
        == [
            500.0,
            1500.0,
            2500.0,
            3500.0,
            4500.0,
            5500.0,
            6500.0,
            7500.0,
            8500.0,
            9500.0,
            10500.0,
            11500.0,
            12500.0,
            13500.0,
            14500.0,
            15500.0,
            16500.0,
            17500.0,
            18500.0,
            19500.0,
        ]
    ).all()

    assert footprint_data.attrs["variables"] == [
        "fp",
        "temperature",
        "pressure",
        "wind_speed",
        "wind_direction",
        "PBLH",
        "release_lon",
        "release_lat",
        "particle_locations_n",
        "particle_locations_e",
        "particle_locations_s",
        "particle_locations_w",
        "mean_age_particles_n",
        "mean_age_particles_e",
        "mean_age_particles_s",
        "mean_age_particles_w",
        "fp_low",
        "fp_high",
        "index_lons",
        "index_lats",
    ]

    del footprint_data.attrs["processed"]
    del footprint_data.attrs["heights"]
    del footprint_data.attrs["variables"]

    expected_attrs = {
        "author": "OpenGHG Cloud",
        "data_type": "footprints",
        "site": "tmb",
        "network": "lghg",
        "height": "10m",
        "model": "test_model",
        "domain": "europe",
        "start_date": "2020-08-01 00:00:00+00:00",
        "end_date": "2020-08-01 00:00:00+00:00",
        "max_longitude": 39.38,
        "min_longitude": -97.9,
        "max_latitude": 79.057,
        "min_latitude": 10.729,
        "time_resolution": "standard_time_resolution",
    }

    assert footprint_data.attrs == expected_attrs

    footprint_data["fp_low"].max().values == pytest.approx(0.43350983)
    footprint_data["fp_high"].max().values == pytest.approx(0.11853027)
    footprint_data["pressure"].max().values == pytest.approx(1011.92)
    footprint_data["fp_low"].min().values == 0.0
    footprint_data["fp_high"].min().values == 0.0
    footprint_data["pressure"].min().values == pytest.approx(1011.92)

Example #21

0

Show file

def get_footprint(
    site: str,
    domain: str,
    height: str,
    model: str = None,
    start_date: Timestamp = None,
    end_date: Timestamp = None,
    species: str = None,
) -> FootprintData:
    """
    Get footprints from one site.

    Args:
        site: The name of the site given in the footprints. This often matches
              to the site name but  if the same site footprints are run with a
              different met and they are named slightly differently from the obs
              file. E.g. site="DJI", site_modifier = "DJI-SAM" -
              station called DJI, footprints site called DJI-SAM
        domain : Domain name for the footprints
        height: Height of inlet in metres
        start_date: Output start date in a format that Pandas can interpret
        end_date: Output end date in a format that Pandas can interpret
        species: Species identifier e.g. "co2" for carbon dioxide. Only needed
                 if species needs a modified footprints from the typical 30-day
                 footprints appropriate for a long-lived species (like methane)
                 e.g. for high time resolution (co2) or is a short-lived species.
    Returns:
        FootprintData: FootprintData dataclass
    """
    from openghg.store import recombine_datasets
    from openghg.retrieve import search
    from openghg.dataobjects import FootprintData

    results = search(
        site=site,
        domain=domain,
        height=height,
        start_date=start_date,
        end_date=end_date,
        species=species,
        data_type="footprints",
    )  # type: ignore
    # Get the footprints data
    # if species is not None:
    # else:
    #     results = search(
    #         site=site,
    #         domain=domain,
    #         height=height,
    #         start_date=start_date,
    #         end_date=end_date,
    #         data_type="footprints",
    #     )  # type: ignore

    try:
        fp_site_key = list(results.keys())[0]
    except IndexError:
        if species is not None:
            raise ValueError(
                f"Unable to find any footprints data for {site} at a height of {height} for species {species}."
            )
        else:
            raise ValueError(
                f"Unable to find any footprints data for {site} at a height of {height}."
            )

    keys = results[fp_site_key]["keys"]
    metadata = results[fp_site_key]["metadata"]
    # fp_ds = recombine_datasets(keys=keys, sort=False) # Why did this have sort=False before?
    fp_ds = recombine_datasets(keys=keys, sort=True)

    if species is None:
        species = metadata.get("species", "NA")

    return FootprintData(
        data=fp_ds,
        metadata=metadata,
        flux={},
        bc={},
        species=species,
        scales="FIXME",
        units="FIXME",
    )