Esempio n. 1
0
def test_get_obs_surface_no_inlet_ranking():
    '''
    Test metadata and attributes returned from get_obs_surface
     - ranking data is set
     - inlet is not specified
     - date range not specified (all dates returned)

    Checks
     - metadata includes expected "rank_metadata" attribute
     - check inlet details have been appropriately updated
    '''
    obsdata = get_obs_surface(site="bsd", species="ch4")

    data = obsdata.data
    metadata = obsdata.metadata

    assert data
    assert metadata["rank_metadata"] == {
        "2015-01-01-00:00:00+00:00_2015-11-01-00:00:00+00:00": "248m",
        "2014-09-02-00:00:00+00:00_2014-11-01-00:00:00+00:00": "108m",
        "2016-09-02-00:00:00+00:00_2018-11-01-00:00:00+00:00": "108m",
        "2019-01-02-00:00:00+00:00_2021-01-01-00:00:00+00:00": "42m",
    }

    assert "inlet" in data
    assert data.attrs["inlet"] == "multiple"
    assert metadata["inlet"] == "multiple"
Esempio n. 2
0
def test_get_obs_surface_ranking_unique():
    '''
    Test data returned from get_obs_surface data
     - ranking data is set
     - inlet is not specified
     - date range covers multiple inlets

    Covers tests not included in `test_get_obs_surface_no_inlet_ranking`
    TODO: At the moment this fails - unique data is not returned and there are multiple
    entries for some time stamps. This is a bug which will need to be fixed.
    '''
    obsdata = get_obs_surface(site="bsd", species="ch4")

    data = obsdata.data

    inlet_slice = data["inlet"].sel(
        time=slice("2015-01-01-00:00:00", "2015-11-01-00:00:00")).values
    expected_array = np.tile("248m", len(inlet_slice))

    np.testing.assert_equal(inlet_slice, expected_array)

    data_at_one_time_108m = data["mf"].sel(time="2016-04-02T09:07:30")
    assert data_at_one_time_108m.size == 1
    assert data_at_one_time_108m["inlet"] == "108m"

    data_at_one_time_248m = data.sel(time="2015-01-30T11:12:30")
    assert data_at_one_time_248m["mf"].size == 1
    assert data_at_one_time_248m["inlet"] == "248m"
Esempio n. 3
0
def test_timeslice_slices_correctly():
    # Test time slicing works correctly
    timeslice_data = get_obs_surface(site="bsd",
                                     species="co2",
                                     inlet="248m",
                                     start_date="2017-01-01",
                                     end_date="2018-03-03")

    sliced_co2_data = timeslice_data.data
    assert sliced_co2_data.time[0] == Timestamp("2017-02-18T06:36:30")
    assert sliced_co2_data.time[-1] == Timestamp("2018-02-18T15:42:30")
Esempio n. 4
0
def test_get_obs_surface():
    obsdata = get_obs_surface(site="bsd", species="co2", inlet="248m")
    co2_data = obsdata.data

    assert co2_data.time[0] == Timestamp("2014-01-30T11:12:30")
    assert co2_data.time[-1] == Timestamp("2020-12-01T22:31:30")
    assert co2_data.mf[0] == 409.55
    assert co2_data.mf[-1] == 417.65

    metadata = obsdata.metadata

    assert metadata["data_owner"] == "Simon O'Doherty"
    assert metadata["inlet_height_magl"] == "248m"

    averaged_data = get_obs_surface(site="bsd",
                                    species="co2",
                                    inlet="248m",
                                    average="2h")

    time = obsdata.data.time
    averaged_time = averaged_data.data.time

    assert not time.equals(averaged_time)
Esempio n. 5
0
def test_timeslice_slices_correctly_exclusive():
    # Test time slicing works with an exclusive time range for continuous data - up to but not including the end point
    timeslice_data = get_obs_surface(site="mhd",
                                     species="ch4",
                                     inlet="10m",
                                     start_date="2012-01-11",
                                     end_date="2012-02-05")

    sliced_mhd_data = timeslice_data.data

    sampling_period = Timedelta(75, unit="seconds")

    assert sliced_mhd_data.time[0] == (Timestamp("2012-01-11T00:13") -
                                       sampling_period / 2.0)
    assert sliced_mhd_data.time[-1] == (Timestamp("2012-02-04T23:47") -
                                        sampling_period / 2.0)
    assert sliced_mhd_data.mf[0] == 1849.814
    assert sliced_mhd_data.mf[-1] == 1891.094
Esempio n. 6
0
def test_get_obs_surface_ranking_single():
    '''
    Test data returned from get_obs_surface data
     - ranking data is set
     - inlet is not specified
     - date range should only include date for one inlet
    '''

    obsdata = get_obs_surface(site="bsd",
                              species="ch4",
                              start_date="2015-01-01",
                              end_date="2015-11-01")

    data = obsdata.data
    metadata = obsdata.metadata

    assert data
    assert data.attrs["inlet"] == "248m"
    assert metadata["inlet"] == "248m"

    data_at_one_time = data["mf"].sel(time="2015-01-30T11:12:30")
    assert data_at_one_time.size == 1
Esempio n. 7
0
def test_no_inlet_no_ranked_data_raises():
    with pytest.raises(ValueError):
        get_obs_surface(site="bsd", species="co2")
Esempio n. 8
0
def test_averaging_incorrect_period_raises():
    with pytest.raises(ValueError):
        get_obs_surface(site="bsd", species="co2", inlet="248m", average="888")
Esempio n. 9
0
def single_site_footprint(
    site: str,
    height: str,
    network: str,
    domain: str,
    species: str,
    start_date: Union[str, Timestamp],
    end_date: Union[str, Timestamp],
    resample_to: str = "coarsest",
    site_modifier: Optional[str] = None,
    platform: Optional[str] = None,
    instrument: Optional[str] = None,
) -> Dataset:
    """Creates a Dataset for a single site's measurement data and footprints

    Args:
        site: Site name
        height: Height of inlet in metres
        network: Network name
        resample_to: Resample the data to a given time dataset.
        Valid options are ["obs", "footprints", "coarsen"].
            - "obs" resamples the footprints to the observation time series data
            - "footprints" resamples to to the footprints time series
            - "coarsest" resamples to the data with the coarsest time resolution
        site_modifier: The name of the site given in the footprints.
                       This is useful for example if the same site footprints are run with a different met and
                       they are named slightly differently from the obs file. E.g.
                       site="DJI", site_modifier = "DJI-SAM" - station called DJI, footprints site called DJI-SAM
        platform: Observation platform used to decide whether to resample
        instrument:
        species: Species type
    Returns:
        xarray.Dataset
    """
    from openghg.retrieve import get_obs_surface, get_footprint
    from openghg.util import timestamp_tzaware

    start_date = timestamp_tzaware(start_date)
    end_date = timestamp_tzaware(end_date)

    resample_to = resample_to.lower()
    resample_choices = ("obs", "footprints", "coarsest")
    if resample_to not in resample_choices:
        raise ValueError(
            f"Invalid resample choice {resample_to} past, please select from one of {resample_choices}"
        )

    # As we're not retrieve any satellite data yet just set tolerance to None
    tolerance = None
    platform = None

    # Here we want to use get_obs_surface
    obs_results = get_obs_surface(
        site=site,
        inlet=height,
        start_date=start_date,
        end_date=end_date,
        species=species,
        instrument=instrument,
    )

    obs_data = obs_results.data

    # Save the observation data units
    try:
        units: Union[float, None] = float(obs_data.mf.attrs["units"])
    except KeyError:
        units = None
    except AttributeError:
        raise AttributeError(
            "Unable to read mf attribute from observation data.")

    # If the site for the footprints has a different name, pass that in
    if site_modifier:
        footprint_site = site_modifier
    else:
        footprint_site = site

    # Try to find appropriate footprints file first with and then without species name
    try:
        footprint = get_footprint(
            site=footprint_site,
            domain=domain,
            height=height,
            start_date=start_date,
            end_date=end_date,
            species=species,
        )
    except ValueError:
        footprint = get_footprint(
            site=footprint_site,
            domain=domain,
            height=height,
            start_date=start_date,
            end_date=end_date,
        )

    # TODO: Add checks for particular species e.g. co2 and short-lived species
    # which should have a specific footprints available rather than the generic one

    # Extract dataset
    footprint_data = footprint.data

    # Align the two Datasets
    aligned_obs, aligned_footprint = align_datasets(
        obs_data=obs_data,
        footprint_data=footprint_data,
        platform=platform,
        resample_to=resample_to,
    )

    combined_dataset = combine_datasets(dataset_A=aligned_obs,
                                        dataset_B=aligned_footprint,
                                        tolerance=tolerance)

    # Transpose to keep time in the last dimension position in case it has been moved in resample
    combined_dataset = combined_dataset.transpose(..., "time")

    if units is not None:
        combined_dataset["fp"].values = combined_dataset["fp"].values / units
        # if HiTRes:
        #     combined_dataset.update({"fp_HiTRes": (combined_dataset.fp_HiTRes.dims, (combined_dataset.fp_HiTRes / units))})

    return combined_dataset