Python recombine_datasets Examples, openghg.store.recombine_datasets Python Examples

Example #1

0

Show file

def test_recombination_CRDS():
    get_local_bucket(empty=True)

    filename = "hfd.picarro.1minute.100m.min.dat"
    filepath = get_datapath(filename=filename, data_type="CRDS")

    ObsSurface.read_file(filepath,
                         data_type="CRDS",
                         site="hfd",
                         network="DECC")

    gas_data = parse_crds(data_filepath=filepath, site="HFD", network="AGAGE")

    ch4_data_read = gas_data["ch4"]["data"]

    species = "ch4"
    site = "hfd"
    inlet = "100m"

    result = search(species=species, site=site, inlet=inlet)

    keys = result.keys(site=site, species=species, inlet=inlet)

    ch4_data_recombined = recombine_datasets(keys=keys)

    ch4_data_recombined.attrs = {}

    assert ch4_data_read.time.equals(ch4_data_recombined.time)
    assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])

Example #2

0

Show file

File: test_emissions.py Project: openghg/openghg

def test_read_file():
    get_local_bucket(empty=True)

    test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc")

    proc_results = Emissions.read_file(
        filepath=test_datapath, species="co2", source="gpp-cardamom", date="2012", domain="europe", high_time_resolution=False
    )

    assert "co2_gppcardamom_europe_2012" in proc_results

    search_results = search(species="co2", source="gpp-cardamom", date="2012", domain="europe", data_type="emissions")

    key = list(search_results.keys())[0]

    data_keys = search_results[key]["keys"]
    emissions_data = recombine_datasets(keys=data_keys, sort=False)

    metadata = search_results[key]["metadata"]

    orig_data = open_dataset(test_datapath)

    assert orig_data.lat.equals(emissions_data.lat)
    assert orig_data.lon.equals(emissions_data.lon)
    assert orig_data.time.equals(emissions_data.time)
    assert orig_data.flux.equals(emissions_data.flux)

    expected_metadata = {
        "title": "gross primary productivity co2",
        "author": "openghg cloud",
        "date_created": "2018-05-20 19:44:14.968710",
        "number_of_prior_files_used": 1,
        "prior_file_1": "cardamom gpp",
        "prior_file_1_raw_resolution": "25x25km",
        "prior_file_1_reference": "t.l. smallman, jgr biogeosciences, 2017",
        "regridder_used": "acrg_grid.regrid.regrid_3d",
        "comments": "fluxes copied from year 2013. december 2012 values copied from january 2013 values.",
        "species": "co2",
        "domain": "europe",
        "source": "gppcardamom",
        "date": "2012",
        "start_date": "2012-12-01 00:00:00+00:00",
        "end_date": "2012-12-01 00:00:00+00:00",
        "max_longitude": 39.38,
        "min_longitude": -97.9,
        "max_latitude": 79.057,
        "min_latitude": 10.729,
        "time_resolution": "standard",
        "data_type": "emissions",
    }

    del metadata["processed"]
    del metadata["prior_file_1_version"]

    assert metadata == expected_metadata

Example #3

0

Show file

    def _create_obsdata(self, site: str, species: str, inlet: str = None) -> ObsData:
        """Creates an ObsData object for return to the user

        Args:
            site: Site code
            species: Species name
        Returns:
            ObsData: ObsData object
        """
        if self.ranked_data:
            specific_source = self.results[site][species]
        else:
            specific_source = self.results[site][species][inlet]

        data_keys = specific_source["keys"]
        metadata = specific_source["metadata"]

        # If cloud use the Retrieve object
        if self.cloud:
            raise NotImplementedError
            # from Acquire.Client import Wallet
            # from xarray import open_dataset

            # wallet = Wallet()
            # self._service_url = "https://fn.openghg.org/t"
            # self._service = wallet.get_service(service_url=f"{self._service_url}/openghg")

            # key = f"{site}_{species}"
            # keys_to_retrieve = {key: data_keys}

            # args = {"keys": keys_to_retrieve}

            # response: Dict = self._service.call_function(function="retrieve.retrieve", args=args)

            # response_data = response["results"]

            # data = open_dataset(response_data[key])
        else:
            data = recombine_datasets(data_keys, sort=True)

        metadata = specific_source["metadata"]

        if self.ranked_data:
            metadata["rank_metadata"] = specific_source["rank_metadata"]

        return ObsData(data=data, metadata=metadata)

Example #4

0

Show file

def test_recombination_GC():
    get_local_bucket(empty=True)

    data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    precision = get_datapath(filename="capegrim-medusa.18.precisions.C",
                             data_type="GC")

    ObsSurface.read_file((data, precision),
                         data_type="GCWERKS",
                         site="cgo",
                         network="agage")

    data = parse_gcwerks(data_filepath=data,
                         precision_filepath=precision,
                         site="CGO",
                         instrument="medusa",
                         network="AGAGE")

    toluene_data = data["c6h5ch3_70m"]["data"]

    species = "c6h5ch3"
    site = "CGO"
    inlet = "70m"

    result = search(species=species, site=site, inlet=inlet)
    keys = result.keys(site=site, species=species, inlet=inlet)

    toluene_data_recombined = recombine_datasets(keys=keys)

    toluene_data.attrs = {}
    toluene_data_recombined.attrs = {}

    assert toluene_data.time.equals(toluene_data_recombined.time)
    assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"])
    assert toluene_data["c6h5ch3_repeatability"].equals(
        toluene_data_recombined["c6h5ch3_repeatability"])
    assert toluene_data["c6h5ch3_status_flag"].equals(
        toluene_data_recombined["c6h5ch3_status_flag"])
    assert toluene_data["c6h5ch3_integration_flag"].equals(
        toluene_data_recombined["c6h5ch3_integration_flag"])

Example #5

0

Show file

def test_read_file():
    get_local_bucket(empty=True)

    test_datapath = get_datapath("GEOSChem.SpeciesConc.20150101_0000z_reduced.nc4")

    proc_results = EulerianModel.read_file(filepath=test_datapath, model="GEOSChem", species="ch4")

    assert "geoschem_ch4_2015-01-01" in proc_results

    search_results = search(species="ch4", model="geoschem", start_date="2015-01-01", data_type="eulerian_model")

    key = list(search_results.keys())[0]

    data_keys = search_results[key]["keys"]
    eulerian_data = recombine_datasets(keys=data_keys, sort=False)

    metadata = search_results[key]["metadata"]

    orig_data = open_dataset(test_datapath)

    assert orig_data["lat"].equals(eulerian_data["lat"])
    assert orig_data["lon"].equals(eulerian_data["lon"])
    assert orig_data["time"].equals(eulerian_data["time"])
    assert orig_data["lev"].equals(eulerian_data["lev"])
    assert orig_data["SpeciesConc_CH4"].equals(eulerian_data["SpeciesConc_CH4"])

    expected_metadata_values = {
        "species": "ch4",
        "date": "2015-01-01",
        "start_date": "2015-01-01 00:00:00+00:00",
        "end_date": "2016-01-01 00:00:00+00:00",
        "max_longitude": 175.0,
        "min_longitude": -180.0,
        "max_latitude": 89.0,
        "min_latitude": -89.0,
    }

    for key, expected_value in expected_metadata_values.items():
        assert metadata[key] == expected_value

Example #6

0

Show file

def get_footprint(
    site: str,
    domain: str,
    height: str,
    model: str = None,
    start_date: Timestamp = None,
    end_date: Timestamp = None,
    species: str = None,
) -> FootprintData:
    """
    Get footprints from one site.

    Args:
        site: The name of the site given in the footprints. This often matches
              to the site name but  if the same site footprints are run with a
              different met and they are named slightly differently from the obs
              file. E.g. site="DJI", site_modifier = "DJI-SAM" -
              station called DJI, footprints site called DJI-SAM
        domain : Domain name for the footprints
        height: Height of inlet in metres
        start_date: Output start date in a format that Pandas can interpret
        end_date: Output end date in a format that Pandas can interpret
        species: Species identifier e.g. "co2" for carbon dioxide. Only needed
                 if species needs a modified footprints from the typical 30-day
                 footprints appropriate for a long-lived species (like methane)
                 e.g. for high time resolution (co2) or is a short-lived species.
    Returns:
        FootprintData: FootprintData dataclass
    """
    from openghg.store import recombine_datasets
    from openghg.retrieve import search
    from openghg.dataobjects import FootprintData

    results = search(
        site=site,
        domain=domain,
        height=height,
        start_date=start_date,
        end_date=end_date,
        species=species,
        data_type="footprints",
    )  # type: ignore
    # Get the footprints data
    # if species is not None:
    # else:
    #     results = search(
    #         site=site,
    #         domain=domain,
    #         height=height,
    #         start_date=start_date,
    #         end_date=end_date,
    #         data_type="footprints",
    #     )  # type: ignore

    try:
        fp_site_key = list(results.keys())[0]
    except IndexError:
        if species is not None:
            raise ValueError(
                f"Unable to find any footprints data for {site} at a height of {height} for species {species}."
            )
        else:
            raise ValueError(
                f"Unable to find any footprints data for {site} at a height of {height}."
            )

    keys = results[fp_site_key]["keys"]
    metadata = results[fp_site_key]["metadata"]
    # fp_ds = recombine_datasets(keys=keys, sort=False) # Why did this have sort=False before?
    fp_ds = recombine_datasets(keys=keys, sort=True)

    if species is None:
        species = metadata.get("species", "NA")

    return FootprintData(
        data=fp_ds,
        metadata=metadata,
        flux={},
        bc={},
        species=species,
        scales="FIXME",
        units="FIXME",
    )

Example #7

0

Show file

def get_flux(
    species: str,
    sources: Union[str, List[str]],
    domain: str,
    start_date: Optional[Timestamp] = None,
    end_date: Optional[Timestamp] = None,
    time_resolution: Optional[str] = "standard",
) -> FluxData:
    """
    The flux function reads in all flux files for the domain and species as an xarray Dataset.
    Note that at present ALL flux data is read in per species per domain or by emissions name.
    To be consistent with the footprints, fluxes should be in mol/m2/s.

    Args:
        species: Species name
        sources: Source name
        domain: Domain e.g. EUROPE
        start_date: Start date
        end_date: End date
        time_resolution: One of ["standard", "high"]
    Returns:
        FluxData: FluxData object

    TODO: Update this to output to a FluxData class?
    TODO: Update inputs to just accept a string and extract one flux file at a time?
    As it stands, this only extracts one flux at a time but is set up to be extended
    to to extract multiple. So if this is removed from this function the functionality
    itself would need to be wrapped up in another function call.
    """
    from openghg.retrieve import search
    from openghg.store import recombine_datasets
    from openghg.util import timestamp_epoch, timestamp_now

    if start_date is None:
        start_date = timestamp_epoch()
    if end_date is None:
        end_date = timestamp_now()

    results: Dict = search(
        species=species,
        source=sources,
        domain=domain,
        time_resolution=time_resolution,
        start_date=start_date,
        end_date=end_date,
        data_type="emissions",
    )  # type: ignore

    if not results:
        raise ValueError(
            f"Unable to find flux data for {species} from {sources}")

    # TODO - more than one emissions file (but see above)
    try:
        em_key = list(results.keys())[0]
    except IndexError:
        raise ValueError(
            f"Unable to find any footprints data for {domain} for {species}.")

    data_keys = results[em_key]["keys"]
    metadata = results[em_key]["metadata"]

    em_ds = recombine_datasets(keys=data_keys, sort=False)

    # Check for level coordinate. If one level, assume surface and drop
    if "lev" in em_ds.coords:
        if len(em_ds.lev) > 1:
            raise ValueError("Error: More than one flux level")

        em_ds = em_ds.drop_vars(names="lev")

    if species is None:
        species = metadata.get("species", "NA")

    return FluxData(
        data=em_ds,
        metadata=metadata,
        flux={},
        bc={},
        species=species,
        scales="FIXME",
        units="FIXME",
    )

Example #8

0

Show file

def test_read_footprint():
    get_local_bucket(empty=True)

    datapath = get_footprint_datapath("footprint_test.nc")
    # model_params = {"simulation_params": "123"}

    site = "TMB"
    network = "LGHG"
    height = "10m"
    domain = "EUROPE"
    model = "test_model"

    Footprints.read_file(
        filepath=datapath, site=site, model=model, network=network, height=height, domain=domain
    )

    # Get the footprints data
    footprint_results = search(site=site, domain=domain, data_type="footprints")

    fp_site_key = list(footprint_results.keys())[0]

    footprint_keys = footprint_results[fp_site_key]["keys"]
    footprint_data = recombine_datasets(keys=footprint_keys, sort=False)

    footprint_coords = list(footprint_data.coords.keys())
    footprint_dims = list(footprint_data.dims)

    # Sorting to allow comparison - coords / dims can be stored in different orders
    # depending on how the Dataset has been manipulated
    footprint_coords.sort()
    footprint_dims.sort()

    assert footprint_coords == ["height", "lat", "lat_high", "lev", "lon", "lon_high", "time"]
    assert footprint_dims == ["height", "index", "lat", "lat_high", "lev", "lon", "lon_high", "time"]

    assert (
        footprint_data.attrs["heights"]
        == [
            500.0,
            1500.0,
            2500.0,
            3500.0,
            4500.0,
            5500.0,
            6500.0,
            7500.0,
            8500.0,
            9500.0,
            10500.0,
            11500.0,
            12500.0,
            13500.0,
            14500.0,
            15500.0,
            16500.0,
            17500.0,
            18500.0,
            19500.0,
        ]
    ).all()

    assert footprint_data.attrs["variables"] == [
        "fp",
        "temperature",
        "pressure",
        "wind_speed",
        "wind_direction",
        "PBLH",
        "release_lon",
        "release_lat",
        "particle_locations_n",
        "particle_locations_e",
        "particle_locations_s",
        "particle_locations_w",
        "mean_age_particles_n",
        "mean_age_particles_e",
        "mean_age_particles_s",
        "mean_age_particles_w",
        "fp_low",
        "fp_high",
        "index_lons",
        "index_lats",
    ]

    del footprint_data.attrs["processed"]
    del footprint_data.attrs["heights"]
    del footprint_data.attrs["variables"]

    expected_attrs = {
        "author": "OpenGHG Cloud",
        "data_type": "footprints",
        "site": "tmb",
        "network": "lghg",
        "height": "10m",
        "model": "test_model",
        "domain": "europe",
        "start_date": "2020-08-01 00:00:00+00:00",
        "end_date": "2020-08-01 00:00:00+00:00",
        "max_longitude": 39.38,
        "min_longitude": -97.9,
        "max_latitude": 79.057,
        "min_latitude": 10.729,
        "time_resolution": "standard_time_resolution",
    }

    assert footprint_data.attrs == expected_attrs

    footprint_data["fp_low"].max().values == pytest.approx(0.43350983)
    footprint_data["fp_high"].max().values == pytest.approx(0.11853027)
    footprint_data["pressure"].max().values == pytest.approx(1011.92)
    footprint_data["fp_low"].min().values == 0.0
    footprint_data["fp_high"].min().values == 0.0
    footprint_data["pressure"].min().values == pytest.approx(1011.92)