Beispiel #1
0
def xr_average(fyear, tar, modules):
    """xarray-based processing routines for cubed sphere atmos. output

    Parameters
    ----------
    fyear : str
        Year being processed (YYYY)
    tar : tarfile
        In-memory tarfile object
    modules : dict
        Mappings of netCDF file names inside the tar file to output db file names
    """

    members = [
        x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.tile1.nc")
    ]

    for member in members:
        print(f"{fyear}.{member}.nc")
        data_files = [
            netcdf.extract_from_tar(tar, f"{fyear}.{member}.tile{x}.nc")
            for x in range(1, 7)
        ]
        data_files = [netcdf.in_mem_xr(x) for x in data_files]
        dset = xr.concat(data_files, "tile")

        # Retain only time-dependent variables
        variables = list(dset.variables.keys())
        for x in variables:
            if "time" not in dset[x].dims:
                del dset[x]

        # Aggregate grid spec tiles
        grid_files = [
            netcdf.extract_from_tar(tar, f"{fyear}.grid_spec.tile{x}.nc")
            for x in range(1, 7)
        ]
        grid_files = [netcdf.in_mem_xr(x) for x in grid_files]
        ds_grid = xr.concat(grid_files, "tile")

        dset["area"] = ds_grid["area"]

        for region in ["global", "nh", "sh", "tropics"]:
            _masked_area = xrtools.xr_mask_by_latitude(
                dset.area, ds_grid.grid_latt, region=region
            )
            gmeantools.write_sqlite_data(
                f"{fyear}.{region}Ave{modules[member]}.db",
                "area",
                fyear,
                _masked_area.sum().data,
            )

            weights = dset.average_DT.astype("float") * _masked_area
            _dset_weighted = xrtools.xr_weighted_avg(dset, weights)
            xrtools.xr_to_db(
                _dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db"
            )
Beispiel #2
0
def xr_average(fyear, tar, modules):
    """xarray-based processing routines for lat-lon model output

    Parameters
    ----------
    fyear : str
        Year being processed (YYYY)
    tar : tarfile
        In-memory tarfile object
    modules : dict
        Mappings of netCDF file names inside the tar file to output db file names
    """

    members = [
        x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc")
    ]

    for member in members:
        print(f"{fyear}.{member}.nc")
        data_file = netcdf.extract_from_tar(tar, f"{fyear}.{member}.nc")
        dset = netcdf.in_mem_xr(data_file)

        grid_file = (f"{fyear}.ocean_static.nc" if netcdf.tar_member_exists(
            tar, f"{fyear}.ocean_static.nc") else f"{fyear}.ocean_month.nc")
        grid_file = netcdf.extract_from_tar(tar, grid_file)
        ds_grid = netcdf.in_mem_xr(grid_file)

        # Retain only time-dependent variables
        variables = list(dset.variables.keys())
        for x in variables:
            if "time" not in dset[x].dims:
                del dset[x]

        _area = "areacello" if "areacello" in list(
            ds_grid.variables) else "area_t"
        if "wet" in list(ds_grid.variables):
            _wet = ds_grid["wet"]
        else:
            _wet = 1.0
            warnings.warn("Unable to find wet mask")
        _area = ds_grid[_area] * _wet

        for region in ["global", "nh", "sh", "tropics"]:
            _masked_area = xrtools.xr_mask_by_latitude(_area,
                                                       ds_grid.geolat,
                                                       region=region)
            gmeantools.write_sqlite_data(
                f"{fyear}.{region}Ave{modules[member]}.db",
                "area",
                fyear,
                _masked_area.sum().data,
            )

            weights = dset.average_DT.astype("float") * _masked_area
            _dset_weighted = xrtools.xr_weighted_avg(dset, weights)
            xrtools.xr_to_db(_dset_weighted, fyear,
                             f"{fyear}.{region}Ave{modules[member]}.db")
Beispiel #3
0
def xr_average(fyear, tar, modules):
    """xarray-based processing routines for lat-lon model output

    Parameters
    ----------
    fyear : str
        Year being processed (YYYY)
    tar : tarfile
        In-memory tarfile object
    modules : dict
        Mappings of netCDF file names inside the tar file to output db file names
    """

    members = [
        x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc")
    ]

    for member in members:
        print(f"{fyear}.{member}.nc")
        data_file = netcdf.extract_from_tar(tar, f"{fyear}.{member}.nc")
        dset = netcdf.in_mem_xr(data_file)

        geolat = np.tile(dset.lat.data[:, None], (1, dset.lon.data.shape[0]))
        geolon = np.tile(dset.lon.data[None, :], (dset.lat.data.shape[0], 1))

        _geolat = xr.DataArray(geolat, coords=((dset.lat, dset.lon)))
        _geolon = xr.DataArray(geolon, coords=((dset.lat, dset.lon)))
        _area = xr.DataArray(
            gmeantools.standard_grid_cell_area(dset.lat.data, dset.lon.data),
            coords=((dset.lat, dset.lon)),
        )

        # Retain only time-dependent variables
        variables = list(dset.variables.keys())
        for x in variables:
            if "time" not in dset[x].dims:
                del dset[x]

        for region in ["global", "nh", "sh", "tropics"]:
            _masked_area = xrtools.xr_mask_by_latitude(_area,
                                                       _geolat,
                                                       region=region)
            gmeantools.write_sqlite_data(
                f"{fyear}.{region}Ave{modules[member]}.db",
                "area",
                fyear,
                _masked_area.sum().data,
            )

            weights = dset.average_DT.astype("float") * _masked_area
            _dset_weighted = xrtools.xr_weighted_avg(dset, weights)
            xrtools.xr_to_db(_dset_weighted, fyear,
                             f"{fyear}.{region}Ave{modules[member]}.db")
Beispiel #4
0
def xr_average(fyear, tar, modules):
    """xarray-based processing routines for cubed sphere LM4 land output

    Parameters
    ----------
    fyear : str
        Year being processed (YYYY)
    tar : tarfile
        In-memory tarfile object
    modules : dict
        Mappings of netCDF file names inside the tar file to output db file names
    """

    members = [
        x for x in modules
        if netcdf.tar_member_exists(tar, f"{fyear}.{x}.tile1.nc")
    ]

    for member in members:
        print(f"{fyear}.{member}.nc")
        data_files = [
            netcdf.extract_from_tar(tar, f"{fyear}.{member}.tile{x}.nc")
            for x in range(1, 7)
        ]
        data_files = [netcdf.in_mem_xr(x) for x in data_files]
        dset = xr.concat(data_files, "tile")

        # Calculate cell depth
        depth = dset["zhalf_soil"].data
        depth = [depth[x] - depth[x - 1] for x in range(1, len(depth))]
        dset["depth"] = xr.DataArray(depth, dims=("zfull_soil"))
        depth = dset["depth"]

        # Retain only time-dependent variables
        variables = list(dset.variables.keys())
        for x in variables:
            if "time" not in dset[x].dims:
                del dset[x]

        # Load grid data
        grid_files = [
            netcdf.extract_from_tar(tar, f"{fyear}.land_static.tile{x}.nc")
            for x in range(1, 7)
        ]
        grid_files = [netcdf.in_mem_xr(x) for x in grid_files]
        ds_grid = xr.concat(grid_files, "tile")

        # Retain only time-invariant area fields
        grid = xr.Dataset()
        variables = list(ds_grid.variables.keys())
        for x in variables:
            if "area" in x or "frac" in x:
                grid[x] = ds_grid[x]

        # Get List of cell measures
        cell_measures = [
            dset[x].attrs["cell_measures"] for x in list(dset.variables)
            if "cell_measures" in list(dset[x].attrs.keys())
        ]
        cell_measures = sorted(list(set(cell_measures)))

        # Create dict of land groups based on cell measures
        land_groups = {}
        for x in cell_measures:
            land_groups[x] = xr.Dataset()

        # Loop over variables and assign them to groups
        variables = list(dset.variables.keys())
        for x in variables:
            if "cell_measures" in list(dset[x].attrs.keys()):
                _measure = dset[x].attrs["cell_measures"]
                dset[x].attrs["measure"] = _measure.split(" ")[-1]
                land_groups[_measure][x] = dset[x]

        # Since natural tile area is time-dependent, ignore for now
        if "area: area_ntrl" in cell_measures:
            cell_measures.remove("area: area_ntrl")

        if "area: glac_area" in cell_measures:
            cell_measures.remove("area: glac_area")

        # Loop over groups
        for measure in cell_measures:
            _dset = land_groups[measure]

            _measure = measure.split(" ")[-1]
            _area = ds_grid[_measure]

            for region in ["global", "nh", "sh", "tropics"]:
                _masked_area = xrtools.xr_mask_by_latitude(_area,
                                                           ds_grid.geolat_t,
                                                           region=region)
                gmeantools.write_sqlite_data(
                    f"{fyear}.{region}Ave{modules[member]}.db",
                    _measure,
                    fyear,
                    _masked_area.sum().data,
                )

                # _masked_area = _masked_area.fillna(0)

                weights = dset.average_DT.astype("float") * _masked_area
                if _measure == "soil_area":
                    area_x_depth = _masked_area * depth
                    gmeantools.write_sqlite_data(
                        f"{fyear}.{region}Ave{modules[member]}.db",
                        "soil_volume",
                        fyear,
                        area_x_depth.sum().data,
                    )
                    weights = [
                        weights,
                        (weights * depth).transpose("tile", "time",
                                                    "zfull_soil", "grid_yt",
                                                    "grid_xt"),
                    ]
                    for x in list(_dset.variables):
                        if "zfull_soil" in list(_dset[x].dims):
                            _dset[x].attrs["measure"] = "soil_volume"

                _dset_weighted = xrtools.xr_weighted_avg(_dset, weights)

                xrtools.xr_to_db(_dset_weighted, fyear,
                                 f"{fyear}.{region}Ave{modules[member]}.db")
Beispiel #5
0
def xr_average(fyear, tar, modules):
    """xarray-based processing routines for lat-lon model output

    Parameters
    ----------
    fyear : str
        Year being processed (YYYY)
    tar : tarfile
        In-memory tarfile object
    modules : dict
        Mappings of netCDF file names inside the tar file to output db file names
    """

    members = [
        x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc")
    ]

    for member in members:
        print(f"{fyear}.{member}.nc")
        data_file = netcdf.extract_from_tar(tar, f"{fyear}.ice_month.nc")
        dset = netcdf.in_mem_xr(data_file)

        grid_file = (f"{fyear}.ice_static.nc" if netcdf.tar_member_exists(
            tar, f"{fyear}.ice_static.nc") else f"{fyear}.ice_month.nc")
        grid_file = netcdf.extract_from_tar(tar, grid_file)
        ds_grid = netcdf.in_mem_xr(grid_file)

        # Retain only time-dependent variables
        variables = list(dset.variables.keys())
        for x in variables:
            if "time" not in dset[x].dims:
                del dset[x]
            if x == "CN":
                dset[x] = dset[x].sum(("ct")).assign_attrs(dset[x].attrs)

        if "CN" in list(dset.variables.keys()):
            concentration = dset["CN"]
        elif "siconc" in list(dset.variables.keys()):
            concentration = dset["siconc"]
        else:
            warnings.warn("Unable to determine sea ice concentation")

        earth_radius = 6371.0e3  # Radius of the Earth in 'm'
        _area = ds_grid["CELL_AREA"] * 4.0 * np.pi * (earth_radius**2)

        # --- todo Add in concentration and extent

        for region in ["global", "nh", "sh"]:
            _masked_area = xrtools.xr_mask_by_latitude(_area,
                                                       ds_grid.GEOLAT,
                                                       region=region)
            gmeantools.write_sqlite_data(
                f"{fyear}.{region}Ave{modules[member]}.db",
                "area",
                fyear,
                _masked_area.sum().data,
            )

            # area-weight but not time_weight
            weights = _masked_area
            _dset = dset.copy()

            ones = (concentration * 0.0) + 1.0
            ice_area = ones.where(concentration > 0.0, 0.0) * _masked_area
            extent = ones.where(concentration > 0.15, 0.0) * _masked_area

            ice_area_attrs = {
                "long_name": "area covered by sea ice",
                "units": "million km2",
            }
            extent_attrs = {
                "long_name": "sea ice extent",
                "units": "million km2"
            }

            for x in list(_dset.variables):
                if tuple(_dset[x].dims)[-3::] == ("time", "yT", "xT"):
                    _dset[x] = ((_dset[x] * weights).sum(("yT", "xT")) /
                                weights.sum()).assign_attrs(dset[x].attrs)
                    _dset["ice_area"] = (ice_area.sum(
                        ("yT", "xT")) * 1.0e-12).assign_attrs(ice_area_attrs)
                    _dset["extent"] = (extent.sum(
                        ("yT", "xT")) * 1.0e-12).assign_attrs(extent_attrs)
                elif tuple(_dset[x].dims)[-3::] == ("time", "yt", "xt"):
                    _dset[x] = ((_dset[x] * weights).sum(("yt", "xt")) /
                                weights.sum()).assign_attrs(dset[x].attrs)
                    _dset["ice_area"] = (ice_area.sum(
                        ("yt", "xt")) * 1.0e-12).assign_attrs(ice_area_attrs)
                    _dset["extent"] = (extent.sum(
                        ("yt", "xt")) * 1.0e-12).assign_attrs(extent_attrs)
                else:
                    del _dset[x]

            _dset_max = _dset.max(("time"))
            newvars = {x: x + "_max" for x in list(_dset_max.variables)}
            _dset_max = _dset_max.rename(newvars)

            _dset_min = _dset.min(("time"))
            newvars = {x: x + "_min" for x in list(_dset_min.variables)}
            _dset_min = _dset_min.rename(newvars)

            weights = dset.average_DT.astype("float")
            _dset_weighted = xrtools.xr_weighted_avg(_dset, weights)
            newvars = {x: x + "_mean" for x in list(_dset_weighted.variables)}
            _dset_weighted = _dset_weighted.rename(newvars)
            xrtools.xr_to_db(_dset_weighted, fyear,
                             f"{fyear}.{region}AveIce.db")
            xrtools.xr_to_db(_dset_max, fyear, f"{fyear}.{region}AveIce.db")
            xrtools.xr_to_db(_dset_min, fyear, f"{fyear}.{region}AveIce.db")