def to_netcdf():
    """
    Function to open and merge files into one netcdf file.

    HDF5 keys :
    Level 1 : 'Grids', 'InputAlgorithmVersions', 'InputFileNames',
    'InputGenerationDateTimes'
    Level 2 : 'G1' (5.0°x 5.0° grid), 'G2' (0.25°x 0.25° grid)
    Level 3 : 'precipRate', ....
    Level 4 : 'count', 'mean', 'stdev'

    Variables and dimmensions:
        rt (3): rain types (0: stratiform, 1: convective, 2: all)
        chn (5): channels (0: KuNS, 1: KaMS, 2: KaHS, 3: DPRMS, 4: KuMS)
                - Ku (Ku-band Precipitation Radar)
                - Ka (Ka-band Precipitation Radar)
                - DPR (Dual frequency Precipitation Radar)
                - NS (normal scan) = 245km swath
                - MS (matched beam scan) = 125km swath
                - HS (high sensitivity beam scan) = 120km swath
        lnH (1440): high resolution 0.25° grid intervals of longitude from
        180°W to 180°E
        ltH (536): high resolution 0.25° grid intervals of latitude from
        0.67°S to 0.67°N
    """

    ds_list = []
    extent = ls.basin_extent('indus')
    lon_arr = np.arange(-180, 180, 0.25)
    lat_arr = np.arange(-67, 67, 0.25)

    for file in tqdm(glob.glob('_Data/GPM/PR_2000-2010/*')):

        f = h5py.File(file, 'r')
        dset = f['Grids']
        tp_arr = dset['G2']['precipRateNearSurfaceUnconditional'].value
        month_arr = np.arange(1. / 24., 1, 1. / 12.)
        month = month_arr[int(file[52:54]) - 1]
        time = float(file[48:52]) + month
        ds = xr.Dataset(data_vars=dict(tp=(["time", "lon", "lat"],
                                           [tp_arr[0, :, :]])),
                        coords=dict(time=(["time"], [time]),
                                    lat=(['lat'], lat_arr),
                                    lon=(['lon'], lon_arr)))
        ds_tr = ds.transpose('time', 'lat', 'lon')
        ds_cropped = ds_tr.sel(lon=slice(extent[1], extent[3]),
                               lat=slice(extent[2], 36.0))
        ds_list.append(ds_cropped)

    ds_merged = xr.merge(ds_list)
    print(ds_merged)
    ds_merged['tp'] = ds_merged['tp'] * 24  # to mm/day
    ds_merged.to_netcdf("_Data/GPM/gpm_pr_unc_2000-2010.nc")
Exemple #2
0
    def eof_formatter(filepath, basin, name=None):
        """ Returns DataFrame of EOF over UIB  """

        da = xr.open_dataset(filepath)
        if "expver" in list(da.dims):
            da = da.sel(expver=1)
        (latmax, lonmin, latmin, lonmax) = ls.basin_extent(basin)
        sliced_da = da.sel(latitude=slice(latmin, latmax),
                           longitude=slice(lonmin, lonmax))

        eof_ds = sliced_da.EOF
        eof2 = eof_ds.assign_coords(time=(eof_ds.time.astype("datetime64")))
        eof_multiindex_df = eof2.to_dataframe()
        eof_df = eof_multiindex_df.dropna()
        eof_df.rename(columns={"EOF": name}, inplace=True)
        return eof_df
def merge_og_files():
    """ Function to open, crop and merge the APHRODITE data """

    ds_list = []
    extent = ls.basin_extent('indus')

    print('1951-2007')
    for f in tqdm(
            glob.glob(
                '_Data/APHRODITE/APHRO_MA_025deg_V1101.1951-2007.gz/*.nc')):
        ds = xr.open_dataset(f)
        ds = ds.rename({'latitude': 'lat', 'longitude': 'lon', 'precip': 'tp'})
        ds_cropped = ds.tp.sel(lon=slice(extent[1], extent[3]),
                               lat=slice(extent[2], extent[0]))
        ds_resampled = (ds_cropped.resample(time="M")).mean()
        ds_resampled['time'] = ds_resampled.time.astype(
            float) / 365 / 24 / 60 / 60 / 1e9
        ds_resampled['time'] = ds_resampled['time'] + 1970
        ds_list.append(ds_resampled)

    print('2007-2016')
    for f in tqdm(
            glob.glob('_Data/APHRODITE/APHRO_MA_025deg_V1101_EXR1/*.nc')):
        ds = xr.open_dataset(f)
        ds = ds.rename({'precip': 'tp'})
        ds_cropped = ds.tp.sel(lon=slice(extent[1], extent[3]),
                               lat=slice(extent[2], extent[0]))
        ds_resampled = (ds_cropped.resample(time="M")).mean()
        ds_resampled['time'] = ds_resampled.time.astype(
            float) / 365 / 24 / 60 / 60 / 1e9
        ds_resampled['time'] = ds_resampled['time'] + 1970
        ds_list.append(ds_resampled)

    ds_merged = xr.merge(ds_list)

    # Standardise time resolution
    maxyear = float(ds_merged.time.max())
    minyear = float(ds_merged.time.min())
    time_arr = np.arange(round(minyear) + 1. / 24., round(maxyear), 1. / 12.)
    ds_merged['time'] = time_arr

    ds_merged.to_netcdf("_Data/APHRODITE/aphrodite_indus_1951_2016.nc")
def download():
    """ Returns CRU data on a 0.25° by 0.25° grid """
    extent = ls.basin_extent('indus')
    ds = xr.open_dataset("_Data/CRU/cru_ts4.04.1901.2019.pre.dat.nc")
    ds_cropped = ds.sel(lon=slice(extent[1], extent[3]),
                        lat=slice(extent[2], extent[0]))
    ds_cropped['pre'] /= 30.437  # TODO apply proper function to get mm/day
    ds_cropped['time'] = standardised_time(ds_cropped)

    # Standardise time resolution
    maxyear = float(ds_cropped.time.max())
    minyear = float(ds_cropped.time.min())
    time_arr = np.arange(round(minyear) + 1. / 24., round(maxyear), 1. / 12.)
    ds_cropped['time'] = time_arr

    ds = ds_cropped.rename_vars({'pre': 'tp'})
    x = np.arange(70, 85, 0.25)
    y = np.arange(25, 35, 0.25)
    interp_ds = ds.interp(lon=x, lat=y, method="nearest")
    interp_ds.to_netcdf("_Data/CRU/interpolated_cru_1901-2019.nc")
Exemple #5
0
def update_cds_monthly_data(
        dataset_name="reanalysis-era5-single-levels-monthly-means",
        product_type="monthly_averaged_reanalysis",
        variables=[
            "geopotential",
            "2m_dewpoint_temperature",
            "angle_of_sub_gridscale_orography",
            "slope_of_sub_gridscale_orography",
            "total_column_water_vapour",
            "total_precipitation",
        ],
        area=[40, 70, 30, 85],
        pressure_level=None,
        path="_Data/ERA5/",
        qualifier=None):
    """
    Imports the most recent version of the given monthly ERA5 dataset as a
    netcdf from the CDS API.

    Inputs:
        dataset_name: str
        prduct_type: str
        variables: list of strings
        pressure_level: str or None
        area: list of scalars
        path: str
        qualifier: str

    Returns: local filepath to netcdf.
    """
    if type(area) == str:
        qualifier = area
        area = ls.basin_extent(area)

    now = datetime.datetime.now()

    if qualifier is None:
        filename = (dataset_name + "_" + product_type + "_" +
                    now.strftime("%m-%Y") + ".nc")
    else:
        filename = (dataset_name + "_" + product_type + "_" + qualifier + "_" +
                    now.strftime("%m-%Y") + ".nc")

    filepath = path + filename

    # Only download if updated file is not present locally
    if not os.path.exists(filepath):

        current_year = now.strftime("%Y")
        years = np.arange(1979, int(current_year) + 1, 1).astype(str)
        months = np.arange(1, 13, 1).astype(str)

        c = cdsapi.Client()

        if pressure_level is None:
            c.retrieve(
                "reanalysis-era5-single-levels-monthly-means",
                {
                    "format": "netcdf",
                    "product_type": product_type,
                    "variable": variables,
                    "year": years.tolist(),
                    "time": "00:00",
                    "month": months.tolist(),
                    "area": area,
                },
                filepath,
            )
        else:
            c.retrieve(
                "reanalysis-era5-single-levels-monthly-means",
                {
                    "format": "netcdf",
                    "product_type": product_type,
                    "variable": variables,
                    "pressure_level": pressure_level,
                    "year": years.tolist(),
                    "time": "00:00",
                    "month": months.tolist(),
                    "area": area,
                },
                filepath,
            )

    return filepath