def to_netcdf(): """ Function to open and merge files into one netcdf file. HDF5 keys : Level 1 : 'Grids', 'InputAlgorithmVersions', 'InputFileNames', 'InputGenerationDateTimes' Level 2 : 'G1' (5.0°x 5.0° grid), 'G2' (0.25°x 0.25° grid) Level 3 : 'precipRate', .... Level 4 : 'count', 'mean', 'stdev' Variables and dimmensions: rt (3): rain types (0: stratiform, 1: convective, 2: all) chn (5): channels (0: KuNS, 1: KaMS, 2: KaHS, 3: DPRMS, 4: KuMS) - Ku (Ku-band Precipitation Radar) - Ka (Ka-band Precipitation Radar) - DPR (Dual frequency Precipitation Radar) - NS (normal scan) = 245km swath - MS (matched beam scan) = 125km swath - HS (high sensitivity beam scan) = 120km swath lnH (1440): high resolution 0.25° grid intervals of longitude from 180°W to 180°E ltH (536): high resolution 0.25° grid intervals of latitude from 0.67°S to 0.67°N """ ds_list = [] extent = ls.basin_extent('indus') lon_arr = np.arange(-180, 180, 0.25) lat_arr = np.arange(-67, 67, 0.25) for file in tqdm(glob.glob('_Data/GPM/PR_2000-2010/*')): f = h5py.File(file, 'r') dset = f['Grids'] tp_arr = dset['G2']['precipRateNearSurfaceUnconditional'].value month_arr = np.arange(1. / 24., 1, 1. / 12.) month = month_arr[int(file[52:54]) - 1] time = float(file[48:52]) + month ds = xr.Dataset(data_vars=dict(tp=(["time", "lon", "lat"], [tp_arr[0, :, :]])), coords=dict(time=(["time"], [time]), lat=(['lat'], lat_arr), lon=(['lon'], lon_arr))) ds_tr = ds.transpose('time', 'lat', 'lon') ds_cropped = ds_tr.sel(lon=slice(extent[1], extent[3]), lat=slice(extent[2], 36.0)) ds_list.append(ds_cropped) ds_merged = xr.merge(ds_list) print(ds_merged) ds_merged['tp'] = ds_merged['tp'] * 24 # to mm/day ds_merged.to_netcdf("_Data/GPM/gpm_pr_unc_2000-2010.nc")
def eof_formatter(filepath, basin, name=None): """ Returns DataFrame of EOF over UIB """ da = xr.open_dataset(filepath) if "expver" in list(da.dims): da = da.sel(expver=1) (latmax, lonmin, latmin, lonmax) = ls.basin_extent(basin) sliced_da = da.sel(latitude=slice(latmin, latmax), longitude=slice(lonmin, lonmax)) eof_ds = sliced_da.EOF eof2 = eof_ds.assign_coords(time=(eof_ds.time.astype("datetime64"))) eof_multiindex_df = eof2.to_dataframe() eof_df = eof_multiindex_df.dropna() eof_df.rename(columns={"EOF": name}, inplace=True) return eof_df
def merge_og_files(): """ Function to open, crop and merge the APHRODITE data """ ds_list = [] extent = ls.basin_extent('indus') print('1951-2007') for f in tqdm( glob.glob( '_Data/APHRODITE/APHRO_MA_025deg_V1101.1951-2007.gz/*.nc')): ds = xr.open_dataset(f) ds = ds.rename({'latitude': 'lat', 'longitude': 'lon', 'precip': 'tp'}) ds_cropped = ds.tp.sel(lon=slice(extent[1], extent[3]), lat=slice(extent[2], extent[0])) ds_resampled = (ds_cropped.resample(time="M")).mean() ds_resampled['time'] = ds_resampled.time.astype( float) / 365 / 24 / 60 / 60 / 1e9 ds_resampled['time'] = ds_resampled['time'] + 1970 ds_list.append(ds_resampled) print('2007-2016') for f in tqdm( glob.glob('_Data/APHRODITE/APHRO_MA_025deg_V1101_EXR1/*.nc')): ds = xr.open_dataset(f) ds = ds.rename({'precip': 'tp'}) ds_cropped = ds.tp.sel(lon=slice(extent[1], extent[3]), lat=slice(extent[2], extent[0])) ds_resampled = (ds_cropped.resample(time="M")).mean() ds_resampled['time'] = ds_resampled.time.astype( float) / 365 / 24 / 60 / 60 / 1e9 ds_resampled['time'] = ds_resampled['time'] + 1970 ds_list.append(ds_resampled) ds_merged = xr.merge(ds_list) # Standardise time resolution maxyear = float(ds_merged.time.max()) minyear = float(ds_merged.time.min()) time_arr = np.arange(round(minyear) + 1. / 24., round(maxyear), 1. / 12.) ds_merged['time'] = time_arr ds_merged.to_netcdf("_Data/APHRODITE/aphrodite_indus_1951_2016.nc")
def download(): """ Returns CRU data on a 0.25° by 0.25° grid """ extent = ls.basin_extent('indus') ds = xr.open_dataset("_Data/CRU/cru_ts4.04.1901.2019.pre.dat.nc") ds_cropped = ds.sel(lon=slice(extent[1], extent[3]), lat=slice(extent[2], extent[0])) ds_cropped['pre'] /= 30.437 # TODO apply proper function to get mm/day ds_cropped['time'] = standardised_time(ds_cropped) # Standardise time resolution maxyear = float(ds_cropped.time.max()) minyear = float(ds_cropped.time.min()) time_arr = np.arange(round(minyear) + 1. / 24., round(maxyear), 1. / 12.) ds_cropped['time'] = time_arr ds = ds_cropped.rename_vars({'pre': 'tp'}) x = np.arange(70, 85, 0.25) y = np.arange(25, 35, 0.25) interp_ds = ds.interp(lon=x, lat=y, method="nearest") interp_ds.to_netcdf("_Data/CRU/interpolated_cru_1901-2019.nc")
def update_cds_monthly_data( dataset_name="reanalysis-era5-single-levels-monthly-means", product_type="monthly_averaged_reanalysis", variables=[ "geopotential", "2m_dewpoint_temperature", "angle_of_sub_gridscale_orography", "slope_of_sub_gridscale_orography", "total_column_water_vapour", "total_precipitation", ], area=[40, 70, 30, 85], pressure_level=None, path="_Data/ERA5/", qualifier=None): """ Imports the most recent version of the given monthly ERA5 dataset as a netcdf from the CDS API. Inputs: dataset_name: str prduct_type: str variables: list of strings pressure_level: str or None area: list of scalars path: str qualifier: str Returns: local filepath to netcdf. """ if type(area) == str: qualifier = area area = ls.basin_extent(area) now = datetime.datetime.now() if qualifier is None: filename = (dataset_name + "_" + product_type + "_" + now.strftime("%m-%Y") + ".nc") else: filename = (dataset_name + "_" + product_type + "_" + qualifier + "_" + now.strftime("%m-%Y") + ".nc") filepath = path + filename # Only download if updated file is not present locally if not os.path.exists(filepath): current_year = now.strftime("%Y") years = np.arange(1979, int(current_year) + 1, 1).astype(str) months = np.arange(1, 13, 1).astype(str) c = cdsapi.Client() if pressure_level is None: c.retrieve( "reanalysis-era5-single-levels-monthly-means", { "format": "netcdf", "product_type": product_type, "variable": variables, "year": years.tolist(), "time": "00:00", "month": months.tolist(), "area": area, }, filepath, ) else: c.retrieve( "reanalysis-era5-single-levels-monthly-means", { "format": "netcdf", "product_type": product_type, "variable": variables, "pressure_level": pressure_level, "year": years.tolist(), "time": "00:00", "month": months.tolist(), "area": area, }, filepath, ) return filepath