def create_windmax_dict(u, v, names, borders, longitude, latitude): """Produce a dictionary of masked maximum wind speeds in units of mph.""" if u.units != "m s**-1": raise ValueError("U field does not have units m/s") if v.units != "m s**-1": raise ValueError("V field does not have units m/s") metre_to_mile = 3600.0 / 1609.3 speed = np.sqrt(u**2 + v**2) * metre_to_mile windmax_dict = {} for i, regname in enumerate(names): # Modify index in case any entries have been dropped e.g. Corsica idx = names.index[i] # Create object from 'borders' for masking gridded data regmask = regionmask.Regions(name=regname, outlines=list(borders[idx])) # Apply mask to dataset coordinates mask_zeros = regmask.mask(longitude, latitude) # Replace zeros with ones for matrix multiplication mask_ones = mask_zeros.where(np.isnan(mask_zeros.values), 1) # Use Dask dataframes for lazy execution mask_ones = dask.array.from_array(mask_ones) speed_mask = speed * mask_ones # Compute maximum over lat-lon grid windmax_dict[regname] = speed_mask.max(dim=["longitude", "latitude"]) return windmax_dict
def regions(df): import regionmask regions = [_get_vertices(df, area) for area in df.index] return regionmask.Regions( regions, names=df.name, abbrevs=df.index, name="prudence regions" )
def create_region(name, abbrev, limits, wrap_lon=False): """ Creates a region mask and stores it into regions_dict dictionnary """ print('Creating region %s' % name) #Dealing with the case of attempt to define two times the same region if name in regions_dict.keys(): choice = None while not choice: choice = input( '!!! region %s already defined, do you want to create it anyway ? (y/n)' % name).lower() if choice == 'y': choice = True name = _update_names(name) elif choice == 'n': choice = True print('ok, this region will not be created') return None else: sys.stdout.write('Please respond with \'y\' or \'n\'.\n') choice = None abbrev = _update_abbrevs(abbrev) short_name = 'tmask' + abbrev region = regionmask.Regions([limits], names=[name], abbrevs=[abbrev]) region_mask = region.mask(longitude, latitude).rename({ 'lat': 'y', 'lon': 'x' }) ##TA addition of rename region_mask = ( mask_data * xr.where(region_mask, 0, 1)).astype('int8').rename(short_name) regions_dict[name] = (region_mask, limits)
def extract_ts(file, shp, var, type='area', lat=34, lon=34): d = xr.open_dataset(file) if type == 'area': nuts = gpd.read_file(shp) num = len(nuts) nuts_mask_poly = regionmask.Regions(name='nuts_mask', numbers=list(range(0, num)), names=list(nuts.ID), abbrevs=list(nuts.ID), outlines=list(nuts.geometry.values[i] for i in range(0, num))) print(nuts_mask_poly) if var in ['Smap', 'Eobs', 'Merra2']: mask = nuts_mask_poly.mask(d.isel(time=0).sel(lat=slice(35, 43), lon=slice(25, 45)), lat_name='lat', lon_name='lon') else: mask = nuts_mask_poly.mask(d.isel(time=0).sel(lat=slice(43, 35), lon=slice(25, 45)), lat_name='lat', lon_name='lon') lat = mask.lat.values lon = mask.lon.values ID_REGION = 0 # print(nuts.NUTS_ID[ID_REGION]) print(nuts.ID[ID_REGION]) sel_mask = mask.where(mask == ID_REGION).values id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))] id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))] try: out_sel = d.sel(lat=slice(id_lat[0], id_lat[-1]), lon=slice(id_lon[0], id_lon[-1])).compute().where( mask == ID_REGION) daily = out_sel.mean(dim=('lat', 'lon'), skipna=True) except: daily = d.mean(dim=('lat', 'lon'), skipna=True) daily['temp'] = 0 else: daily = d.sel(lon=lon, lat=lat, method='nearest') if var in ['Era5', 'Smap']: daily = daily.drop('time_bnds') if var != 'Eobs' and daily.temp.values.max() != 0: daily['temp'] = daily['temp'] - 273 daily = daily.rename({'temp': var + '_' + 'temp'}) df = daily.to_dataframe() if var == 'Smap': df.index = df.index.astype(int) df.index = pd.to_datetime(df.index.astype(str)) df = df.drop('crs', axis=1) df.index = df.index.strftime('%m/%d/%Y') if type != 'area': df = df.drop(['lat', 'lon'], axis=1) return df
def extract_ts(file, shp, var, type='area', lat=34, lon=34): d = xr.open_dataset(file) if type == 'area': nuts = gpd.read_file(shp) num = len(nuts) nuts_mask_poly = regionmask.Regions(name='nuts_mask', numbers=list(range(0, num)), names=list(nuts.ID), abbrevs=list(nuts.ID), outlines=list(nuts.geometry.values[i] for i in range(0, num))) print(nuts_mask_poly) if var == 'Agro': mask = nuts_mask_poly.mask(d.isel(time=0).sel(lat=slice(43, 35), lon=slice(25, 45)), lat_name='lat', lon_name='lon') lat = mask.lat.values lon = mask.lon.values else: mask = nuts_mask_poly.mask(d.isel(time=0).sel(latitude=slice(43, 35), longitude=slice(25, 45)), lat_name='latitude', lon_name='longitude') lat = mask.latitude.values lon = mask.longitude.values ID_REGION = 1 # print(nuts.NUTS_ID[ID_REGION]) print(nuts.ID[ID_REGION]) sel_mask = mask.where(mask == ID_REGION).values id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))] id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))] if var == 'Agro': out_sel = d.sel(lat=slice(id_lat[0], id_lat[-1]), lon=slice(id_lon[0], id_lon[-1])).compute().where( mask == ID_REGION) daily = out_sel.mean(dim=('lat', 'lon')) else: out_sel = d.sel(latitude=slice(id_lat[0], id_lat[-1]), longitude=slice(id_lon[0], id_lon[-1])).compute().where( mask == ID_REGION) daily = out_sel.mean(dim=('latitude', 'longitude')) else: if var == 'Agro': daily = d.sel(lon=lon, lat=lat, method='nearest') else: daily = d.sel(longitude=lon, latitude=lat, method='nearest') daily = daily.drop('time_bnds') for key in daily.keys(): daily[key] = daily[key] - 273 daily = daily.rename({key: var + '_' + key}) df = daily.to_dataframe() df.index = df.index.strftime('%m/%d/%Y') return df
def mask_region(ds,region,wraplon): if region == 'NEU': mask = regionmask.defined_regions.srex.mask(ds, wrap_lon=wraplon) ds_r = ds.where(mask == 11) if region == 'MED': mask = regionmask.defined_regions.srex.mask(ds, wrap_lon=wraplon) ds_r = ds.where(mask == 13) if region == 'DA': da_msk = [[-60., 90.], [-60., 25.], [100., 25.], [100., 90.]] names = ['Adj. Region'] abbrevs = ['DA'] mask = regionmask.Regions([da_msk],names=names, abbrevs=abbrevs, name='DA_Region') mask = mask.mask(ds, wrap_lon=wraplon) == 0 ds_r = ds.where(mask) return ds_r
def mask_region(ds, region, wraplon): if region == 'NEU': mask = regionmask.defined_regions.srex.mask(ds, wrap_lon=wraplon) ds_r = ds.where(mask == 11) if region == 'MED': mask = regionmask.defined_regions.srex.mask(ds, wrap_lon=wraplon) ds_r = ds.where(mask == 13) if region == 'EUR': da_msk = [[-10., 76.25], [-10., 30.], [39., 30.], [39., 76.25]] # numbers = [0] names = ['Europe'] abbrevs = ['EUR'] da_mask = regionmask.Regions([da_msk], names=names, abbrevs=abbrevs, name='Europe') mask2 = da_mask.mask(ds, wrap_lon=wraplon) ds_r = ds.where(mask2 == 0) return ds_r
def mask_california_current(ds): """Return a dataset with a mask applied over the California Current Large Marine Ecosystem. Args: ds (xarray object): Dataset or DataArray to mask over the California Current. Returns: xarray object: Dataset or DataArray with the California Current Large Marine Ecosystem mask applied, with all-nan rows and columns dropped. """ # There are many Large Marine Ecosystems spanning continental coastlines around # the world. Here, we select the California Current. shpfile = gpd.read_file("data/shpfiles/LMEs66.shp") polygon = shpfile[shpfile["LME_NAME"] == "California Current"] polygon = polygon.geometry[:].unary_union CalCS = regionmask.Regions([polygon]) mask = CalCS.mask(ds.TLONG, ds.TLAT) masked = ds.where(mask == 0, drop=True) return masked
FparLai_QC_tmp = FparLai_QC_tmp.to_dataset(name='FparLai_QC') FparExtra_QC_tmp = xr.open_rasterio(layernames[i_FparExtra_QC[0]], chunks=chunks) FparExtra_QC_tmp = FparExtra_QC_tmp.to_dataset(name='FparExtra_QC') ds_tmp = xr.merge([ Fpar_init, Lai_init, FparStd_init, LaiStd_init, FparLai_QC_init, FparExtra_QC_init ]) ds_tmp = ds_tmp.assign_coords({'time': time}) if (i == 1): ds_final = xr.concat([ds_init, ds_tmp], dim='time') else: ds_final = xr.concat([ds_final, ds_tmp], dim='time') ds_final = ds_final.rename({'x': 'lon', 'y': 'lat'}) ds_final = ds_final.squeeze(drop=True) lat = ds_final.lat lon = ds_final.lon shp_domain = fiona.open(shpin) first = shp_domain.next() shp_domain_geom = shape(first['geometry']) # this is shapely my_shp = MultiPolygon(shp_domain_geom) shp_poly = regionmask.Regions([my_shp]) mask = shp_poly.mask(lon, lat) masked_ds = ds_final.where(mask == 0)
def grid2ts(file, shp): # file = '/media/cak/AT/Datasets/EOBS/temp2.nc' # file = '/media/cak/AT/Datasets/SM2RAIN/SM2RAIN_ASCAT_0125_2016_v1.1.nc' # shp = '/home/cak/Desktop/SHP/Karasu_all.shp' # shp = '/home/cak/Desktop/at/NUTS_RG_60M_2016_4326_LEVL_0.shp' resample = False factor = 37 nuts = gpd.read_file(shp) # nuts.head() num = len(nuts) d = xr.open_mfdataset(file, chunks={'time': 10}) d = d.assign_coords(longitude=(((d.longitude + 180) % 360) - 180)).sortby('longitude') nuts_mask_poly = regionmask.Regions(name='nuts_mask', numbers=list(range(0, num)), names=list(nuts.FID), abbrevs=list(nuts.FID), outlines=list(nuts.geometry.values[i] for i in range(0, num))) if resample: new_lon = np.linspace(d.longitude[0], d.longitude[-1], d.dims['longitude'] * factor) new_lat = np.linspace(d.latitude[0], d.latitude[-1], d.dims['latitude'] * factor) d = d.interp(latitude=new_lat, longitude=new_lon) # print(nuts_mask_poly) # mask = nuts_mask_poly.mask(d.isel(time=0).sel(latitude=slice(75, 32), longitude=slice(-30, 50)), lat_name='latitude', # lon_name='longitude') mask = nuts_mask_poly.mask(d.isel(time=0).sel(latitude=slice(75, 32), longitude=slice(-30, 50)), lat_name='latitude', lon_name='longitude') # plt.figure(figsize=(12, 8)) # ax = plt.axes() # mask.plot(ax=ax) # nuts.plot(ax=ax, alpha=0.8, facecolor='none', lw=1) # plt.show() lat = mask.latitude.values lon = mask.longitude.values # print(mask) data = {} num = 1 for i in range(num): ID_REGION = 35 # print(nuts.ID[ID_REGION]) Zone = 'Zone ' + nuts.FID[ID_REGION] sel_mask = mask.where(mask == ID_REGION).values id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))] id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))] out_sel = d.sel(latitude=slice(id_lat[0], id_lat[-1]), longitude=slice(id_lon[0], id_lon[-1])).compute().where( mask == ID_REGION) plt.figure(figsize=(12, 8)) ax = plt.axes() out_sel.t2m.isel(time=0).plot(ax=ax) nuts.plot(ax=ax, alpha=0.8, facecolor='none') plt.show() x = out_sel.groupby('time.day').mean(dim=('latitude', 'longitude')) data.update({Zone: x.t2m.values}) # x.t2m.plot() # plt.show() time = pd.to_datetime(x.day.time.values) df = pd.DataFrame(data).set_index(time) df_h = df.resample('D').mean().subtract(273) df_h.plot() plt.show()
shp = gpd.read_file(fname) region = shp.geometry # In[3]: # Read the variable "SWGDN" from "SWGDN.nc" and get lat/lon info; f_axis = cdms.open('data/SWGDN.nc') v = f_axis('SWGDN') lat = v.getAxis(1) # latitude lon = v.getAxis(2) # longitude f_axis.close() # In[4]: #create mask poly = regionmask.Regions(region) mask = np.ma.masked_invalid(poly.mask(lon, lat)) # In[22]: #formatting mask_out = MV.array(mask) mask_out.id = f'mask_{region_name}' mask_out.setAxis(0, lat) mask_out.setAxis(1, lon) # In[23]: #save it as a .nc file print(f"\n\033[0;33mSaving file as selected_masks_{region_name}.nc\033[0m\n") g = cdms.open(f'selected_masks_{region_name}.nc', 'w')
def extract_ts(file, shp, var, type='area', lat=34, lon=34): d = xr.open_dataset(file) name = file.split('_')[0] # if name in ['TRMM']: # d = d.transpose('time','latitude', 'longitude') print(name) if type == 'area': nuts = gpd.read_file(shp) num = len(nuts) nuts_mask_poly = regionmask.Regions( name='nuts_mask', numbers=list(range(0, num)), names=list(nuts.ID), abbrevs=list(nuts.ID), outlines=list(nuts.geometry.values[i] for i in range(0, num))) print(nuts_mask_poly) if name not in ['sm2rain', 'GPM', 'TRMM', 'TRMMRT', 'Chirps']: mask = nuts_mask_poly.mask(d.isel(time=0).sel( latitude=slice(43, 35), longitude=slice(25, 45)), lat_name='latitude', lon_name='longitude') else: mask = nuts_mask_poly.mask(d.isel(time=0).sel( latitude=slice(35, 43), longitude=slice(25, 45)), lat_name='latitude', lon_name='longitude') lat = mask.latitude.values lon = mask.longitude.values proj = ccrs.EqualEarth(central_longitude=0) ax = plt.subplot(111, projection=proj) d.isel(time=0).tp.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree()) ax.coastlines() plt.show() ID_REGION = 1 # print(nuts.NUTS_ID[ID_REGION]) print(nuts.ID[ID_REGION]) sel_mask = mask.where(mask == ID_REGION).values id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))] id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))] try: out_sel = d.sel( latitude=slice(id_lat[0], id_lat[-1]), longitude=slice(id_lon[0], id_lon[-1])).compute().where(mask == ID_REGION) daily = out_sel.mean(dim=('latitude', 'longitude'), skipna=True) except: daily = d.mean(dim=('latitude', 'longitude'), skipna=True) daily['tp'] = 0 else: daily = d.sel(longitude=lon, latitude=lat, method='nearest') for key in daily.keys(): daily = daily.rename({key: name + '_' + key}) df = daily.to_dataframe() if name == 'Era5': df['Era5_tp'] = df['Era5_tp'] * 1e3 if name == 'PERSIANN': df = df.drop(['PERSIANN_crs'], axis=1) df.loc[(df.PERSIANN_tp < 0), 'PERSIANN_tp'] = 0 if name in ['sm2rain', 'TRMM', 'TRMMRT']: df.index = df.index.astype(int) df.index = pd.to_datetime(df.index.astype(str)) df.index = df.index.strftime('%m/%d/%Y') return df
file = '/home/cak/Desktop/Sentinel/test/SD_20190101.nc' file = '/media/D/Datasets/PERSIANN_CCS/Data/CCS_Turkey_2020-03-07122131pm_2018.nc' file = '/media/D/Datasets/ERA5_Land/Temp/Temp_daily.nc' file = '/media/D/Datasets/ERA5_Land/pre/pre_daily.nc' shp = '/home/cak/Desktop/Jupyter-lumped-models/Data/shp/Basins.shp' # shp = '/home/cak/Desktop/NUTS/NUTS_RG_10M_2016_4326_LEVL_0.shp' nuts = gpd.read_file(shp) num = len(nuts) d = xr.open_dataset(file) nuts_mask_poly = regionmask.Regions(name='nuts_mask', numbers=list(range(0, num)), names=list(nuts.ID), abbrevs=list(nuts.ID), outlines=list(nuts.geometry.values[i] for i in range(0, num))) # nuts_mask_poly = regionmask.Regions_cls(name = 'nuts_mask', numbers = list(range(0,37)), names = list(nuts.NUTS_ID), abbrevs = list(nuts.NUTS_ID), outlines = list(nuts.geometry.values[i] for i in range(0,37))) print(nuts_mask_poly) mask = nuts_mask_poly.mask(d.isel(time=0).sel(latitude=slice(43, 35), longitude=slice(25, 45)), lat_name='latitude', lon_name='longitude') proj = ccrs.EqualEarth(central_longitude=0) ax = plt.subplot(111, projection=proj) if var == 'pre': d.isel(time=1).tp.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree())
ds_pop = xr.open_dataset(os.path.join( datapath, ifile)).sel(latitude=slice(89.9, -89.9)) # 2. load the shapefiles and create two masks for each region - population weighted and not-weighted; write them as Python pickle file ## load the shapefile as a GeoDataFrame using geopandas datapath = os.path.join(DATAPATH, 'gadm/gadm36_levels_shp') datafile = "gadm36_0.shp" gdf_shapes = gpd.read_file(os.path.join(datapath, datafile)) # set id-column ID_COLUMN = 'GID_0' # create the mask (a dataset with one multidimensional array, whose values are the "numbers" of the geometry objects to which a grid point belongs) nuts_mask_polygons = regionmask.Regions(list(gdf_shapes.geometry.values), name='regions_mask', names=list(gdf_shapes[ID_COLUMN]), abbrevs=list( gdf_shapes[ID_COLUMN])) mask = nuts_mask_polygons.mask(ds_pop, lat_name='latitude', lon_name='longitude') datapath = os.path.join(DATAPATH, 'ecmwf-covid/region-masks/countries') for i, region in enumerate(gdf_shapes[ID_COLUMN]): mask_unweighted = ((mask == float(i)) * 1.) / np.sum( (mask == float(i)) * 1.) mask_weighted = ((mask == float(i)) * ds_pop['population']) / np.sum( (mask == float(i)) * ds_pop['population']) # for some regions, there is no grid point with centroid inside their geometry
# ---------------------------------- # format data into timeseries # ---------------------------------- lon_name = 'lon' lat_name = 'lat' lat_min1, lat_max1, lon_min1, lon_max1 = 79, 80.4, 112, 140 lat_min2, lat_max2, lon_min2, lon_max2 = 80.45, 87, 128, 155 roi1 = [[lon_min1, lat_min1], [lon_max1, lat_min1], [lon_max1, lat_max1], [lon_min1, lat_max1]] roi2 = [[lon_min2, lat_min2], [lon_max2, lat_min2], [lon_max2, lat_max2], [lon_min2, lat_max2]] roi = [roi1, roi2] id = [0, 1] names = ['roi_laptev', 'roi_laptev_north'] abbrevs = ['roi1', 'roi2'] mask = regionmask.Regions(roi, names=names, abbrevs=abbrevs, name='roi') def ds_stats(ds, lat_name='lat', lon_name='lon'): ts_ = ds.mean(dim=(lat_name, lon_name)) ts_25 = ds.quantile(0.25, dim=(lat_name, lon_name)) ts_50 = ds.quantile(0.50, dim=(lat_name, lon_name)) ts_75 = ds.quantile(0.75, dim=(lat_name, lon_name)) N = ds.count(dim=(lat_name, lon_name)) return {'ave': ts_, 'q25': ts_25, 'q50': ts_50, 'q75': ts_75, 'N': N} def stats(ds, lat_name='lat', lon_name='lon',): ave = ds.mean(dim=(lat_name, lon_name)).values q25 = ds.quantile(0.25, dim=(lat_name, lon_name)).values q50 = ds.quantile(0.50, dim=(lat_name, lon_name)).values
def _create_region(df, area): import regionmask polygon = _get_vertices(df, area) return regionmask.Regions([polygon])
#load NORESM boundaries and extract data within box osnap=xr.open_dataset(datadir+'NorESM/NorESM_osnap_xray_1912.nc') ns=xr.open_dataset(datadir+'NorESM/NorESM_ns_xray_1912.nc') fs=xr.open_dataset(datadir+'NorESM/NorESM_fs_xray_1912.nc') bso=xr.open_dataset(datadir+'NorESM/NorESM_bso_xray_1912.nc') bnd_lon=hstack((fs.LONGITUDE.values,bso.LONGITUDE.values,ns.LONGITUDE.values[::-1],osnap.LONGITUDE.values[::-1],-45,-40,fs.LONGITUDE.values[0])) bnd_lat=hstack((fs.LATITUDE.values,bso.LATITUDE.values,ns.LATITUDE.values[::-1],osnap.LATITUDE.values[::-1],60,75,fs.LATITUDE.values[0])) plot(bnd_lon,bnd_lat,linewidth=3,color='k') import regionmask bnd_tpl=array(tuple((zip(bnd_lon,bnd_lat)))) mybox = regionmask.Regions([bnd_tpl], name='mybox') ax = mybox.plot() #create masks epmask = mybox.mask(ep.lon.values,ep.lat.values) hfmask = mybox.mask(hf.lon.values,hf.lat.values) cut_ep=ep.where(epmask==0) cut_hf=hf.where(hfmask==0) cut_ep.mean(dim='time').plot() cut_hf.mean(dim='time').plot() def get_area(dat): lonmat,latmat=meshgrid(dat.lon,dat.lat)
osnap.LONGITUDE.values[:5][::-1] wbnd_lon=hstack((fs.LONGITUDE.values[:10],-20,-30,osnap.LONGITUDE.values[:5][::-1],-45,fs.LONGITUDE.values[0])) wbnd_lat=hstack((fs.LATITUDE.values[:10],70,65,osnap.LATITUDE.values[:5][::-1],76,fs.LATITUDE.values[0])) ebnd_lon=hstack((fs.LONGITUDE.values[9:],bso.LONGITUDE.values,20,ns.LONGITUDE.values[::-1],osnap.LONGITUDE.values[4:][::-1],-30,-20,fs.LONGITUDE.values[9])) ebnd_lat=hstack((fs.LATITUDE.values[9:],bso.LATITUDE.values,62,ns.LATITUDE.values[::-1],osnap.LATITUDE.values[4:][::-1],65,70,fs.LATITUDE.values[9])) plot(ebnd_lon,ebnd_lat) import regionmask wbnd_tpl=array(tuple((zip(wbnd_lon,wbnd_lat)))) ebnd_tpl=array(tuple((zip(ebnd_lon,ebnd_lat)))) wbox = regionmask.Regions([wbnd_tpl], name='west box') ebox = regionmask.Regions([ebnd_tpl], name='east box') #create masks emask = ebox.mask(dat.plon.values,dat.plat.values) wmask = wbox.mask(dat.plon.values,dat.plat.values) dat=dat.rename({'y':'lon_idx','x':'lat_idx'}) dat['lon_idx']=range(len(dat['lon_idx'])) dat['lat_idx']=range(len(dat['lat_idx'])) wmask.plot() emask.plot()