def netCDF_empty(self, ncfile_out, stations, nc_in): # TODO: change date type from f4 to f8 for lat and lon ''' Creates an empty station file to hold interpolated reults. The number of stations is defined by the variable stations, variables are determined by the variable list passed from the gridded original netCDF. ncfile_out: full name of the file to be created stations: station list read with common_utils.StationListRead() variables: variables read from netCDF handle lev: list of pressure levels, empty is [] (default) ''' rootgrp = netcdf_base(ncfile_out, len(stations), None, 'hours since 1980-01-01 00:00:00') station = rootgrp["station"] latitude = rootgrp["latitude"] longitude = rootgrp["longitude"] height = rootgrp["height"] # assign station characteristics station[:] = list(stations['station_number']) latitude[:] = list(stations['latitude_dd']) longitude[:] = list(stations['longitude_dd']) height[:] = list(stations['elevation_m']) # extra treatment for pressure level files try: lev = nc_in.variables['level'][:] logger.info("Creating empty 3D file (has pressure levels)") level = rootgrp.createDimension('level', len(lev)) level = rootgrp.createVariable('level', 'i4', ('level')) level.long_name = 'pressure_level' level.units = 'hPa' level[:] = lev except Exception: logger.info("Creating empty 2D file (without pressure levels)") lev = [] # remove extra variables varlist_merra = [str_encode(x) for x in nc_in.variables.keys()] # create and assign variables based on input file for n, var in enumerate(varlist_merra): if variables_skip(var): continue logger.debug(f"Add empty variable: {var}") # extra treatment for pressure level files if len(lev): tmp = rootgrp.createVariable(var, 'f4', ('time', 'level', 'station')) else: tmp = rootgrp.createVariable(var, 'f4', ('time', 'station')) tmp.long_name = str_encode( nc_in.variables[var].long_name) # for merra2 tmp.units = str_encode(nc_in.variables[var].units) # close the file rootgrp.close() logger.debug(f"Created empty netcdf file {ncfile_out}")
def inventory(self): """ Report on data avaialbe in directory: time slice, variables, area """ print("\n\n\n") print("=== INVENTORY FOR GLOBSIM ERA-INTERIM DATA === \n") print("Download parameter file: \n" + self.pfile + "\n") # loop over filetypes, read, report file_type = [ 'erai_pl_*.nc', 'erai_sa_*.nc', 'erai_sf_*.nc', 'erai_t*.nc' ] for ft in file_type: infile = path.join(self.directory, ft) nf = len(filter(listdir(self.directory), ft)) print(str(nf) + " FILE(S): " + infile) if nf > 0: # open dataset ncf = nc.MFDataset(infile, 'r') # list variables keylist = [str_encode(x) for x in ncf.variables.keys()] print(" VARIABLES:") print(" " + str(len(keylist)) + " variables, inclusing dimensions") for key in keylist: print(" " + ncf.variables[key].long_name) # time slice time = ncf.variables['time'] tmin = nc.num2date(min(time[:]), time.units, calendar=time.calendar).strftime('%Y/%m/%d') tmax = nc.num2date(max(time[:]), time.units, calendar=time.calendar).strftime('%Y/%m/%d') print(" TIME SLICE") print(" " + str(len(time[:])) + " time steps") print(" " + tmin + " to " + tmax) # area lon = ncf.variables['longitude'] lat = ncf.variables['latitude'] nlat = str(len(lat)) nlon = str(len(lon)) ncel = str(len(lat) * len(lon)) print(" BOUNDING BOX / AREA") print(" " + ncel + " cells, " + nlon + " W-E and " + nlat + " S-N") print(" N: " + str(max(lat))) print(" S: " + str(min(lat))) print(" W: " + str(min(lon))) print(" E: " + str(max(lon))) ncf.close()
def inventory(self): """ Report on data avaialbe in directory: time slice, variables, area """ logger.info("START INVENTORY FOR GLOBSIM ERA5 DATA") logger.debug(f"Download parameter file: {self.pfile}") # loop over filetypes, read, report file_type = [ self.typeString(self.era5type) + '_pl_*.nc', self.typeString(self.era5type) + '_sa_*.nc', self.typeString(self.era5type) + '_sf_*.nc', self.typeString(self.era5type) + '_t*.nc' ] for ft in file_type: infile = path.join(self.directory, ft) nf = len(filter(listdir(self.directory), ft)) logger.debug(str(nf) + " FILE(S): " + infile) if nf > 0: # open dataset ncf = nc.MFDataset(infile, 'r', aggdim='time') # list variables keylist = [str_encode(x) for x in ncf.variables.keys()] logger.info("VARIABLES:") logger.info( f"Found {str(len(keylist))} variables, including dimensions" ) for key in keylist: logger.debug( "Found variable :{ncf.variables[key].long_name}") # time slice time = ncf.variables['time'] tmin = nc.num2date(min(time[:]), time.units, calendar=time.calendar).strftime('%Y/%m/%d') tmax = nc.num2date(max(time[:]), time.units, calendar=time.calendar).strftime('%Y/%m/%d') logger.info("TIME SLICE") logger.info( f"Found {str(len(time[:]))} time steps from {tmin} to {tmax}" ) # area lon = ncf.variables['longitude'] lat = ncf.variables['latitude'] nlat = str(len(lat)) nlon = str(len(lon)) ncel = str(len(lat) * len(lon)) logger.info("BOUNDING BOX / AREA") logger.info( f"Found {ncel} cells: {nlon} along W-E and {nlat} along S-N" ) logger.debug(" N: " + str(max(lat))) logger.debug(" S: " + str(min(lat))) logger.debug(" W: " + str(min(lon))) logger.debug(" E: " + str(max(lon))) ncf.close()
def levels2elevation(self, ncfile_in, ncfile_out): """ Linear 1D interpolation of pressure level data available for individual stations to station elevation. Where and when stations are below the lowest pressure level, they are assigned the value of the lowest pressure level. """ # open file ncf = nc.MFDataset(ncfile_in, 'r', aggdim='time') height = ncf.variables['height'][:] nt = len(ncf.variables['time'][:]) nl = len(ncf.variables['level'][:]) # list variables varlist = [str_encode(x) for x in ncf.variables.keys()] for V in [ 'time', 'station', 'latitude', 'longitude', 'level', 'height', 'z' ]: varlist.remove(V) # === open and prepare output netCDF file ============================== # dimensions: station, time # variables: latitude(station), longitude(station), elevation(station) # others: ...(time, station) # stations are integer numbers # create a file (Dataset object, also the root group). rootgrp = netcdf_base(ncfile_out=ncfile_out, n_stations=len(height), n_time=nt, time_units='hours since 1900-01-01 00:00:0.0') rootgrp.source = 'ERA-Interim, interpolated (bi)linearly to stations' time = rootgrp['time'] station = rootgrp['station'] latitude = rootgrp['latitude'] longitude = rootgrp['longitude'] height = rootgrp['height'] # assign base variables time[:] = ncf.variables['time'][:] station[:] = ncf.variables['station'][:] latitude[:] = ncf.variables['latitude'][:] longitude[:] = ncf.variables['longitude'][:] height[:] = ncf.variables['height'][:] # create and assign variables from input file for var in varlist: tmp = rootgrp.createVariable(var, 'f4', ('time', 'station')) tmp.long_name = str_encode(ncf.variables[var].long_name) tmp.units = str_encode(ncf.variables[var].units) # add air pressure as new variable var = 'air_pressure' varlist.append(var) tmp = rootgrp.createVariable(var, 'f4', ('time', 'station')) tmp.long_name = var.encode('UTF8') tmp.units = 'hPa'.encode('UTF8') # end file prepation =================================================== # loop over stations for n, h in enumerate(height): # convert geopotential [mbar] to height [m], shape: (time, level) elevation = ncf.variables['z'][:, :, n] / 9.80665 # TODO: check if height of stations in data range # difference in elevation, level directly above will be >= 0 elev_diff = elevation - h # vector of level indices that fall directly above station. # Apply after ravel() of data. va = np.argmin(elev_diff + (elev_diff < 0) * 100000, axis=1) # mask for situations where station is below lowest level mask = va < (nl - 1) va += np.arange(elevation.shape[0]) * elevation.shape[1] # Vector level indices that fall directly below station. # Apply after ravel() of data. vb = va + mask # +1 when OK, +0 when below lowest level wa, wb = self.calculate_weights(elev_diff, va, vb) #loop over variables and apply interpolation weights for v, var in enumerate(varlist): if var == 'air_pressure': # pressure [Pa] variable from levels, shape: (time, level) data = np.repeat([ncf.variables['level'][:]], len(time), axis=0).ravel() else: #read data from netCDF data = ncf.variables[var][:, :, n].ravel() ipol = data[va] * wa + data[vb] * wb # interpolated value rootgrp.variables[var][:, n] = ipol # assign to file rootgrp.close() ncf.close()
def mergeFiles(self, ncfile_in): """ To combine mutiple downloaded erai netCDF files into a large file. Args: ncfile_in: the full name of downloaded files (file directory + files names) e.g.: '/home/xquan/src/globsim/examples/erai/era_sa_*.nc' '/home/xquan/src/globsim/examples/erai/era_pl_*.nc' '/home/xquan/src/globsim/examples/erai/era_sf_*.nc' Output: merged netCDF files erai_sa_all.nc, erai_sf_all.nc, erai_pl_all.nc """ # read in one type of mutiple netcdf files ncf_in = nc.MFDataset(ncfile_in, 'r', aggdim='time') # is it a file with pressure levels? pl = 'level' in ncf_in.dimensions.keys() # get spatial dimensions lat = ncf_in.variables['latitude'][:] lon = ncf_in.variables['longitude'][:] if pl: # only for pressure level files lev = ncf_in.variables['level'][:] nlev = len(lev) # get time and convert to datetime object nctime = ncf_in.variables['time'][:] #set up the name of merged file if ncfile_in[-7:-5] == 'sa': ncfile_out = path.join(ncfile_in[:-11], 'erai_sa_all' + '.nc') elif ncfile_in[-7:-5] == 'sf': ncfile_out = path.join(ncfile_in[:-11], 'erai_sf_all' + '.nc') elif ncfile_in[-7:-5] == 'pl': ncfile_out = path.join(ncfile_in[:-11], 'erai_pl_all' + '.nc') else: print('There is not such type of file') # get variables varlist = [str_encode(x) for x in ncf_in.variables.keys()] varlist.remove('time') varlist.remove('latitude') varlist.remove('longitude') if pl: #only for pressure level files varlist.remove('level') #Build the netCDF file rootgrp = nc.Dataset(ncfile_out, 'w', format='NETCDF4_CLASSIC') rootgrp.Conventions = 'CF-1.6' rootgrp.source = 'ERA_Interim, merged downloaded original files' rootgrp.featureType = "timeSeries" # dimensions latitude = rootgrp.createDimension('latitude', len(lat)) longitude = rootgrp.createDimension('longitude', len(lon)) time = rootgrp.createDimension('time', None) # base variables time = rootgrp.createVariable('time', 'i4', ('time')) time.long_name = 'time' time.units = 'hours since 1900-01-01 00:00:0.0' time.calendar = 'gregorian' latitude = rootgrp.createVariable('latitude', 'f4', ('latitude')) latitude.long_name = 'latitude' latitude.units = 'degrees_north' longitude = rootgrp.createVariable('longitude', 'f4', ('longitude')) longitude.long_name = 'longitude' longitude.units = 'degrees_east' # assign station characteristics latitude[:] = lat[:] longitude[:] = lon[:] time[:] = nctime[:] # extra treatment for pressure level files try: lev = ncf_in.variables['level'][:] print("== 3D: file has pressure levels") level = rootgrp.createDimension('level', len(lev)) level = rootgrp.createVariable('level', 'i4', ('level')) level.long_name = 'pressure_level' level.units = 'hPa' level[:] = lev except: print("== 2D: file without pressure levels") lev = [] # create and assign variables based on input file for n, var in enumerate(varlist): print("VAR: ", var) # extra treatment for pressure level files if len(lev): tmp = rootgrp.createVariable( var, 'f4', ('time', 'level', 'latitude', 'longitude')) else: tmp = rootgrp.createVariable(var, 'f4', ('time', 'latitude', 'longitude')) tmp.long_name = ncf_in.variables[var].long_name.encode( 'UTF8') # for erai tmp.units = ncf_in.variables[var].units.encode('UTF8') # assign values if pl: # only for pressure level files tmp[:] = ncf_in.variables[var][:, :, :, :] else: tmp[:] = ncf_in.variables[var][:, :, :] #close the file rootgrp.close() ncf_in.close() #get the file list files_list = glob.glob(ncfile_in) files_list.sort() #clear up the data for fl in files_list: remove(fl)
def WIND_sur(self): """ Wind at 10 metre derived from surface data, exclusively. """ # add variable to ncdf file vn = '10 metre U wind component' # variable name var = self.rg.createVariable(vn, 'f4', ('time', 'station')) var.long_name = '10 metre U wind component' var.units = str_encode( self.nc_sa.variables['u-component of wind'].units) # interpolate station by station time_in = self.nc_sa.variables['time'][:] values = self.nc_sa.variables['u-component of wind'][:] for n, s in enumerate(self.rg.variables['station'][:].tolist()): self.rg.variables[vn][:, n] = np.interp(self.times_out_nc, time_in, values[:, n]) # add variable to ncdf file vn = '10 metre V wind component' # variable name var = self.rg.createVariable(vn, 'f4', ('time', 'station')) var.long_name = '10 metre V wind component' var.units = str_encode( self.nc_sa.variables['v-component of wind'].units) # interpolate station by station time_in = self.nc_sa.variables['time'][:] values = self.nc_sa.variables['v-component of wind'][:] for n, s in enumerate(self.rg.variables['station'][:].tolist()): self.rg.variables[vn][:, n] = np.interp(self.times_out_nc, time_in, values[:, n]) # add variable to ncdf file vn = 'WSPD_sur' # variable name var = self.rg.createVariable(vn, 'f4', ('time', 'station')) var.long_name = '10 metre wind speed {} surface only'.format(self.NAME) var.units = 'm s-1' var.standard_name = 'wind_speed' # add variable to ncdf file vn = 'WDIR_sur' # variable name var = self.rg.createVariable(vn, 'f4', ('time', 'station')) var.long_name = '10 metre wind direction {} surface only'.format( self.NAME) var.units = 'degree' var.standard_name = 'wind_from_direction' # convert # u is the ZONAL VELOCITY, i.e. horizontal wind TOWARDS EAST. # v is the MERIDIONAL VELOCITY, i.e. horizontal wind TOWARDS NORTH. V = self.rg.variables['10 metre V wind component'][:] U = self.rg.variables['10 metre U wind component'][:] for n, s in enumerate(self.rg.variables['station'][:].tolist()): WS = np.sqrt(np.power(V, 2) + np.power(U, 2)) WD = [ atan2(V[i, n], U[i, n]) * (180 / pi) + 180 for i in np.arange(V.shape[0]) ] self.rg.variables['WSPD_sur'][:, n] = WS self.rg.variables['WDIR_sur'][:, n] = WD
def levels2elevation(self, ncfile_in, ncfile_out): """ Linear 1D interpolation of pressure level data available for individual stations to station elevation. Where and when stations are below the lowest pressure level, they are assigned the value of the lowest pressure level. """ # open file # TODO: check the aggdim does not work ncf = nc.MFDataset(ncfile_in, 'r', aggdim='time') height = ncf.variables['height'][:] nt = len(ncf.variables['time'][:]) nl = len(ncf.variables['level'][:]) # list variables varlist = [str_encode(x) for x in ncf.variables.keys()] for V in ['time', 'station', 'latitude', 'longitude', 'level','height','z']: varlist.remove(V) if self.ens: varlist.remove('number') # === open and prepare output netCDF file ============================== # dimensions: station, time # variables: latitude(station), longitude(station), elevation(station) # others: ...(time, station) # stations are integer numbers # create a file (Dataset object, also the root group). rootgrp = netcdf_base(ncfile_out, len(height), nt, 'hours since 1900-01-01 00:00:0.0', ncf) if self.ens: rootgrp.source = 'ERA5 10-member ensemble, interpolated (bi)linearly to stations' else: rootgrp.source = 'ERA5, interpolated (bi)linearly to stations' time = rootgrp['time'] station = rootgrp['station'] latitude = rootgrp['latitude'] longitude = rootgrp['longitude'] height = rootgrp['height'] # assign base variables time[:] = ncf.variables['time'][:] station[:] = ncf.variables['station'][:] latitude[:] = ncf.variables['latitude'][:] longitude[:] = ncf.variables['longitude'][:] height[:] = ncf.variables['height'][:] # create and assign variables from input file for var in varlist: if self.ens: tmp = rootgrp.createVariable(var, 'f4',('time','number','station')) else: tmp = rootgrp.createVariable(var,'f4',('time', 'station')) tmp.long_name = str_encode(ncf.variables[var].long_name) tmp.units = str_encode(ncf.variables[var].units) # add air pressure as new variable var = 'air_pressure' varlist.append(var) if self.ens: tmp = rootgrp.createVariable(var,'f4',('time','number','station')) else: tmp = rootgrp.createVariable(var,'f4',('time','station')) tmp.long_name = var.encode('UTF8') tmp.units = 'hPa'.encode('UTF8') # end file prepation =================================================== # loop over stations for n, h in enumerate(height): if self.ens: num = ncf.variables['number'][:] for ni in num: elevation = ncf.variables['z'][:,ni,:,n] / 9.80665 elev_diff, va, vb = self.ele_interpolate(elevation, h, nl) wa, wb = self.calculate_weights(elev_diff, va, vb) for v, var in enumerate(varlist): if var == 'air_pressure': # pressure [Pa] variable from levels, shape: (time, level) data = np.repeat([ncf.variables['level'][:]], len(time),axis=0).ravel() else: # read data from netCDF data = ncf.variables[var][:,ni,:,n].ravel() ipol = data[va] * wa + data[vb] * wb # interpolated value rootgrp.variables[var][:,ni,n] = ipol # assign to file else: # convert geopotential [mbar] to height [m], shape: (time, level) elevation = ncf.variables['z'][:,:,n] / 9.80665 elev_diff, va, vb = self.ele_interpolate(elevation, h, nl) wa, wb = self.calculate_weights(elev_diff, va, vb) # loop over variables and apply interpolation weights for v, var in enumerate(varlist): if var == 'air_pressure': # pressure [Pa] variable from levels, shape: (time, level) data = np.repeat([ncf.variables['level'][:]], len(time),axis=0).ravel() else: # read data from netCDF data = ncf.variables[var][:,:,n].ravel() ipol = data[va] * wa + data[vb] * wb # interpolated value rootgrp.variables[var][:,n] = ipol # assign to file rootgrp.close() ncf.close()
def interp2D(self, ncfile_in: str, ncf_in, points, tmask_chunk: "np.ndarray", variables=None, date=None): """ Bilinear interpolation from fields on regular grid (latitude, longitude) to individual point stations (latitude, longitude). This works for surface and for pressure level files Args: ncfile_in: Full path to an Era-Interim derived netCDF file. This can contain wildcards to point to multiple files if temporal chunking was used. ncf_in: A netCDF4.MFDataset derived from reading in Era-Interim multiple files (def ERA2station()) points: A dictionary of locations. See method StationListRead in common_utils.py for more details. tmask_chunk: variables: List of variable(s) to interpolate such as ['r', 't', 'u','v', 't2m', 'u10', 'v10', 'ssrd', 'strd', 'tp']. Defaults to using all variables available. date: Directory to specify begin and end time for the derived time series. Defaluts to using all times available in ncfile_in. Example: from datetime import datetime date = {'beg' : datetime(2008, 1, 1), 'end' : datetime(2008,12,31)} variables = ['t','u', 'v'] stations = StationListRead("points.csv") ERA2station('era_sa.nc', 'era_sa_inter.nc', stations, variables=variables, date=date) """ logger.debug( f"Starting 2d interpolation for chunks {np.min(np.where(tmask_chunk == True))} to {np.max(np.where(tmask_chunk == True))} of {len(tmask_chunk)} " ) # is it a file with pressure levels? pl = 'level' in ncf_in.dimensions.keys() ens = 'number' in ncf_in.dimensions.keys() # get spatial dimensions if pl: # only for pressure level files nlev = len(ncf_in.variables['level'][:]) else: nlev = 1 if ens: num = ncf_in.variables['number'][:] else: num = [] # test if time steps to interpolate remain nt = tmask_chunk.sum( ) # TODO: could this just be length? what is being tested here? if nt == 0: raise ValueError('No time steps from netCDF file selected.') # get variables varlist = [str_encode(x) for x in ncf_in.variables.keys()] self.remove_select_variables(varlist, pl, ens=False) # list variables that should be interpolated if variables is None: variables = varlist # test is variables given are available in file if (set(variables) < set(varlist) == 0): raise ValueError('One or more variables not in netCDF file.') sgrid = self.create_source_grid(ncfile_in) # create source field(s) on source grid if ens: sfield = [] for ni in num: if pl: # only for pressure level files sfield.append(create_field(sgrid, variables, nt, nlev)) else: # 2D files sfield.append(create_field(sgrid, variables, nt)) self.nc_ensemble_data_to_source_field(variables, sfield, ncf_in, tmask_chunk, pl) else: if pl: # only for pressure level files sfield = create_field(sgrid, variables, nt, nlev) else: # 2D files sfield = create_field(sgrid, variables, nt) #self.nc_data_subset_to_source_field(variables, sfield, ncf_in, tmask_chunk, pl) self.nc_data_to_source_field(variables, sfield, ncf_in, tmask_chunk, pl) locstream = self.create_loc_stream(points) # create destination field if ens: dfield = [] for ni in num: if pl: # only for pressure level files di = ESMF.Field(locstream, name='dfield', ndbounds=[len(variables), nt, nlev]) else: di = ESMF.Field(locstream, name='dfield', ndbounds=[len(variables), nt]) dfield.append(self.regrid(sfield[ni], di)) else: if pl: # only for pressure level files dfield = ESMF.Field(locstream, name='dfield', ndbounds=[len(variables), nt, nlev]) else: dfield = ESMF.Field(locstream, name='dfield', ndbounds=[len(variables), nt]) dfield = self.regrid(sfield, dfield) logger.debug("Created destination field") return dfield, variables