def get_T63_landseamask(shift_lon, mask_antarctica=True, area='land'): """ get JSBACH T63 land sea mask the LS mask is read from the JSBACH init file area : str ['land','ocean']: When 'land', then the mask returned is True on land pixels, for ocean it is vice versa. In any other case, you get a valid field everywhere (globally) mask_antarctica : bool if True, then the mask is FALSE over Antarctica (<60S) """ ls_file = get_data_pool_directory() \ + 'data_sources/LSMASK/jsbach_T63_GR15_4tiles_1992.nc' ls_mask = Data(ls_file, 'slm', read=True, label='T63 land-sea mask', lat_name='lat', lon_name='lon', shift_lon=shift_lon) if area == 'land': msk = ls_mask.data > 0. elif area == 'ocean': msk = ls_mask.data == 0. else: msk = np.ones(ls_mask.data.shape).astype('bool') ls_mask.data[~msk] = 0. ls_mask.data[msk] = 1. ls_mask.data = ls_mask.data.astype('bool') if mask_antarctica: ls_mask.data[ls_mask.lat < -60.] = False # ensure that also the mask attribute is set properly ls_mask._apply_mask(~msk) return ls_mask
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Crazy Unit') #interval = kwargs.pop('interval') #, 'season') #does not make sense to specifiy a default value as this option is specified by configuration file! lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix') model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = ("%s%s/merged/%s_%s_%s_%s_%s.%s" % (self.data_dir, varname, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) else: if self.type == 'CMIP5': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAW': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAWSINGLE': print 'todo needs implementation!' assert False elif self.type == 'CMIP3': filename1 = ("%s/%s_%s_%s_%s.%s" % (custom_path, self.experiment, self.model, varname, model_suffix, file_format)) else: print self.type raise ValueError('Can not generate filename: invalid model type! %s' % self.type) force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def xxxxxxxxxxxxxxxxxxxget_surface_shortwave_radiation_down(self, interval='season', force_calc=False, **kwargs): """ return data object of a) seasonal means for SIS b) global mean timeseries for SIS at original temporal resolution """ the_variable = 'rsds' locdict = kwargs[self.type] valid_mask = locdict.pop('valid_mask') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] if self.type == 'CMIP5': filename1 = self.data_dir + 'rsds' + os.sep + self.experiment + '/ready/' + self.model + '/rsds_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown model type! not supported here!') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None #/// PREPROCESSING /// cdo = Cdo() #1) select timeperiod and generatget_she monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) print file_monthly sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': sis_clim_file = file_monthly[:-3] + '_ymonmean.nc' sis_sum_file = file_monthly[:-3] + '_ymonsum.nc' sis_N_file = file_monthly[:-3] + '_ymonN.nc' sis_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples elif interval == 'season': sis_clim_file = file_monthly[:-3] + '_yseasmean.nc' sis_sum_file = file_monthly[:-3] + '_yseassum.nc' sis_N_file = file_monthly[:-3] + '_yseasN.nc' sis_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(sis_clim_file): return None #3) read data sis = Data(sis_clim_file, 'rsds', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sis_std = Data(sis_clim_std_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.std = sis_std.data.copy() del sis_std sis_N = Data(sis_N_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.n = sis_N.data.copy() del sis_N #ensure that climatology always starts with January, therefore set date and then sort sis.adjust_time(year=1700, day=15) # set arbitrary time for climatology sis.timsort() #4) read monthly data sisall = Data(file_monthly, 'rsds', read=True, label=self._unique_name, unit='W m^{-2}', lat_name='lat', lon_name='lon', shift_lon=False) if not sisall._is_monthly(): raise ValueError('Timecycle of 12 expected here!') sisall.adjust_time(day=15) # land/sea masking ... if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False sis._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sisall._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sismean = sisall.fldmean() # return data as a tuple list retval = (sisall.time, sismean, sisall) del sisall # mask areas without radiation (set to invalid): all data < 1 W/m**2 sis.data = np.ma.array(sis.data, mask=sis.data < 1.) return sis, retval
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print '' print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys( ) assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable', None) #~ print self.type #~ print locdict.keys() assert varname is not None, 'ERROR: provide varname!' units = locdict.pop('unit', None) assert units is not None, 'ERROR: provide unit!' lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix', None) model_prefix = locdict.pop('model_prefix', None) file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = self.get_raw_filename( varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, 'No valid model input data') return None force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[: -3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename( file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError( 'Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def get_generic_landseamask(shift_lon, mask_antarctica=True, area='land', interpolation_method='remapnn', target_grid='t63grid', force=False): """ get generic land/sea mask. The routine uses the CDO command 'topo' to generate a 0.5 degree land/sea mask and remaps this using nearest neighbor to the target grid NOTE: using inconsistent land/sea masks between datasets can result in considerable biases. Note also that the application of l/s mask is dependent on the spatial resolution This routine implements a VERY simple approach, but assuming that all areas >0 m height are land and the rest is ocean. Parameters ---------- shift_lon : bool specifies if longitudes shall be shifted interpolation_method : str specifies the interpolation method that shall be used for remapping the 0.5degree data to the target grid. This can be any of ['remapnn','remapcon', 'remapbil'] target_grid : str specifies target grid to interpolate to as similar to CDO remap functions. This can be either a string or a filename which includes valid geometry information force : bool force calculation (removes previous file) = slower area : str ['land','ocean']. When 'land', then the mask returned is True on land pixels, for ocean it is vice versa. in any other case, you get a valid field everywhere (globally) mask_antarctica : bool mask antarctica; if True, then the mask is FALSE over Antarctice (<60S) Returns ------- returns a Data object """ print('WARNING: Automatic generation of land/sea mask. \ Ensure that this is what you want!') cdo = Cdo() #/// construct output filename. #If a filename was given for the grid, replace path separators /// target_grid1 = target_grid.replace(os.sep, '_') outputfile = get_temporary_directory() + 'land_sea_fractions_' \ + interpolation_method + '_' + target_grid1 + '.nc' print 'outfile: ', outputfile print 'cmd: ', '-remapnn,' + target_grid + ' -topo' #/// interpolate data to grid using CDO /// cdo.monmean(options='-f nc', output=outputfile, input='-remapnn,' + target_grid + ' -topo', force=force) #/// generate L/S mask from topography (land = height > 0. ls_mask = Data(outputfile, 'topo', read=True, label='generic land-sea mask', lat_name='lat', lon_name='lon', shift_lon=shift_lon) print('Land/sea mask can be found on file: %s' % outputfile) if area == 'land': msk = ls_mask.data > 0. # gives land elif area == 'ocean': msk = ls_mask.data <= 0. else: msk = np.ones(ls_mask.data.shape).astype('bool') ls_mask.data[~msk] = 0. ls_mask.data[msk] = 1. ls_mask.data = ls_mask.data.astype('bool') #/// mask Antarctica if desired /// if mask_antarctica: ls_mask.data[ls_mask.lat < -60.] = False # ensure that also the mask attribute is set properly ls_mask._apply_mask(~msk) return ls_mask
def _do_preprocessing(self, rawfile, varname, s_start_time, s_stop_time, interval='monthly', force_calc=False, valid_mask='global', target_grid='t63grid'): """ perform preprocessing * selection of variable * temporal subsetting """ cdo = Cdo() if not os.path.exists(rawfile): print('File not existing! %s ' % rawfile) return None, None # calculate monthly means file_monthly = get_temporary_directory() + os.sep + os.path.basename(rawfile[:-3]) + '_' + varname + '_' + s_start_time + '_' + s_stop_time + '_mm.nc' if (force_calc) or (not os.path.exists(file_monthly)): cdo.monmean(options='-f nc', output=file_monthly, input='-seldate,' + s_start_time + ',' + s_stop_time + ' ' + '-selvar,' + varname + ' ' + rawfile, force=force_calc) else: pass if not os.path.exists(file_monthly): raise ValueError('Monthly preprocessing did not work! %s ' % file_monthly) # calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None # read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self.name, shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon') mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.name + ' std', unit='-', shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon') mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self.name + ' std', shift_lon=False, lat_name='lat', lon_name='lon') mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self.name, shift_lon=False, time_cycle=12, lat_name='lat', lon_name='lon') mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degree S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def get_jsbach_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None print self.type print kwargs locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Unit not specified') lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') #model_suffix = locdict.pop('model_suffix') #model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if self.type != 'JSBACH_RAW2': print self.type raise ValueError('Invalid data format here!') # define from which stream of JSBACH data needs to be taken for specific variables if varname in ['swdown_acc', 'swdown_reflect_acc']: filename1 = self.files['jsbach'] elif varname in ['precip_acc']: filename1 = self.files['land'] elif varname in ['temp2']: filename1 = self.files['echam'] elif varname in ['var14']: # albedo vis filename1 = self.files['albedo_vis'] elif varname in ['var15']: # albedo NIR filename1 = self.files['albedo_nir'] else: print varname raise ValueError('Unknown variable type for JSBACH_RAW2 processing!') force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing! ') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with J anuary, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
filename = download.get_sample_file(name='air', return_object=False) air = Data(filename, 'air', read=True) # generate some mask that fits geometry of data. Could be also read from some # file. We mimic here some irregular mask mask = np.zeros(air.data[0, :, :].shape) mask[20:30, 50:60] = 1. mask[40:60, 100:120] = 1. mask = np.asarray(mask).astype('bool') # make a bool array # generate figure for illustration purposes f = plt.figure() ax1 = f.add_subplot(2, 2, 1) ax2 = f.add_subplot(2, 2, 2) ax3 = f.add_subplot(2, 2, 3) ax4 = f.add_subplot(2, 2, 4) f1 = map_plot(air, ax=ax1, title='This is the original data (unprojected)') ax2.imshow(mask) ax2.set_title('This is the mask') # now we apply the mask to the Data object air._apply_mask(mask) # applies a mask to each timestep f3 = map_plot(air, ax=ax3, title='Masked data') # if you want you can estimate automatically the bounding box air.cut_bounding_box() f4 = map_plot(air, ax=ax4, title='Cutted data') plt.show()
def get_generic_landseamask(shift_lon, mask_antarctica=True, area='land', interpolation_method='remapnn', target_grid='t63grid', force=False): """ get generic land/sea mask. The routine uses the CDO command 'topo' to generate a 0.5 degree land/sea mask and remaps this using nearest neighbor to the target grid NOTE: using inconsistent land/sea masks between datasets can result in considerable biases. Note also that the application of l/s mask is dependent on the spatial resolution This routine implements a VERY simple approach, but assuming that all areas >0 m height are land and the rest is ocean. Parameters ---------- shift_lon : bool specifies if longitudes shall be shifted interpolation_method : str specifies the interpolation method that shall be used for remapping the 0.5degree data to the target grid. This can be any of ['remapnn','remapcon', 'remapbil'] target_grid : str specifies target grid to interpolate to as similar to CDO remap functions. This can be either a string or a filename which includes valid geometry information force : bool force calculation (removes previous file) = slower area : str ['land','ocean']. When 'land', then the mask returned is True on land pixels, for ocean it is vice versa. in any other case, you get a valid field everywhere (globally) mask_antarctica : bool mask antarctica; if True, then the mask is FALSE over Antarctice (<60S) Returns ------- returns a Data object """ print ('WARNING: Automatic generation of land/sea mask. \ Ensure that this is what you want!') cdo = Cdo() #/// construct output filename. #If a filename was given for the grid, replace path separators /// target_grid1 = target_grid.replace(os.sep, '_') outputfile = get_temporary_directory() + 'land_sea_fractions_' \ + interpolation_method + '_' + target_grid1 + '.nc' print 'outfile: ', outputfile print 'cmd: ', '-remapnn,' + target_grid + ' -topo' #/// interpolate data to grid using CDO /// cdo.monmean(options='-f nc', output=outputfile, input='-remapnn,' + target_grid + ' -topo', force=force) #/// generate L/S mask from topography (land = height > 0. ls_mask = Data(outputfile, 'topo', read=True, label='generic land-sea mask', lat_name='lat', lon_name='lon', shift_lon=shift_lon) print('Land/sea mask can be found on file: %s' % outputfile) if area == 'land': msk = ls_mask.data > 0. # gives land elif area == 'ocean': msk = ls_mask.data <= 0. else: msk = np.ones(ls_mask.data.shape).astype('bool') ls_mask.data[~msk] = 0. ls_mask.data[msk] = 1. ls_mask.data = ls_mask.data.astype('bool') #/// mask Antarctica if desired /// if mask_antarctica: ls_mask.data[ls_mask.lat < -60.] = False # ensure that also the mask attribute is set properly ls_mask._apply_mask(~msk) return ls_mask
def get_model_data_generic(self, interval="season", **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print "" print "WARNING: it is not possible to get data using generic function, as method missing: ", self.type, kwargs.keys() assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop("variable", None) # ~ print self.type # ~ print locdict.keys() assert varname is not None, "ERROR: provide varname!" units = locdict.pop("unit", None) assert units is not None, "ERROR: provide unit!" lat_name = locdict.pop("lat_name", "lat") lon_name = locdict.pop("lon_name", "lon") model_suffix = locdict.pop("model_suffix", None) model_prefix = locdict.pop("model_prefix", None) file_format = locdict.pop("file_format") scf = locdict.pop("scale_factor") valid_mask = locdict.pop("valid_mask") custom_path = locdict.pop("custom_path", None) thelevel = locdict.pop("level", None) target_grid = self._actplot_options["targetgrid"] interpolation = self._actplot_options["interpolation"] if custom_path is None: filename1 = self.get_raw_filename(varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, "No valid model input data") return None force_calc = False if self.start_time is None: raise ValueError("Start time needs to be specified") if self.stop_time is None: raise ValueError("Stop time needs to be specified") # /// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] # 1) select timeperiod and generate monthly mean file if target_grid == "t63grid": gridtok = "T63" else: gridtok = "SPECIAL_GRID" file_monthly = ( filename1[:-3] + "_" + s_start_time + "_" + s_stop_time + "_" + gridtok + "_monmean.nc" ) # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write("\n *** Model file monthly: %s\n" % file_monthly) if not os.path.exists(filename1): print "WARNING: File not existing: " + filename1 return None cdo.monmean( options="-f nc", output=file_monthly, input="-" + interpolation + "," + target_grid + " -seldate," + s_start_time + "," + s_stop_time + " " + filename1, force=force_calc, ) sys.stdout.write("\n *** Reading model data... \n") sys.stdout.write(" Interval: " + interval + "\n") # 2) calculate monthly or seasonal climatology if interval == "monthly": mdata_clim_file = file_monthly[:-3] + "_ymonmean.nc" mdata_sum_file = file_monthly[:-3] + "_ymonsum.nc" mdata_N_file = file_monthly[:-3] + "_ymonN.nc" mdata_clim_std_file = file_monthly[:-3] + "_ymonstd.nc" cdo.ymonmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div( options="-f nc", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc ) # number of samples elif interval == "season": mdata_clim_file = file_monthly[:-3] + "_yseasmean.nc" mdata_sum_file = file_monthly[:-3] + "_yseassum.nc" mdata_N_file = file_monthly[:-3] + "_yseasN.nc" mdata_clim_std_file = file_monthly[:-3] + "_yseasstd.nc" cdo.yseasmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div( options="-f nc -b 32", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc, ) # number of samples else: raise ValueError("Unknown temporal interval. Can not perform preprocessing!") if not os.path.exists(mdata_clim_file): return None # 3) read data if interval == "monthly": thetime_cylce = 12 elif interval == "season": thetime_cylce = 4 else: print interval raise ValueError("Unsupported interval!") mdata = Data( mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce, ) mdata_std = Data( mdata_clim_std_file, varname, read=True, label=self._unique_name + " std", unit="-", lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce, ) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data( mdata_N_file, varname, read=True, label=self._unique_name + " std", unit="-", lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, ) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() # 4) read monthly data mdata_all = Data( file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel, ) mdata_all.adjust_time(day=15) # mask_antarctica masks everything below 60 degrees S. # here we only mask Antarctica, if only LAND points shall be used if valid_mask == "land": mask_antarctica = True elif valid_mask == "ocean": mask_antarctica = False else: mask_antarctica = False if target_grid == "t63grid": mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask( False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica ) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
mask[20:30, 50:60] = 1. mask[40:60, 100:120] = 1. mask = np.asarray(mask).astype('bool') # make a bool array # generate figure for illustration purposes f = plt.figure() ax1 = f.add_subplot(2,2,1) ax2 = f.add_subplot(2,2,2) ax3 = f.add_subplot(2,2,3) ax4 = f.add_subplot(2,2,4) f1 = map_plot(air, ax=ax1, title='This is the original data (unprojected)') ax2.imshow(mask) ax2.set_title('This is the mask') # now we apply the mask to the Data object air._apply_mask(mask) # applies a mask to each timestep f3 = map_plot(air, ax=ax3, title='Masked data') # if you want you can estimate automatically the bounding box air.cut_bounding_box() f4 = map_plot(air, ax=ax4, title='Cutted data') plt.show()