def get_gpp_data(self, interval='season'): """ get surface GPP data for JSBACH todo temporal aggregation of data --> or leave it to the user! """ cdo = Cdo() v = 'var167' y1 = str(self.start_time)[0:10] y2 = str(self.stop_time)[0:10] rawfilename = self.data_dir + 'data/model/' + self.experiment + '_' + y1[ 0:4] + '-' + y2[0:4] + '.nc' times_in_file = int(''.join(cdo.ntime(input=rawfilename))) if interval == 'season': if times_in_file != 4: tmp_file = get_temporary_directory() + os.path.basename( rawfilename) cdo.yseasmean(options='-f nc -b 32 -r ', input='-selvar,' + v + ' ' + rawfilename, output=tmp_file[:-3] + '_yseasmean.nc') rawfilename = tmp_file[:-3] + '_yseasmean.nc' if interval == 'monthly': if times_in_file != 12: tmp_file = get_temporary_directory() + os.path.basename( rawfilename) cdo.ymonmean(options='-f nc -b 32 -r ', input='-selvar,' + v + ' ' + rawfilename, output=tmp_file[:-3] + '_ymonmean.nc') rawfilename = tmp_file[:-3] + '_ymonmean.nc' if not os.path.exists(rawfilename): return None filename = rawfilename #--- read land-sea mask ls_mask = get_T63_landseamask(self.shift_lon) #--- read SW up data gpp = Data4D(filename, v, read=True, label=self.experiment + ' ' + v, unit='gC m-2 a-1', lat_name='lat', lon_name='lon', shift_lon=self.shift_lon, mask=ls_mask.data.data, scale_factor=3600. * 24. * 30. / 0.083) return gpp.sum_data4D()
def _get_ensemble_filename(self, the_variable, mip, realm): """ get filename of ensemble mean file if required, then all pre-processing steps are done Parameters ---------- the_variable : str variable name to be processed Returns ------- returns filename of file with multi-ensemble means """ # use model parser to generate a list of available institutes and # models from data directory data_dir = self.data_dir if data_dir[-1] != os.sep: data_dir += os.sep CMP = preprocessor.CMIP5ModelParser(self.data_dir) model_list = CMP.get_all_models() # model name in configuration file is assumed to be INSTITUTE:MODEL institute = self.model.split(':')[0] model = self.model.split(':')[1] # TODO why is the institute not in the model output name ??? output_file = get_temporary_directory( ) + the_variable + '_' + mip + '_' + model + '_' + self.experiment + '_ensmean.nc' if institute not in model_list.keys(): raise ValueError('Data for this institute is not existing: %s' % institute) # do preprocessing of data from multiple ensembles if file # already existing, then no processing is done C5PP = preprocessor.CMIP5Preprocessor(data_dir, output_file, the_variable, model, self.experiment, institute=institute, mip=mip, realm=realm) # calculate the ensemble mean and store as file # also the STDV is calculated on the fly calculated # resulting filenames are available by C5PP.outfile_ensmean and C5PP.outfile_ensstd C5PP.ensemble_mean(delete=False, start_time=self.start_time, stop_time=self.stop_time) return C5PP.outfile_ensmean
def get_gpp_data(self, interval='season'): """ get surface GPP data for JSBACH todo temporal aggregation of data --> or leave it to the user! """ cdo = Cdo() v = 'var167' y1 = str(self.start_time)[0:10] y2 = str(self.stop_time)[0:10] rawfilename = self.data_dir + 'data/model/' + self.experiment + '_' + y1[0:4] + '-' + y2[0:4] + '.nc' times_in_file = int(''.join(cdo.ntime(input=rawfilename))) if interval == 'season': if times_in_file != 4: tmp_file = get_temporary_directory() + os.path.basename(rawfilename) cdo.yseasmean(options='-f nc -b 32 -r ', input='-selvar,' + v + ' ' + rawfilename, output=tmp_file[:-3] + '_yseasmean.nc') rawfilename = tmp_file[:-3] + '_yseasmean.nc' if interval == 'monthly': if times_in_file != 12: tmp_file = get_temporary_directory() + os.path.basename(rawfilename) cdo.ymonmean(options='-f nc -b 32 -r ', input='-selvar,' + v + ' ' + rawfilename, output=tmp_file[:-3] + '_ymonmean.nc') rawfilename = tmp_file[:-3] + '_ymonmean.nc' if not os.path.exists(rawfilename): return None filename = rawfilename #--- read land-sea mask ls_mask = get_T63_landseamask(self.shift_lon) #--- read SW up data gpp = Data4D(filename, v, read=True, label=self.experiment + ' ' + v, unit='gC m-2 a-1', lat_name='lat', lon_name='lon', shift_lon=self.shift_lon, mask=ls_mask.data.data, scale_factor=3600. * 24. * 30. / 0.083 ) return gpp.sum_data4D()
def _get_ensemble_filename(self, the_variable, mip, realm): """ get filename of ensemble mean file if required, then all pre-processing steps are done Parameters ---------- the_variable : str variable name to be processed Returns ------- returns filename of file with multi-ensemble means """ # use model parser to generate a list of available institutes and # models from data directory data_dir = self.data_dir if data_dir[-1] != os.sep: data_dir += os.sep CMP = preprocessor.CMIP5ModelParser(self.data_dir) model_list = CMP.get_all_models() # model name in configuration file is assumed to be INSTITUTE:MODEL institute = self.model.split(":")[0] model = self.model.split(":")[1] # TODO why is the institute not in the model output name ??? output_file = ( get_temporary_directory() + the_variable + "_" + mip + "_" + model + "_" + self.experiment + "_ensmean.nc" ) if institute not in model_list.keys(): raise ValueError("Data for this institute is not existing: %s" % institute) # do preprocessing of data from multiple ensembles if file # already existing, then no processing is done C5PP = preprocessor.CMIP5Preprocessor( data_dir, output_file, the_variable, model, self.experiment, institute=institute, mip=mip, realm=realm ) # calculate the ensemble mean and store as file # also the STDV is calculated on the fly calculated # resulting filenames are available by C5PP.outfile_ensmean and C5PP.outfile_ensstd C5PP.ensemble_mean(delete=False, start_time=self.start_time, stop_time=self.stop_time) return C5PP.outfile_ensmean
def _get_ensemble_filename(self, the_variable): """ get filename of ensemble mean file if required, then all pre-processing steps are done Parameters ---------- the_variable : str variable name to be processed Returns ------- returns filename of file with multiensemble means """ # use model parser to generate a list of available institutes and # models from data directory data_dir = self.data_dir if data_dir[-1] != os.sep: data_dir += os.sep CMP = preprocessor.CMIP5ModelParser(self.data_dir) model_list = CMP.get_all_models() # model name in configuration file is assumed to be INSTITUTE:MODEL institute = self.model.split(':')[0] model = self.model.split(':')[1] # TODO why is the institute not in the model output name ??? output_file = get_temporary_directory() + the_variable + '_Amon_' + model + '_' + self.experiment + '_ensmean.nc' if institute not in model_list.keys(): raise ValueError('Data for this institute is not existing: %s' % institute) # do preprocessing of data from multiple ensembles if file # already existing, then no processing is done C5PP = preprocessor.CMIP5Preprocessor(data_dir, output_file, the_variable, model, self.experiment, institute=institute) res_file = C5PP.ensemble_mean(delete=False, start_time=self.start_time, stop_time=self.stop_time) return res_file
def test_cdo_tempdir_DefaultNoEnv(self): if 'CDOTEMPDIR' in os.environ.keys(): d = os.environ.pop('CDOTEMPDIR') r = utils.get_temporary_directory() self.assertEqual(r, './')
def test_cdo_tempdir_fromENV1(self): d = '/some/directory/path/' os.environ.update({'CDOTEMPDIR': d}) r = utils.get_temporary_directory() self.assertEqual(r, d)
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Crazy Unit') #interval = kwargs.pop('interval') #, 'season') #does not make sense to specifiy a default value as this option is specified by configuration file! lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix') model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = ("%s%s/merged/%s_%s_%s_%s_%s.%s" % (self.data_dir, varname, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) else: if self.type == 'CMIP5': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAW': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAWSINGLE': print 'todo needs implementation!' assert False elif self.type == 'CMIP3': filename1 = ("%s/%s_%s_%s_%s.%s" % (custom_path, self.experiment, self.model, varname, model_suffix, file_format)) else: print self.type raise ValueError('Can not generate filename: invalid model type! %s' % self.type) force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def xxxxxxxxxxxxxxxxxxxget_surface_shortwave_radiation_down(self, interval='season', force_calc=False, **kwargs): """ return data object of a) seasonal means for SIS b) global mean timeseries for SIS at original temporal resolution """ the_variable = 'rsds' locdict = kwargs[self.type] valid_mask = locdict.pop('valid_mask') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] if self.type == 'CMIP5': filename1 = self.data_dir + 'rsds' + os.sep + self.experiment + '/ready/' + self.model + '/rsds_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown model type! not supported here!') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None #/// PREPROCESSING /// cdo = Cdo() #1) select timeperiod and generatget_she monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) print file_monthly sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': sis_clim_file = file_monthly[:-3] + '_ymonmean.nc' sis_sum_file = file_monthly[:-3] + '_ymonsum.nc' sis_N_file = file_monthly[:-3] + '_ymonN.nc' sis_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples elif interval == 'season': sis_clim_file = file_monthly[:-3] + '_yseasmean.nc' sis_sum_file = file_monthly[:-3] + '_yseassum.nc' sis_N_file = file_monthly[:-3] + '_yseasN.nc' sis_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(sis_clim_file): return None #3) read data sis = Data(sis_clim_file, 'rsds', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sis_std = Data(sis_clim_std_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.std = sis_std.data.copy() del sis_std sis_N = Data(sis_N_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.n = sis_N.data.copy() del sis_N #ensure that climatology always starts with January, therefore set date and then sort sis.adjust_time(year=1700, day=15) # set arbitrary time for climatology sis.timsort() #4) read monthly data sisall = Data(file_monthly, 'rsds', read=True, label=self._unique_name, unit='W m^{-2}', lat_name='lat', lon_name='lon', shift_lon=False) if not sisall._is_monthly(): raise ValueError('Timecycle of 12 expected here!') sisall.adjust_time(day=15) # land/sea masking ... if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False sis._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sisall._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sismean = sisall.fldmean() # return data as a tuple list retval = (sisall.time, sismean, sisall) del sisall # mask areas without radiation (set to invalid): all data < 1 W/m**2 sis.data = np.ma.array(sis.data, mask=sis.data < 1.) return sis, retval
def xxxxxget_surface_shortwave_radiation_up(self, interval='season', force_calc=False, **kwargs): the_variable = 'rsus' if self.type == 'CMIP5': filename1 = self.data_dir + the_variable + os.sep + self.experiment + os.sep + 'ready' + os.sep + self.model + os.sep + 'rsus_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown type! not supported here!') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None # PREPROCESSING cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) #2) calculate monthly or seasonal climatology if interval == 'monthly': sup_clim_file = file_monthly[:-3] + '_ymonmean.nc' sup_sum_file = file_monthly[:-3] + '_ymonsum.nc' sup_N_file = file_monthly[:-3] + '_ymonN.nc' sup_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc) # number of samples elif interval == 'season': sup_clim_file = file_monthly[:-3] + '_yseasmean.nc' sup_sum_file = file_monthly[:-3] + '_yseassum.nc' sup_N_file = file_monthly[:-3] + '_yseasN.nc' sup_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing! ') if not os.path.exists(sup_clim_file): print 'File not existing (sup_clim_file): ' + sup_clim_file return None #3) read data sup = Data(sup_clim_file, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sup_std = Data(sup_clim_std_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sup.std = sup_std.data.copy() del sup_std sup_N = Data(sup_N_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sup.n = sup_N.data.copy() del sup_N # ensure that climatology always starts with January, therefore set date and then sort sup.adjust_time(year=1700, day=15) # set arbitrary time for climatology sup.timsort() #4) read monthly data supall = Data(file_monthly, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) supall.adjust_time(day=15) if not supall._is_monthly(): raise ValueError('Monthly timecycle expected here!') supmean = supall.fldmean() #/// return data as a tuple list retval = (supall.time, supmean, supall) del supall #/// mask areas without radiation (set to invalid): all data < 1 W/m**2 #sup.data = np.ma.array(sis.data,mask=sis.data < 1.) return sup, retval
def main(): plt.close('all') if len(sys.argv) > 1: if len(sys.argv) == 2: # a single argument was provides as option if sys.argv[1] == 'init': # copy INI files and a template configuration file # to current directory create_dummy_configuration() sys.exit() else: file = sys.argv[1] # name of config file if not os.path.exists(file): raise ValueError('Configuration file can not be \ found: %s' % file) else: raise ValueError('Currently not more than one command \ line parameter supported!') else: # default file = 'pyCMBS.cfg' print('*******************************************') print('* WELCOME to pycmbs.py *') print('* Happy benchmarking ... *') print('*******************************************') #################################################################### # CONFIGURATION and OPTIONS #################################################################### # read configuration file CF = config.ConfigFile(file) # read plotting options PCFG = config.PlotOptions() PCFG.read(CF) plot_options = PCFG #################################################################### # REMOVE previous Data warnings #################################################################### outdir = CF.options['outputdir'] if outdir[-1] != os.sep: outdir += os.sep os.environ['PYCMBS_OUTPUTDIR'] = CF.options['outputdir'] os.environ['PYCMBS_OUTPUTFORMAT'] = CF.options['report_format'] os.environ['DATA_WARNING_FILE'] = outdir + 'data_warnings_' \ + CF.options['report'] + '.log' if os.path.exists(os.environ['DATA_WARNING_FILE']): os.remove(os.environ['DATA_WARNING_FILE']) # init regions REGIONS = config.AnalysisRegions() for thevar in plot_options.options.keys(): if thevar in plot_options.options.keys(): print('Variable: %s' % thevar) for k in plot_options.options[thevar].keys(): print(' Observation: %s' % k) if CF.options['basemap']: f_fast = False else: f_fast = True shift_lon = use_basemap = not f_fast ######################################################################## # TIMES ######################################################################## s_start_time = CF.start_date s_stop_time = CF.stop_date start_time = pylab.num2date(pylab.datestr2num(s_start_time)) stop_time = pylab.num2date(pylab.datestr2num(s_stop_time)) model_dict = {'rain': {'CMIP5': { 'variable': 'pr', 'unit': 'mm/day', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'model_prefix': 'Amon', 'file_format': 'nc', 'scale_factor': 86400., 'valid_mask': 'ocean' }, 'JSBACH_RAW2': { 'variable': 'precip_acc', 'unit': 'mm/day', 'lat_name': 'lat', 'lon_name': 'lon', 'file_format': 'nc', 'scale_factor': 86400., 'valid_mask': 'global' } }, 'evap': {'CMIP5': { 'variable': 'evspsbl', 'unit': 'mm/day', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 86400., 'valid_mask': 'ocean' } }, 'twpa': {'CMIP5': { 'variable': 'clwvi', 'unit': 'kg/m^2', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 1., 'valid_mask': 'ocean' } }, 'wind': {'CMIP5': { 'variable': 'sfcWind', 'unit': 'm/s', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 1., 'valid_mask': 'ocean' } }, 'wvpa': {'CMIP5': { 'variable': 'prw', 'unit': 'kg m^2', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 1, 'valid_mask': 'ocean' } }, 'late': {'CMIP5': { 'variable': 'hfls', 'unit': 'W/m^2', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 1, 'valid_mask': 'ocean' } }, 'hair': {'CMIP5': { 'variable': 'huss', 'unit': '$kg/kg^2$', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 1, 'valid_mask': 'ocean' } }, 'seaice_concentration': {'CMIP5': { 'variable': 'sic', 'unit': '-', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ens_mean_185001-200512', 'file_format': 'nc', 'model_prefix': 'OImon', 'scale_factor': 1, 'valid_mask': 'ocean', 'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk' }, 'CMIP3': { 'variable': 'SICOMO', 'unit': '-', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': '1860-2100.ext', 'file_format': 'nc', 'model_prefix': '', 'scale_factor': 100., 'valid_mask': 'ocean', 'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk', 'level': 0 }, }, 'seaice_extent': {'CMIP5': { 'variable': 'sic', 'unit': '-', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ens_mean_185001-200512', 'file_format': 'nc', 'model_prefix': 'OImon', 'scale_factor': 1, 'valid_mask': 'ocean', 'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk' }, 'CMIP3': { 'variable': 'SICOMO', 'unit': '-', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': '1860-2100.ext', 'file_format': 'nc', 'model_prefix': '', 'scale_factor': 100., 'valid_mask': 'ocean', 'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk', 'level': 0 }, }, 'budg': {'CMIP5': { 'variable': 'budg', 'unit': 'mm/d', 'lat_name': 'lat', 'lon_name': 'lon', 'model_suffix': 'ensmean', 'file_format': 'nc', 'model_prefix': 'Amon', 'scale_factor': 86400., 'valid_mask': 'ocean', 'custom_path': '/net/nas2/export/eo/workspace/m300036/pycmbs-cmsaf/data' } }, 'sis': {'JSBACH_RAW2': { 'variable': 'swdown_acc', 'unit': '$W/m^2$', 'lat_name': 'lat', 'lon_name': 'lon', 'file_format': 'nc', 'scale_factor': 1., 'valid_mask': 'land' }, 'CMIP5': { 'valid_mask': 'land' } }, 'surface_upward_flux': {'JSBACH_RAW2': { 'variable': 'swdown_reflect_acc', 'unit': '$W/m^2$', 'lat_name': 'lat', 'lon_name': 'lon', 'file_format': 'nc', 'scale_factor': 1., 'valid_mask': 'land' } }, 'albedo_vis': {'JSBACH_RAW2': { 'variable': 'var14', 'unit': '-', 'lat_name': 'lat', 'lon_name': 'lon', 'file_format': 'nc', 'scale_factor': 1., 'valid_mask': 'land' } }, 'albedo_nir': {'JSBACH_RAW2': { 'variable': 'var15', 'unit': '-', 'lat_name': 'lat', 'lon_name': 'lon', 'file_format': 'nc', 'scale_factor': 1., 'valid_mask': 'land' } }, 'temperature': { 'JSBACH_RAW2': { 'variable': 'temp2', 'unit': 'K', 'lat_name': 'lat', 'lon_name': 'lon', 'file_format': 'nc', 'scale_factor': 1., 'valid_mask': 'global' } } } ######################################################################## # INIT METHODS ######################################################################## # names of analysis scripts for all variables --- scripts = CF.get_analysis_scripts() # get dictionary with methods how to read data for variables to be analyzed variables = CF.variables varmethods = CF.get_methods4variables(variables, model_dict) #/// READ DATA /// """ create a Model instance for each model specified in the configuration file read the data for all variables and return a list of Data objects for further processing """ model_cnt = 1 proc_models = [] for i in range(len(CF.models)): # assign model information from configuration data_dir = CF.dirs[i] model = CF.models[i] experiment = CF.experiments[i] #--- create model object and read data --- # results are stored in individual variables namex modelXXXXX if CF.dtypes[i].upper() == 'CMIP5': themodel = CMIP5Data(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) elif CF.dtypes[i].upper() == 'CMIP5RAW': themodel = CMIP5RAWData(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) elif CF.dtypes[i].upper() == 'JSBACH_BOT': themodel = JSBACH_BOT(data_dir, varmethods, experiment, intervals=CF.intervals, start_time=start_time, stop_time=stop_time, name=model, shift_lon=shift_lon) elif CF.dtypes[i].upper() == 'JSBACH_RAW': themodel = JSBACH_RAW(data_dir, varmethods, experiment, intervals=CF.intervals, name=model, shift_lon=shift_lon, start_time=start_time, stop_time=stop_time ) elif CF.dtypes[i].upper() == 'JSBACH_RAW2': themodel = JSBACH_RAW2(data_dir, varmethods, experiment, intervals=CF.intervals, start_time=start_time, stop_time=stop_time, name=model, shift_lon=shift_lon, model_dict=model_dict) elif CF.dtypes[i].upper() == 'CMIP3': themodel = CMIP3Data(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) else: raise ValueError('Invalid model type: %s' % CF.dtypes[i]) #--- read data for current model --- # options that specify regrid options etc. themodel.plot_options = plot_options themodel.get_data() # copy current model to a variable named modelXXXX --- cmd = 'model' + str(model_cnt).zfill(4) + ' = ' \ + 'themodel.copy(); del themodel' exec(cmd) # store copy of cmip5 model in separate variable # append model to list of models --- proc_models.append('model' + str(model_cnt).zfill(4)) model_cnt += 1 ######################################################################## # MULTIMODEL MEAN # here we have now all the model and variables read. # The list of all models is contained in the variable proc_models. f_mean_model = True if f_mean_model: # calculate climatological mean values: The models contain already # climatological information in the variables[] list. Thus there is # not need to take care for the different timesteps here. This # should have been handled already in the preprocessing. # generate instance of MeanModel to store result MEANMODEL = MeanModel(varmethods, intervals=CF.intervals) # sum up all models for i in range(len(proc_models)): exec('actmodel = ' + proc_models[i] + '.copy()') MEANMODEL.add_member(actmodel) del actmodel # calculate ensemble mean MEANMODEL.ensmean() # save mean model to file MEANMODEL.save(get_temporary_directory(), prefix='MEANMODEL_' + file[:-4]) # include filename of configuration file # add mean model to general list of models to process in analysis proc_models.append('MEANMODEL') ######################################################################## # END MULTIMODEL MEAN ######################################################################## ######################################################################## # INIT reporting and plotting and diagnostics ######################################################################## # Gleckler Plot global_gleckler = GlecklerPlot() # Report rep = Report(CF.options['report'], 'pyCMBS report - ' + CF.options['report'], CF.options['author'], outdir=outdir, dpi=300, format=CF.options['report_format']) cmd = 'cp ' + os.environ['PYCMBSPATH'] + '/logo/Phytonlogo5.pdf ' + rep.outdir os.system(cmd) ######################################################################## ######################################################################## ######################################################################## # MAIN ANALYSIS LOOP: perform analysis for each model and variable ######################################################################## ######################################################################## ######################################################################## skeys = scripts.keys() for variable in variables: #/// register current variable in Gleckler Plot global_gleckler.add_variable(variable) #/// call analysis scripts for each variable for k in range(len(skeys)): if variable == skeys[k]: print 'Doing analysis for variable ... ', variable print ' ... ', scripts[variable] # model list is reformatted so it can be evaluated properly model_list = str(proc_models).replace("'", "") cmd = 'analysis.' + scripts[variable] + '(' + model_list \ + ',GP=global_gleckler,shift_lon=shift_lon, \ use_basemap=use_basemap,report=rep,\ interval=CF.intervals[variable],\ plot_options=PCFG,regions=REGIONS.regions)' eval(cmd) ######################################################################## # GLECKLER PLOT finalization ... ######################################################################## #/// generate Gleckler analysis plot for all variables and models analyzed /// global_gleckler.plot(vmin=-0.1, vmax=0.1, nclasses=16, show_value=False, ticks=[-0.1, -0.05, 0., 0.05, 0.1]) oname = outdir + 'gleckler.pkl' if os.path.exists(oname): os.remove(oname) pickle.dump(global_gleckler.models, open(outdir + 'gleckler_models.pkl', 'w')) pickle.dump(global_gleckler.variables, open(outdir + 'gleckler_variables.pkl', 'w')) pickle.dump(global_gleckler.data, open(outdir + 'gleckler_data.pkl', 'w')) pickle.dump(global_gleckler._raw_data, open(outdir + 'gleckler_rawdata.pkl', 'w')) rep.section('Summary error statistics') rep.subsection('Gleckler metric') rep.figure(global_gleckler.fig, caption='Gleckler et al. (2008) model performance index', width='10cm') global_gleckler.fig.savefig(outdir + 'portraet_diagram.png', dpi=200, bbox_inches='tight') global_gleckler.fig.savefig(outdir + 'portraet_diagram.pdf', dpi=200, bbox_inches='tight') plt.close(global_gleckler.fig.number) # generate dictionary with observation labels for each variable labels_dict = {} for variable in variables: if variable not in PCFG.options.keys(): continue varoptions = PCFG.options[variable] thelabels = {} for k in varoptions.keys(): # keys of observational datasets if k == 'OPTIONS': continue else: # only add observation to legend, # if option in INI file is set if varoptions[k]['add_to_report']: # generate dictionary for GlecklerPLot legend thelabels.update({int(varoptions[k]['gleckler_position']): k}) labels_dict.update({variable: thelabels}) del thelabels #/// legend for gleckler plot /// lcnt = 1 for variable in variables: if variable not in PCFG.options.keys(): continue varoptions = PCFG.options[variable] thelabels = labels_dict[variable] fl = global_gleckler._draw_legend(thelabels, title=variable.upper()) rep.figure(fl, width='8cm', bbox_inches=None) fl.savefig(outdir + 'legend_portraet_' + str(lcnt).zfill(5) + '.png', bbox_inches='tight', dpi=200) plt.close(fl.number) del fl lcnt += 1 #/// plot model ranking between different observational datasets /// rep.subsection('Model ranking consistency') for v in global_gleckler.variables: rep.subsubsection(v.upper()) tmpfig = global_gleckler.plot_model_ranking(v, show_text=True, obslabels=labels_dict[v]) rep.figure(tmpfig, width='8cm', bbox_inches=None, caption='Model RANKING for different observational \ datasets: ' + v.upper()) plt.close(tmpfig.number) del tmpfig # write a table with model ranking tmp_filename = outdir + 'ranking_table_' + v + '.tex' rep.open_table() global_gleckler.write_ranking_table(v, tmp_filename, fmt='latex', obslabels=labels_dict[v]) rep.input(tmp_filename) rep.close_table(caption='Model rankings for variable ' + v.upper()) # plot absolute model error tmpfig = global_gleckler.plot_model_error(v, obslabels=labels_dict[v]) rep.figure(tmpfig, width='8cm', bbox_inches=None, caption='Model ERROR for different observational \ datasets: ' + v.upper()) plt.close(tmpfig.number) del tmpfig ######################################################################## # CLEAN up and finish ######################################################################## plt.close('all') rep.close() print('##########################################') print('# BENCHMARKING FINIHSED! #') print('##########################################')
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print '' print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys( ) assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable', None) #~ print self.type #~ print locdict.keys() assert varname is not None, 'ERROR: provide varname!' units = locdict.pop('unit', None) assert units is not None, 'ERROR: provide unit!' lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix', None) model_prefix = locdict.pop('model_prefix', None) file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = self.get_raw_filename( varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, 'No valid model input data') return None force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[: -3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename( file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError( 'Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def get_model_data_generic(self, interval="season", **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print "" print "WARNING: it is not possible to get data using generic function, as method missing: ", self.type, kwargs.keys() assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop("variable", None) # ~ print self.type # ~ print locdict.keys() assert varname is not None, "ERROR: provide varname!" units = locdict.pop("unit", None) assert units is not None, "ERROR: provide unit!" lat_name = locdict.pop("lat_name", "lat") lon_name = locdict.pop("lon_name", "lon") model_suffix = locdict.pop("model_suffix", None) model_prefix = locdict.pop("model_prefix", None) file_format = locdict.pop("file_format") scf = locdict.pop("scale_factor") valid_mask = locdict.pop("valid_mask") custom_path = locdict.pop("custom_path", None) thelevel = locdict.pop("level", None) target_grid = self._actplot_options["targetgrid"] interpolation = self._actplot_options["interpolation"] if custom_path is None: filename1 = self.get_raw_filename(varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, "No valid model input data") return None force_calc = False if self.start_time is None: raise ValueError("Start time needs to be specified") if self.stop_time is None: raise ValueError("Stop time needs to be specified") # /// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] # 1) select timeperiod and generate monthly mean file if target_grid == "t63grid": gridtok = "T63" else: gridtok = "SPECIAL_GRID" file_monthly = ( filename1[:-3] + "_" + s_start_time + "_" + s_stop_time + "_" + gridtok + "_monmean.nc" ) # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write("\n *** Model file monthly: %s\n" % file_monthly) if not os.path.exists(filename1): print "WARNING: File not existing: " + filename1 return None cdo.monmean( options="-f nc", output=file_monthly, input="-" + interpolation + "," + target_grid + " -seldate," + s_start_time + "," + s_stop_time + " " + filename1, force=force_calc, ) sys.stdout.write("\n *** Reading model data... \n") sys.stdout.write(" Interval: " + interval + "\n") # 2) calculate monthly or seasonal climatology if interval == "monthly": mdata_clim_file = file_monthly[:-3] + "_ymonmean.nc" mdata_sum_file = file_monthly[:-3] + "_ymonsum.nc" mdata_N_file = file_monthly[:-3] + "_ymonN.nc" mdata_clim_std_file = file_monthly[:-3] + "_ymonstd.nc" cdo.ymonmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div( options="-f nc", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc ) # number of samples elif interval == "season": mdata_clim_file = file_monthly[:-3] + "_yseasmean.nc" mdata_sum_file = file_monthly[:-3] + "_yseassum.nc" mdata_N_file = file_monthly[:-3] + "_yseasN.nc" mdata_clim_std_file = file_monthly[:-3] + "_yseasstd.nc" cdo.yseasmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div( options="-f nc -b 32", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc, ) # number of samples else: raise ValueError("Unknown temporal interval. Can not perform preprocessing!") if not os.path.exists(mdata_clim_file): return None # 3) read data if interval == "monthly": thetime_cylce = 12 elif interval == "season": thetime_cylce = 4 else: print interval raise ValueError("Unsupported interval!") mdata = Data( mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce, ) mdata_std = Data( mdata_clim_std_file, varname, read=True, label=self._unique_name + " std", unit="-", lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce, ) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data( mdata_N_file, varname, read=True, label=self._unique_name + " std", unit="-", lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, ) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() # 4) read monthly data mdata_all = Data( file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel, ) mdata_all.adjust_time(day=15) # mask_antarctica masks everything below 60 degrees S. # here we only mask Antarctica, if only LAND points shall be used if valid_mask == "land": mask_antarctica = True elif valid_mask == "ocean": mask_antarctica = False else: mask_antarctica = False if target_grid == "t63grid": mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask( False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica ) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def main(): plt.close('all') if len(sys.argv) > 1: if len(sys.argv) == 2: # a single argument was provides as option if sys.argv[1] == 'init': # copy INI files and a template configuration file # to current directory create_dummy_configuration() sys.exit() else: file = sys.argv[1] # name of config file if not os.path.exists(file): raise ValueError('Configuration file can not be \ found: %s' % file) else: raise ValueError('Currently not more than one command \ line parameter supported!') else: # default print('*******************************************') print('* WELCOME to pycmbs.py *') print('* Happy benchmarking ... *') print('*******************************************') print '' print 'please specify a configuration filename as argument' sys.exit() #################################################################### # CONFIGURATION and OPTIONS #################################################################### # read configuration file CF = config.ConfigFile(file) # read plotting options PCFG = config.PlotOptions() PCFG.read(CF) plot_options = PCFG #################################################################### # REMOVE previous Data warnings #################################################################### outdir = CF.options['outputdir'] if outdir[-1] != os.sep: outdir += os.sep os.environ['PYCMBS_OUTPUTDIR'] = outdir os.environ['PYCMBS_OUTPUTFORMAT'] = CF.options['report_format'] os.environ['DATA_WARNING_FILE'] = outdir + 'data_warnings_' \ + CF.options['report'] + '.log' if os.path.exists(os.environ['DATA_WARNING_FILE']): os.remove(os.environ['DATA_WARNING_FILE']) for thevar in plot_options.options.keys(): if thevar in plot_options.options.keys(): print('Variable: %s' % thevar) for k in plot_options.options[thevar].keys(): print(' Observation: %s' % k) if CF.options['basemap']: f_fast = False else: f_fast = True shift_lon = use_basemap = not f_fast ######################################################################## # TIMES ######################################################################## s_start_time = CF.start_date s_stop_time = CF.stop_date start_time = pylab.num2date(pylab.datestr2num(s_start_time)) stop_time = pylab.num2date(pylab.datestr2num(s_stop_time)) ######################################################################## # INIT METHODS ######################################################################## # names of analysis scripts for all variables --- scripts = CF.get_analysis_scripts() # get dictionary with methods how to read data for model variables to be # analyzed variables = CF.variables varmethods = CF.get_methods4variables(CF.variables) # READ DATA # create a Model instance for each model specified # in the configuration file # # read the data for all variables and return a list # of Data objects for further processing model_cnt = 1 proc_models = [] for i in range(len(CF.models)): # assign model information from configuration data_dir = CF.dirs[i] model = CF.models[i] experiment = CF.experiments[i] # create model object and read data # results are stored in individual variables namex modelXXXXX if CF.dtypes[i].upper() == 'CMIP5': themodel = CMIP5Data(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) elif CF.dtypes[i].upper() == 'CMIP5RAW': themodel = CMIP5RAWData(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) elif 'CMIP5RAWSINGLE' in CF.dtypes[i].upper(): themodel = CMIP5RAW_SINGLE(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) elif CF.dtypes[i].upper() == 'JSBACH_BOT': themodel = JSBACH_BOT(data_dir, varmethods, experiment, intervals=CF.intervals, start_time=start_time, stop_time=stop_time, name=model, shift_lon=shift_lon) elif CF.dtypes[i].upper() == 'JSBACH_RAW': themodel = JSBACH_RAW(data_dir, varmethods, experiment, intervals=CF.intervals, name=model, shift_lon=shift_lon, start_time=start_time, stop_time=stop_time) elif CF.dtypes[i].upper() == 'JSBACH_RAW2': themodel = JSBACH_RAW2(data_dir, varmethods, experiment, intervals=CF.intervals, start_time=start_time, stop_time=stop_time, name=model, shift_lon=shift_lon) # , # model_dict=model_dict) elif CF.dtypes[i].upper() == 'JSBACH_SPECIAL': themodel = JSBACH_SPECIAL(data_dir, varmethods, experiment, intervals=CF.intervals, start_time=start_time, stop_time=stop_time, name=model, shift_lon=shift_lon) # , # model_dict=model_dict) elif CF.dtypes[i].upper() == 'CMIP3': themodel = CMIP3Data(data_dir, model, experiment, varmethods, intervals=CF.intervals, lat_name='lat', lon_name='lon', label=model, start_time=start_time, stop_time=stop_time, shift_lon=shift_lon) else: raise ValueError('Invalid model type: %s' % CF.dtypes[i]) # read data for current model # options that specify regrid options etc. themodel._global_configuration = CF themodel.plot_options = plot_options themodel.get_data() # copy current model to a variable named modelXXXX cmd = 'model' + str(model_cnt).zfill(4) + ' = ' \ + 'themodel.copy(); del themodel' exec(cmd) # store copy of cmip5 model in separate variable # append model to list of models --- proc_models.append('model' + str(model_cnt).zfill(4)) model_cnt += 1 ######################################################################## # MULTIMODEL MEAN # here we have now all the model and variables read. # The list of all models is contained in the variable proc_models. f_mean_model = True if f_mean_model: # calculate climatological mean values: The models contain already # climatological information in the variables[] list. Thus there is # not need to take care for the different timesteps here. This # should have been handled already in the preprocessing. # generate instance of MeanModel to store result MEANMODEL = MeanModel(varmethods, intervals=CF.intervals) # sum up all models for i in range(len(proc_models)): exec('actmodel = ' + proc_models[i] + '.copy()') MEANMODEL.add_member(actmodel) del actmodel # calculate ensemble mean MEANMODEL.ensmean() # save mean model to file # include filename of configuration file MEANMODEL.save(get_temporary_directory(), prefix='MEANMODEL_' + file[:-4]) # add mean model to general list of models to process in analysis proc_models.append('MEANMODEL') ######################################################################## # END MULTIMODEL MEAN ######################################################################## ######################################################################## # INIT reporting and plotting and diagnostics ######################################################################## # Gleckler Plot global_gleckler = GlecklerPlot() # Report rep = Report(CF.options['report'], 'pyCMBS report - ' + CF.options['report'], CF.options['author'], outdir=outdir, dpi=300, format=CF.options['report_format']) cmd = 'cp ' + os.environ['PYCMBSPATH'] + os.sep + \ 'logo' + os.sep + 'Phytonlogo5.pdf ' + rep.outdir os.system(cmd) ######################################################################## ######################################################################## ######################################################################## # MAIN ANALYSIS LOOP: perform analysis for each model and variable ######################################################################## ######################################################################## ######################################################################## skeys = scripts.keys() for variable in variables: # register current variable in Gleckler Plot global_gleckler.add_variable(variable) # call analysis scripts for each variable for k in range(len(skeys)): if variable == skeys[k]: print 'Doing analysis for variable ... ', variable print ' ... ', scripts[variable] # model list is reformatted so it can be evaluated properly model_list = str(proc_models).replace("'", "") cmd = 'analysis.' + scripts[variable] + '(' + model_list \ + ',GP=global_gleckler,shift_lon=shift_lon, \ use_basemap=use_basemap,report=rep,\ interval=CF.intervals[variable],\ plot_options=PCFG)' eval(cmd) ######################################################################## # GLECKLER PLOT finalization ... ######################################################################## # generate Gleckler analysis plot for all variables and models analyzed /// global_gleckler.plot(vmin=-0.1, vmax=0.1, nclasses=16, show_value=False, ticks=[-0.1, -0.05, 0., 0.05, 0.1]) oname = outdir + 'gleckler.pkl' if os.path.exists(oname): os.remove(oname) pickle.dump(global_gleckler.models, open(outdir + 'gleckler_models.pkl', 'w')) pickle.dump(global_gleckler.variables, open(outdir + 'gleckler_variables.pkl', 'w')) pickle.dump(global_gleckler.data, open(outdir + 'gleckler_data.pkl', 'w')) pickle.dump(global_gleckler._raw_data, open(outdir + 'gleckler_rawdata.pkl', 'w')) rep.section('Summary error statistics') rep.subsection('Gleckler metric') rep.figure(global_gleckler.fig, caption='Gleckler et al. (2008) model performance index', width='10cm') global_gleckler.fig.savefig(outdir + 'portraet_diagram.png', dpi=200, bbox_inches='tight') global_gleckler.fig.savefig(outdir + 'portraet_diagram.pdf', dpi=200, bbox_inches='tight') plt.close(global_gleckler.fig.number) # generate dictionary with observation labels for each variable labels_dict = {} for variable in variables: if variable not in PCFG.options.keys(): continue varoptions = PCFG.options[variable] thelabels = {} for k in varoptions.keys(): # keys of observational datasets if k == 'OPTIONS': continue else: # only add observation to legend, # if option in INI file is set if varoptions[k]['add_to_report']: # generate dictionary for GlecklerPLot legend thelabels.update( {int(varoptions[k]['gleckler_position']): k}) labels_dict.update({variable: thelabels}) del thelabels # legend for gleckler plot /// lcnt = 1 for variable in variables: if variable not in PCFG.options.keys(): continue varoptions = PCFG.options[variable] thelabels = labels_dict[variable] fl = global_gleckler._draw_legend(thelabels, title=variable.upper()) if fl is not None: rep.figure(fl, width='8cm', bbox_inches=None) fl.savefig(outdir + 'legend_portraet_' + str(lcnt).zfill(5) + '.png', bbox_inches='tight', dpi=200) plt.close(fl.number) del fl lcnt += 1 # plot model ranking between different observational datasets /// rep.subsection('Model ranking consistency') for v in global_gleckler.variables: rep.subsubsection(v.upper()) tmpfig = global_gleckler.plot_model_ranking(v, show_text=True, obslabels=labels_dict[v]) if tmpfig is not None: rep.figure(tmpfig, width='8cm', bbox_inches=None, caption='Model RANKING for different observational \ datasets: ' + v.upper()) plt.close(tmpfig.number) del tmpfig # write a table with model ranking tmp_filename = outdir + 'ranking_table_' + v + '.tex' rep.open_table() global_gleckler.write_ranking_table(v, tmp_filename, fmt='latex', obslabels=labels_dict[v]) rep.input(tmp_filename) rep.close_table(caption='Model rankings for variable ' + v.upper()) # plot absolute model error tmpfig = global_gleckler.plot_model_error(v, obslabels=labels_dict[v]) if tmpfig is not None: rep.figure(tmpfig, width='8cm', bbox_inches=None, caption='Model ERROR for different observational \ datasets: ' + v.upper()) plt.close(tmpfig.number) del tmpfig ######################################################################## # CLEAN up and finish ######################################################################## plt.close('all') rep.close() print('##########################################') print('# BENCHMARKING FINIHSED! #') print('##########################################')
def _do_preprocessing(self, rawfile, varname, s_start_time, s_stop_time, interval='monthly', force_calc=False, valid_mask='global', target_grid='t63grid'): """ perform preprocessing * selection of variable * temporal subsetting """ cdo = Cdo() if not os.path.exists(rawfile): print('File not existing! %s ' % rawfile) return None, None # calculate monthly means file_monthly = get_temporary_directory() + os.sep + os.path.basename(rawfile[:-3]) + '_' + varname + '_' + s_start_time + '_' + s_stop_time + '_mm.nc' if (force_calc) or (not os.path.exists(file_monthly)): cdo.monmean(options='-f nc', output=file_monthly, input='-seldate,' + s_start_time + ',' + s_stop_time + ' ' + '-selvar,' + varname + ' ' + rawfile, force=force_calc) else: pass if not os.path.exists(file_monthly): raise ValueError('Monthly preprocessing did not work! %s ' % file_monthly) # calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None # read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self.name, shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon') mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.name + ' std', unit='-', shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon') mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self.name + ' std', shift_lon=False, lat_name='lat', lon_name='lon') mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self.name, shift_lon=False, time_cycle=12, lat_name='lat', lon_name='lon') mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degree S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def get_jsbach_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None print self.type print kwargs locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Unit not specified') lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') #model_suffix = locdict.pop('model_suffix') #model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if self.type != 'JSBACH_RAW2': print self.type raise ValueError('Invalid data format here!') # define from which stream of JSBACH data needs to be taken for specific variables if varname in ['swdown_acc', 'swdown_reflect_acc']: filename1 = self.files['jsbach'] elif varname in ['precip_acc']: filename1 = self.files['land'] elif varname in ['temp2']: filename1 = self.files['echam'] elif varname in ['var14']: # albedo vis filename1 = self.files['albedo_vis'] elif varname in ['var15']: # albedo NIR filename1 = self.files['albedo_nir'] else: print varname raise ValueError('Unknown variable type for JSBACH_RAW2 processing!') force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing! ') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with J anuary, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def _preproc_streams(self): """ It is assumed that the standard JSBACH postprocessing scripts have been applied. Thus monthly mean data is available for each stream and code tables still need to be applied. This routine does the following: 1) merge all times from individual (monthly mean) output files 2) assign codetables to work with proper variable names 3) aggregate data from tiles to gridbox values """ print 'Preprocessing JSBACH raw data streams (may take a while) ...' cdo = Cdo() # jsbach stream print ' JSBACH stream ...' outfile = get_temporary_directory() + self.experiment + '_jsbach_mm_full.nc' if os.path.exists(outfile): pass else: codetable = self.data_dir + 'log/' + self.experiment + '_jsbach.codes' tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_', dir=get_temporary_directory()) # temporary file #~ print self.data_dir #~ print self.raw_outdata #~ print 'Files: ', self._get_filenames_jsbach_stream() #~ stop if len(glob.glob(self._get_filenames_jsbach_stream())) > 0: # check if input files existing at all print 'Mering the following files:', self._get_filenames_jsbach_stream() cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_jsbach_stream()) if os.path.exists(codetable): cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp) # monmean needed here, as otherwise interface does not work else: cdo.monmean(options='-f nc', output=outfile, input=tmp) # monmean needed here, as otherwise interface does not work print 'Outfile: ', outfile #~ os.remove(tmp) print 'Temporary name: ', tmp self.files.update({'jsbach': outfile}) # veg stream print ' VEG stream ...' outfile = get_temporary_directory() + self.experiment + '_jsbach_veg_mm_full.nc' if os.path.exists(outfile): pass else: codetable = self.data_dir + 'log/' + self.experiment + '_jsbach_veg.codes' tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_veg_', dir=get_temporary_directory()) # temporary file if len(glob.glob(self._get_filenames_veg_stream())) > 0: # check if input files existing at all cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_veg_stream()) if os.path.exists(codetable): cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp) # monmean needed here, as otherwise interface does not work else: cdo.monmean(options='-f nc', output=outfile, input=tmp) # monmean needed here, as otherwise interface does not work os.remove(tmp) self.files.update({'veg': outfile}) # veg land print ' LAND stream ...' outfile = get_temporary_directory() + self.experiment + '_jsbach_land_mm_full.nc' if os.path.exists(outfile): pass else: codetable = self.data_dir + 'log/' + self.experiment + '_jsbach_land.codes' tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_land_', dir=get_temporary_directory()) # temporary file if len(glob.glob(self._get_filenames_land_stream())) > 0: # check if input files existing at all cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_land_stream()) if os.path.exists(codetable): cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp) # monmean needed here, as otherwise interface does not work else: cdo.monmean(options='-f nc', output=outfile, input=tmp) # monmean needed here, as otherwise interface does not work os.remove(tmp) self.files.update({'land': outfile}) # surf stream print ' SURF stream ...' outfile = get_temporary_directory() + self.experiment + '_jsbach_surf_mm_full.nc' if os.path.exists(outfile): pass else: codetable = self.data_dir + 'log/' + self.experiment + '_jsbach_surf.codes' tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_surf_', dir=get_temporary_directory()) # temporary file if len(glob.glob(self._get_filenames_surf_stream())) > 0: # check if input files existing at all print glob.glob(self._get_filenames_surf_stream()) cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_surf_stream()) if os.path.exists(codetable): cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp) # monmean needed here, as otherwise interface does not work else: cdo.monmean(options='-f nc', output=outfile, input=tmp) # monmean needed here, as otherwise interface does not work os.remove(tmp) self.files.update({'surf': outfile}) # ECHAM BOT stream print ' BOT stream ...' outfile = get_temporary_directory() + self.experiment + '_echam6_echam_mm_full.nc' if os.path.exists(outfile): pass else: codetable = self.data_dir + 'log/' + self.experiment + '_echam6_echam.codes' tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_echam6_echam_', dir=get_temporary_directory()) # temporary file if len(glob.glob(self._get_filenames_echam_BOT())) > 0: # check if input files existing at all cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_echam_BOT()) if os.path.exists(codetable): cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp) # monmean needed here, as otherwise interface does not work else: cdo.monmean(options='-f nc', output=outfile, input=tmp) # monmean needed here, as otherwise interface does not work os.remove(tmp) self.files.update({'echam': outfile}) # ALBEDO file # albedo files as preprocessed by a script of Thomas print ' ALBEDO VIS stream ...' outfile = get_temporary_directory() + self.experiment + '_jsbach_VIS_albedo_mm_full.nc' if os.path.exists(outfile): pass else: if len(glob.glob(self._get_filenames_albedo_VIS())) > 0: # check if input files existing at all cdo.mergetime(options='-f nc', output=outfile, input=self._get_filenames_albedo_VIS()) self.files.update({'albedo_vis': outfile}) print ' ALBEDO NIR stream ...' outfile = get_temporary_directory() + self.experiment + '_jsbach_NIR_albedo_mm_full.nc' if os.path.exists(outfile): pass else: if len(glob.glob(self._get_filenames_albedo_NIR())) > 0: # check if input files existing at all cdo.mergetime(options='-f nc', output=outfile, input=self._get_filenames_albedo_NIR()) self.files.update({'albedo_nir': outfile})