def get_temperature_2m(self, interval=None): """ return data object of a) seasonal means for air temperature b) global mean timeseries for TAS at original temporal resolution """ print 'Needs revision to support CMIP RAWDATA!!' assert False if interval != 'season': raise ValueError('Other data than seasonal not supported at the moment for CMIP5 data and temperature!') #original data filename1 = self.data_dir + 'tas/' + self.model + '/' + 'tas_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] tmp = pyCDO(filename1, s_start_time, s_stop_time, force=force_calc).seldate() tmp1 = pyCDO(tmp, s_start_time, s_stop_time).seasmean() filename = pyCDO(tmp1, s_start_time, s_stop_time).yseasmean() if not os.path.exists(filename): print 'WARNING: Temperature file not found: ', filename return None tas = Data(filename, 'tas', read=True, label=self._unique_name, unit='K', lat_name='lat', lon_name='lon', shift_lon=False) tasall = Data(filename1, 'tas', read=True, label=self._unique_name, unit='K', lat_name='lat', lon_name='lon', shift_lon=False) if tasall.time_cycle != 12: raise ValueError('Timecycle of 12 expected here!') tasmean = tasall.fldmean() retval = (tasall.time, tasmean, tasall) del tasall tas.data = np.ma.array(tas.data, mask=tas.data < 0.) return tas, retval
plt.close('all') # load some sample data # filename = '<THEINPUTFILE>' filename = download.get_sample_file(name='<VARNAME>', return_object=False) thevar = '<VARNAME>' if thevar == 'rain': thevar = 'pr_wtr' x = Data(filename, thevar, read=True) print 'Data dimensions: ', x.shape # calculate global mean temperature timeseries t = x.fldmean() # plot results as a figure f = plt.figure() ax = f.add_subplot(111) ax.plot(x.date, t, label='global mean') ax.set_xlabel('Years') ax.set_ylabel('Temperature [degC]') # perhaps you also want to calculate some statistics like the temperature trend from scipy import stats import numpy as np slope, intercept, r_value, p_value, std_err = stats.mstats.linregress( x.time, t) # note that the slope has the same units like the time variable of the Data object. Here it is hours! # if we want to express the slope in [K/decade] we need to rescale
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Crazy Unit') #interval = kwargs.pop('interval') #, 'season') #does not make sense to specifiy a default value as this option is specified by configuration file! lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix') model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = ("%s%s/merged/%s_%s_%s_%s_%s.%s" % (self.data_dir, varname, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) else: if self.type == 'CMIP5': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAW': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAWSINGLE': print 'todo needs implementation!' assert False elif self.type == 'CMIP3': filename1 = ("%s/%s_%s_%s_%s.%s" % (custom_path, self.experiment, self.model, varname, model_suffix, file_format)) else: print self.type raise ValueError('Can not generate filename: invalid model type! %s' % self.type) force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def xxxxxget_surface_shortwave_radiation_up(self, interval='season', force_calc=False, **kwargs): the_variable = 'rsus' if self.type == 'CMIP5': filename1 = self.data_dir + the_variable + os.sep + self.experiment + os.sep + 'ready' + os.sep + self.model + os.sep + 'rsus_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown type! not supported here!') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None # PREPROCESSING cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) #2) calculate monthly or seasonal climatology if interval == 'monthly': sup_clim_file = file_monthly[:-3] + '_ymonmean.nc' sup_sum_file = file_monthly[:-3] + '_ymonsum.nc' sup_N_file = file_monthly[:-3] + '_ymonN.nc' sup_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc) # number of samples elif interval == 'season': sup_clim_file = file_monthly[:-3] + '_yseasmean.nc' sup_sum_file = file_monthly[:-3] + '_yseassum.nc' sup_N_file = file_monthly[:-3] + '_yseasN.nc' sup_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing! ') if not os.path.exists(sup_clim_file): print 'File not existing (sup_clim_file): ' + sup_clim_file return None #3) read data sup = Data(sup_clim_file, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sup_std = Data(sup_clim_std_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sup.std = sup_std.data.copy() del sup_std sup_N = Data(sup_N_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sup.n = sup_N.data.copy() del sup_N # ensure that climatology always starts with January, therefore set date and then sort sup.adjust_time(year=1700, day=15) # set arbitrary time for climatology sup.timsort() #4) read monthly data supall = Data(file_monthly, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) supall.adjust_time(day=15) if not supall._is_monthly(): raise ValueError('Monthly timecycle expected here!') supmean = supall.fldmean() #/// return data as a tuple list retval = (supall.time, supmean, supall) del supall #/// mask areas without radiation (set to invalid): all data < 1 W/m**2 #sup.data = np.ma.array(sis.data,mask=sis.data < 1.) return sup, retval
def xxxxxxxxxxxxxxxxxxxget_surface_shortwave_radiation_down(self, interval='season', force_calc=False, **kwargs): """ return data object of a) seasonal means for SIS b) global mean timeseries for SIS at original temporal resolution """ the_variable = 'rsds' locdict = kwargs[self.type] valid_mask = locdict.pop('valid_mask') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] if self.type == 'CMIP5': filename1 = self.data_dir + 'rsds' + os.sep + self.experiment + '/ready/' + self.model + '/rsds_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown model type! not supported here!') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None #/// PREPROCESSING /// cdo = Cdo() #1) select timeperiod and generatget_she monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) print file_monthly sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': sis_clim_file = file_monthly[:-3] + '_ymonmean.nc' sis_sum_file = file_monthly[:-3] + '_ymonsum.nc' sis_N_file = file_monthly[:-3] + '_ymonN.nc' sis_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples elif interval == 'season': sis_clim_file = file_monthly[:-3] + '_yseasmean.nc' sis_sum_file = file_monthly[:-3] + '_yseassum.nc' sis_N_file = file_monthly[:-3] + '_yseasN.nc' sis_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(sis_clim_file): return None #3) read data sis = Data(sis_clim_file, 'rsds', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sis_std = Data(sis_clim_std_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.std = sis_std.data.copy() del sis_std sis_N = Data(sis_N_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.n = sis_N.data.copy() del sis_N #ensure that climatology always starts with January, therefore set date and then sort sis.adjust_time(year=1700, day=15) # set arbitrary time for climatology sis.timsort() #4) read monthly data sisall = Data(file_monthly, 'rsds', read=True, label=self._unique_name, unit='W m^{-2}', lat_name='lat', lon_name='lon', shift_lon=False) if not sisall._is_monthly(): raise ValueError('Timecycle of 12 expected here!') sisall.adjust_time(day=15) # land/sea masking ... if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False sis._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sisall._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sismean = sisall.fldmean() # return data as a tuple list retval = (sisall.time, sismean, sisall) del sisall # mask areas without radiation (set to invalid): all data < 1 W/m**2 sis.data = np.ma.array(sis.data, mask=sis.data < 1.) return sis, retval
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print '' print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys( ) assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable', None) #~ print self.type #~ print locdict.keys() assert varname is not None, 'ERROR: provide varname!' units = locdict.pop('unit', None) assert units is not None, 'ERROR: provide unit!' lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix', None) model_prefix = locdict.pop('model_prefix', None) file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = self.get_raw_filename( varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, 'No valid model input data') return None force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[: -3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename( file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError( 'Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
plt.close('all') # load some sample data # filename = '<THEINPUTFILE>' filename = download.get_sample_file(name='<VARNAME>', return_object=False) thevar = '<VARNAME>' if thevar == 'rain': thevar = 'pr_wtr' x = Data(filename, thevar, read=True) print 'Data dimensions: ', x.shape # calculate global mean temperature timeseries t = x.fldmean() # plot results as a figure f = plt.figure() ax = f.add_subplot(111) ax.plot(x.date, t, label='global mean') ax.set_xlabel('Years') ax.set_ylabel('Temperature [degC]') # perhaps you also want to calculate some statistics like the temperature trend from scipy import stats import numpy as np slope, intercept, r_value, p_value, std_err = stats.mstats.linregress(x.time, t) # note that the slope has the same units like the time variable of the Data object. Here it is hours! # if we want to express the slope in [K/decade] we need to rescale slope = slope * 24. * 365.25 * 10.
def _do_preprocessing(self, rawfile, varname, s_start_time, s_stop_time, interval='monthly', force_calc=False, valid_mask='global', target_grid='t63grid'): """ perform preprocessing * selection of variable * temporal subsetting """ cdo = Cdo() if not os.path.exists(rawfile): print('File not existing! %s ' % rawfile) return None, None # calculate monthly means file_monthly = get_temporary_directory() + os.sep + os.path.basename(rawfile[:-3]) + '_' + varname + '_' + s_start_time + '_' + s_stop_time + '_mm.nc' if (force_calc) or (not os.path.exists(file_monthly)): cdo.monmean(options='-f nc', output=file_monthly, input='-seldate,' + s_start_time + ',' + s_stop_time + ' ' + '-selvar,' + varname + ' ' + rawfile, force=force_calc) else: pass if not os.path.exists(file_monthly): raise ValueError('Monthly preprocessing did not work! %s ' % file_monthly) # calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None # read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self.name, shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon') mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.name + ' std', unit='-', shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon') mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self.name + ' std', shift_lon=False, lat_name='lat', lon_name='lon') mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self.name, shift_lon=False, time_cycle=12, lat_name='lat', lon_name='lon') mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degree S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def get_jsbach_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None print self.type print kwargs locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Unit not specified') lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') #model_suffix = locdict.pop('model_suffix') #model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if self.type != 'JSBACH_RAW2': print self.type raise ValueError('Invalid data format here!') # define from which stream of JSBACH data needs to be taken for specific variables if varname in ['swdown_acc', 'swdown_reflect_acc']: filename1 = self.files['jsbach'] elif varname in ['precip_acc']: filename1 = self.files['land'] elif varname in ['temp2']: filename1 = self.files['echam'] elif varname in ['var14']: # albedo vis filename1 = self.files['albedo_vis'] elif varname in ['var15']: # albedo NIR filename1 = self.files['albedo_nir'] else: print varname raise ValueError('Unknown variable type for JSBACH_RAW2 processing!') force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing! ') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with J anuary, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
def get_model_data_generic(self, interval="season", **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print "" print "WARNING: it is not possible to get data using generic function, as method missing: ", self.type, kwargs.keys() assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop("variable", None) # ~ print self.type # ~ print locdict.keys() assert varname is not None, "ERROR: provide varname!" units = locdict.pop("unit", None) assert units is not None, "ERROR: provide unit!" lat_name = locdict.pop("lat_name", "lat") lon_name = locdict.pop("lon_name", "lon") model_suffix = locdict.pop("model_suffix", None) model_prefix = locdict.pop("model_prefix", None) file_format = locdict.pop("file_format") scf = locdict.pop("scale_factor") valid_mask = locdict.pop("valid_mask") custom_path = locdict.pop("custom_path", None) thelevel = locdict.pop("level", None) target_grid = self._actplot_options["targetgrid"] interpolation = self._actplot_options["interpolation"] if custom_path is None: filename1 = self.get_raw_filename(varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, "No valid model input data") return None force_calc = False if self.start_time is None: raise ValueError("Start time needs to be specified") if self.stop_time is None: raise ValueError("Stop time needs to be specified") # /// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] # 1) select timeperiod and generate monthly mean file if target_grid == "t63grid": gridtok = "T63" else: gridtok = "SPECIAL_GRID" file_monthly = ( filename1[:-3] + "_" + s_start_time + "_" + s_stop_time + "_" + gridtok + "_monmean.nc" ) # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write("\n *** Model file monthly: %s\n" % file_monthly) if not os.path.exists(filename1): print "WARNING: File not existing: " + filename1 return None cdo.monmean( options="-f nc", output=file_monthly, input="-" + interpolation + "," + target_grid + " -seldate," + s_start_time + "," + s_stop_time + " " + filename1, force=force_calc, ) sys.stdout.write("\n *** Reading model data... \n") sys.stdout.write(" Interval: " + interval + "\n") # 2) calculate monthly or seasonal climatology if interval == "monthly": mdata_clim_file = file_monthly[:-3] + "_ymonmean.nc" mdata_sum_file = file_monthly[:-3] + "_ymonsum.nc" mdata_N_file = file_monthly[:-3] + "_ymonN.nc" mdata_clim_std_file = file_monthly[:-3] + "_ymonstd.nc" cdo.ymonmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div( options="-f nc", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc ) # number of samples elif interval == "season": mdata_clim_file = file_monthly[:-3] + "_yseasmean.nc" mdata_sum_file = file_monthly[:-3] + "_yseassum.nc" mdata_N_file = file_monthly[:-3] + "_yseasN.nc" mdata_clim_std_file = file_monthly[:-3] + "_yseasstd.nc" cdo.yseasmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div( options="-f nc -b 32", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc, ) # number of samples else: raise ValueError("Unknown temporal interval. Can not perform preprocessing!") if not os.path.exists(mdata_clim_file): return None # 3) read data if interval == "monthly": thetime_cylce = 12 elif interval == "season": thetime_cylce = 4 else: print interval raise ValueError("Unsupported interval!") mdata = Data( mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce, ) mdata_std = Data( mdata_clim_std_file, varname, read=True, label=self._unique_name + " std", unit="-", lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce, ) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data( mdata_N_file, varname, read=True, label=self._unique_name + " std", unit="-", lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, ) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() # 4) read monthly data mdata_all = Data( file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel, ) mdata_all.adjust_time(day=15) # mask_antarctica masks everything below 60 degrees S. # here we only mask Antarctica, if only LAND points shall be used if valid_mask == "land": mask_antarctica = True elif valid_mask == "ocean": mask_antarctica = False else: mask_antarctica = False if target_grid == "t63grid": mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask( False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica ) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval