Exemple #1
0
    def get_gpp_data(self, interval='season'):
        """
        get surface GPP data for JSBACH

        todo temporal aggregation of data --> or leave it to the user!
        """
        cdo = Cdo()
        v = 'var167'
        y1 = str(self.start_time)[0:10]
        y2 = str(self.stop_time)[0:10]
        rawfilename = self.data_dir + 'data/model/' + self.experiment + '_' + y1[
            0:4] + '-' + y2[0:4] + '.nc'
        times_in_file = int(''.join(cdo.ntime(input=rawfilename)))

        if interval == 'season':
            if times_in_file != 4:
                tmp_file = get_temporary_directory() + os.path.basename(
                    rawfilename)
                cdo.yseasmean(options='-f nc -b 32 -r ',
                              input='-selvar,' + v + ' ' + rawfilename,
                              output=tmp_file[:-3] + '_yseasmean.nc')
                rawfilename = tmp_file[:-3] + '_yseasmean.nc'

        if interval == 'monthly':
            if times_in_file != 12:
                tmp_file = get_temporary_directory() + os.path.basename(
                    rawfilename)
                cdo.ymonmean(options='-f nc -b 32 -r ',
                             input='-selvar,' + v + ' ' + rawfilename,
                             output=tmp_file[:-3] + '_ymonmean.nc')
                rawfilename = tmp_file[:-3] + '_ymonmean.nc'

        if not os.path.exists(rawfilename):
            return None

        filename = rawfilename

        #--- read land-sea mask
        ls_mask = get_T63_landseamask(self.shift_lon)

        #--- read SW up data
        gpp = Data4D(filename,
                     v,
                     read=True,
                     label=self.experiment + ' ' + v,
                     unit='gC m-2 a-1',
                     lat_name='lat',
                     lon_name='lon',
                     shift_lon=self.shift_lon,
                     mask=ls_mask.data.data,
                     scale_factor=3600. * 24. * 30. / 0.083)

        return gpp.sum_data4D()
Exemple #2
0
    def _get_ensemble_filename(self, the_variable, mip, realm):
        """
        get filename of ensemble mean file
        if required, then all pre-processing steps are done

        Parameters
        ----------
        the_variable : str
            variable name to be processed

        Returns
        -------
        returns filename of file with multi-ensemble means
        """

        # use model parser to generate a list of available institutes and
        # models from data directory
        data_dir = self.data_dir
        if data_dir[-1] != os.sep:
            data_dir += os.sep

        CMP = preprocessor.CMIP5ModelParser(self.data_dir)
        model_list = CMP.get_all_models()

        # model name in configuration file is assumed to be INSTITUTE:MODEL
        institute = self.model.split(':')[0]
        model = self.model.split(':')[1]

        # TODO why is the institute not in the model output name ???
        output_file = get_temporary_directory(
        ) + the_variable + '_' + mip + '_' + model + '_' + self.experiment + '_ensmean.nc'

        if institute not in model_list.keys():
            raise ValueError('Data for this institute is not existing: %s' %
                             institute)

        # do preprocessing of data from multiple ensembles if file
        # already existing, then no processing is done
        C5PP = preprocessor.CMIP5Preprocessor(data_dir,
                                              output_file,
                                              the_variable,
                                              model,
                                              self.experiment,
                                              institute=institute,
                                              mip=mip,
                                              realm=realm)

        # calculate the ensemble mean and store as file
        # also the STDV is calculated on the fly calculated
        # resulting filenames are available by C5PP.outfile_ensmean and C5PP.outfile_ensstd
        C5PP.ensemble_mean(delete=False,
                           start_time=self.start_time,
                           stop_time=self.stop_time)

        return C5PP.outfile_ensmean
Exemple #3
0
    def get_gpp_data(self, interval='season'):
        """
        get surface GPP data for JSBACH

        todo temporal aggregation of data --> or leave it to the user!
        """
        cdo = Cdo()
        v = 'var167'
        y1 = str(self.start_time)[0:10]
        y2 = str(self.stop_time)[0:10]
        rawfilename = self.data_dir + 'data/model/' + self.experiment + '_' + y1[0:4] + '-' + y2[0:4] + '.nc'
        times_in_file = int(''.join(cdo.ntime(input=rawfilename)))

        if interval == 'season':
            if times_in_file != 4:
                tmp_file = get_temporary_directory() + os.path.basename(rawfilename)
                cdo.yseasmean(options='-f nc -b 32 -r ', input='-selvar,' + v + ' ' + rawfilename, output=tmp_file[:-3] + '_yseasmean.nc')
                rawfilename = tmp_file[:-3] + '_yseasmean.nc'

        if interval == 'monthly':
            if times_in_file != 12:
                tmp_file = get_temporary_directory() + os.path.basename(rawfilename)
                cdo.ymonmean(options='-f nc -b 32 -r ', input='-selvar,' + v + ' ' + rawfilename, output=tmp_file[:-3] + '_ymonmean.nc')
                rawfilename = tmp_file[:-3] + '_ymonmean.nc'

        if not os.path.exists(rawfilename):
            return None

        filename = rawfilename

        #--- read land-sea mask
        ls_mask = get_T63_landseamask(self.shift_lon)

        #--- read SW up data
        gpp = Data4D(filename, v, read=True,
                     label=self.experiment + ' ' + v, unit='gC m-2 a-1', lat_name='lat', lon_name='lon',
                     shift_lon=self.shift_lon,
                     mask=ls_mask.data.data, scale_factor=3600. * 24. * 30. / 0.083
                     )

        return gpp.sum_data4D()
Exemple #4
0
    def _get_ensemble_filename(self, the_variable, mip, realm):
        """
        get filename of ensemble mean file
        if required, then all pre-processing steps are done

        Parameters
        ----------
        the_variable : str
            variable name to be processed

        Returns
        -------
        returns filename of file with multi-ensemble means
        """

        # use model parser to generate a list of available institutes and
        # models from data directory
        data_dir = self.data_dir
        if data_dir[-1] != os.sep:
            data_dir += os.sep

        CMP = preprocessor.CMIP5ModelParser(self.data_dir)
        model_list = CMP.get_all_models()

        # model name in configuration file is assumed to be INSTITUTE:MODEL
        institute = self.model.split(":")[0]
        model = self.model.split(":")[1]

        # TODO why is the institute not in the model output name ???
        output_file = (
            get_temporary_directory() + the_variable + "_" + mip + "_" + model + "_" + self.experiment + "_ensmean.nc"
        )

        if institute not in model_list.keys():
            raise ValueError("Data for this institute is not existing: %s" % institute)

        # do preprocessing of data from multiple ensembles if file
        # already existing, then no processing is done
        C5PP = preprocessor.CMIP5Preprocessor(
            data_dir, output_file, the_variable, model, self.experiment, institute=institute, mip=mip, realm=realm
        )

        # calculate the ensemble mean and store as file
        # also the STDV is calculated on the fly calculated
        # resulting filenames are available by C5PP.outfile_ensmean and C5PP.outfile_ensstd
        C5PP.ensemble_mean(delete=False, start_time=self.start_time, stop_time=self.stop_time)

        return C5PP.outfile_ensmean
Exemple #5
0
    def _get_ensemble_filename(self, the_variable):
        """
        get filename of ensemble mean file
        if required, then all pre-processing steps are done

        Parameters
        ----------
        the_variable : str
            variable name to be processed

        Returns
        -------
        returns filename of file with multiensemble means
        """

        # use model parser to generate a list of available institutes and
        # models from data directory
        data_dir = self.data_dir
        if data_dir[-1] != os.sep:
            data_dir += os.sep

        CMP = preprocessor.CMIP5ModelParser(self.data_dir)
        model_list = CMP.get_all_models()

        # model name in configuration file is assumed to be INSTITUTE:MODEL
        institute = self.model.split(':')[0]
        model = self.model.split(':')[1]

        # TODO why is the institute not in the model output name ???
        output_file = get_temporary_directory() + the_variable + '_Amon_' + model + '_' + self.experiment + '_ensmean.nc'

        if institute not in model_list.keys():
            raise ValueError('Data for this institute is not existing: %s' % institute)

        # do preprocessing of data from multiple ensembles if file
        # already existing, then no processing is done
        C5PP = preprocessor.CMIP5Preprocessor(data_dir, output_file,
                                              the_variable, model,
                                              self.experiment,
                                              institute=institute)
        res_file = C5PP.ensemble_mean(delete=False,
                                      start_time=self.start_time,
                                      stop_time=self.stop_time)

        return res_file
Exemple #6
0
 def test_cdo_tempdir_DefaultNoEnv(self):
     if 'CDOTEMPDIR' in os.environ.keys():
         d = os.environ.pop('CDOTEMPDIR')
     r = utils.get_temporary_directory()
     self.assertEqual(r, './')
Exemple #7
0
 def test_cdo_tempdir_fromENV1(self):
     d = '/some/directory/path/'
     os.environ.update({'CDOTEMPDIR': d})
     r = utils.get_temporary_directory()
     self.assertEqual(r, d)
Exemple #8
0
    def get_model_data_generic(self, interval='season', **kwargs):
        """
        unique parameters are:
            filename - file basename
            variable - name of the variable as the short_name in the netcdf file

            kwargs is a dictionary with keys for each model. Then a dictionary with properties follows

        """

        if not self.type in kwargs.keys():
            print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys()
            return None

        locdict = kwargs[self.type]

        # read settings and details from the keyword arguments
        # no defaults; everything should be explicitely specified in either the config file or the dictionaries
        varname = locdict.pop('variable')
        units = locdict.pop('unit', 'Crazy Unit')
        #interval = kwargs.pop('interval') #, 'season') #does not make sense to specifiy a default value as this option is specified by configuration file!

        lat_name = locdict.pop('lat_name', 'lat')
        lon_name = locdict.pop('lon_name', 'lon')
        model_suffix = locdict.pop('model_suffix')
        model_prefix = locdict.pop('model_prefix')
        file_format = locdict.pop('file_format')
        scf = locdict.pop('scale_factor')
        valid_mask = locdict.pop('valid_mask')
        custom_path = locdict.pop('custom_path', None)
        thelevel = locdict.pop('level', None)

        target_grid = self._actplot_options['targetgrid']
        interpolation = self._actplot_options['interpolation']

        if custom_path is None:
            filename1 = ("%s%s/merged/%s_%s_%s_%s_%s.%s" %
                        (self.data_dir, varname, varname, model_prefix, self.model, self.experiment, model_suffix, file_format))
        else:
            if self.type == 'CMIP5':
                filename1 = ("%s/%s_%s_%s_%s_%s.%s" %
                             (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format))
            elif self.type == 'CMIP5RAW':
                filename1 = ("%s/%s_%s_%s_%s_%s.%s" %
                             (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format))
            elif self.type == 'CMIP5RAWSINGLE':
                print 'todo needs implementation!'
                assert False
            elif self.type == 'CMIP3':
                filename1 = ("%s/%s_%s_%s_%s.%s" %
                             (custom_path, self.experiment, self.model, varname, model_suffix, file_format))
            else:
                print self.type
                raise ValueError('Can not generate filename: invalid model type! %s' % self.type)

        force_calc = False

        if self.start_time is None:
            raise ValueError('Start time needs to be specified')
        if self.stop_time is None:
            raise ValueError('Stop time needs to be specified')

        #/// PREPROCESSING ///
        cdo = Cdo()
        s_start_time = str(self.start_time)[0:10]
        s_stop_time = str(self.stop_time)[0:10]

        #1) select timeperiod and generate monthly mean file
        if target_grid == 't63grid':
            gridtok = 'T63'
        else:
            gridtok = 'SPECIAL_GRID'

        file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc'  # target filename
        file_monthly = get_temporary_directory() + os.path.basename(file_monthly)

        sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly)

        if not os.path.exists(filename1):
            print 'WARNING: File not existing: ' + filename1
            return None

        cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc)

        sys.stdout.write('\n *** Reading model data... \n')
        sys.stdout.write('     Interval: ' + interval + '\n')

        #2) calculate monthly or seasonal climatology
        if interval == 'monthly':
            mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc'
            mdata_N_file = file_monthly[:-3] + '_ymonN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc'
            cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc)  # number of samples
        elif interval == 'season':
            mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_yseassum.nc'
            mdata_N_file = file_monthly[:-3] + '_yseasN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc'
            cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc)  # number of samples
        else:
            raise ValueError('Unknown temporal interval. Can not perform preprocessing!')

        if not os.path.exists(mdata_clim_file):
            return None

        #3) read data
        if interval == 'monthly':
            thetime_cylce = 12
        elif interval == 'season':
            thetime_cylce = 4
        else:
            print interval
            raise ValueError('Unsupported interval!')
        mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce)
        mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce)
        mdata.std = mdata_std.data.copy()
        del mdata_std
        mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel)
        mdata.n = mdata_N.data.copy()
        del mdata_N

        #ensure that climatology always starts with January, therefore set date and then sort
        mdata.adjust_time(year=1700, day=15)  # set arbitrary time for climatology
        mdata.timsort()

        #4) read monthly data
        mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel)
        mdata_all.adjust_time(day=15)

        #mask_antarctica masks everything below 60 degrees S.
        #here we only mask Antarctica, if only LAND points shall be used
        if valid_mask == 'land':
            mask_antarctica = True
        elif valid_mask == 'ocean':
            mask_antarctica = False
        else:
            mask_antarctica = False

        if target_grid == 't63grid':
            mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica))
            mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica))
        else:
            tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica)
            mdata._apply_mask(tmpmsk)
            mdata_all._apply_mask(tmpmsk)
            del tmpmsk

        mdata_mean = mdata_all.fldmean()

        # return data as a tuple list
        retval = (mdata_all.time, mdata_mean, mdata_all)

        del mdata_all
        return mdata, retval
Exemple #9
0
    def xxxxxxxxxxxxxxxxxxxget_surface_shortwave_radiation_down(self, interval='season', force_calc=False, **kwargs):
        """
        return data object of
        a) seasonal means for SIS
        b) global mean timeseries for SIS at original temporal resolution
        """

        the_variable = 'rsds'

        locdict = kwargs[self.type]
        valid_mask = locdict.pop('valid_mask')

        if self.start_time is None:
            raise ValueError('Start time needs to be specified')
        if self.stop_time is None:
            raise ValueError('Stop time needs to be specified')

        s_start_time = str(self.start_time)[0:10]
        s_stop_time = str(self.stop_time)[0:10]

        if self.type == 'CMIP5':
            filename1 = self.data_dir + 'rsds' + os.sep + self.experiment + '/ready/' + self.model + '/rsds_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc'
        elif self.type == 'CMIP5RAW':  # raw CMIP5 data based on ensembles
            filename1 = self._get_ensemble_filename(the_variable)
        elif self.type == 'CMIP5RAWSINGLE':
            filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon')
        else:
            raise ValueError('Unknown model type! not supported here!')

        if not os.path.exists(filename1):
            print ('WARNING file not existing: %s' % filename1)
            return None

        #/// PREPROCESSING ///
        cdo = Cdo()

        #1) select timeperiod and generatget_she monthly mean file
        file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc'
        file_monthly = get_temporary_directory() + os.path.basename(file_monthly)

        print file_monthly

        sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly)
        cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc)

        sys.stdout.write('\n *** Reading model data... \n')
        sys.stdout.write('     Interval: ' + interval + '\n')

        #2) calculate monthly or seasonal climatology
        if interval == 'monthly':
            sis_clim_file = file_monthly[:-3] + '_ymonmean.nc'
            sis_sum_file = file_monthly[:-3] + '_ymonsum.nc'
            sis_N_file = file_monthly[:-3] + '_ymonN.nc'
            sis_clim_std_file = file_monthly[:-3] + '_ymonstd.nc'
            cdo.ymonmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc)
            cdo.ymonsum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc)
            cdo.ymonstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc)  # number of samples
        elif interval == 'season':
            sis_clim_file = file_monthly[:-3] + '_yseasmean.nc'
            sis_sum_file = file_monthly[:-3] + '_yseassum.nc'
            sis_N_file = file_monthly[:-3] + '_yseasN.nc'
            sis_clim_std_file = file_monthly[:-3] + '_yseasstd.nc'
            cdo.yseasmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc)
            cdo.yseassum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc)
            cdo.yseasstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc -b 32', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc)  # number of samples
        else:
            print interval
            raise ValueError('Unknown temporal interval. Can not perform preprocessing!')

        if not os.path.exists(sis_clim_file):
            return None

        #3) read data
        sis = Data(sis_clim_file, 'rsds', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False)
        sis_std = Data(sis_clim_std_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False)
        sis.std = sis_std.data.copy()
        del sis_std
        sis_N = Data(sis_N_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False)
        sis.n = sis_N.data.copy()
        del sis_N

        #ensure that climatology always starts with January, therefore set date and then sort
        sis.adjust_time(year=1700, day=15)  # set arbitrary time for climatology
        sis.timsort()

        #4) read monthly data
        sisall = Data(file_monthly, 'rsds', read=True, label=self._unique_name, unit='W m^{-2}', lat_name='lat', lon_name='lon', shift_lon=False)
        if not sisall._is_monthly():
            raise ValueError('Timecycle of 12 expected here!')
        sisall.adjust_time(day=15)

        # land/sea masking ...
        if valid_mask == 'land':
            mask_antarctica = True
        elif valid_mask == 'ocean':
            mask_antarctica = False
        else:
            mask_antarctica = False

        sis._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask))
        sisall._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask))
        sismean = sisall.fldmean()

        # return data as a tuple list
        retval = (sisall.time, sismean, sisall)
        del sisall

        # mask areas without radiation (set to invalid): all data < 1 W/m**2
        sis.data = np.ma.array(sis.data, mask=sis.data < 1.)

        return sis, retval
Exemple #10
0
    def xxxxxget_surface_shortwave_radiation_up(self, interval='season', force_calc=False, **kwargs):

        the_variable = 'rsus'

        if self.type == 'CMIP5':
            filename1 = self.data_dir + the_variable + os.sep + self.experiment + os.sep + 'ready' + os.sep + self.model + os.sep + 'rsus_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc'
        elif self.type == 'CMIP5RAW':  # raw CMIP5 data based on ensembles
            filename1 = self._get_ensemble_filename(the_variable)
        elif self.type == 'CMIP5RAWSINGLE':
            filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon')
        else:
            raise ValueError('Unknown type! not supported here!')

        if self.start_time is None:
            raise ValueError('Start time needs to be specified')
        if self.stop_time is None:
            raise ValueError('Stop time needs to be specified')

        if not os.path.exists(filename1):
            print ('WARNING file not existing: %s' % filename1)
            return None

        # PREPROCESSING
        cdo = Cdo()
        s_start_time = str(self.start_time)[0:10]
        s_stop_time = str(self.stop_time)[0:10]

        #1) select timeperiod and generate monthly mean file
        file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc'
        file_monthly = get_temporary_directory() + os.path.basename(file_monthly)
        cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc)

        #2) calculate monthly or seasonal climatology
        if interval == 'monthly':
            sup_clim_file = file_monthly[:-3] + '_ymonmean.nc'
            sup_sum_file = file_monthly[:-3] + '_ymonsum.nc'
            sup_N_file = file_monthly[:-3] + '_ymonN.nc'
            sup_clim_std_file = file_monthly[:-3] + '_ymonstd.nc'
            cdo.ymonmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc)
            cdo.ymonsum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc)
            cdo.ymonstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc)  # number of samples
        elif interval == 'season':
            sup_clim_file = file_monthly[:-3] + '_yseasmean.nc'
            sup_sum_file = file_monthly[:-3] + '_yseassum.nc'
            sup_N_file = file_monthly[:-3] + '_yseasN.nc'
            sup_clim_std_file = file_monthly[:-3] + '_yseasstd.nc'
            cdo.yseasmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc)
            cdo.yseassum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc)
            cdo.yseasstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc -b 32', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc)  # number of samples
        else:
            print interval
            raise ValueError('Unknown temporal interval. Can not perform preprocessing! ')

        if not os.path.exists(sup_clim_file):
            print 'File not existing (sup_clim_file): ' + sup_clim_file
            return None

        #3) read data
        sup = Data(sup_clim_file, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False)
        sup_std = Data(sup_clim_std_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False)
        sup.std = sup_std.data.copy()
        del sup_std
        sup_N = Data(sup_N_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False)
        sup.n = sup_N.data.copy()
        del sup_N

        # ensure that climatology always starts with January, therefore set date and then sort
        sup.adjust_time(year=1700, day=15)  # set arbitrary time for climatology
        sup.timsort()

        #4) read monthly data
        supall = Data(file_monthly, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False)
        supall.adjust_time(day=15)
        if not supall._is_monthly():
            raise ValueError('Monthly timecycle expected here!')
        supmean = supall.fldmean()

        #/// return data as a tuple list
        retval = (supall.time, supmean, supall)
        del supall

        #/// mask areas without radiation (set to invalid): all data < 1 W/m**2
        #sup.data = np.ma.array(sis.data,mask=sis.data < 1.)

        return sup, retval
def main():
    plt.close('all')
    if len(sys.argv) > 1:
        if len(sys.argv) == 2:
            # a single argument was provides as option
            if sys.argv[1] == 'init':
                # copy INI files and a template configuration file
                # to current directory
                create_dummy_configuration()
                sys.exit()
            else:
                file = sys.argv[1]  # name of config file
                if not os.path.exists(file):
                    raise ValueError('Configuration file can not be \
                                      found: %s' % file)
        else:
            raise ValueError('Currently not more than one command \
                               line parameter supported!')
    else:  # default
        file = 'pyCMBS.cfg'
        print('*******************************************')
        print('* WELCOME to pycmbs.py                    *')
        print('* Happy benchmarking ...                  *')
        print('*******************************************')

    ####################################################################
    # CONFIGURATION and OPTIONS
    ####################################################################

    # read configuration file
    CF = config.ConfigFile(file)

    # read plotting options
    PCFG = config.PlotOptions()
    PCFG.read(CF)
    plot_options = PCFG

    ####################################################################
    # REMOVE previous Data warnings
    ####################################################################
    outdir = CF.options['outputdir']
    if outdir[-1] != os.sep:
        outdir += os.sep
    os.environ['PYCMBS_OUTPUTDIR'] = CF.options['outputdir']
    os.environ['PYCMBS_OUTPUTFORMAT'] = CF.options['report_format']

    os.environ['DATA_WARNING_FILE'] = outdir + 'data_warnings_' \
        + CF.options['report'] + '.log'
    if os.path.exists(os.environ['DATA_WARNING_FILE']):
        os.remove(os.environ['DATA_WARNING_FILE'])

    # init regions
    REGIONS = config.AnalysisRegions()

    for thevar in plot_options.options.keys():
        if thevar in plot_options.options.keys():
            print('Variable: %s' % thevar)
            for k in plot_options.options[thevar].keys():
                print('    Observation: %s' % k)

    if CF.options['basemap']:
        f_fast = False
    else:
        f_fast = True
    shift_lon = use_basemap = not f_fast

    ########################################################################
    # TIMES
    ########################################################################
    s_start_time = CF.start_date
    s_stop_time = CF.stop_date
    start_time = pylab.num2date(pylab.datestr2num(s_start_time))
    stop_time = pylab.num2date(pylab.datestr2num(s_stop_time))

    model_dict = {'rain': {'CMIP5':
                           {
                               'variable': 'pr',
                               'unit': 'mm/day',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'model_prefix': 'Amon',
                               'file_format': 'nc',
                               'scale_factor': 86400.,
                               'valid_mask': 'ocean'
                           },


                           'JSBACH_RAW2':
                           {
                               'variable': 'precip_acc',
                               'unit': 'mm/day',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'file_format': 'nc',
                               'scale_factor': 86400.,
                               'valid_mask': 'global'
                           }
                           },


                  'evap': {'CMIP5':
                           {
                               'variable': 'evspsbl',
                               'unit': 'mm/day',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'file_format': 'nc',
                               'model_prefix': 'Amon',
                               'scale_factor': 86400.,
                               'valid_mask': 'ocean'
                           }
                           },

                  'twpa': {'CMIP5':
                           {
                               'variable': 'clwvi',
                               'unit': 'kg/m^2',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'file_format': 'nc',
                               'model_prefix': 'Amon',
                               'scale_factor': 1.,
                               'valid_mask': 'ocean'
                           }
                           },

                  'wind': {'CMIP5':
                          {
                              'variable': 'sfcWind',
                              'unit': 'm/s',
                              'lat_name': 'lat',
                              'lon_name': 'lon',
                              'model_suffix': 'ensmean',
                              'file_format': 'nc',
                              'model_prefix': 'Amon',
                              'scale_factor': 1.,
                              'valid_mask': 'ocean'
                          }
                  },

                  'wvpa': {'CMIP5':
                           {
                               'variable': 'prw',
                               'unit': 'kg m^2',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'file_format': 'nc',
                               'model_prefix': 'Amon',
                               'scale_factor': 1,
                               'valid_mask': 'ocean'
                           }
                           },

                  'late': {'CMIP5':
                           {
                               'variable': 'hfls',
                               'unit': 'W/m^2',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'file_format': 'nc',
                               'model_prefix': 'Amon',
                               'scale_factor': 1,
                               'valid_mask': 'ocean'
                           }
                           },

                  'hair': {'CMIP5':
                           {
                               'variable': 'huss',
                               'unit': '$kg/kg^2$',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'file_format': 'nc',
                               'model_prefix': 'Amon',
                               'scale_factor': 1,
                               'valid_mask': 'ocean'
                           }
                           },

                  'seaice_concentration': {'CMIP5':
                                           {
                                               'variable': 'sic',
                                               'unit': '-',
                                               'lat_name': 'lat',
                                               'lon_name': 'lon',
                                               'model_suffix': 'ens_mean_185001-200512',
                                               'file_format': 'nc',
                                               'model_prefix': 'OImon',
                                               'scale_factor': 1,
                                               'valid_mask': 'ocean',
                                               'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk'
                                           },

                                           'CMIP3':
                                           {
                                               'variable': 'SICOMO',
                                               'unit': '-',
                                               'lat_name': 'lat',
                                               'lon_name': 'lon',
                                               'model_suffix': '1860-2100.ext',
                                               'file_format': 'nc',
                                               'model_prefix': '',
                                               'scale_factor': 100.,
                                               'valid_mask': 'ocean',
                                               'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk',
                                               'level': 0
                                           },
                                           },

                  'seaice_extent': {'CMIP5':
                                   {
                                    'variable': 'sic',
                                    'unit': '-',
                                    'lat_name': 'lat',
                                    'lon_name': 'lon',
                                    'model_suffix': 'ens_mean_185001-200512',
                                    'file_format': 'nc',
                                    'model_prefix': 'OImon',
                                    'scale_factor': 1,
                                    'valid_mask': 'ocean',
                                    'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk'
                                    },

                                    'CMIP3':
                                   {
                                    'variable': 'SICOMO',
                                       'unit': '-',
                                       'lat_name': 'lat',
                                       'lon_name': 'lon',
                                       'model_suffix': '1860-2100.ext',
                                       'file_format': 'nc',
                                       'model_prefix': '',
                                       'scale_factor': 100.,
                                       'valid_mask': 'ocean',
                                       'custom_path': '/home/m300028/shared/dev/svn/pyCMBS/dirk',
                                       'level': 0
                                   },
                  },

                  'budg': {'CMIP5':
                           {
                               'variable': 'budg',
                               'unit': 'mm/d',
                               'lat_name': 'lat',
                               'lon_name': 'lon',
                               'model_suffix': 'ensmean',
                               'file_format': 'nc',
                               'model_prefix': 'Amon',
                               'scale_factor': 86400.,
                               'valid_mask': 'ocean',
                               'custom_path': '/net/nas2/export/eo/workspace/m300036/pycmbs-cmsaf/data'
                           }
                           },


                  'sis': {'JSBACH_RAW2':
                          {
                              'variable': 'swdown_acc',
                              'unit': '$W/m^2$',
                              'lat_name': 'lat',
                              'lon_name': 'lon',
                              'file_format': 'nc',
                              'scale_factor': 1.,
                              'valid_mask': 'land'
                          },
                          'CMIP5':
                          {
                              'valid_mask': 'land'
                          }

                          },

                  'surface_upward_flux': {'JSBACH_RAW2':
                                          {
                                              'variable': 'swdown_reflect_acc',
                                              'unit': '$W/m^2$',
                                              'lat_name': 'lat',
                                              'lon_name': 'lon',
                                              'file_format': 'nc',
                                              'scale_factor': 1.,
                                              'valid_mask': 'land'
                                          }
                                          },

                  'albedo_vis': {'JSBACH_RAW2':
                                 {
                                     'variable': 'var14',
                                     'unit': '-',
                                     'lat_name': 'lat',
                                     'lon_name': 'lon',
                                     'file_format': 'nc',
                                     'scale_factor': 1.,
                                     'valid_mask': 'land'
                                 }
                                 },

                  'albedo_nir': {'JSBACH_RAW2':
                                 {
                                     'variable': 'var15',
                                     'unit': '-',
                                     'lat_name': 'lat',
                                     'lon_name': 'lon',
                                     'file_format': 'nc',
                                     'scale_factor': 1.,
                                     'valid_mask': 'land'
                                 }
                                 },

                  'temperature': {
                      'JSBACH_RAW2':
                      {
                          'variable': 'temp2',
                          'unit': 'K',
                          'lat_name': 'lat',
                          'lon_name': 'lon',
                          'file_format': 'nc',
                          'scale_factor': 1.,
                          'valid_mask': 'global'
                      }
                  }

                  }

    ########################################################################
    # INIT METHODS
    ########################################################################
    # names of analysis scripts for all variables ---
    scripts = CF.get_analysis_scripts()

    # get dictionary with methods how to read data for variables to be analyzed
    variables = CF.variables
    varmethods = CF.get_methods4variables(variables, model_dict)

    #/// READ DATA ///
    """
    create a Model instance for each model specified
    in the configuration file

    read the data for all variables and return a list
    of Data objects for further processing
    """

    model_cnt = 1
    proc_models = []

    for i in range(len(CF.models)):
        # assign model information from configuration
        data_dir = CF.dirs[i]
        model = CF.models[i]
        experiment = CF.experiments[i]

        #--- create model object and read data ---
        # results are stored in individual variables namex modelXXXXX
        if CF.dtypes[i].upper() == 'CMIP5':
            themodel = CMIP5Data(data_dir, model, experiment, varmethods,
                                 intervals=CF.intervals, lat_name='lat',
                                 lon_name='lon', label=model,
                                 start_time=start_time,
                                 stop_time=stop_time,
                                 shift_lon=shift_lon)
        elif CF.dtypes[i].upper() == 'CMIP5RAW':
            themodel = CMIP5RAWData(data_dir, model, experiment, varmethods,
                                    intervals=CF.intervals, lat_name='lat',
                                    lon_name='lon', label=model,
                                    start_time=start_time,
                                    stop_time=stop_time,
                                    shift_lon=shift_lon)
        elif CF.dtypes[i].upper() == 'JSBACH_BOT':
            themodel = JSBACH_BOT(data_dir, varmethods, experiment,
                                  intervals=CF.intervals,
                                  start_time=start_time,
                                  stop_time=stop_time,
                                  name=model, shift_lon=shift_lon)
        elif CF.dtypes[i].upper() == 'JSBACH_RAW':
            themodel = JSBACH_RAW(data_dir, varmethods, experiment,
                                  intervals=CF.intervals,
                                  name=model,
                                  shift_lon=shift_lon,
                                  start_time=start_time,
                                  stop_time=stop_time
                                  )
        elif CF.dtypes[i].upper() == 'JSBACH_RAW2':
            themodel = JSBACH_RAW2(data_dir, varmethods, experiment,
                                   intervals=CF.intervals,
                                   start_time=start_time,
                                   stop_time=stop_time,
                                   name=model, shift_lon=shift_lon,
                                   model_dict=model_dict)
        elif CF.dtypes[i].upper() == 'CMIP3':
            themodel = CMIP3Data(data_dir, model, experiment, varmethods,
                                 intervals=CF.intervals, lat_name='lat',
                                 lon_name='lon', label=model,
                                 start_time=start_time,
                                 stop_time=stop_time,
                                 shift_lon=shift_lon)
        else:
            raise ValueError('Invalid model type: %s' % CF.dtypes[i])

        #--- read data for current model ---

        # options that specify regrid options etc.
        themodel.plot_options = plot_options
        themodel.get_data()

        # copy current model to a variable named modelXXXX ---
        cmd = 'model' + str(model_cnt).zfill(4) + ' = ' \
            + 'themodel.copy(); del themodel'
        exec(cmd)  # store copy of cmip5 model in separate variable

        # append model to list of models ---
        proc_models.append('model' + str(model_cnt).zfill(4))
        model_cnt += 1

    ########################################################################
    # MULTIMODEL MEAN
    # here we have now all the model and variables read.
    # The list of all models is contained in the variable proc_models.
    f_mean_model = True
    if f_mean_model:
        # calculate climatological mean values: The models contain already
        # climatological information in the variables[] list. Thus there is
        # not need to take care for the different timesteps here. This
        # should have been handled already in the preprocessing.
        # generate instance of MeanModel to store result
        MEANMODEL = MeanModel(varmethods, intervals=CF.intervals)

        # sum up all models
        for i in range(len(proc_models)):
            exec('actmodel = ' + proc_models[i] + '.copy()')
            MEANMODEL.add_member(actmodel)
            del actmodel

        # calculate ensemble mean
        MEANMODEL.ensmean()

        # save mean model to file
        MEANMODEL.save(get_temporary_directory(), prefix='MEANMODEL_' + file[:-4])  # include filename of configuration file

        # add mean model to general list of models to process in analysis
        proc_models.append('MEANMODEL')

    ########################################################################
    # END MULTIMODEL MEAN
    ########################################################################
    ########################################################################
    # INIT reporting and plotting and diagnostics
    ########################################################################
    # Gleckler Plot
    global_gleckler = GlecklerPlot()

    # Report
    rep = Report(CF.options['report'],
                 'pyCMBS report - ' + CF.options['report'],
                 CF.options['author'],
                 outdir=outdir,
                 dpi=300, format=CF.options['report_format'])
    cmd = 'cp ' + os.environ['PYCMBSPATH'] + '/logo/Phytonlogo5.pdf ' + rep.outdir
    os.system(cmd)

    ########################################################################
    ########################################################################
    ########################################################################
    # MAIN ANALYSIS LOOP: perform analysis for each model and variable
    ########################################################################
    ########################################################################
    ########################################################################
    skeys = scripts.keys()
    for variable in variables:

        #/// register current variable in Gleckler Plot
        global_gleckler.add_variable(variable)

        #/// call analysis scripts for each variable
        for k in range(len(skeys)):
            if variable == skeys[k]:
                print 'Doing analysis for variable ... ', variable
                print '   ... ', scripts[variable]
                # model list is reformatted so it can be evaluated properly
                model_list = str(proc_models).replace("'", "")
                cmd = 'analysis.' + scripts[variable] + '(' + model_list \
                    + ',GP=global_gleckler,shift_lon=shift_lon, \
                        use_basemap=use_basemap,report=rep,\
                        interval=CF.intervals[variable],\
                        plot_options=PCFG,regions=REGIONS.regions)'
                eval(cmd)

    ########################################################################
    # GLECKLER PLOT finalization ...
    ########################################################################
    #/// generate Gleckler analysis plot for all variables and models analyzed ///
    global_gleckler.plot(vmin=-0.1, vmax=0.1, nclasses=16, show_value=False, ticks=[-0.1, -0.05, 0., 0.05, 0.1])
    oname = outdir + 'gleckler.pkl'
    if os.path.exists(oname):
        os.remove(oname)
    pickle.dump(global_gleckler.models,
                open(outdir + 'gleckler_models.pkl', 'w'))
    pickle.dump(global_gleckler.variables,
                open(outdir + 'gleckler_variables.pkl', 'w'))
    pickle.dump(global_gleckler.data,
                open(outdir + 'gleckler_data.pkl', 'w'))
    pickle.dump(global_gleckler._raw_data,
                open(outdir + 'gleckler_rawdata.pkl', 'w'))

    rep.section('Summary error statistics')
    rep.subsection('Gleckler metric')
    rep.figure(global_gleckler.fig,
               caption='Gleckler et al. (2008) model performance index',
               width='10cm')
    global_gleckler.fig.savefig(outdir + 'portraet_diagram.png', dpi=200, bbox_inches='tight')
    global_gleckler.fig.savefig(outdir + 'portraet_diagram.pdf', dpi=200, bbox_inches='tight')

    plt.close(global_gleckler.fig.number)



    # generate dictionary with observation labels for each variable
    labels_dict = {}
    for variable in variables:
        if variable not in PCFG.options.keys():
            continue
        varoptions = PCFG.options[variable]
        thelabels = {}
        for k in varoptions.keys():  # keys of observational datasets
            if k == 'OPTIONS':
                continue
            else:
                # only add observation to legend,
                # if option in INI file is set
                if varoptions[k]['add_to_report']:
                    # generate dictionary for GlecklerPLot legend
                    thelabels.update({int(varoptions[k]['gleckler_position']): k})
        labels_dict.update({variable: thelabels})
        del thelabels

    #/// legend for gleckler plot ///
    lcnt = 1
    for variable in variables:
        if variable not in PCFG.options.keys():
            continue
        varoptions = PCFG.options[variable]
        thelabels = labels_dict[variable]
        fl = global_gleckler._draw_legend(thelabels, title=variable.upper())
        rep.figure(fl, width='8cm', bbox_inches=None)
        fl.savefig(outdir + 'legend_portraet_' + str(lcnt).zfill(5) + '.png', bbox_inches='tight', dpi=200)
        plt.close(fl.number)
        del fl
        lcnt += 1

    #/// plot model ranking between different observational datasets ///
    rep.subsection('Model ranking consistency')
    for v in global_gleckler.variables:
        rep.subsubsection(v.upper())
        tmpfig = global_gleckler.plot_model_ranking(v, show_text=True, obslabels=labels_dict[v])
        rep.figure(tmpfig, width='8cm', bbox_inches=None,
                   caption='Model RANKING for different observational \
                   datasets: ' + v.upper())
        plt.close(tmpfig.number)
        del tmpfig

        # write a table with model ranking
        tmp_filename = outdir + 'ranking_table_' + v + '.tex'
        rep.open_table()
        global_gleckler.write_ranking_table(v, tmp_filename, fmt='latex', obslabels=labels_dict[v])
        rep.input(tmp_filename)
        rep.close_table(caption='Model rankings for variable ' + v.upper())

        # plot absolute model error
        tmpfig = global_gleckler.plot_model_error(v, obslabels=labels_dict[v])
        rep.figure(tmpfig, width='8cm', bbox_inches=None,
                   caption='Model ERROR for different observational \
                   datasets: ' + v.upper())
        plt.close(tmpfig.number)
        del tmpfig

    ########################################################################
    # CLEAN up and finish
    ########################################################################
    plt.close('all')
    rep.close()

    print('##########################################')
    print('# BENCHMARKING FINIHSED!                 #')
    print('##########################################')
Exemple #12
0
    def get_model_data_generic(self, interval='season', **kwargs):
        """
        unique parameters are:
            filename - file basename
            variable - name of the variable as the short_name in the netcdf file

            kwargs is a dictionary with keys for each model. Then a dictionary with properties follows

        """

        if not self.type in kwargs.keys():
            print ''
            print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys(
            )
            assert False

        locdict = kwargs[self.type]

        # read settings and details from the keyword arguments
        # no defaults; everything should be explicitely specified in either the config file or the dictionaries
        varname = locdict.pop('variable', None)
        #~ print self.type
        #~ print locdict.keys()
        assert varname is not None, 'ERROR: provide varname!'

        units = locdict.pop('unit', None)
        assert units is not None, 'ERROR: provide unit!'

        lat_name = locdict.pop('lat_name', 'lat')
        lon_name = locdict.pop('lon_name', 'lon')
        model_suffix = locdict.pop('model_suffix', None)
        model_prefix = locdict.pop('model_prefix', None)
        file_format = locdict.pop('file_format')
        scf = locdict.pop('scale_factor')
        valid_mask = locdict.pop('valid_mask')
        custom_path = locdict.pop('custom_path', None)
        thelevel = locdict.pop('level', None)

        target_grid = self._actplot_options['targetgrid']
        interpolation = self._actplot_options['interpolation']

        if custom_path is None:
            filename1 = self.get_raw_filename(
                varname,
                **kwargs)  # routine needs to be implemented by each subclass
        else:
            filename1 = custom_path + self.get_raw_filename(varname, **kwargs)

        if filename1 is None:
            print_log(WARNING, 'No valid model input data')
            return None

        force_calc = False

        if self.start_time is None:
            raise ValueError('Start time needs to be specified')
        if self.stop_time is None:
            raise ValueError('Stop time needs to be specified')

        #/// PREPROCESSING ///
        cdo = Cdo()
        s_start_time = str(self.start_time)[0:10]
        s_stop_time = str(self.stop_time)[0:10]

        #1) select timeperiod and generate monthly mean file
        if target_grid == 't63grid':
            gridtok = 'T63'
        else:
            gridtok = 'SPECIAL_GRID'

        file_monthly = filename1[:
                                 -3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc'  # target filename
        file_monthly = get_temporary_directory() + os.path.basename(
            file_monthly)

        sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly)

        if not os.path.exists(filename1):
            print 'WARNING: File not existing: ' + filename1
            return None

        cdo.monmean(options='-f nc',
                    output=file_monthly,
                    input='-' + interpolation + ',' + target_grid +
                    ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' +
                    filename1,
                    force=force_calc)

        sys.stdout.write('\n *** Reading model data... \n')
        sys.stdout.write('     Interval: ' + interval + '\n')

        #2) calculate monthly or seasonal climatology
        if interval == 'monthly':
            mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc'
            mdata_N_file = file_monthly[:-3] + '_ymonN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc'
            cdo.ymonmean(options='-f nc -b 32',
                         output=mdata_clim_file,
                         input=file_monthly,
                         force=force_calc)
            cdo.ymonsum(options='-f nc -b 32',
                        output=mdata_sum_file,
                        input=file_monthly,
                        force=force_calc)
            cdo.ymonstd(options='-f nc -b 32',
                        output=mdata_clim_std_file,
                        input=file_monthly,
                        force=force_calc)
            cdo.div(options='-f nc',
                    output=mdata_N_file,
                    input=mdata_sum_file + ' ' + mdata_clim_file,
                    force=force_calc)  # number of samples
        elif interval == 'season':
            mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_yseassum.nc'
            mdata_N_file = file_monthly[:-3] + '_yseasN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc'
            cdo.yseasmean(options='-f nc -b 32',
                          output=mdata_clim_file,
                          input=file_monthly,
                          force=force_calc)
            cdo.yseassum(options='-f nc -b 32',
                         output=mdata_sum_file,
                         input=file_monthly,
                         force=force_calc)
            cdo.yseasstd(options='-f nc -b 32',
                         output=mdata_clim_std_file,
                         input=file_monthly,
                         force=force_calc)
            cdo.div(options='-f nc -b 32',
                    output=mdata_N_file,
                    input=mdata_sum_file + ' ' + mdata_clim_file,
                    force=force_calc)  # number of samples
        else:
            raise ValueError(
                'Unknown temporal interval. Can not perform preprocessing!')

        if not os.path.exists(mdata_clim_file):
            return None

        #3) read data
        if interval == 'monthly':
            thetime_cylce = 12
        elif interval == 'season':
            thetime_cylce = 4
        else:
            print interval
            raise ValueError('Unsupported interval!')
        mdata = Data(mdata_clim_file,
                     varname,
                     read=True,
                     label=self._unique_name,
                     unit=units,
                     lat_name=lat_name,
                     lon_name=lon_name,
                     shift_lon=False,
                     scale_factor=scf,
                     level=thelevel,
                     time_cycle=thetime_cylce)
        mdata_std = Data(mdata_clim_std_file,
                         varname,
                         read=True,
                         label=self._unique_name + ' std',
                         unit='-',
                         lat_name=lat_name,
                         lon_name=lon_name,
                         shift_lon=False,
                         level=thelevel,
                         time_cycle=thetime_cylce)
        mdata.std = mdata_std.data.copy()
        del mdata_std
        mdata_N = Data(mdata_N_file,
                       varname,
                       read=True,
                       label=self._unique_name + ' std',
                       unit='-',
                       lat_name=lat_name,
                       lon_name=lon_name,
                       shift_lon=False,
                       scale_factor=scf,
                       level=thelevel)
        mdata.n = mdata_N.data.copy()
        del mdata_N

        # ensure that climatology always starts with January, therefore set date and then sort
        mdata.adjust_time(year=1700,
                          day=15)  # set arbitrary time for climatology
        mdata.timsort()

        #4) read monthly data
        mdata_all = Data(file_monthly,
                         varname,
                         read=True,
                         label=self._unique_name,
                         unit=units,
                         lat_name=lat_name,
                         lon_name=lon_name,
                         shift_lon=False,
                         time_cycle=12,
                         scale_factor=scf,
                         level=thelevel)
        mdata_all.adjust_time(day=15)

        #mask_antarctica masks everything below 60 degrees S.
        #here we only mask Antarctica, if only LAND points shall be used
        if valid_mask == 'land':
            mask_antarctica = True
        elif valid_mask == 'ocean':
            mask_antarctica = False
        else:
            mask_antarctica = False

        if target_grid == 't63grid':
            mdata._apply_mask(
                get_T63_landseamask(False,
                                    area=valid_mask,
                                    mask_antarctica=mask_antarctica))
            mdata_all._apply_mask(
                get_T63_landseamask(False,
                                    area=valid_mask,
                                    mask_antarctica=mask_antarctica))
        else:
            tmpmsk = get_generic_landseamask(False,
                                             area=valid_mask,
                                             target_grid=target_grid,
                                             mask_antarctica=mask_antarctica)
            mdata._apply_mask(tmpmsk)
            mdata_all._apply_mask(tmpmsk)
            del tmpmsk

        mdata_mean = mdata_all.fldmean()

        mdata._raw_filename = filename1
        mdata._monthly_filename = file_monthly
        mdata._clim_filename = mdata_clim_file
        mdata._varname = varname

        # return data as a tuple list
        retval = (mdata_all.time, mdata_mean, mdata_all)

        del mdata_all
        return mdata, retval
Exemple #13
0
    def get_model_data_generic(self, interval="season", **kwargs):
        """
        unique parameters are:
            filename - file basename
            variable - name of the variable as the short_name in the netcdf file

            kwargs is a dictionary with keys for each model. Then a dictionary with properties follows

        """

        if not self.type in kwargs.keys():
            print ""
            print "WARNING: it is not possible to get data using generic function, as method missing: ", self.type, kwargs.keys()
            assert False

        locdict = kwargs[self.type]

        # read settings and details from the keyword arguments
        # no defaults; everything should be explicitely specified in either the config file or the dictionaries
        varname = locdict.pop("variable", None)
        # ~ print self.type
        # ~ print locdict.keys()
        assert varname is not None, "ERROR: provide varname!"

        units = locdict.pop("unit", None)
        assert units is not None, "ERROR: provide unit!"

        lat_name = locdict.pop("lat_name", "lat")
        lon_name = locdict.pop("lon_name", "lon")
        model_suffix = locdict.pop("model_suffix", None)
        model_prefix = locdict.pop("model_prefix", None)
        file_format = locdict.pop("file_format")
        scf = locdict.pop("scale_factor")
        valid_mask = locdict.pop("valid_mask")
        custom_path = locdict.pop("custom_path", None)
        thelevel = locdict.pop("level", None)

        target_grid = self._actplot_options["targetgrid"]
        interpolation = self._actplot_options["interpolation"]

        if custom_path is None:
            filename1 = self.get_raw_filename(varname, **kwargs)  # routine needs to be implemented by each subclass
        else:
            filename1 = custom_path + self.get_raw_filename(varname, **kwargs)

        if filename1 is None:
            print_log(WARNING, "No valid model input data")
            return None

        force_calc = False

        if self.start_time is None:
            raise ValueError("Start time needs to be specified")
        if self.stop_time is None:
            raise ValueError("Stop time needs to be specified")

        # /// PREPROCESSING ///
        cdo = Cdo()
        s_start_time = str(self.start_time)[0:10]
        s_stop_time = str(self.stop_time)[0:10]

        # 1) select timeperiod and generate monthly mean file
        if target_grid == "t63grid":
            gridtok = "T63"
        else:
            gridtok = "SPECIAL_GRID"

        file_monthly = (
            filename1[:-3] + "_" + s_start_time + "_" + s_stop_time + "_" + gridtok + "_monmean.nc"
        )  # target filename
        file_monthly = get_temporary_directory() + os.path.basename(file_monthly)

        sys.stdout.write("\n *** Model file monthly: %s\n" % file_monthly)

        if not os.path.exists(filename1):
            print "WARNING: File not existing: " + filename1
            return None

        cdo.monmean(
            options="-f nc",
            output=file_monthly,
            input="-"
            + interpolation
            + ","
            + target_grid
            + " -seldate,"
            + s_start_time
            + ","
            + s_stop_time
            + " "
            + filename1,
            force=force_calc,
        )

        sys.stdout.write("\n *** Reading model data... \n")
        sys.stdout.write("     Interval: " + interval + "\n")

        # 2) calculate monthly or seasonal climatology
        if interval == "monthly":
            mdata_clim_file = file_monthly[:-3] + "_ymonmean.nc"
            mdata_sum_file = file_monthly[:-3] + "_ymonsum.nc"
            mdata_N_file = file_monthly[:-3] + "_ymonN.nc"
            mdata_clim_std_file = file_monthly[:-3] + "_ymonstd.nc"
            cdo.ymonmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.ymonsum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.ymonstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(
                options="-f nc", output=mdata_N_file, input=mdata_sum_file + " " + mdata_clim_file, force=force_calc
            )  # number of samples
        elif interval == "season":
            mdata_clim_file = file_monthly[:-3] + "_yseasmean.nc"
            mdata_sum_file = file_monthly[:-3] + "_yseassum.nc"
            mdata_N_file = file_monthly[:-3] + "_yseasN.nc"
            mdata_clim_std_file = file_monthly[:-3] + "_yseasstd.nc"
            cdo.yseasmean(options="-f nc -b 32", output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.yseassum(options="-f nc -b 32", output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.yseasstd(options="-f nc -b 32", output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(
                options="-f nc -b 32",
                output=mdata_N_file,
                input=mdata_sum_file + " " + mdata_clim_file,
                force=force_calc,
            )  # number of samples
        else:
            raise ValueError("Unknown temporal interval. Can not perform preprocessing!")

        if not os.path.exists(mdata_clim_file):
            return None

        # 3) read data
        if interval == "monthly":
            thetime_cylce = 12
        elif interval == "season":
            thetime_cylce = 4
        else:
            print interval
            raise ValueError("Unsupported interval!")
        mdata = Data(
            mdata_clim_file,
            varname,
            read=True,
            label=self._unique_name,
            unit=units,
            lat_name=lat_name,
            lon_name=lon_name,
            shift_lon=False,
            scale_factor=scf,
            level=thelevel,
            time_cycle=thetime_cylce,
        )
        mdata_std = Data(
            mdata_clim_std_file,
            varname,
            read=True,
            label=self._unique_name + " std",
            unit="-",
            lat_name=lat_name,
            lon_name=lon_name,
            shift_lon=False,
            level=thelevel,
            time_cycle=thetime_cylce,
        )
        mdata.std = mdata_std.data.copy()
        del mdata_std
        mdata_N = Data(
            mdata_N_file,
            varname,
            read=True,
            label=self._unique_name + " std",
            unit="-",
            lat_name=lat_name,
            lon_name=lon_name,
            shift_lon=False,
            scale_factor=scf,
            level=thelevel,
        )
        mdata.n = mdata_N.data.copy()
        del mdata_N

        # ensure that climatology always starts with January, therefore set date and then sort
        mdata.adjust_time(year=1700, day=15)  # set arbitrary time for climatology
        mdata.timsort()

        # 4) read monthly data
        mdata_all = Data(
            file_monthly,
            varname,
            read=True,
            label=self._unique_name,
            unit=units,
            lat_name=lat_name,
            lon_name=lon_name,
            shift_lon=False,
            time_cycle=12,
            scale_factor=scf,
            level=thelevel,
        )
        mdata_all.adjust_time(day=15)

        # mask_antarctica masks everything below 60 degrees S.
        # here we only mask Antarctica, if only LAND points shall be used
        if valid_mask == "land":
            mask_antarctica = True
        elif valid_mask == "ocean":
            mask_antarctica = False
        else:
            mask_antarctica = False

        if target_grid == "t63grid":
            mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica))
            mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica))
        else:
            tmpmsk = get_generic_landseamask(
                False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica
            )
            mdata._apply_mask(tmpmsk)
            mdata_all._apply_mask(tmpmsk)
            del tmpmsk

        mdata_mean = mdata_all.fldmean()

        mdata._raw_filename = filename1
        mdata._monthly_filename = file_monthly
        mdata._clim_filename = mdata_clim_file
        mdata._varname = varname

        # return data as a tuple list
        retval = (mdata_all.time, mdata_mean, mdata_all)

        del mdata_all
        return mdata, retval
def main():
    plt.close('all')
    if len(sys.argv) > 1:
        if len(sys.argv) == 2:
            # a single argument was provides as option
            if sys.argv[1] == 'init':
                # copy INI files and a template configuration file
                # to current directory
                create_dummy_configuration()
                sys.exit()
            else:
                file = sys.argv[1]  # name of config file
                if not os.path.exists(file):
                    raise ValueError('Configuration file can not be \
                                      found: %s' % file)
        else:
            raise ValueError('Currently not more than one command \
                               line parameter supported!')
    else:  # default
        print('*******************************************')
        print('* WELCOME to pycmbs.py                    *')
        print('* Happy benchmarking ...                  *')
        print('*******************************************')
        print ''
        print 'please specify a configuration filename as argument'
        sys.exit()

    ####################################################################
    # CONFIGURATION and OPTIONS
    ####################################################################

    # read configuration file
    CF = config.ConfigFile(file)

    # read plotting options
    PCFG = config.PlotOptions()
    PCFG.read(CF)
    plot_options = PCFG

    ####################################################################
    # REMOVE previous Data warnings
    ####################################################################
    outdir = CF.options['outputdir']
    if outdir[-1] != os.sep:
        outdir += os.sep

    os.environ['PYCMBS_OUTPUTDIR'] = outdir
    os.environ['PYCMBS_OUTPUTFORMAT'] = CF.options['report_format']

    os.environ['DATA_WARNING_FILE'] = outdir + 'data_warnings_' \
        + CF.options['report'] + '.log'
    if os.path.exists(os.environ['DATA_WARNING_FILE']):
        os.remove(os.environ['DATA_WARNING_FILE'])

    for thevar in plot_options.options.keys():
        if thevar in plot_options.options.keys():
            print('Variable: %s' % thevar)
            for k in plot_options.options[thevar].keys():
                print('    Observation: %s' % k)

    if CF.options['basemap']:
        f_fast = False
    else:
        f_fast = True
    shift_lon = use_basemap = not f_fast

    ########################################################################
    # TIMES
    ########################################################################
    s_start_time = CF.start_date
    s_stop_time = CF.stop_date
    start_time = pylab.num2date(pylab.datestr2num(s_start_time))
    stop_time = pylab.num2date(pylab.datestr2num(s_stop_time))

    ########################################################################
    # INIT METHODS
    ########################################################################
    # names of analysis scripts for all variables ---
    scripts = CF.get_analysis_scripts()

    # get dictionary with methods how to read data for model variables to be
    # analyzed
    variables = CF.variables
    varmethods = CF.get_methods4variables(CF.variables)

    # READ DATA
    # create a Model instance for each model specified
    # in the configuration file
    #
    # read the data for all variables and return a list
    # of Data objects for further processing

    model_cnt = 1
    proc_models = []

    for i in range(len(CF.models)):
        # assign model information from configuration
        data_dir = CF.dirs[i]
        model = CF.models[i]
        experiment = CF.experiments[i]

        # create model object and read data
        # results are stored in individual variables namex modelXXXXX
        if CF.dtypes[i].upper() == 'CMIP5':
            themodel = CMIP5Data(data_dir,
                                 model,
                                 experiment,
                                 varmethods,
                                 intervals=CF.intervals,
                                 lat_name='lat',
                                 lon_name='lon',
                                 label=model,
                                 start_time=start_time,
                                 stop_time=stop_time,
                                 shift_lon=shift_lon)
        elif CF.dtypes[i].upper() == 'CMIP5RAW':
            themodel = CMIP5RAWData(data_dir,
                                    model,
                                    experiment,
                                    varmethods,
                                    intervals=CF.intervals,
                                    lat_name='lat',
                                    lon_name='lon',
                                    label=model,
                                    start_time=start_time,
                                    stop_time=stop_time,
                                    shift_lon=shift_lon)
        elif 'CMIP5RAWSINGLE' in CF.dtypes[i].upper():
            themodel = CMIP5RAW_SINGLE(data_dir,
                                       model,
                                       experiment,
                                       varmethods,
                                       intervals=CF.intervals,
                                       lat_name='lat',
                                       lon_name='lon',
                                       label=model,
                                       start_time=start_time,
                                       stop_time=stop_time,
                                       shift_lon=shift_lon)

        elif CF.dtypes[i].upper() == 'JSBACH_BOT':
            themodel = JSBACH_BOT(data_dir,
                                  varmethods,
                                  experiment,
                                  intervals=CF.intervals,
                                  start_time=start_time,
                                  stop_time=stop_time,
                                  name=model,
                                  shift_lon=shift_lon)
        elif CF.dtypes[i].upper() == 'JSBACH_RAW':
            themodel = JSBACH_RAW(data_dir,
                                  varmethods,
                                  experiment,
                                  intervals=CF.intervals,
                                  name=model,
                                  shift_lon=shift_lon,
                                  start_time=start_time,
                                  stop_time=stop_time)
        elif CF.dtypes[i].upper() == 'JSBACH_RAW2':
            themodel = JSBACH_RAW2(data_dir,
                                   varmethods,
                                   experiment,
                                   intervals=CF.intervals,
                                   start_time=start_time,
                                   stop_time=stop_time,
                                   name=model,
                                   shift_lon=shift_lon)  # ,
            # model_dict=model_dict)
        elif CF.dtypes[i].upper() == 'JSBACH_SPECIAL':
            themodel = JSBACH_SPECIAL(data_dir,
                                      varmethods,
                                      experiment,
                                      intervals=CF.intervals,
                                      start_time=start_time,
                                      stop_time=stop_time,
                                      name=model,
                                      shift_lon=shift_lon)  # ,
            # model_dict=model_dict)
        elif CF.dtypes[i].upper() == 'CMIP3':
            themodel = CMIP3Data(data_dir,
                                 model,
                                 experiment,
                                 varmethods,
                                 intervals=CF.intervals,
                                 lat_name='lat',
                                 lon_name='lon',
                                 label=model,
                                 start_time=start_time,
                                 stop_time=stop_time,
                                 shift_lon=shift_lon)
        else:
            raise ValueError('Invalid model type: %s' % CF.dtypes[i])

        # read data for current model

        # options that specify regrid options etc.
        themodel._global_configuration = CF
        themodel.plot_options = plot_options
        themodel.get_data()

        # copy current model to a variable named modelXXXX
        cmd = 'model' + str(model_cnt).zfill(4) + ' = ' \
            + 'themodel.copy(); del themodel'
        exec(cmd)  # store copy of cmip5 model in separate variable

        # append model to list of models ---
        proc_models.append('model' + str(model_cnt).zfill(4))
        model_cnt += 1

    ########################################################################
    # MULTIMODEL MEAN
    # here we have now all the model and variables read.
    # The list of all models is contained in the variable proc_models.
    f_mean_model = True
    if f_mean_model:
        # calculate climatological mean values: The models contain already
        # climatological information in the variables[] list. Thus there is
        # not need to take care for the different timesteps here. This
        # should have been handled already in the preprocessing.
        # generate instance of MeanModel to store result
        MEANMODEL = MeanModel(varmethods, intervals=CF.intervals)

        # sum up all models
        for i in range(len(proc_models)):
            exec('actmodel = ' + proc_models[i] + '.copy()')
            MEANMODEL.add_member(actmodel)
            del actmodel

        # calculate ensemble mean
        MEANMODEL.ensmean()

        # save mean model to file
        # include filename of configuration file
        MEANMODEL.save(get_temporary_directory(),
                       prefix='MEANMODEL_' + file[:-4])

        # add mean model to general list of models to process in analysis
        proc_models.append('MEANMODEL')

    ########################################################################
    # END MULTIMODEL MEAN
    ########################################################################

    ########################################################################
    # INIT reporting and plotting and diagnostics
    ########################################################################
    # Gleckler Plot
    global_gleckler = GlecklerPlot()

    # Report
    rep = Report(CF.options['report'],
                 'pyCMBS report - ' + CF.options['report'],
                 CF.options['author'],
                 outdir=outdir,
                 dpi=300,
                 format=CF.options['report_format'])
    cmd = 'cp ' + os.environ['PYCMBSPATH'] + os.sep + \
        'logo' + os.sep + 'Phytonlogo5.pdf ' + rep.outdir
    os.system(cmd)

    ########################################################################
    ########################################################################
    ########################################################################
    # MAIN ANALYSIS LOOP: perform analysis for each model and variable
    ########################################################################
    ########################################################################
    ########################################################################
    skeys = scripts.keys()
    for variable in variables:

        # register current variable in Gleckler Plot
        global_gleckler.add_variable(variable)

        # call analysis scripts for each variable
        for k in range(len(skeys)):
            if variable == skeys[k]:

                print 'Doing analysis for variable ... ', variable
                print '   ... ', scripts[variable]
                # model list is reformatted so it can be evaluated properly
                model_list = str(proc_models).replace("'", "")
                cmd = 'analysis.' + scripts[variable] + '(' + model_list \
                    + ',GP=global_gleckler,shift_lon=shift_lon, \
                        use_basemap=use_basemap,report=rep,\
                        interval=CF.intervals[variable],\
                        plot_options=PCFG)'

                eval(cmd)

    ########################################################################
    # GLECKLER PLOT finalization ...
    ########################################################################
    # generate Gleckler analysis plot for all variables and models analyzed ///
    global_gleckler.plot(vmin=-0.1,
                         vmax=0.1,
                         nclasses=16,
                         show_value=False,
                         ticks=[-0.1, -0.05, 0., 0.05, 0.1])
    oname = outdir + 'gleckler.pkl'
    if os.path.exists(oname):
        os.remove(oname)
    pickle.dump(global_gleckler.models,
                open(outdir + 'gleckler_models.pkl', 'w'))
    pickle.dump(global_gleckler.variables,
                open(outdir + 'gleckler_variables.pkl', 'w'))
    pickle.dump(global_gleckler.data, open(outdir + 'gleckler_data.pkl', 'w'))
    pickle.dump(global_gleckler._raw_data,
                open(outdir + 'gleckler_rawdata.pkl', 'w'))

    rep.section('Summary error statistics')
    rep.subsection('Gleckler metric')
    rep.figure(global_gleckler.fig,
               caption='Gleckler et al. (2008) model performance index',
               width='10cm')
    global_gleckler.fig.savefig(outdir + 'portraet_diagram.png',
                                dpi=200,
                                bbox_inches='tight')
    global_gleckler.fig.savefig(outdir + 'portraet_diagram.pdf',
                                dpi=200,
                                bbox_inches='tight')

    plt.close(global_gleckler.fig.number)

    # generate dictionary with observation labels for each variable
    labels_dict = {}
    for variable in variables:
        if variable not in PCFG.options.keys():
            continue
        varoptions = PCFG.options[variable]
        thelabels = {}
        for k in varoptions.keys():  # keys of observational datasets
            if k == 'OPTIONS':
                continue
            else:
                # only add observation to legend,
                # if option in INI file is set
                if varoptions[k]['add_to_report']:
                    # generate dictionary for GlecklerPLot legend
                    thelabels.update(
                        {int(varoptions[k]['gleckler_position']): k})
        labels_dict.update({variable: thelabels})
        del thelabels

    # legend for gleckler plot ///
    lcnt = 1
    for variable in variables:
        if variable not in PCFG.options.keys():
            continue
        varoptions = PCFG.options[variable]
        thelabels = labels_dict[variable]
        fl = global_gleckler._draw_legend(thelabels, title=variable.upper())
        if fl is not None:
            rep.figure(fl, width='8cm', bbox_inches=None)
            fl.savefig(outdir + 'legend_portraet_' + str(lcnt).zfill(5) +
                       '.png',
                       bbox_inches='tight',
                       dpi=200)
            plt.close(fl.number)
        del fl
        lcnt += 1

    # plot model ranking between different observational datasets ///
    rep.subsection('Model ranking consistency')
    for v in global_gleckler.variables:
        rep.subsubsection(v.upper())
        tmpfig = global_gleckler.plot_model_ranking(v,
                                                    show_text=True,
                                                    obslabels=labels_dict[v])
        if tmpfig is not None:
            rep.figure(tmpfig,
                       width='8cm',
                       bbox_inches=None,
                       caption='Model RANKING for different observational \
                       datasets: ' + v.upper())
            plt.close(tmpfig.number)
        del tmpfig

        # write a table with model ranking
        tmp_filename = outdir + 'ranking_table_' + v + '.tex'
        rep.open_table()
        global_gleckler.write_ranking_table(v,
                                            tmp_filename,
                                            fmt='latex',
                                            obslabels=labels_dict[v])
        rep.input(tmp_filename)
        rep.close_table(caption='Model rankings for variable ' + v.upper())

        # plot absolute model error
        tmpfig = global_gleckler.plot_model_error(v, obslabels=labels_dict[v])
        if tmpfig is not None:
            rep.figure(tmpfig,
                       width='8cm',
                       bbox_inches=None,
                       caption='Model ERROR for different observational \
                       datasets: ' + v.upper())
            plt.close(tmpfig.number)
        del tmpfig

    ########################################################################
    # CLEAN up and finish
    ########################################################################
    plt.close('all')
    rep.close()

    print('##########################################')
    print('# BENCHMARKING FINIHSED!                 #')
    print('##########################################')
Exemple #15
0
    def _do_preprocessing(self, rawfile, varname, s_start_time, s_stop_time, interval='monthly', force_calc=False, valid_mask='global', target_grid='t63grid'):
        """
        perform preprocessing
        * selection of variable
        * temporal subsetting
        """
        cdo = Cdo()

        if not os.path.exists(rawfile):
            print('File not existing! %s ' % rawfile)
            return None, None

        # calculate monthly means
        file_monthly = get_temporary_directory() + os.sep + os.path.basename(rawfile[:-3]) + '_' + varname + '_' + s_start_time + '_' + s_stop_time + '_mm.nc'
        if (force_calc) or (not os.path.exists(file_monthly)):
            cdo.monmean(options='-f nc', output=file_monthly, input='-seldate,' + s_start_time + ',' + s_stop_time + ' ' + '-selvar,' + varname + ' ' + rawfile, force=force_calc)
        else:
            pass
        if not os.path.exists(file_monthly):
            raise ValueError('Monthly preprocessing did not work! %s ' % file_monthly)

        # calculate monthly or seasonal climatology
        if interval == 'monthly':
            mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc'
            mdata_N_file = file_monthly[:-3] + '_ymonN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc'
            cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc)  # number of samples
        elif interval == 'season':
            mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_yseassum.nc'
            mdata_N_file = file_monthly[:-3] + '_yseasN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc'
            cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc)  # number of samples
        else:
            raise ValueError('Unknown temporal interval. Can not perform preprocessing!')

        if not os.path.exists(mdata_clim_file):
            return None

        # read data
        if interval == 'monthly':
            thetime_cylce = 12
        elif interval == 'season':
            thetime_cylce = 4
        else:
            print interval
            raise ValueError('Unsupported interval!')

        mdata = Data(mdata_clim_file, varname, read=True, label=self.name, shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon')
        mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.name + ' std', unit='-', shift_lon=False, time_cycle=thetime_cylce, lat_name='lat', lon_name='lon')
        mdata.std = mdata_std.data.copy()
        del mdata_std
        mdata_N = Data(mdata_N_file, varname, read=True, label=self.name + ' std', shift_lon=False, lat_name='lat', lon_name='lon')
        mdata.n = mdata_N.data.copy()
        del mdata_N

        # ensure that climatology always starts with January, therefore set date and then sort
        mdata.adjust_time(year=1700, day=15)  # set arbitrary time for climatology
        mdata.timsort()

        #4) read monthly data
        mdata_all = Data(file_monthly, varname, read=True, label=self.name, shift_lon=False, time_cycle=12, lat_name='lat', lon_name='lon')
        mdata_all.adjust_time(day=15)

        #mask_antarctica masks everything below 60 degree S.
        #here we only mask Antarctica, if only LAND points shall be used
        if valid_mask == 'land':
            mask_antarctica = True
        elif valid_mask == 'ocean':
            mask_antarctica = False
        else:
            mask_antarctica = False

        if target_grid == 't63grid':
            mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica))
            mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica))
        else:
            tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica)
            mdata._apply_mask(tmpmsk)
            mdata_all._apply_mask(tmpmsk)
            del tmpmsk

        mdata_mean = mdata_all.fldmean()

        # return data as a tuple list
        retval = (mdata_all.time, mdata_mean, mdata_all)

        del mdata_all
        return mdata, retval
Exemple #16
0
    def get_jsbach_data_generic(self, interval='season', **kwargs):
        """
        unique parameters are:
            filename - file basename
            variable - name of the variable as the short_name in the netcdf file

            kwargs is a dictionary with keys for each model. Then a dictionary with properties follows
        """

        if not self.type in kwargs.keys():
            print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys()
            return None

        print self.type
        print kwargs

        locdict = kwargs[self.type]

        # read settings and details from the keyword arguments
        # no defaults; everything should be explicitely specified in either the config file or the dictionaries

        varname = locdict.pop('variable')
        units = locdict.pop('unit', 'Unit not specified')

        lat_name = locdict.pop('lat_name', 'lat')
        lon_name = locdict.pop('lon_name', 'lon')
        #model_suffix = locdict.pop('model_suffix')
        #model_prefix = locdict.pop('model_prefix')
        file_format = locdict.pop('file_format')
        scf = locdict.pop('scale_factor')
        valid_mask = locdict.pop('valid_mask')
        custom_path = locdict.pop('custom_path', None)
        thelevel = locdict.pop('level', None)

        target_grid = self._actplot_options['targetgrid']
        interpolation = self._actplot_options['interpolation']

        if self.type != 'JSBACH_RAW2':
            print self.type
            raise ValueError('Invalid data format here!')

        # define from which stream of JSBACH data needs to be taken for specific variables
        if varname in ['swdown_acc', 'swdown_reflect_acc']:
            filename1 = self.files['jsbach']
        elif varname in ['precip_acc']:
            filename1 = self.files['land']
        elif varname in ['temp2']:
            filename1 = self.files['echam']
        elif varname in ['var14']:  # albedo vis
            filename1 = self.files['albedo_vis']
        elif varname in ['var15']:  # albedo NIR
            filename1 = self.files['albedo_nir']
        else:
            print varname
            raise ValueError('Unknown variable type for JSBACH_RAW2 processing!')

        force_calc = False

        if self.start_time is None:
            raise ValueError('Start time needs to be specified')
        if self.stop_time is None:
            raise ValueError('Stop time needs to be specified')

        #/// PREPROCESSING ///
        cdo = Cdo()
        s_start_time = str(self.start_time)[0:10]
        s_stop_time = str(self.stop_time)[0:10]

        #1) select timeperiod and generate monthly mean file
        if target_grid == 't63grid':
            gridtok = 'T63'
        else:
            gridtok = 'SPECIAL_GRID'

        file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc'  # target filename
        file_monthly = get_temporary_directory() + os.path.basename(file_monthly)

        sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly)

        if not os.path.exists(filename1):
            print 'WARNING: File not existing: ' + filename1
            return None

        cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc)

        sys.stdout.write('\n *** Reading model data... \n')
        sys.stdout.write('     Interval: ' + interval + '\n')

        #2) calculate monthly or seasonal climatology
        if interval == 'monthly':
            mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc'
            mdata_N_file = file_monthly[:-3] + '_ymonN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc'
            cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc)  # number of samples
        elif interval == 'season':
            mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc'
            mdata_sum_file = file_monthly[:-3] + '_yseassum.nc'
            mdata_N_file = file_monthly[:-3] + '_yseasN.nc'
            mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc'
            cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc)
            cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc)
            cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc)
            cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc)  # number of samples
        else:
            raise ValueError('Unknown temporal interval. Can not perform preprocessing! ')

        if not os.path.exists(mdata_clim_file):
            return None

        #3) read data
        if interval == 'monthly':
            thetime_cylce = 12
        elif interval == 'season':
            thetime_cylce = 4
        else:
            print interval
            raise ValueError('Unsupported interval!')
        mdata = Data(mdata_clim_file, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce)
        mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce)
        mdata.std = mdata_std.data.copy()
        del mdata_std
        mdata_N = Data(mdata_N_file, varname, read=True, label=self.model + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel)
        mdata.n = mdata_N.data.copy()
        del mdata_N

        #ensure that climatology always starts with J  anuary, therefore set date and then sort
        mdata.adjust_time(year=1700, day=15)  # set arbitrary time for climatology
        mdata.timsort()

        #4) read monthly data
        mdata_all = Data(file_monthly, varname, read=True, label=self.model, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel)
        mdata_all.adjust_time(day=15)

        if target_grid == 't63grid':
            mdata._apply_mask(get_T63_landseamask(False, area=valid_mask))
            mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask))
        else:
            tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid)
            mdata._apply_mask(tmpmsk)
            mdata_all._apply_mask(tmpmsk)
            del tmpmsk

        mdata_mean = mdata_all.fldmean()

        # return data as a tuple list
        retval = (mdata_all.time, mdata_mean, mdata_all)

        del mdata_all

        return mdata, retval
Exemple #17
0
    def _preproc_streams(self):
        """
        It is assumed that the standard JSBACH postprocessing scripts have been applied.
        Thus monthly mean data is available for each stream and code tables still need to be applied.

        This routine does the following:
        1) merge all times from individual (monthly mean) output files
        2) assign codetables to work with proper variable names
        3) aggregate data from tiles to gridbox values
        """

        print 'Preprocessing JSBACH raw data streams (may take a while) ...'

        cdo = Cdo()

        # jsbach stream
        print '   JSBACH stream ...'
        outfile = get_temporary_directory() + self.experiment + '_jsbach_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            codetable = self.data_dir + 'log/' + self.experiment + '_jsbach.codes'
            tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_', dir=get_temporary_directory())  # temporary file
            #~ print self.data_dir
            #~ print self.raw_outdata
            #~ print 'Files: ', self._get_filenames_jsbach_stream()
            #~ stop
            if len(glob.glob(self._get_filenames_jsbach_stream())) > 0:  # check if input files existing at all
                print 'Mering the following files:', self._get_filenames_jsbach_stream()
                cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_jsbach_stream())
                if os.path.exists(codetable):
                    cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp)  # monmean needed here, as otherwise interface does not work
                else:
                    cdo.monmean(options='-f nc', output=outfile, input=tmp)  # monmean needed here, as otherwise interface does not work
                print 'Outfile: ', outfile
                #~ os.remove(tmp)

                print 'Temporary name: ', tmp

        self.files.update({'jsbach': outfile})

        # veg stream
        print '   VEG stream ...'
        outfile = get_temporary_directory() + self.experiment + '_jsbach_veg_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            codetable = self.data_dir + 'log/' + self.experiment + '_jsbach_veg.codes'
            tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_veg_', dir=get_temporary_directory())  # temporary file
            if len(glob.glob(self._get_filenames_veg_stream())) > 0:  # check if input files existing at all
                cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_veg_stream())
                if os.path.exists(codetable):
                    cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp)  # monmean needed here, as otherwise interface does not work
                else:
                    cdo.monmean(options='-f nc', output=outfile, input=tmp)  # monmean needed here, as otherwise interface does not work
                os.remove(tmp)
        self.files.update({'veg': outfile})

        # veg land
        print '   LAND stream ...'
        outfile = get_temporary_directory() + self.experiment + '_jsbach_land_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            codetable = self.data_dir + 'log/' + self.experiment + '_jsbach_land.codes'
            tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_land_', dir=get_temporary_directory())  # temporary file
            if len(glob.glob(self._get_filenames_land_stream())) > 0:  # check if input files existing at all
                cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_land_stream())
                if os.path.exists(codetable):
                    cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp)  # monmean needed here, as otherwise interface does not work
                else:
                    cdo.monmean(options='-f nc', output=outfile, input=tmp)  # monmean needed here, as otherwise interface does not work
                os.remove(tmp)
        self.files.update({'land': outfile})

        # surf stream
        print '   SURF stream ...'
        outfile = get_temporary_directory() + self.experiment + '_jsbach_surf_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            codetable = self.data_dir + 'log/' + self.experiment + '_jsbach_surf.codes'
            tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_jsbach_surf_', dir=get_temporary_directory())  # temporary file
            if len(glob.glob(self._get_filenames_surf_stream())) > 0:  # check if input files existing at all
                print glob.glob(self._get_filenames_surf_stream())
                cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_surf_stream())
                if os.path.exists(codetable):
                    cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp)  # monmean needed here, as otherwise interface does not work
                else:
                    cdo.monmean(options='-f nc', output=outfile, input=tmp)  # monmean needed here, as otherwise interface does not work
                os.remove(tmp)

        self.files.update({'surf': outfile})

        # ECHAM BOT stream
        print '   BOT stream ...'
        outfile = get_temporary_directory() + self.experiment + '_echam6_echam_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            codetable = self.data_dir + 'log/' + self.experiment + '_echam6_echam.codes'
            tmp = tempfile.mktemp(suffix='.nc', prefix=self.experiment + '_echam6_echam_', dir=get_temporary_directory())  # temporary file
            if len(glob.glob(self._get_filenames_echam_BOT())) > 0:  # check if input files existing at all
                cdo.mergetime(options='-f nc', output=tmp, input=self._get_filenames_echam_BOT())
                if os.path.exists(codetable):
                    cdo.monmean(options='-f nc', output=outfile, input='-setpartab,' + codetable + ' ' + tmp)  # monmean needed here, as otherwise interface does not work
                else:
                    cdo.monmean(options='-f nc', output=outfile, input=tmp)  # monmean needed here, as otherwise interface does not work
                os.remove(tmp)
        self.files.update({'echam': outfile})

        # ALBEDO file
        # albedo files as preprocessed by a script of Thomas
        print '   ALBEDO VIS stream ...'
        outfile = get_temporary_directory() + self.experiment + '_jsbach_VIS_albedo_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            if len(glob.glob(self._get_filenames_albedo_VIS())) > 0:  # check if input files existing at all
                cdo.mergetime(options='-f nc', output=outfile, input=self._get_filenames_albedo_VIS())
        self.files.update({'albedo_vis': outfile})

        print '   ALBEDO NIR stream ...'
        outfile = get_temporary_directory() + self.experiment + '_jsbach_NIR_albedo_mm_full.nc'
        if os.path.exists(outfile):
            pass
        else:
            if len(glob.glob(self._get_filenames_albedo_NIR())) > 0:  # check if input files existing at all
                cdo.mergetime(options='-f nc', output=outfile, input=self._get_filenames_albedo_NIR())
        self.files.update({'albedo_nir': outfile})
Exemple #18
0
 def test_cdo_tempdir_DefaultNoEnv(self):
     if 'CDOTEMPDIR' in os.environ.keys():
         d = os.environ.pop('CDOTEMPDIR')
     r = utils.get_temporary_directory()
     self.assertEqual(r, './')
Exemple #19
0
 def test_cdo_tempdir_fromENV1(self):
     d = '/some/directory/path/'
     os.environ.update({'CDOTEMPDIR': d})
     r = utils.get_temporary_directory()
     self.assertEqual(r, d)