Beispiel #1
0
 def val(self, val):
     ival = 1
     if val[-1].isdigit():
         raise TemporalResolutionError(
             'Invalid input for TsType: {}'.format(val))
     elif val[0].isdigit():
         ivalstr = re.findall('\d+', val)[0]
         val = val.split(ivalstr)[-1]
         ival = int(ivalstr)
     if not val in self.VALID:
         try:
             val = self._from_pandas(val)
         except TemporalResolutionError:
             raise TemporalResolutionError('Invalid input. Need any valid '
                                           'ts_type: {}'.format(self.VALID))
     if val in self.TS_MAX_VALS and ival != 1:
         if ival > self.TS_MAX_VALS[val]:
             raise TemporalResolutionError(
                 'Invalid input for ts_type {}{}. '
                 'Interval factor {} exceeds '
                 'maximum allowed for {}, which '
                 'is: {}'.format(ival, val, ival, val,
                                 self.TS_MAX_VALS[val]))
     self._val = val
     self._mulfac = ival
Beispiel #2
0
def _init_data_default_frequencies(coldata, colocation_settings):

    to_ts_types = ['daily', 'monthly', 'yearly']

    data_arrs = dict.fromkeys(to_ts_types)
    jsdate = dict.fromkeys(to_ts_types)

    tt = TsType(coldata.ts_type)

    if tt < TsType('monthly'):
        raise TemporalResolutionError(
            'Temporal resolution ({}) is too low for '
            'web processing, need monthly or higher'.format(tt))
    elif tt > TsType('daily'):
        # resolution is higher than daily -> convert to daily
        coldata = _resample_time_coldata(coldata, 'daily', colocation_settings)
        tt = TsType('daily')

    for freq in to_ts_types:
        tt_freq = TsType(freq)
        if tt < tt_freq:  # skip (coldata is in lower resolution)
            #data_arrs[freq] = None
            continue
        elif tt == tt_freq:
            data_arrs[freq] = coldata.copy()
            jsdate[freq] = _get_jsdate(coldata)

        else:
            cd = _resample_time_coldata(coldata, freq, colocation_settings)
            data_arrs[freq] = cd
            jsdate[freq] = _get_jsdate(cd)

    return (data_arrs, jsdate)
Beispiel #3
0
 def to_pandas_freq(self):
     """Convert ts_type to pandas frequency string"""
     if not self._val in self.TO_PANDAS:
         raise TemporalResolutionError(
             'pandas frequency not available for {}'.format(self._val))
     freq = self.TO_PANDAS[self._val]
     if self._mulfac == 1:
         return freq
     return '{}{}'.format(self._mulfac, freq)
Beispiel #4
0
def compute_trends_station(station, var_name, start_year=None, 
                           stop_year=None, season=None, slope_confidence=0.68,
                           **alt_range):
    # load additional information about data source (if applicable)
    if not 'trends' in station:
        station['trends'] = od()
    tr = station['trends']
    if not var_name in tr:
        station['trends'][var_name] = trv = TrendsEngine(var_name)
    else:
        trv = station['trends'][var_name]

    freq = station.get_var_ts_type(var_name)
    
    ts_types = const.GRID_IO.TS_TYPES
    
    if not trv.has_daily:
        if not freq in ts_types or (ts_types.index(freq) <= ts_types.index('daily')):
            trv['daily'] = station.to_timeseries(var_name, freq='daily', **alt_range)
    # monthly is mandatory
    if not trv.has_monthly:
        if freq in ts_types and ts_types.index(freq) >= ts_types.index('monthly'):
            raise TemporalResolutionError('Need monthly or higher')
        ms = station.to_timeseries(var_name, freq='monthly', **alt_range)
        trv['monthly'] = ms
    else:
        ms = trv['monthly']
        
    if len(ms) == 0 or all(np.isnan(ms)):
        raise DataCoverageError('Failed to retrieve monthly timeseries for '
                                '{} ({})'.format(station.station_name,
                                 var_name))
      
    if trv._mobs is None:
        trv._mobs = _make_mobs_dataframe(ms)
    
    result = trv.compute_trend(start_year, stop_year, season, 
                               slope_confidence)
    
    
    trv.meta.update(station.get_meta(add_none_vals=True))
    if var_name in station.var_info:
        trv.meta.update(station.var_info[var_name])
    return result
Beispiel #5
0
    def _run_gridded_gridded(self, var_name=None):

        start, stop = start_stop(self.start, self.stop)
        model_reader = ReadGridded(self.model_id)
        obs_reader = ReadGridded(self.obs_id)

        if 'obs_filters' in self:
            remaining_filters = self._eval_obs_filters()
            if bool(remaining_filters):
                raise NotImplementedError(
                    'Cannot apply filters {} to gridded '
                    'observation data.'.format(remaining_filters))

        obs_vars = self.obs_vars

        obs_vars_avail = obs_reader.vars_provided

        for obs_var in obs_vars:
            if not obs_var in obs_vars_avail:
                raise DataCoverageError(
                    'Variable {} is not supported by {}'.format(
                        obs_var, self.obs_id))

        var_matches = self._find_var_matches(obs_vars, model_reader, var_name)
        if self.remove_outliers:
            self._update_var_outlier_ranges(var_matches)

        all_ts_types = const.GRID_IO.TS_TYPES

        ts_type = self.ts_type

        data_objs = {}

        for model_var, obs_var in var_matches.items():

            print_log.info('Running {} / {} ({}, {})'.format(
                self.model_id, self.obs_id, model_var, obs_var))
            try:
                model_data = self._read_gridded(reader=model_reader,
                                                var_name=model_var,
                                                start=start,
                                                stop=stop,
                                                is_model=True)
            except Exception as e:

                msg = (
                    'Failed to load gridded data: {} / {}. Reason {}'.format(
                        self.model_id, model_var, repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')

                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
                else:
                    continue

            if not model_data.ts_type in all_ts_types:
                raise TemporalResolutionError('Invalid temporal resolution {} '
                                              'in model {}'.format(
                                                  model_data.ts_type,
                                                  self.model_id))
            try:
                obs_data = self._read_gridded(reader=obs_reader,
                                              var_name=obs_var,
                                              start=start,
                                              stop=stop,
                                              is_model=False)
            except Exception as e:

                msg = (
                    'Failed to load gridded data: {} / {}. Reason {}'.format(
                        self.model_id, model_var, repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')

                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
                else:
                    continue

            if not obs_data.ts_type in all_ts_types:
                raise TemporalResolutionError('Invalid temporal resolution {} '
                                              'in obs {}'.format(
                                                  obs_data.ts_type,
                                                  self.model_id))

            # update colocation ts_type, based on the available resolution in
            # model and obs.
            lowest = self.get_lowest_resolution(ts_type, model_data.ts_type,
                                                obs_data.ts_type)
            if lowest != ts_type:
                print_log.info('Updating ts_type from {} to {} (highest '
                               'available in {} / {} combination)'.format(
                                   ts_type, lowest, self.model_id,
                                   self.obs_id))
                ts_type = lowest

            if self.save_coldata:
                out_dir = chk_make_subdir(self.basedir_coldata, self.model_id)

                savename = self._coldata_savename(model_data,
                                                  start,
                                                  stop,
                                                  ts_type,
                                                  var_name=model_var)

                file_exists = self._check_coldata_exists(
                    self.model_id, savename)
                if file_exists:
                    if not self.reanalyse_existing:
                        if self._log:
                            self._write_log('SKIP: {}\n'.format(savename))
                            print_log.info('Skip {} (file already '
                                           'exists)'.format(savename))
                        continue
                    else:
                        os.remove(os.path.join(out_dir, savename))
            try:
                by = None
                if self.model_use_climatology:
                    by = to_pandas_timestamp(start).year
                coldata = colocate_gridded_gridded(
                        gridded_data=model_data,
                        gridded_data_ref=obs_data,
                        ts_type=ts_type,
                        start=start, stop=stop,
                        filter_name=self.filter_name,
                        regrid_res_deg=self.regrid_res_deg,
                        remove_outliers=self.remove_outliers,
                        vert_scheme=self.vert_scheme,
                        harmonise_units=self.harmonise_units,
                        var_outlier_ranges=self.var_outlier_ranges,
                        var_ref_outlier_ranges=self.var_ref_outlier_ranges,
                        update_baseyear_gridded=by,
                        apply_time_resampling_constraints=\
                            self.apply_time_resampling_constraints,
                        min_num_obs=self.min_num_obs,
                        colocate_time=self.colocate_time,
                        var_keep_outliers=self.model_keep_outliers,
                        var_ref_keep_outliers=self.obs_keep_outliers)
                if self.save_coldata:
                    self._save_coldata(coldata, savename, out_dir, model_var,
                                       model_data, obs_var)
                    #coldata.to_netcdf(out_dir, savename=savename)
                if self._log:
                    self._write_log('WRITE: {}\n'.format(savename))
                    print_log.info('Writing file {}'.format(savename))
                data_objs[model_var] = coldata
            except Exception as e:
                msg = ('Colocation between model {} / {} and obs {} / {} '
                       'failed: Reason {}'.format(self.model_id, model_var,
                                                  self.obs_id, obs_var,
                                                  repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg)
                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
        return data_objs
Beispiel #6
0
    def resample(self, to_ts_type, input_data=None, from_ts_type=None,
                 how=None, apply_constraints=None,
                 min_num_obs=None, **kwargs):
        """Resample input data

        Parameters
        ----------
        to_ts_type : str or pyaerocom.tstype.TsType
            output resolution
        input_data : pandas.Series or xarray.DataArray
            data to be resampled
        how : str
            string specifying how the data is to be aggregated, default is mean
        apply_constraints : bool, optional
            if True, hierarchical resampling is applied using input
            `samping_constraints` (if provided) or else, using constraints
            specified in :attr:`pyaerocom.const.OBS_MIN_NUM_RESAMPLE`
        min_num_obs : dict or int, optinal
            integer or nested dictionary specifying minimum number of
            observations required to resample from higher to lower frequency.
            For instance, if `input_data` is hourly and `to_ts_type` is
            monthly, you may specify something like::

                min_num_obs =
                    {'monthly'  :   {'daily'  : 7},
                     'daily'    :   {'hourly' : 6}}

            to require at least 6 hours per day and 7 days per month.

        **kwargs
           additional input arguments passed to resampling method

        Returns
        -------
        pandas.Series or xarray.DataArray
            resampled data object
        """
        if how is None:
            how = 'mean'

        if not isinstance(to_ts_type, TsType):
            to_ts_type = TsType(to_ts_type)

        if input_data is not None:
            self.input_data = input_data
        if self.input_data is None:
            raise ValueError('Please provide data (Series or DataArray)')

        if apply_constraints is None:
            apply_constraints = self.APPLY_CONSTRAINTS

        self.last_setup = dict(apply_constraints=False,
                               min_num_obs=None,
                               how=how)

        if not apply_constraints or from_ts_type is None:
            freq = to_ts_type.to_pandas_freq()
            if not isinstance(how, str):
                raise ValueError('Temporal resampling without constraints can '
                                 'only use string type argument how (e.g. '
                                 'how=mean). Got {}'.format(how))
            return self.fun(self.input_data, freq=freq,
                            how=how, **kwargs)
# =============================================================================
#         elif from_ts_type is None:
#             self.last_setup = dict(apply_constraints=False,
#                                    min_num_obs=None)
#             freq = to_ts_type.to_pandas_freq()
#             return self.fun(self.input_data, freq=freq,
#                             how=how, **kwargs)
# =============================================================================

        if isinstance(from_ts_type, str):
            from_ts_type = TsType(from_ts_type)

        if not isinstance(from_ts_type, TsType):
            raise ValueError('Invalid input for from_ts_type: {}. Need valid '
                             'str or TsType. Input arg from_ts_type is '
                             'required if resampling using hierarchical '
                             'constraints (arg apply_constraints) is activated'
                             .format(from_ts_type.val))

        if to_ts_type > from_ts_type:
            raise TemporalResolutionError('Cannot resample time-series from {} '
                                          'to {}'
                                          .format(from_ts_type, to_ts_type))
        elif to_ts_type == from_ts_type:
            const.logger.info('Input time frequency {} equals current frequency '
                              'of data. Resampling will be applied anyways '
                              'which will introduce NaN values at missing '
                              'time stamps'.format(to_ts_type.val))

            freq = to_ts_type.to_pandas_freq()
            return self.fun(self.input_data, freq=freq, how='mean',
                            **kwargs)

        if min_num_obs is None:
            min_num_obs = self.SAMPLING_CONSTRAINTS

        _idx = self._gen_idx(from_ts_type, to_ts_type, min_num_obs, how)
        data = self.input_data
        for to_ts_type, mno, rshow in _idx:
            const.logger.info('TO: {} ({}, {})'.format(to_ts_type, mno, rshow))
            freq = TsType(to_ts_type).to_pandas_freq()
            data = self.fun(data, freq=freq, how=rshow,
                            min_num_obs=mno)
        self.last_setup = dict(apply_constraints=True,
                               min_num_obs=min_num_obs,
                               how=how)
        return data
Beispiel #7
0
 def _from_pandas(self, val):
     if not val in self.FROM_PANDAS:
         raise TemporalResolutionError('Invalid input: {}, need pandas '
                                       'frequency string'.format(val))
     return self.FROM_PANDAS[val]
Beispiel #8
0
 def resample(self, to_ts_type, input_data=None, from_ts_type=None, 
              how='mean', apply_constraints=False, 
              min_num_obs=None, **kwargs):
     """Resample input data
     
     Parameters
     ----------
     input_data : pandas.Series or xarray.DataArray
         data to be resampled
     to_ts_type : str or pyaerocom.tstype.TsType
         output resolution
     how : str
         string specifying how the data is to be aggregated, default is mean
     apply_constraints : bool, optional
         if True, hierarchical resampling is applied using input 
         `samping_constraints` (if provided) or else, using constraints 
         specified in :attr:`pyaerocom.const.OBS_MIN_NUM_RESAMPLE`
     min_num_obs : dict or int, optinal
         integer or nested dictionary specifying minimum number of 
         observations required to resample from higher to lower frequency.
         For instance, if `input_data` is hourly and `to_ts_type` is
         monthly, you may specify something like::
             
             min_num_obs = 
                 {'monthly'  :   {'daily'  : 7}, 
                  'daily'    :   {'hourly' : 6}}
                 
         to require at least 6 hours per day and 7 days per month. Or, if 
         data is daily and output is monthly and   
     **kwargs
        additional input arguments passed to resampling method
       
     Returns
     -------
     pandas.Series or xarray.DataArray
         resampled data object
     """
     if not isinstance(to_ts_type, TsType):
         to_ts_type = TsType(to_ts_type)
     
     if not to_ts_type.val in self.FREQS_SUPPORTED:
         raise NotImplementedError('Cannot resample to input frequency '
                                   '{}. Choose from: {}'
                                   .format(to_ts_type, 
                                           self.FREQS_SUPPORTED.keys()))
     
     if input_data is not None:
         self.input_data = input_data
     if self.input_data is None:
         raise ValueError('Please provide data (Series or DataArray)')
     
     if apply_constraints is None:
         apply_constraints = self.APPLY_CONSTRAINTS
     
     if not apply_constraints:
         self.last_setup = dict(apply_constraints=False,
                                min_num_obs=None)
         return self.fun(self.input_data, freq=to_ts_type.val, 
                         how=how, **kwargs)
     elif from_ts_type is None:
         const.print_log.warn('Cannot apply time resampling constraints, '
                              'since input from_ts_type is None. Applying '
                              'resampling to {} without any constraints'
                              .format(to_ts_type))
         self.last_setup = dict(apply_constraints=False,
                                min_num_obs=None)
         return self.fun(self.input_data, freq=to_ts_type.val, 
                         how=how, **kwargs)
     
     if isinstance(from_ts_type, str):
         from_ts_type = TsType(from_ts_type)
     
     if not isinstance(from_ts_type, TsType):
         raise ValueError('Invalid input for from_ts_type: {}. Need valid '
                          'str or TsType. Input arg from_ts_type is '
                          'required if resampling using hierarchical '
                          'constraints (arg apply_constraints) is activated'
                          .format(from_ts_type))
     
     if to_ts_type > from_ts_type:
         raise TemporalResolutionError('Cannot resample time-series from {} '
                                       'to {}'
                                       .format(from_ts_type, to_ts_type))
     elif to_ts_type == from_ts_type:
         const.logger.info('Input time frequency equals current frequency '
                           'of data, ignoring any resampling constraints')
         self.last_setup = dict(apply_constraints=False,
                                min_num_obs=None)
         return self.fun(self.input_data, freq=to_ts_type.val, how=how, 
                         **kwargs)
         
     if min_num_obs is None:
         min_num_obs = self.SAMPLING_CONSTRAINTS
         
     _idx = self._gen_idx(from_ts_type, to_ts_type, min_num_obs)
     data = self.input_data
     for to_ts_type, mno in _idx:
         data = self.fun(data, freq=to_ts_type, how=how, 
                         min_num_obs=mno)
     self.last_setup = dict(apply_constraints=True,
                            min_num_obs=min_num_obs)   
     return data
Beispiel #9
0
 def to_numpy_freq(self):
     if not self._val in self.TO_NUMPY:
         raise TemporalResolutionError(
             'numpy frequency not available for {}'.format(self._val))
     freq = self.TO_NUMPY[self._val]
     return '{}{}'.format(self.mulfac, freq)