Beispiel #1
0
def add_file_to_log(filepath, err_msg):

    try:
        dirname = os.path.dirname(filepath)
        spl = dirname.split(os.sep)
        if spl[-1].lower() == 'renamed':
            model_or_obs_id = spl[-2]
        else:
            model_or_obs_id = spl[-1]
    except:
        model_or_obs_id = 'others'
    try:
        logdir = const.LOGFILESDIR
        found = False
        logfile = os.path.join(logdir, model_or_obs_id + '.log')
        if os.path.exists(logfile):
            with open(logfile, 'r') as f:
                for line in f:
                    if filepath == line.strip():
                        found = True
                        break

        if not found:
            with open(logfile, 'a+') as f:
                f.write(filepath + '\n')
            with open(os.path.join(logdir, model_or_obs_id + '_ERR.log'),
                      'a+') as ferr:
                ferr.write('{}\n{}\n\n'.format(filepath, err_msg))
    except Exception as e:
        from pyaerocom import print_log
        const.WRITE_FILEIO_ERR_LOG = False
        print_log.info('Failed to write to file-read error logging ({}). '
                       'Deactiving lgging'.format(repr(e)))
Beispiel #2
0
 def CACHEDIR(self):
     """Cache directory"""
     try:
         return chk_make_subdir(self._cachedir, getpass.getuser())
     except Exception as e:
         from pyaerocom import print_log
         print_log.info('Failed to access CACHEDIR: {}'
                        'Deactivating caching'.format(repr(e)))
         self._caching_active = False
Beispiel #3
0
 def LOGFILESDIR(self):
     """Directory where logfiles are stored"""
     try:
         logdir = chk_make_subdir(self.OUTPUTDIR, '_log')
         return logdir
     except Exception as e:
         from pyaerocom import print_log
         print_log.info('Failed to access LOGFILESDIR: {}'
                        'Deactivating file logging'.format(repr(e)))
         self.WRITE_FILEIO_ERR_LOG = False
Beispiel #4
0
 def CACHEDIR(self):
     """Cache directory for UngriddedData objects"""
     if self._cachedir is None:
         raise IOError('Cache directory is not defined')
     try:
         return chk_make_subdir(self._cachedir, getpass.getuser())
     except Exception as e:
         from pyaerocom import print_log
         print_log.info('Failed to access CACHEDIR: {}\n'
                        'Deactivating caching'.format(repr(e)))
         self._caching_active = False
Beispiel #5
0
    def _save_coldata(self, coldata, savename, out_dir, model_var, model_data,
                      obs_var):
        """Helper for saving colocateddata"""
        if model_var != model_data.var_name:
            coldata.rename_variable(model_data.var_name, model_var,
                                    model_data.data_id)
        if (isinstance(self.model_add_vars, dict)
                and obs_var in self.model_add_vars
                and self.model_add_vars[obs_var] == model_var):

            coldata.rename_variable(obs_var, model_var, self.obs_id)

        coldata.to_netcdf(out_dir, savename=savename)
        self.file_status[savename] = 'saved'
        if self._log:
            self._write_log('WRITE: {}\n'.format(savename))
            print_log.info('Writing file {}'.format(savename))
Beispiel #6
0
    def _read_gridded(self, reader, var_name, start, stop, is_model=True):
        if is_model:
            vert_which = self.obs_vert_type
            if all(x == '' for x in reader.file_info.vert_code.values):
                print_log.info('Deactivating model file search by vertical '
                               'code for {}, since filenames do not include '
                               'information about vertical code (probably '
                               'AeroCom 2 convention)'.format(reader.data_id))
                vert_which = None
            ts_type_read = self.model_ts_type_read
            if self.model_use_climatology:
                start = 9999
                stop = None
        else:
            vert_which = None
            ts_type_read = self.obs_ts_type_read
        msg = ('No data files available for dataset {} ({})'.format(
            reader.data_id, var_name))
        try:
            return reader.read_var(var_name,
                                   start=start,
                                   stop=stop,
                                   ts_type=ts_type_read,
                                   flex_ts_type=self.flex_ts_type_gridded,
                                   vert_which=vert_which)
        except DataCoverageError:
            vt = None
            if is_model:
                if self.obs_vert_type in self.OBS_VERT_TYPES_ALT:
                    vt = self.OBS_VERT_TYPES_ALT[self.obs_vert_type]
                elif self.model_vert_type_alt is not None:
                    mva = self.model_vert_type_alt
                    if isinstance(mva, str):
                        vt = mva
                    elif isinstance(mva, dict) and var_name in mva:
                        vt = mva[var_name]

            if vt is None:
                raise DataCoverageError(msg)

            return reader.read_var(var_name,
                                   start=start,
                                   stop=stop,
                                   ts_type=ts_type_read,
                                   flex_ts_type=self.flex_ts_type_gridded,
                                   vert_which=vt)
Beispiel #7
0
 def check_output_dirs(self):
     """Checks if output directories are available and have write-access"""
     ok = True
     from pyaerocom import print_log
     if not self.dir_exists(self._outputdir) or not self._write_access(self._outputdir):
         self._outputdir = chk_make_subdir(self.HOMEDIR, self._outhomename)
     if not self._write_access(self._outputdir):
         print_log.info('Cannot establish write access to output directory {}'
                        .format(self._outputdir))
         ok = False
     if not self.dir_exists(self._cachedir) or not self._write_access(self._cachedir):
         self._cachedir = chk_make_subdir(self._outputdir, '_cache')
     if not self._write_access(self._cachedir):
         print_log.info('Cannot establish write access to cache directory {}.'
               'Deactivating caching of files'.format(self._cachedir))
         self._caching_active = False
         ok = False
     return ok
Beispiel #8
0
    def BASEDIR(self, value):
        if not os.path.exists(value):
            raise IOError('Cannot change data base directory. Input directory '
                          'does not exist')

        self._obsbasedir = value
        self._modelbasedir = value

        subdirs = os.listdir(value)
        from pyaerocom import print_log
        if 'aerocom0' in subdirs:
            print_log.info('Initiating directories for lustre')
            self.read_config(self._config_ini, keep_basedirs=True)
        elif 'obsdata' in subdirs:  #test dataset

            print_log.info('Initiating directories for pyaerocom testdataset')
            self.read_config(self._config_ini_testdata, keep_basedirs=True)
            self._cachedir = os.path.join('..', '_cache')
        elif 'AMAP' in subdirs:
            print_log.info('Initiating directories for AEROCOM users database')
            self.read_config(self._config_ini_user_server, keep_basedirs=True)
        else:
            self.reload()
Beispiel #9
0
    def _run_gridded_gridded(self, var_name=None):

        start, stop = start_stop(self.start, self.stop)
        model_reader = ReadGridded(self.model_id)
        obs_reader = ReadGridded(self.obs_id)

        if 'obs_filters' in self:
            remaining_filters = self._eval_obs_filters()
            if bool(remaining_filters):
                raise NotImplementedError(
                    'Cannot apply filters {} to gridded '
                    'observation data.'.format(remaining_filters))

        obs_vars = self.obs_vars

        obs_vars_avail = obs_reader.vars_provided

        for obs_var in obs_vars:
            if not obs_var in obs_vars_avail:
                raise DataCoverageError(
                    'Variable {} is not supported by {}'.format(
                        obs_var, self.obs_id))

        var_matches = self._find_var_matches(obs_vars, model_reader, var_name)
        if self.remove_outliers:
            self._update_var_outlier_ranges(var_matches)

        all_ts_types = const.GRID_IO.TS_TYPES

        ts_type = self.ts_type

        data_objs = {}

        for model_var, obs_var in var_matches.items():

            print_log.info('Running {} / {} ({}, {})'.format(
                self.model_id, self.obs_id, model_var, obs_var))
            try:
                model_data = self._read_gridded(reader=model_reader,
                                                var_name=model_var,
                                                start=start,
                                                stop=stop,
                                                is_model=True)
            except Exception as e:

                msg = (
                    'Failed to load gridded data: {} / {}. Reason {}'.format(
                        self.model_id, model_var, repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')

                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
                else:
                    continue

            if not model_data.ts_type in all_ts_types:
                raise TemporalResolutionError('Invalid temporal resolution {} '
                                              'in model {}'.format(
                                                  model_data.ts_type,
                                                  self.model_id))
            try:
                obs_data = self._read_gridded(reader=obs_reader,
                                              var_name=obs_var,
                                              start=start,
                                              stop=stop,
                                              is_model=False)
            except Exception as e:

                msg = (
                    'Failed to load gridded data: {} / {}. Reason {}'.format(
                        self.model_id, model_var, repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')

                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
                else:
                    continue

            if not obs_data.ts_type in all_ts_types:
                raise TemporalResolutionError('Invalid temporal resolution {} '
                                              'in obs {}'.format(
                                                  obs_data.ts_type,
                                                  self.model_id))

            # update colocation ts_type, based on the available resolution in
            # model and obs.
            lowest = self.get_lowest_resolution(ts_type, model_data.ts_type,
                                                obs_data.ts_type)
            if lowest != ts_type:
                print_log.info('Updating ts_type from {} to {} (highest '
                               'available in {} / {} combination)'.format(
                                   ts_type, lowest, self.model_id,
                                   self.obs_id))
                ts_type = lowest

            if self.save_coldata:
                out_dir = chk_make_subdir(self.basedir_coldata, self.model_id)

                savename = self._coldata_savename(model_data,
                                                  start,
                                                  stop,
                                                  ts_type,
                                                  var_name=model_var)

                file_exists = self._check_coldata_exists(
                    self.model_id, savename)
                if file_exists:
                    if not self.reanalyse_existing:
                        if self._log:
                            self._write_log('SKIP: {}\n'.format(savename))
                            print_log.info('Skip {} (file already '
                                           'exists)'.format(savename))
                        continue
                    else:
                        os.remove(os.path.join(out_dir, savename))
            try:
                by = None
                if self.model_use_climatology:
                    by = to_pandas_timestamp(start).year
                coldata = colocate_gridded_gridded(
                        gridded_data=model_data,
                        gridded_data_ref=obs_data,
                        ts_type=ts_type,
                        start=start, stop=stop,
                        filter_name=self.filter_name,
                        regrid_res_deg=self.regrid_res_deg,
                        remove_outliers=self.remove_outliers,
                        vert_scheme=self.vert_scheme,
                        harmonise_units=self.harmonise_units,
                        var_outlier_ranges=self.var_outlier_ranges,
                        var_ref_outlier_ranges=self.var_ref_outlier_ranges,
                        update_baseyear_gridded=by,
                        apply_time_resampling_constraints=\
                            self.apply_time_resampling_constraints,
                        min_num_obs=self.min_num_obs,
                        colocate_time=self.colocate_time,
                        var_keep_outliers=self.model_keep_outliers,
                        var_ref_keep_outliers=self.obs_keep_outliers)
                if self.save_coldata:
                    self._save_coldata(coldata, savename, out_dir, model_var,
                                       model_data, obs_var)
                    #coldata.to_netcdf(out_dir, savename=savename)
                if self._log:
                    self._write_log('WRITE: {}\n'.format(savename))
                    print_log.info('Writing file {}'.format(savename))
                data_objs[model_var] = coldata
            except Exception as e:
                msg = ('Colocation between model {} / {} and obs {} / {} '
                       'failed: Reason {}'.format(self.model_id, model_var,
                                                  self.obs_id, obs_var,
                                                  repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg)
                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
        return data_objs
Beispiel #10
0
    def _run_gridded_ungridded(self, var_name=None):
        """Analysis method for gridded vs. ungridded data"""
        model_reader = ReadGridded(self.model_id)

        obs_reader = ReadUngridded(self.obs_id)

        obs_vars_supported = obs_reader.get_reader(
            self.obs_id).PROVIDES_VARIABLES

        obs_vars = list(np.intersect1d(self.obs_vars, obs_vars_supported))

        if len(obs_vars) == 0:
            raise DataCoverageError(
                'No observation variable matches found for '
                '{}'.format(self.obs_id))

        var_matches = self._find_var_matches(obs_vars, model_reader, var_name)

        if self.read_opts_ungridded is not None:
            ropts = self.read_opts_ungridded
        else:
            ropts = {}
        obs_data = obs_reader.read(datasets_to_read=self.obs_id,
                                   vars_to_retrieve=obs_vars,
                                   **ropts)
        if 'obs_filters' in self:
            remaining_filters = self._eval_obs_filters()
            obs_data = obs_data.apply_filters(**remaining_filters)

        if self.remove_outliers:
            self._update_var_outlier_ranges(var_matches)

        #all_ts_types = const.GRID_IO.TS_TYPES

        data_objs = {}
        for model_var, obs_var in var_matches.items():

            ts_type = self.ts_type
            start, stop = start_stop(self.start, self.stop)
            print_log.info('Running {} / {} ({}, {})'.format(
                self.model_id, self.obs_id, model_var, obs_var))
            try:
                model_data = self._read_gridded(reader=model_reader,
                                                var_name=model_var,
                                                start=start,
                                                stop=stop,
                                                is_model=True)
            except Exception as e:

                msg = (
                    'Failed to load gridded data: {} / {}. Reason {}'.format(
                        self.model_id, model_var, repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')

                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
                else:
                    continue
            ts_type_src = model_data.ts_type
            # =============================================================================
            #             if not model_data.ts_type in all_ts_types:
            #                 raise TemporalResolutionError('Invalid temporal resolution {} '
            #                                               'in model {}'.format(model_data.ts_type,
            #                                                                    self.model_id))
            # =============================================================================
            ignore_stats = None
            if self.ignore_station_names is not None:
                ignore_stats = self.ignore_station_names
                if isinstance(ignore_stats, dict):
                    if obs_var in ignore_stats:
                        ignore_stats = ignore_stats[obs_var]
                    else:
                        ignore_stats = None

            #ts_type_src = model_data.ts_type
            if TsType(ts_type_src) < TsType(
                    ts_type):  # < all_ts_types.index(ts_type_src):
                print_log.info('Updating ts_type from {} to {} (highest '
                               'available in model {})'.format(
                                   ts_type, ts_type_src, self.model_id))
                ts_type = ts_type_src

            if self.save_coldata:
                savename = self._coldata_savename(model_data,
                                                  start,
                                                  stop,
                                                  ts_type,
                                                  var_name=model_var)

                file_exists = self._check_coldata_exists(
                    model_data.data_id, savename)

                out_dir = chk_make_subdir(self.basedir_coldata, self.model_id)
                if file_exists:
                    if not self.reanalyse_existing:
                        if self._log:
                            self._write_log('SKIP: {}\n'.format(savename))
                            print_log.info('Skip {} (file already '
                                           'exists)'.format(savename))
                            self.file_status[savename] = 'skipped'
                        continue
                    else:
                        print_log.info(
                            'Deleting and recomputing existing '
                            'colocated data file {}'.format(savename))
                        print_log.info('REMOVE: {}\n'.format(savename))
                        os.remove(os.path.join(out_dir, savename))

            try:
                by = None
                if self.model_use_climatology:
                    by = start.year
                coldata = colocate_gridded_ungridded(
                    gridded_data=model_data,
                    ungridded_data=obs_data,
                    ts_type=ts_type,
                    start=start,
                    stop=stop,
                    var_ref=obs_var,
                    filter_name=self.filter_name,
                    regrid_res_deg=self.regrid_res_deg,
                    remove_outliers=self.remove_outliers,
                    vert_scheme=self.vert_scheme,
                    harmonise_units=self.harmonise_units,
                    var_outlier_ranges=self.var_outlier_ranges,
                    var_ref_outlier_ranges=self.var_ref_outlier_ranges,
                    update_baseyear_gridded=by,
                    ignore_station_names=ignore_stats,
                    apply_time_resampling_constraints=self.
                    apply_time_resampling_constraints,
                    min_num_obs=self.min_num_obs,
                    colocate_time=self.colocate_time,
                    var_keep_outliers=self.model_keep_outliers,
                    var_ref_keep_outliers=self.obs_keep_outliers)

                if self.save_coldata:
                    self._save_coldata(coldata, savename, out_dir, model_var,
                                       model_data, obs_var)
                data_objs[model_var] = coldata
            except Exception as e:
                msg = ('Colocation between model {} / {} and obs {} / {} '
                       'failed: Reason {}'.format(self.model_id, model_var,
                                                  self.obs_id, obs_var,
                                                  repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')
                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)

        return data_objs
Beispiel #11
0
    def read_dataset(self,
                     dataset_to_read,
                     vars_to_retrieve=None,
                     only_cached=False,
                     **kwargs):
        """Read dataset into an instance of :class:`ReadUngridded`

        Parameters
        ----------
        dataset_to_read : str
            name of dataset
        vars_to_retrieve : str or list
            variable or list of variables to be imported
        only_cached : bool
            if True, then nothing is reloaded but only data is loaded that is
            available as cached objects (not recommended to use but may be
            used if working offline without connection to database)
        **kwargs
            additional reading constraints. If any are provided, caching is
            deactivated and the data will be read from disk.

        Returns
        --------
        UngriddedData
            data object
        """
        _caching = None
        if len(kwargs) > 0:
            _caching = const.CACHING
            const.CACHING = False

            print_log.info('Received additional reading constraints, '
                           'ignoring caching')

        reader = self.get_reader(dataset_to_read)

        if vars_to_retrieve is not None:
            # Note: self.vars_to_retrieve may be None as well, then
            # default variables of each network are read
            self.vars_to_retrieve = vars_to_retrieve

        if self.vars_to_retrieve is None:
            self.vars_to_retrieve = reader.PROVIDES_VARIABLES

        vars_to_retrieve = varlist_aerocom(self.vars_to_retrieve)

        # data_dir will be None in most cases, but can be specified when
        # creating the instance, by default, data_dir is inferred automatically
        # in the reading class, using database location
        data_dir = self._get_data_dir(dataset_to_read)
        if data_dir is not None:
            if not os.path.exists(data_dir):
                raise FileNotFoundError(
                    'Trying to read {} from specified data_dir {} failed. '
                    'Directory does not exist'.format(dataset_to_read,
                                                      data_dir))
            reader._dataset_path = data_dir
            const.print_log.info(
                'Reading {} from specified data loaction: {}'.format(
                    dataset_to_read, data_dir))

        # Since this interface enables to load multiple datasets, each of
        # which support a number of variables, here, only the variables are
        # considered that are supported by the dataset
        vars_available = [
            var for var in vars_to_retrieve if reader.var_supported(var)
        ]
        if len(vars_available) == 0:
            raise DataRetrievalError('None of the input variables ({}) is '
                                     'supported by {} interface'.format(
                                         vars_to_retrieve, dataset_to_read))
        cache = CacheHandlerUngridded(reader)
        if not self.ignore_cache:
            # initate cache handler
            for var in vars_available:
                try:
                    cache.check_and_load(var, force_use_outdated=only_cached)
                except Exception:
                    self.logger.exception(
                        'Fatal: compatibility error between '
                        'old cache file {} and current version '
                        'of code ')

        if not only_cached:
            vars_to_read = [
                v for v in vars_available if not v in cache.loaded_data
            ]
        else:
            vars_to_read = []

        data_read = None
        if len(vars_to_read) > 0:

            _loglevel = print_log.level
            print_log.setLevel(logging.INFO)
            data_read = reader.read(vars_to_read, **kwargs)
            print_log.setLevel(_loglevel)

            for var in vars_to_read:
                # write the cache file
                if not self.ignore_cache:
                    try:
                        cache.write(data_read, var)
                    except Exception as e:
                        _caching = False
                        print_log.warning(
                            'Failed to write to cache directory. '
                            'Error: {}. Deactivating caching in '
                            'pyaerocom'.format(repr(e)))

        if len(vars_to_read) == len(vars_available):
            data_out = data_read
        else:
            data_out = UngriddedData()
            for var in vars_available:
                if var in cache.loaded_data:
                    data_out.append(cache.loaded_data[var])
            if data_read is not None:
                data_out.append(data_read)

        if _caching is not None:
            const.CACHING = _caching
        return data_out
Beispiel #12
0
    def read_datasetOLD(self,
                        dataset_to_read,
                        vars_to_retrieve=None,
                        **kwargs):
        """Read single dataset into instance of :class:`ReadUngridded`
        
        Note
        ----
        This method does not write class attribute :attr:`data` (only
        :func:`read` does)
        
        Parameters
        ----------
        dataset_to_read : str
            name of dataset
        vars_to_retrieve : list
            list of variables to be retrieved. If None (default), the default
            variables of each reading routine are imported
            
        Returns
        --------
        UngriddedData
            data object
        """
        _caching = None
        if len(kwargs) > 0:
            _caching = const.CACHING
            const.CACHING = False

            print_log.info('Received additional reading constraints, '
                           'ignoring caching')
        if vars_to_retrieve is None:
            # Note: self.vars_to_retrieve may be None as well, then
            # default variables of each network are read
            vars_to_retrieve = self.vars_to_retrieve

        reader = self.get_reader(dataset_to_read)

        if vars_to_retrieve is None:
            vars_to_retrieve = reader.PROVIDES_VARIABLES
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        # Since this interface enables to load multiple datasets, each of
        # which support a number of variables, here, only the variables are
        # considered that are supported by the dataset
        vars_available = [
            var for var in vars_to_retrieve if var in reader.PROVIDES_VARIABLES
        ]

        # read the data sets
        cache_hit_flag = False

        if not self.ignore_cache:
            # initate cache handler
            try:
                cache = CacheHandlerUngridded(reader, vars_available, **kwargs)
                if cache.check_and_load():
                    all_avail = True
                    for var in vars_available:
                        if not var in cache.loaded_data:
                            all_avail = False
                            break
                    if all_avail:
                        print_log.info(
                            'Found Cache match for {}'.format(dataset_to_read))
                        cache_hit_flag = True
                        data = cache.loaded_data
            except:
                self.logger.exception(
                    'Fatal: compatibility error between old '
                    'cache file and current version of code ')
                cache_hit_flag = False

        if not cache_hit_flag:
            print_log.info('No Cache match found for {} in {}. '
                           'Reading from files (this '
                           'may take a while)'.format(dataset_to_read,
                                                      const.CACHEDIR))
            _loglevel = print_log.level
            print_log.setLevel(logging.INFO)
            data = reader.read(vars_available, **kwargs)
            print_log.setLevel(_loglevel)

        self.revision[dataset_to_read] = reader.data_revision
        self.data_version[dataset_to_read] = reader.__version__

        # write the cache file
        if not cache_hit_flag and not self.ignore_cache:
            try:
                cache.write(data)
            except Exception as e:
                _caching = False
                print_log.warning('Failed to write to cache directory:\n{}.\n'
                                  'Deactivating caching in pyaerocom'.format(
                                      repr(e)))

        if _caching is not None:
            const.CACHING = _caching
        return data
Beispiel #13
0
def get_topo_data(lat0, lon0, lat1=None, lon1=None, topo_dataset='srtm', 
                  topodata_loc=None, try_etopo1=False):
    """Retrieve topographic altitude for a certain location
    
    Currently works only if :mod:`geonum` is installed. Supports topography
    datasets supported by geonum. These are currently (20 Feb. 19) srtm 
    (SRTM dataset, default, automatic access if online) and etopo1 
    (ETOPO1 dataset, lower resolution, must be available on local machine or
    server). 
    
    Parameters
    ----------
    lat0 : float 
            start longitude for data extraction
    lon0 : float 
        start latitude for data extraction
    lat1 : float 
        stop longitude for data extraction (default: None). If None only 
        data around lon0, lat0 will be extracted.
    lon1 : float
        stop latitude for data extraction (default: None). 
        If None only data around lon0, lat0 will be extracted
    topo_dataset : str
        name of topography dataset
    topodata_loc : str
        filepath or directory containing supported topographic datasets
    try_etopo1 : bool
        if True and if access fails via input arg `topo_dataset`, then try
        to access altitude using ETOPO1 dataset.
        
    Returns
    -------
    geonum.TopoData
        data object containing topography data in specified range 
    
    Raises
    ------
    ValueError
        if altitude data cannot be accessed
    """
    if not GEONUM_AVAILABLE:
        raise ModuleNotFoundError('Feature disabled: geonum library is not '
                                  'installed')
    import geonum
    if topodata_loc is None:
        from pyaerocom import const
        if topo_dataset in const.SUPPLDIRS and os.path.exists(const.SUPPLDIRS[topo_dataset]):
            topodata_loc = const.SUPPLDIRS[topo_dataset]    
            print_log.info('Found default location for {} topodata at\n{}'
                      .format(topo_dataset, topodata_loc))
        
    try:
        access = geonum.TopoDataAccess(topo_dataset, local_path=topodata_loc)
        return access.get_data(lat0, lon0, lat1, lon1)
    except Exception as e:
        if try_etopo1 and not topo_dataset=='etopo1':
            print_log.warning('Failed to access topography data for {}. '
                           'Trying ETOPO1.\nError: {}'.format(topo_dataset, repr(e)))
            return get_topo_data(lat0, lon0, lat1, lon1, 
                                 topo_dataset='etopo1', 
                                 topodata_loc=topodata_loc,
                                 try_etopo1=False)
        raise
Beispiel #14
0
 def _run_gridded_gridded(self):
 
     start, stop = self.start, self.stop
     model_reader = ReadGridded(self.model_id, start, stop)
     obs_reader = ReadGridded(self.obs_id, start, stop)
 
     vars_to_analyse = self.vars_to_analyse
     if vars_to_analyse is None:
         vars_to_analyse = model_reader.vars_provided
         
     var_matches = {}
     for var in vars_to_analyse:
         if var in model_reader.vars_provided: #candidate
             # first check if the variable pair was defined explicitely
             if var in self.alt_vars:
                 if self.alt_vars[var] in obs_reader.vars_provided:
                     var_matches[var] = self.alt_vars[var]
             else:
                 if var in obs_reader.vars_provided:
                     var_matches[var] = var
     
     if len(var_matches) == 0:
         raise DataCoverageError('No variable matches between {} and {} for '
                                 'input vars: {}'.format(self.model_id, 
                                                         self.obs_id, 
                                                         self.vars_to_analyse))
         
     all_ts_types = const.GRID_IO.TS_TYPES
     ts_types_ana = self.ts_types_ana
     if ts_types_ana is None:
         ts_types_ana = self._setup.TS_TYPES_ANA_DEFAULT['gridded']
     
     ts_types_read = self.ts_types_read
     if ts_types_read is None:
         ts_types_read = model_reader.ts_types
     
     vars_model = list(var_matches.keys())
     vars_obs = list(var_matches.values())
     flex_obs = self._setup.options.TS_TYPE_OBS_FLEX
     for ts_type_read in ts_types_read:
         # reads only year if starttime is provided but not stop time
         model_data_vars = model_reader.read(vars_model, 
                                             start=start,
                                             stop=stop,
                                             ts_type=ts_type_read,
                                             flex_ts_type=False)
         
         if len(model_data_vars) == 0:
             if self._log:    
                 self._log.write('No model data available ({}-{}, {})\n'
                                 .format(start, stop, ts_type_read))
             continue
         
         obs_data_vars = obs_reader.read(vars_obs, 
                                         start=start,
                                         stop=stop,
                                         ts_type=ts_type_read,
                                         flex_ts_type=flex_obs)
         if len(obs_data_vars) == 0:
             if self._log:    
                 self._log.write('No obs data available for variables {} '
                                 '({}-{}, {})\n'
                                 .format(vars_obs, start, stop, 
                                         ts_type_read))
             continue
         
         for model_data in model_data_vars:
             var = model_data.var_name
             obs_data = None
             for _obs in obs_data_vars:
                 if _obs.var_name == var_matches[var]:
                     obs_data = _obs
                     break
             if obs_data is None:
                 if self._log:    
                     self._log.write('No obs data available for model var {} '
                                     '({}-{}, {})\n'
                                     .format(var, start, stop, 
                                         ts_type_read))
                 continue
             for ts_type_ana in ts_types_ana:
                 # model resolution (ts_type) must be equal or higher 
                 # than the current analysis setting (since )
                 if all_ts_types.index(ts_type_ana) >= all_ts_types.index(ts_type_read):
                     out_dir = chk_make_subdir(self.output_dir('colocate'),
                                               self.model_id)
                                               
                     savename = self._coldata_save_name(model_data,
                                                        ts_type_ana, 
                                                        start,
                                                        stop)
                     
                     file_exists = self._check_coldata_exists(self.model_id,
                                                               savename)
                     if file_exists:
                         if not self.options.REANALYSE_EXISTING:
                             if self._log:
                                 self._log.write('SKIP: {}\n'.format(savename))
                                 print_log.info('Skip {} (file already '
                                                'exists)'.format(savename))
                             continue
                         else:
                             os.remove(os.path.join(out_dir, savename))
                         
                     data_coll = colocate_gridded_gridded(
                                     model_data, obs_data, 
                                     ts_type=ts_type_ana, 
                                     start=start, stop=stop, 
                                     filter_name=self.filter_name)
                     self._last_coldata = data_coll
                     if data_coll.save_name_aerocom + '.nc' != savename:
                         raise Exception
                     data_coll.to_netcdf(out_dir)
                     if self._log:
                         self._log.write('WRITE: {}\n'.format(savename))
                         print_log.info('Writing {}'.format(savename))
Beispiel #15
0
 def _run_gridded_ungridded(self):
     """Analysis method for gridded vs. ungridded data"""
     start, stop = self.start, self.stop
     model_reader = ReadGridded(self.model_id, start, stop)
     
     obs_reader = ReadUngridded(self.obs_id)
     obs_vars = obs_reader.get_reader(self.obs_id).PROVIDES_VARIABLES
 
     vars_to_analyse = self.vars_to_analyse
     if vars_to_analyse is None:
         vars_to_analyse = model_reader.vars_provided
         
     var_matches = {}
     
     for var in vars_to_analyse:
         if var in model_reader.vars_provided: #candidate
             if var in self.alt_vars:
                 if self.alt_vars[var] in obs_vars:
                     var_matches[var] = self.alt_vars[var]
             else:
                 if var in obs_vars:
                     var_matches[var] = var
     
     if len(var_matches) == 0:
         
         raise DataCoverageError('No variable matches between '
                                 '{} and {} for input vars: {}'
                                 .format(self.model_id, 
                                         self.obs_id, 
                                         self.vars_to_analyse))
         
     all_ts_types = const.GRID_IO.TS_TYPES
     ts_types_ana = self.ts_types_ana
     if ts_types_ana is None:
         ts_types_ana = self._setup.TS_TYPES_ANA_DEFAULT['ungridded']
     
     ts_types_read = self.ts_types_read
     if ts_types_read is None:
         ts_types_read = model_reader.ts_types
     
     
     vars_model = list(var_matches.keys())
     vars_obs = list(var_matches.values())
     
     obs_data = obs_reader.read(datasets_to_read=self.obs_id, 
                                vars_to_retrieve=vars_obs)
     
     for ts_type_read in ts_types_read:
         model_data_vars = model_reader.read(vars_model, 
                                             start=start,
                                             stop=stop,
                                             ts_type=ts_type_read,
                                             flex_ts_type=False)
                     
         if len(model_data_vars)==0:
             if self._log:    
                 self._log.write('No model data available ({}-{}, {})\n'
                                 .format(start, stop, ts_type_read))
             continue
         
         for model_data in model_data_vars:
             var = model_data.var_info.var_name
             obs_var = var_matches[var]
             if not obs_var in obs_reader.data:
                 if self._log:    
                     self._log.write('No obs data available for variable {} '
                                     '({}-{}, {})\n'
                                     .format(obs_var, start, stop, 
                                             ts_type_read))
                 continue
             for ts_type_ana in ts_types_ana:
 
                 if all_ts_types.index(ts_type_ana) >= all_ts_types.index(ts_type_read):
                 
                     out_dir = chk_make_subdir(self.output_dir('colocate'),
                                               self.model_id)
                     savename = self._coldata_save_name(model_data,
                                                        ts_type_ana, 
                                                        start,
                                                        stop)
                     file_exists = self._check_coldata_exists(
                                                         self.model_id, 
                                                         savename)
                     if file_exists:
                         if not self.options.REANALYSE_EXISTING:
                             if self._log:
                                 self._log.write('SKIP: {}\n'
                                                 .format(savename))
                                 print_log.info('Skip {} (file already '
                                                'exists)'.format(savename))
                             continue
                         else:
                             os.remove(os.path.join(out_dir, savename))
                     
                     data_coll = colocate_gridded_ungridded_2D(
                                             model_data, obs_data, 
                                             ts_type=ts_type_ana, 
                                             start=start, stop=stop,
                                             var_ref=obs_var,
                                             filter_name=self.filter_name)
                     self._last_coldata = data_coll
                     data_coll.to_netcdf(out_dir)
                     if self._log:
                         self._log.write('WRITE: {}\n'.format(savename))
                         print_log.info('Writing {}'.format(savename))
                     
                     plt.close('all')
Beispiel #16
0
    def __init__(self,
                 model_base_dir=None,
                 obs_base_dir=None,
                 output_dir=None,
                 config_file=None,
                 cache_dir=None,
                 colocateddata_dir=None,
                 write_fileio_err_log=True,
                 activate_caching=True):

        # Loggers
        from pyaerocom import print_log, logger
        self.print_log = print_log
        self.logger = logger

        # Directories
        self._modelbasedir = model_base_dir
        self._obsbasedir = obs_base_dir
        self._cachedir = cache_dir
        self._outputdir = output_dir
        self._testdatadir = os.path.join(self.HOMEDIR, 'pyaerocom-testdata')
        self._colocateddatadir = colocateddata_dir

        # Options
        self._caching_active = activate_caching

        #: Settings for reading and writing of gridded data
        self.GRID_IO = GridIO()
        print_log.info('Initating pyaerocom configuration')

        if not isinstance(config_file, str) or not os.path.exists(config_file):
            from time import time
            print_log.info('Checking database access...')
            t0 = time()
            config_file = self._infer_config_file()
            print_log.info('Expired time: {:.3f} s'.format(time() - t0))

        self._var_param = None
        self._coords = None

        # Attributes that are used to store search directories
        self.OBSCONFIG = od()
        self.SUPPLDIRS = od()
        self.MODELDIRS = []

        self.WRITE_FILEIO_ERR_LOG = write_fileio_err_log

        self._ebas_flag_info = None

        if config_file is not None:

            keep_basedirs = False
            if self.dir_exists(model_base_dir) and self.dir_exists(
                    obs_base_dir):
                keep_basedirs = True
            try:
                self.read_config(config_file, keep_basedirs)

            except Exception as e:
                from traceback import format_exc
                self.init_outputdirs()
                self.print_log.warning(format_exc())
                self.print_log.warning("Failed to init config. Error: %s" %
                                       repr(e))
        else:
            self.init_outputdirs()
Beispiel #17
0
    def _run_gridded_ungridded(self, var_name=None):
        """Analysis method for gridded vs. ungridded data"""
        print_log.info('PREPARING colocation of {} vs. {}'.format(
            self.model_id, self.obs_id))

        model_reader = self.instantiate_gridded_reader(what='model')
        obs_reader = ReadUngridded(self.obs_id, data_dir=self.obs_data_dir)

        obs_vars = obs_reader.get_vars_supported(self.obs_id, self.obs_vars)

        if len(obs_vars) == 0:
            raise DataCoverageError(
                'No observation variable matches found for '
                '{}'.format(self.obs_id))

        var_matches = self._find_var_matches(obs_vars, model_reader, var_name)

        print_log.info(
            'The following variable combinations will be colocated\n'
            'MODEL-VAR\tOBS-VAR')
        for key, val in var_matches.items():
            print_log.info('{}\t{}'.format(key, val))

        # get list of unique observation variables
        obs_vars = np.unique(list(var_matches.values())).tolist()

        if self.remove_outliers:
            self._update_var_outlier_ranges(var_matches)

        if self.read_opts_ungridded is not None:
            ropts = self.read_opts_ungridded
        else:
            ropts = {}

        data_objs = {}
        if self.start is None:
            self._infer_start_stop(model_reader)

        start, stop = start_stop(self.start, self.stop)

        for model_var, obs_var in var_matches.items():

            # ToDo: consider removing outliers already here.
            #if 'obs_filters' in self:
            ts_type = self.ts_type
            print_log.info('Running {} / {} ({}, {})'.format(
                self.model_id, self.obs_id, model_var, obs_var))

            try:
                model_data = self._read_gridded(reader=model_reader,
                                                var_name=model_var,
                                                start=start,
                                                stop=stop,
                                                is_model=True)
            except Exception as e:

                msg = (
                    'Failed to load gridded data: {} / {}. Reason {}'.format(
                        self.model_id, model_var, repr(e)))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')

                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)
                else:
                    continue
            ts_type_src = model_data.ts_type
            rshow = self._eval_resample_how(model_var, obs_var)
            if ts_type is None:
                # if colocation frequency is not specified
                ts_type = ts_type_src

            ignore_stats = None
            if self.ignore_station_names is not None:
                ignore_stats = self.ignore_station_names
                if isinstance(ignore_stats, dict):
                    if obs_var in ignore_stats:
                        ignore_stats = ignore_stats[obs_var]
                    else:
                        ignore_stats = None

            #ts_type_src = model_data.ts_type
            if TsType(ts_type_src) < TsType(
                    ts_type):  # < all_ts_types.index(ts_type_src):
                print_log.info('Updating ts_type from {} to {} (highest '
                               'available in model {})'.format(
                                   ts_type, ts_type_src, self.model_id))
                ts_type = ts_type_src

            really_do_reanalysis = True
            if self.save_coldata:
                really_do_reanalysis = False
                savename = self._coldata_savename(model_data,
                                                  start,
                                                  stop,
                                                  ts_type,
                                                  var_name=model_var)

                file_exists = self._check_coldata_exists(
                    model_data.data_id, savename)

                out_dir = chk_make_subdir(self.basedir_coldata, self.model_id)
                if file_exists:
                    if not self.reanalyse_existing:
                        if self._log:
                            self._write_log('SKIP: {}\n'.format(savename))
                            print_log.info('Skip {} (file already '
                                           'exists)'.format(savename))
                            self.file_status[savename] = 'skipped'
                        continue
                    else:
                        really_do_reanalysis = True
                        print_log.info(
                            'Deleting and recomputing existing '
                            'colocated data file {}'.format(savename))
                        print_log.info('REMOVE: {}\n'.format(savename))
                        os.remove(os.path.join(out_dir, savename))
                else:
                    really_do_reanalysis = True

            if really_do_reanalysis:
                #Reading obs data only if the co-located data file does
                #not already exist.
                #This part of the method has been changed by @hansbrenna to work better with
                #large observational data sets. Only one variable is loaded into
                # the UngriddedData object at a time. Currently the variable is
                #re-read a lot of times, which is a weakness.
                obs_data = obs_reader.read(vars_to_retrieve=obs_var,
                                           only_cached=self._obs_cache_only,
                                           **ropts)

                # ToDo: consider removing outliers already here.
                if 'obs_filters' in self:
                    remaining_filters = self._eval_obs_filters()
                    obs_data = obs_data.apply_filters(**remaining_filters)

            try:
                try:
                    by = self.update_baseyear_gridded
                    stop = None
                except AttributeError:
                    by = None
                if self.model_use_climatology:
                    by = start.year
                coldata = colocate_gridded_ungridded(
                    gridded_data=model_data,
                    ungridded_data=obs_data,
                    ts_type=ts_type,
                    start=start,
                    stop=stop,
                    var_ref=obs_var,
                    filter_name=self.filter_name,
                    regrid_res_deg=self.regrid_res_deg,
                    remove_outliers=self.remove_outliers,
                    vert_scheme=self.vert_scheme,
                    harmonise_units=self.harmonise_units,
                    var_outlier_ranges=self.var_outlier_ranges,
                    var_ref_outlier_ranges=self.var_ref_outlier_ranges,
                    update_baseyear_gridded=by,
                    ignore_station_names=ignore_stats,
                    apply_time_resampling_constraints=self.
                    apply_time_resampling_constraints,
                    min_num_obs=self.min_num_obs,
                    colocate_time=self.colocate_time,
                    var_keep_outliers=self.model_keep_outliers,
                    var_ref_keep_outliers=self.obs_keep_outliers,
                    use_climatology_ref=self.obs_use_climatology,
                    resample_how=rshow)

                if self.model_to_stp:
                    coldata = correct_model_stp_coldata(coldata)
                if self.save_coldata:
                    self._save_coldata(coldata, savename, out_dir, model_var,
                                       model_data, obs_var)
                data_objs[model_var] = coldata
            except Exception:
                msg = ('Colocation between model {} / {} and obs {} / {} '
                       'failed.\nTraceback:\n{}'.format(
                           self.model_id, model_var, self.obs_id, obs_var,
                           traceback.format_exc()))
                const.print_log.warning(msg)
                self._write_log(msg + '\n')
                if self.raise_exceptions:
                    self._close_log()
                    raise Exception(msg)

        return data_objs
Beispiel #18
0
    def read(self,
             vars_to_retrieve=None,
             files=None,
             first_file=None,
             last_file=None):
        """Method that reads list of files as instance of :class:`UngriddedData`
        
        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None, 
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file 
            in the list is used
            
        Returns
        -------
        UngriddedData
            data object
        """

        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        if files is None:
            if len(self.files) == 0:
                self.get_file_list()
            files = self.files

        if first_file is None:
            first_file = 0
        if last_file is None:
            last_file = len(files)

        files = files[first_file:last_file]

        self.read_failed = []

        data_obj = UngriddedData()
        meta_key = 0.0
        idx = 0

        #assign metadata object
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx

        num_vars = len(vars_to_retrieve)
        num_files = len(files)
        disp_each = int(num_files * 0.1)
        if disp_each < 1:
            disp_each = 1

        for i, _file in enumerate(files):

            if i % disp_each == 0:
                print_log.info("Reading file {} of {} ({})".format(
                    i, num_files,
                    type(self).__name__))
            station_data = self.read_file(_file,
                                          vars_to_retrieve=vars_to_retrieve)
            # Fill the metatdata dict
            # the location in the data set is time step dependant!
            # use the lat location here since we have to choose one location
            # in the time series plot
            metadata[meta_key] = od()
            metadata[meta_key].update(station_data.get_meta())
            metadata[meta_key].update(station_data.get_station_coords())
            metadata[meta_key]['dataset_name'] = self.DATASET_NAME
            metadata[meta_key]['ts_type'] = self.TS_TYPE
            metadata[meta_key]['variables'] = vars_to_retrieve
            if 'instrument_name' in station_data and station_data[
                    'instrument_name'] is not None:
                instr = station_data['instrument_name']
            else:
                instr = self.INSTRUMENT_NAME
            metadata[meta_key]['instrument_name'] = instr
            # this is a list with indices of this station for each variable
            # not sure yet, if we really need that or if it speeds up things
            meta_idx[meta_key] = od()

            num_times = len(station_data['dtime'])

            #access array containing time stamps
            # TODO: check using index instead (even though not a problem here
            # since all Aerocom data files are of type timeseries)
            times = np.float64(station_data['dtime'])

            totnum = num_times * num_vars

            #check if size of data object needs to be extended
            if (idx + totnum) >= data_obj._ROWNO:
                #if totnum < data_obj._CHUNKSIZE, then the latter is used
                data_obj.add_chunk(totnum)

            for var_idx, var in enumerate(vars_to_retrieve):
                values = station_data[var]
                start = idx + var_idx * num_times
                stop = start + num_times

                #write common meta info for this station (data lon, lat and
                #altitude are set to station locations)
                data_obj._data[start:stop,
                               data_obj._LATINDEX] = station_data['stat_lat']
                data_obj._data[start:stop,
                               data_obj._LONINDEX] = station_data['stat_lat']
                data_obj._data[
                    start:stop,
                    data_obj._ALTITUDEINDEX] = station_data['stat_alt']
                data_obj._data[start:stop,
                               data_obj._METADATAKEYINDEX] = meta_key

                # write data to data object
                data_obj._data[start:stop, data_obj._TIMEINDEX] = times
                data_obj._data[start:stop, data_obj._DATAINDEX] = values
                data_obj._data[start:stop, data_obj._VARINDEX] = var_idx

                meta_idx[meta_key][var] = np.arange(start, stop)

                if not var in data_obj.var_idx:
                    data_obj.var_idx[var] = var_idx

            idx += totnum
            meta_key = meta_key + 1.

        # shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]
        data_obj.data_revision[self.DATASET_NAME] = self.data_revision
        self.data = data_obj
        return data_obj
Beispiel #19
0
    def read(self, vars_to_retrieve=None, files=None, first_file=None,
             last_file=None, file_pattern=None, common_meta=None):
        """Method that reads list of files as instance of :class:`UngriddedData`

        Parameters
        ----------
        vars_to_retrieve : :obj:`list` or similar, optional,
            list containing variable IDs that are supposed to be read. If None,
            all variables in :attr:`PROVIDES_VARIABLES` are loaded
        files : :obj:`list`, optional
            list of files to be read. If None, then the file list is used that
            is returned on :func:`get_file_list`.
        first_file : :obj:`int`, optional
            index of first file in file list to read. If None, the very first
            file in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        last_file : :obj:`int`, optional
            index of last file in list to read. If None, the very last file
            in the list is used. Note: is ignored if input parameter
            `file_pattern` is specified.
        file_pattern : str, optional
            string pattern for file search (cf :func:`get_file_list`)
        common_meta : dict, optional
            dictionary that contains additional metadata shared for this
            network (assigned to each metadata block of the
            :class:`UngriddedData` object that is returned)

        Returns
        -------
        UngriddedData
            data object
        """
        if common_meta is None:
            common_meta = {}
        if vars_to_retrieve is None:
            vars_to_retrieve = self.DEFAULT_VARS
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]
        vars_to_retrieve = varlist_aerocom(vars_to_retrieve)
        if files is None:
            if len(self.files) == 0:
                self.get_file_list(pattern=file_pattern)
            files = self.files

        if file_pattern is None:
            if first_file is None:
                first_file = 0
            if last_file is None:
                last_file = len(files)

            files = files[first_file:last_file]

        self.read_failed = []

        data_obj = UngriddedData()
        meta_key = 0.0
        idx = 0

        #assign metadata object
        metadata = data_obj.metadata
        meta_idx = data_obj.meta_idx

        num_vars = len(vars_to_retrieve)
        num_files = len(files)
        print_log.info('Reading AERONET data')
        for i in tqdm(range(num_files)):

            _file = files[i]
            station_data = self.read_file(_file,
                                          vars_to_retrieve=vars_to_retrieve)
            # Fill the metatdata dict
            # the location in the data set is time step dependant!
            # use the lat location here since we have to choose one location
            # in the time series plot
            meta = od()
            meta['var_info'] = od()
            meta.update(station_data.get_meta())
            #metadata[meta_key].update(station_data.get_station_coords())
            meta['data_id'] = self.data_id
            meta['ts_type'] = self.TS_TYPE
            #meta['variables'] = vars_to_retrieve
            if 'instrument_name' in station_data and station_data['instrument_name'] is not None:
                instr = station_data['instrument_name']
            else:
                instr = self.INSTRUMENT_NAME
            meta['instrument_name'] = instr
            meta['data_revision'] = self.data_revision
            meta['filename'] = _file

            meta.update(**common_meta)
            # this is a list with indices of this station for each variable
            # not sure yet, if we really need that or if it speeds up things
            meta_idx[meta_key] = od()

            num_times = len(station_data['dtime'])

            #access array containing time stamps
            # TODO: check using index instead (even though not a problem here
            # since all Aerocom data files are of type timeseries)
            times = np.float64(station_data['dtime'])

            totnum = num_times * num_vars

            #check if size of data object needs to be extended
            if (idx + totnum) >= data_obj._ROWNO:
                #if totnum < data_obj._CHUNKSIZE, then the latter is used
                data_obj.add_chunk(totnum)

            for var_idx, var in enumerate(vars_to_retrieve):
                values = station_data[var]
                start = idx + var_idx * num_times
                stop = start + num_times

                #write common meta info for this station (data lon, lat and
                #altitude are set to station locations)
                data_obj._data[start:stop,
                               data_obj._LATINDEX] = station_data['latitude']
                data_obj._data[start:stop,
                               data_obj._LONINDEX] = station_data['longitude']
                data_obj._data[start:stop,
                               data_obj._ALTITUDEINDEX] = station_data['altitude']
                data_obj._data[start:stop,
                               data_obj._METADATAKEYINDEX] = meta_key

                # write data to data object
                data_obj._data[start:stop, data_obj._TIMEINDEX] = times
                data_obj._data[start:stop, data_obj._DATAINDEX] = values
                data_obj._data[start:stop, data_obj._VARINDEX] = var_idx

                meta_idx[meta_key][var] = np.arange(start, stop)

                if var in station_data['var_info']:
                    if 'units' in station_data['var_info'][var]:
                        u = station_data['var_info'][var]['units']
                    elif 'unit' in station_data['var_info'][var]:
                        from pyaerocom.exceptions import MetaDataError
                        raise MetaDataError('Metadata attr unit is deprecated, '
                                            'please use units')
                    else:
                        u = self.DEFAULT_UNIT
                elif var in self.UNITS:
                    u = self.UNITS[var]
                else:
                    u = self.DEFAULT_UNIT
                meta['var_info'][var] = od(units=u)
                if not var in data_obj.var_idx:
                    data_obj.var_idx[var] = var_idx

            idx += totnum
            metadata[meta_key] = meta
            meta_key = meta_key + 1.

        # shorten data_obj._data to the right number of points
        data_obj._data = data_obj._data[:idx]
        #data_obj.data_revision[self.data_id] = self.data_revision
        self.data = data_obj
        return data_obj
Beispiel #20
0
    def read_dataset(self, dataset_to_read, vars_to_retrieve=None, **kwargs):
        """Read dataset into an instance of :class:`ReadUngridded`
        
        Note
        ----
        This method does not assign loaded data obj to class attribute 
        :attr:`data` (only :func:`read` does)
        
        Parameters
        ----------
        dataset_to_read : str
            name of dataset
        vars_to_retrieve : list
            list of variables to be retrieved. If None (default), the default
            variables of each reading routine are imported
            
        Returns
        --------
        UngriddedData
            data object
        """
        _caching = None
        if len(kwargs) > 0:
            _caching = const.CACHING
            const.CACHING = False

            print_log.info('Received additional reading constraints, '
                           'ignoring caching')
        if vars_to_retrieve is None:
            # Note: self.vars_to_retrieve may be None as well, then
            # default variables of each network are read
            vars_to_retrieve = self.vars_to_retrieve

        reader = self.get_reader(dataset_to_read)

        if vars_to_retrieve is None:
            vars_to_retrieve = reader.PROVIDES_VARIABLES
        elif isinstance(vars_to_retrieve, str):
            vars_to_retrieve = [vars_to_retrieve]

        # Since this interface enables to load multiple datasets, each of
        # which support a number of variables, here, only the variables are
        # considered that are supported by the dataset
        vars_available = [
            var for var in vars_to_retrieve if var in reader.PROVIDES_VARIABLES
        ]

        cache = CacheHandlerUngridded(reader)
        if not self.ignore_cache:
            # initate cache handler
            for var in vars_available:
                try:
                    cache.check_and_load(var_name=var)
                except:
                    self.logger.exception(
                        'Fatal: compatibility error between '
                        'old cache file {} and current version '
                        'of code ')

        vars_to_read = [
            v for v in vars_available if not v in cache.loaded_data
        ]
        data_read = None
        if len(vars_to_read) > 0:

            _loglevel = print_log.level
            print_log.setLevel(logging.INFO)
            data_read = reader.read(vars_to_read, **kwargs)
            print_log.setLevel(_loglevel)

            for var in vars_to_read:
                # write the cache file
                if not self.ignore_cache:
                    try:
                        cache.write(data_read, var)
                    except Exception as e:
                        _caching = False
                        print_log.warning(
                            'Failed to write to cache directory. '
                            'Error: {}. Deactivating caching in '
                            'pyaerocom'.format(repr(e)))

        if len(vars_to_read) == len(vars_available):
            data_out = data_read
        else:
            data_out = UngriddedData()
            for var in vars_available:
                if var in cache.loaded_data:
                    data_out.append(cache.loaded_data[var])
            if data_read is not None:
                data_out.append(data_read)

        if _caching is not None:
            const.CACHING = _caching
        return data_out