def test_get_variable(): variable = ocg_utils.get_variable( local_path(TESTDATA['cmip5_tasmax_2007_nc'])) assert 'tasmax' == variable variable = ocg_utils.get_variable( local_path(TESTDATA['cordex_tasmax_2007_nc'])) assert 'tasmax' == variable
def get_index_lat(resource, variable=None): """ returns the dimension index of the latiude values :param resource: list of path(s) to netCDF file(s) of one Dataset :param variable: variable name :return int: index """ if variable is None: variable = get_variable(resource) if type(resource) != list: resource = [resource] if len(resource) == 1: ds = Dataset(resource[0]) else: ds = MFDataset(resource) var = ds.variables[variable] dims = list(var.dimensions) if 'rlat' in dims: index = dims.index('rlat') if 'lat' in dims: index = dims.index('lat') if 'latitude' in dims: index = dims.index('latitude') if 'y' in dims: index = dims.index('y') return index
def get_values(resource, variable=None): """ returns the values for a list of files of files belonging to one dataset :param resource: list of files :param variable: variable to be picked from the files (if not set, variable will be detected) :returs numpy.array: values """ from numpy import squeeze if variable is None: variable = get_variable(resource) if isinstance(resource, str): ds = Dataset(resource) elif len(resource) == 1: ds = Dataset(resource) else: ds = MFDataset(resource) vals = squeeze(ds.variables[variable][:]) return vals
def aggregations(resource): """ aggregates netcdf files by experiment. Aggregation examples: CORDEX: EUR-11_ICHEC-EC-EARTH_historical_r3i1p1_DMI-HIRHAM5_v1_day CMIP5: We collect for each experiment all files on the time axis: 200101-200512, 200601-201012, ... Time axis is sorted by time. :param resource: list of netcdf files :return: dictionary with key=experiment """ from .nc_utils import drs_filename, sort_by_time aggregations = {} for nc in resource: key = drs_filename(nc, skip_timestamp=True, skip_format=True) # collect files of each aggregation (time axis) if key in aggregations: aggregations[key]['files'].append(nc) else: aggregations[key] = dict(files=[nc]) # collect aggregation metadata for key in aggregations.keys(): # sort files by time aggregations[key]['files'] = sort_by_time(aggregations[key]['files']) # start timestamp of first file start, _ = get_timerange(aggregations[key]['files'][0]) # end timestamp of last file _, end = get_timerange(aggregations[key]['files'][-1]) aggregations[key]['from_timestamp'] = start aggregations[key]['to_timestamp'] = end aggregations[key]['start_year'] = int(start[0:4]) aggregations[key]['end_year'] = int(end[0:4]) aggregations[key]['variable'] = get_variable( aggregations[key]['files'][0]) aggregations[key]['filename'] = "%s_%s-%s.nc" % (key, start, end) return aggregations
def get_coordinates(resource, variable=None, unrotate=False): """ reads out the coordinates of a variable :param resource: netCDF resource file :param variable: variable name :param unrotate: If True the coordinates will be returned for unrotated pole :returns list, list: latitudes , longitudes """ if type(resource) != list: resource = [resource] if variable is None: variable = get_variable(resource) if unrotate is False: try: if len(resource) > 1: ds = MFDataset(resource) else: ds = Dataset(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) if 'time' in dims: dims.remove('time') # TODO: find position of lat and long in list and replace dims[0] dims[1] lats = ds.variables[dims[0]][:] lons = ds.variables[dims[1]][:] ds.close() LOGGER.info('got coordinates without pole rotation') except Exception: msg = 'failed to extract coordinates' LOGGER.exception(msg) else: lats, lons = unrotate_pole(resource) LOGGER.info('got coordinates with pole rotation') return lats, lons
def drs_filename(resource, skip_timestamp=False, skip_format=False, variable=None, rename_file=False, add_file_path=False): """ generates filename according to the data reference syntax (DRS) based on the metadata in the resource. http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf https://pypi.python.org/pypi/drslib :param add_file_path: if add_file_path=True, path to file will be added (default=False) :param resource: netcdf file :param skip_timestamp: if True then from/to timestamp != added to the filename (default: False) :param variable: appropriate variable for filename, if not set (default), variable will be determined. For files with more than one data variable, the variable parameter has to be defined (default: ) example: variable='tas' :param rename_file: rename the file. (default: False) :returns str: DRS filename """ from os import path, rename try: ds = Dataset(resource) if variable is None: variable = get_variable(resource) # CORDEX example: EUR-11_ICHEC-EC-EARTH_historical_r3i1p1_DMI-HIRHAM5_v1_day cordex_pattern = "{variable}_{domain}_{driving_model}_{experiment}_{ensemble}_{model}_{version}_{frequency}" # CMIP5 example: tas_MPI-ESM-LR_historical_r1i1p1 cmip5_pattern = "{variable}_{model}_{experiment}_{ensemble}" filename = resource if ds.project_id == 'CORDEX' or ds.project_id == 'EOBS': filename = cordex_pattern.format( variable=variable, domain=ds.CORDEX_domain, driving_model=ds.driving_model_id, experiment=ds.experiment_id, ensemble=ds.driving_model_ensemble_member, model=ds.model_id, version=ds.rcm_version_id, frequency=ds.frequency) elif ds.project_id == 'CMIP5': # TODO: attributes missing in netcdf file for name generation? filename = cmip5_pattern.format(variable=variable, model=ds.model_id, experiment=ds.experiment, ensemble=ds.parent_experiment_rip) else: raise Exception('unknown project %s' % ds.project_id) ds.close() except Exception: LOGGER.exception('Could not read metadata %s', resource) try: # add from/to timestamp if not skipped if skip_timestamp is False: LOGGER.debug("add timestamp") from_timestamp, to_timestamp = get_timerange(resource) LOGGER.debug("from_timestamp %s", from_timestamp) filename = "%s_%s-%s" % (filename, int(from_timestamp), int(to_timestamp)) # add format extension if skip_format is False: filename = filename + '.nc' pf = path.dirname(resource) # add file path if add_file_path is True: filename = path.join(pf, filename) # rename the file if rename_file is True: if path.exists(path.join(resource)): rename(resource, path.join(pf, filename)) except Exception: LOGGER.exception('Could not generate DRS filename for %s', resource) return filename
def clipping(resource=[], variable=None, dimension_map=None, calc=None, output_format='nc', calc_grouping=None, time_range=None, time_region=None, historical_concatination=True, prefix=None, spatial_wrapping='wrap', polygons=None, mosaic=False, dir_output=None, memory_limit=None): """ returns list of clipped netCDF files :param resource: list of input netCDF files :param variable: variable (string) to be used in netCDF :param dimesion_map: specify a dimension map if input netCDF has unconventional dimension :param calc: ocgis calculation argument :param calc_grouping: ocgis calculation grouping :param historical_concatination: concat files of RCPs with appropriate historical runs into one timeseries :param prefix: prefix for output file name :param polygons: list of polygons to be used. If more than 1 in the list, an appropriate mosaic will be clipped :param mosaic: Whether the polygons are aggregated into a single geometry (True) or individual files are created for each geometry (False). :param output_format: output_format (default='nc') :param dir_output: specify an output location :param time_range: [start, end] of time subset :param time_region: year, months or days to be extracted in the timeseries :returns list: path to clipped files """ if type(resource) != list: resource = list([resource]) if type(polygons) != list: polygons = list([polygons]) if prefix is not None: if type(prefix) != list: prefix = list([prefix]) geoms = set() ncs = sort_by_filename(resource, historical_concatination=historical_concatination) # historical_concatenation=True geom_files = [] if mosaic is True: try: nameadd = '_' for polygon in polygons: geoms.add(get_geom(polygon)) nameadd = nameadd + polygon.replace(' ', '') if len(geoms) > 1: LOGGER.error('polygons belong to different shapefiles! mosaic option is not possible %s', geoms) else: geom = geoms.pop() ugids = get_ugid(polygons=polygons, geom=geom) except Exception as ex: LOGGER.exception('geom identification failed {}'.format(str(ex))) for i, key in enumerate(ncs.keys()): try: # if variable is None: variable = get_variable(ncs[key]) LOGGER.info('variable %s detected in resource' % (variable)) if prefix is None: name = key + nameadd else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugids, time_range=time_range, time_region=time_region, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, dir_output=dir_output, dimension_map=dimension_map) geom_files.append(geom_file) LOGGER.info('ocgis mosaik clipping done for %s' % (key)) except Exception as ex: msg = 'ocgis mosaik clipping failed for %s, %s ' % (key, ex) LOGGER.exception(msg) else: for i, polygon in enumerate(polygons): try: geom = get_geom(polygon) ugid = get_ugid(polygons=polygon, geom=geom) for key in ncs.keys(): try: # if variable is None: variable = get_variable(ncs[key]) LOGGER.info('variable %s detected in resource' % (variable)) if prefix is None: name = key + '_' + polygon.replace(' ', '') else: name = prefix[i] geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping, output_format=output_format, prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output, dimension_map=dimension_map, spatial_wrapping=spatial_wrapping, memory_limit=memory_limit, time_range=time_range, time_region=time_region, ) geom_files.append(geom_file) LOGGER.info('ocgis clipping done for %s' % (key)) except Exception as ex: msg = 'ocgis clipping failed for %s: %s ' % (key, ex) LOGGER.exception(msg) except Exception as ex: LOGGER.exception('geom identification failed {}'.format(str(ex))) return geom_files