Exemple #1
0
def test_get_variable():
    variable = ocg_utils.get_variable(
        local_path(TESTDATA['cmip5_tasmax_2007_nc']))
    assert 'tasmax' == variable
    variable = ocg_utils.get_variable(
        local_path(TESTDATA['cordex_tasmax_2007_nc']))
    assert 'tasmax' == variable
Exemple #2
0
def get_index_lat(resource, variable=None):
    """
    returns the dimension index of the latiude values

    :param resource:  list of path(s) to netCDF file(s) of one Dataset
    :param variable: variable name

    :return int: index
    """

    if variable is None:
        variable = get_variable(resource)

    if type(resource) != list:
        resource = [resource]
    if len(resource) == 1:
        ds = Dataset(resource[0])
    else:
        ds = MFDataset(resource)

    var = ds.variables[variable]
    dims = list(var.dimensions)

    if 'rlat' in dims:
        index = dims.index('rlat')
    if 'lat' in dims:
        index = dims.index('lat')
    if 'latitude' in dims:
        index = dims.index('latitude')
    if 'y' in dims:
        index = dims.index('y')
    return index
Exemple #3
0
def get_values(resource, variable=None):
    """
    returns the values for a list of files of files belonging to one dataset

    :param resource: list of files
    :param variable: variable to be picked from the files (if not set, variable will be detected)

    :returs numpy.array: values
    """
    from numpy import squeeze
    if variable is None:
        variable = get_variable(resource)

    if isinstance(resource, str):
        ds = Dataset(resource)
    elif len(resource) == 1:
        ds = Dataset(resource)
    else:
        ds = MFDataset(resource)
    vals = squeeze(ds.variables[variable][:])
    return vals
Exemple #4
0
def aggregations(resource):
    """
    aggregates netcdf files by experiment. Aggregation examples:
    CORDEX: EUR-11_ICHEC-EC-EARTH_historical_r3i1p1_DMI-HIRHAM5_v1_day
    CMIP5:
    We collect for each experiment all files on the time axis:
    200101-200512, 200601-201012, ...
    Time axis is sorted by time.
    :param resource: list of netcdf files
    :return: dictionary with key=experiment
    """
    from .nc_utils import drs_filename, sort_by_time
    aggregations = {}
    for nc in resource:
        key = drs_filename(nc, skip_timestamp=True, skip_format=True)

        # collect files of each aggregation (time axis)
        if key in aggregations:
            aggregations[key]['files'].append(nc)
        else:
            aggregations[key] = dict(files=[nc])

    # collect aggregation metadata
    for key in aggregations.keys():
        # sort files by time
        aggregations[key]['files'] = sort_by_time(aggregations[key]['files'])
        # start timestamp of first file
        start, _ = get_timerange(aggregations[key]['files'][0])
        # end timestamp of last file
        _, end = get_timerange(aggregations[key]['files'][-1])
        aggregations[key]['from_timestamp'] = start
        aggregations[key]['to_timestamp'] = end
        aggregations[key]['start_year'] = int(start[0:4])
        aggregations[key]['end_year'] = int(end[0:4])
        aggregations[key]['variable'] = get_variable(
            aggregations[key]['files'][0])
        aggregations[key]['filename'] = "%s_%s-%s.nc" % (key, start, end)
    return aggregations
Exemple #5
0
def get_coordinates(resource, variable=None, unrotate=False):
    """
    reads out the coordinates of a variable
    :param resource: netCDF resource file
    :param variable: variable name
    :param unrotate: If True the coordinates will be returned for unrotated pole
    :returns list, list: latitudes , longitudes
    """
    if type(resource) != list:
        resource = [resource]

    if variable is None:
        variable = get_variable(resource)

    if unrotate is False:
        try:
            if len(resource) > 1:
                ds = MFDataset(resource)
            else:
                ds = Dataset(resource[0])

            var = ds.variables[variable]
            dims = list(var.dimensions)
            if 'time' in dims: dims.remove('time')
            # TODO: find position of lat and long in list and replace dims[0] dims[1]
            lats = ds.variables[dims[0]][:]
            lons = ds.variables[dims[1]][:]
            ds.close()
            LOGGER.info('got coordinates without pole rotation')
        except Exception:
            msg = 'failed to extract coordinates'
            LOGGER.exception(msg)
    else:
        lats, lons = unrotate_pole(resource)
        LOGGER.info('got coordinates with pole rotation')
    return lats, lons
Exemple #6
0
def drs_filename(resource,
                 skip_timestamp=False,
                 skip_format=False,
                 variable=None,
                 rename_file=False,
                 add_file_path=False):
    """
    generates filename according to the data reference syntax (DRS)
    based on the metadata in the resource.
    http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf
    https://pypi.python.org/pypi/drslib
    :param add_file_path: if add_file_path=True, path to file will be added (default=False)
    :param resource: netcdf file
    :param skip_timestamp: if True then from/to timestamp != added to the filename
                           (default: False)
    :param variable: appropriate variable for filename, if not set (default), variable will
                      be determined. For files with more than one data variable,
                      the variable parameter has to be defined (default: )
                      example: variable='tas'
    :param rename_file: rename the file. (default: False)
    :returns str: DRS filename
    """
    from os import path, rename

    try:
        ds = Dataset(resource)
        if variable is None:
            variable = get_variable(resource)
        # CORDEX example: EUR-11_ICHEC-EC-EARTH_historical_r3i1p1_DMI-HIRHAM5_v1_day
        cordex_pattern = "{variable}_{domain}_{driving_model}_{experiment}_{ensemble}_{model}_{version}_{frequency}"
        # CMIP5 example: tas_MPI-ESM-LR_historical_r1i1p1
        cmip5_pattern = "{variable}_{model}_{experiment}_{ensemble}"
        filename = resource
        if ds.project_id == 'CORDEX' or ds.project_id == 'EOBS':
            filename = cordex_pattern.format(
                variable=variable,
                domain=ds.CORDEX_domain,
                driving_model=ds.driving_model_id,
                experiment=ds.experiment_id,
                ensemble=ds.driving_model_ensemble_member,
                model=ds.model_id,
                version=ds.rcm_version_id,
                frequency=ds.frequency)
        elif ds.project_id == 'CMIP5':
            # TODO: attributes missing in netcdf file for name generation?
            filename = cmip5_pattern.format(variable=variable,
                                            model=ds.model_id,
                                            experiment=ds.experiment,
                                            ensemble=ds.parent_experiment_rip)
        else:
            raise Exception('unknown project %s' % ds.project_id)
        ds.close()
    except Exception:
        LOGGER.exception('Could not read metadata %s', resource)
    try:
        # add from/to timestamp if not skipped
        if skip_timestamp is False:
            LOGGER.debug("add timestamp")
            from_timestamp, to_timestamp = get_timerange(resource)
            LOGGER.debug("from_timestamp %s", from_timestamp)
            filename = "%s_%s-%s" % (filename, int(from_timestamp),
                                     int(to_timestamp))

        # add format extension
        if skip_format is False:
            filename = filename + '.nc'

        pf = path.dirname(resource)
        # add file path
        if add_file_path is True:
            filename = path.join(pf, filename)

        # rename the file
        if rename_file is True:
            if path.exists(path.join(resource)):
                rename(resource, path.join(pf, filename))
    except Exception:
        LOGGER.exception('Could not generate DRS filename for %s', resource)

    return filename
Exemple #7
0
def clipping(resource=[], variable=None, dimension_map=None, calc=None, output_format='nc',
             calc_grouping=None, time_range=None, time_region=None,
             historical_concatination=True, prefix=None,
             spatial_wrapping='wrap', polygons=None, mosaic=False,
             dir_output=None, memory_limit=None):
    """ returns list of clipped netCDF files

    :param resource: list of input netCDF files
    :param variable: variable (string) to be used in netCDF
    :param dimesion_map: specify a dimension map if input netCDF has unconventional dimension
    :param calc: ocgis calculation argument
    :param calc_grouping: ocgis calculation grouping
    :param historical_concatination: concat files of RCPs with appropriate historical runs into one timeseries
    :param prefix: prefix for output file name
    :param polygons: list of polygons to be used. If more than 1 in the list, an appropriate mosaic will be clipped
    :param mosaic: Whether the polygons are aggregated into a single geometry (True) or individual files
                   are created for each geometry (False).
    :param output_format: output_format (default='nc')
    :param dir_output: specify an output location
    :param time_range: [start, end] of time subset
    :param time_region: year, months or days to be extracted in the timeseries

    :returns list: path to clipped files
    """

    if type(resource) != list:
        resource = list([resource])
    if type(polygons) != list:
        polygons = list([polygons])
    if prefix is not None:
        if type(prefix) != list:
            prefix = list([prefix])

    geoms = set()
    ncs = sort_by_filename(resource, historical_concatination=historical_concatination)  # historical_concatenation=True
    geom_files = []
    if mosaic is True:
        try:
            nameadd = '_'
            for polygon in polygons:
                geoms.add(get_geom(polygon))
                nameadd = nameadd + polygon.replace(' ', '')
            if len(geoms) > 1:
                LOGGER.error('polygons belong to different shapefiles! mosaic option is not possible %s', geoms)
            else:
                geom = geoms.pop()
            ugids = get_ugid(polygons=polygons, geom=geom)
        except Exception as ex:
            LOGGER.exception('geom identification failed {}'.format(str(ex)))
        for i, key in enumerate(ncs.keys()):
            try:
                # if variable is None:
                variable = get_variable(ncs[key])
                LOGGER.info('variable %s detected in resource' % (variable))
                if prefix is None:
                    name = key + nameadd
                else:
                    name = prefix[i]
                geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping,
                                 output_format=output_format, prefix=name,
                                 geom=geom, select_ugid=ugids, time_range=time_range,
                                 time_region=time_region,
                                 spatial_wrapping=spatial_wrapping, memory_limit=memory_limit,
                                 dir_output=dir_output, dimension_map=dimension_map)
                geom_files.append(geom_file)
                LOGGER.info('ocgis mosaik clipping done for %s' % (key))
            except Exception as ex:
                msg = 'ocgis mosaik clipping failed for %s, %s ' % (key, ex)
                LOGGER.exception(msg)
    else:
        for i, polygon in enumerate(polygons):
            try:
                geom = get_geom(polygon)
                ugid = get_ugid(polygons=polygon, geom=geom)
                for key in ncs.keys():
                    try:
                        # if variable is None:
                        variable = get_variable(ncs[key])
                        LOGGER.info('variable %s detected in resource' % (variable))
                        if prefix is None:
                            name = key + '_' + polygon.replace(' ', '')
                        else:
                            name = prefix[i]
                        geom_file = call(resource=ncs[key], variable=variable, calc=calc, calc_grouping=calc_grouping,
                                         output_format=output_format,
                                         prefix=name, geom=geom, select_ugid=ugid, dir_output=dir_output,
                                         dimension_map=dimension_map, spatial_wrapping=spatial_wrapping,
                                         memory_limit=memory_limit, time_range=time_range, time_region=time_region,
                                         )
                        geom_files.append(geom_file)
                        LOGGER.info('ocgis clipping done for %s' % (key))
                    except Exception as ex:
                        msg = 'ocgis clipping failed for %s: %s ' % (key, ex)
                        LOGGER.exception(msg)
            except Exception as ex:
                LOGGER.exception('geom identification failed {}'.format(str(ex)))
    return geom_files