Example #1
0
 def test_agg_raw(self):
     grouping = ['month']
     funcs = [{
         'func': 'std',
         'name': 'std',
         'ref': StandardDeviation,
         'kwds': {}
     }]
     raws = [True, False]
     aggs = [True, False]
     for raw, agg in itertools.product(raws, aggs):
         coll = self.get_collection(aggregate=agg)
         ce = OcgCalculationEngine(grouping, funcs, raw, agg)
         ret = ce.execute(coll)
         shape = ret.calc['tas']['std'].shape
         value, weights = ce._get_value_weights_(coll.variables['tas'])
         ## aggregated data should have a (1,1) spatial dimension
         if agg is True:
             self.assertNumpyAll(shape[-2:], (1, 1))
         ## if raw data is used, the input values to a calculation should be
         ## returned with a different shape - aggregated spatial dimension
         if raw is True and agg is True:
             self.assertNumpyAll(value.shape[-2:], weights.shape)
             self.assertNumpyNotAll(value.shape[-2:], shape[-2:])
         if raw is True and agg is False:
             self.assertNumpyAll(shape[-3:], value.shape[-3:])
Example #2
0
 def test_agg_raw(self):
     grouping = ['month']
     funcs = [{'func':'threshold','name':'threshold','ref':Threshold,'kwds':{'operation':'gte','threshold':200}}]
     raws = [True,False]
     aggs = [True,False]
     for raw,agg in itertools.product(raws,aggs):
         coll = self.get_collection(aggregate=agg)
         ce = OcgCalculationEngine(grouping,funcs,raw,agg)
         ret = ce.execute(coll)
         value = ret[25]['tas'].variables['threshold'].value
         ## aggregated data should have a (1,1) spatial dimension
         if agg is True:
             self.assertNumpyAll(value.shape[-2:],(1,1))
Example #3
0
    def __init__(self,ops,serial=True,nprocs=1):
        self.ops = ops
        self.serial = serial
        self.nprocs = nprocs
        
        self._subset_log = ocgis_lh.get_logger('subset')

        ## create the calculation engine
        if self.ops.calc is None:
            self.cengine = None
        else:
            ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                           self.ops.calc,
                                           raw=self.ops.calc_raw,
                                           agg=self.ops.aggregate,
                                           calc_sample_size=self.ops.calc_sample_size)
            
        ## in the case of netcdf output, geometries must be unioned. this is
        ## also true for the case of the selection geometry being requested as
        ## aggregated.
        if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \
         and self.ops.geom is not None:
            ocgis_lh('aggregating selection geometry',self._subset_log)
            build = True
            for element_geom in self.ops.geom:
                if build:
                    new_geom = element_geom['geom']
                    new_crs = element_geom['crs']
                    new_properties = {'UGID':1}
                    build = False
                else:
                    new_geom = new_geom.union(element_geom['geom'])
            itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}]
            self.ops.geom = itr
Example #4
0
    def __init__(self, ops, serial=True, nprocs=1, validate=True):
        self.ops = ops
        self.serial = serial
        self.nprocs = nprocs

        subset_log = ocgis_lh.get_logger('subset')

        if validate:
            ocgis_lh('validating request datasets',
                     subset_log,
                     level=logging.DEBUG)
            ops.dataset.validate(ops=ops)

        ## create the calculation engine
        if self.ops.calc is None:
            self.cengine = None
        else:
            ocgis_lh('initializing calculation engine',
                     subset_log,
                     level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                                self.ops.calc,
                                                raw=self.ops.calc_raw,
                                                agg=self.ops.aggregate)

        ## check for snippet request in the operations dictionary. if there is
        ## on, the time range should be set in the operations dictionary.
        if self.ops.snippet is True:
            ##TODO: move snippet to iteration
            ocgis_lh('getting snippet bounds', subset_log)
            for rd in self.ops.dataset:
                ## snippet is not implemented for time regions
                if rd.time_region is not None:
                    exc = NotImplementedError(
                        'snippet is not implemented for time regions')
                    ocgis_lh(exc=exc, logger=subset_log)

                rd.level_range = [1, 1]
                ods = rd.ds
                ## load the first time slice if there is calculation or the
                ## calculation does not use a temporal group.
                if self.cengine is None or (self.cengine is not None
                                            and self.cengine.grouping is None):
                    ##TODO: improve slicing to not load all time values in a more
                    ## elegant way.
                    ods._load_slice.update({'T': slice(0, 1)})
                ## snippet for the computation. this currently requires loading
                ## all the data from the time dimension into memory.
                ##TODO: more efficiently pull dates for monthly grouping (for
                ##example).
                else:
                    ods.temporal.set_grouping(self.cengine.grouping)
                    tgdim = ods.temporal.group
                    times = ods.temporal.value[tgdim.dgroups[0]]
                    rd.time_range = list(
                        ods.temporal.get_datetime([times.min(),
                                                   times.max()]))
Example #5
0
 def test_agg_raw(self):
     grouping = ['month']
     funcs = [{'func':'std','name':'std','ref':StandardDeviation,'kwds':{}}]
     raws = [True,False]
     aggs = [True,False]
     for raw,agg in itertools.product(raws,aggs):
         coll = self.get_collection(aggregate=agg)
         ce = OcgCalculationEngine(grouping,funcs,raw,agg)
         ret = ce.execute(coll)
         shape = ret.calc['tas']['std'].shape
         value,weights = ce._get_value_weights_(coll.variables['tas'])
         ## aggregated data should have a (1,1) spatial dimension
         if agg is True:
             self.assertNumpyAll(shape[-2:],(1,1))
         ## if raw data is used, the input values to a calculation should be
         ## returned with a different shape - aggregated spatial dimension
         if raw is True and agg is True:
             self.assertNumpyAll(value.shape[-2:],weights.shape)
             self.assertNumpyNotAll(value.shape[-2:],shape[-2:])
         if raw is True and agg is False:
             self.assertNumpyAll(shape[-3:],value.shape[-3:])
Example #6
0
    def validate_ops(cls, ops):
        from ocgis.api.parms.definition import OutputFormat

        def _raise_(msg, ocg_arugument=OutputFormat):
            raise DefinitionValidationError(ocg_arugument, msg)

        # we can only write one requestdataset to netCDF
        if len(ops.dataset) > 1 and ops.calc is None:
            msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                   'There are currently {dcount} RequestDatasets. Note, this is different than a '
                   'multifile dataset.'.format(dcount=len(ops.dataset)))
            _raise_(msg, OutputFormat)
        # we can write multivariate functions to netCDF however
        else:
            if ops.calc is not None and len(ops.dataset) > 1:
                # count the occurrences of these classes in the calculation list.
                klasses_to_check = [AbstractMultivariateFunction, MultivariateEvalFunction]
                multivariate_checks = []
                for klass in klasses_to_check:
                    for calc in ops.calc:
                        multivariate_checks.append(issubclass(calc['ref'], klass))
                if sum(multivariate_checks) != 1:
                    msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                           'There are currently {dcount} RequestDatasets. Note, this is different than a '
                           'multifile dataset.'.format(dcount=len(ops.dataset)))
                    _raise_(msg, OutputFormat)
                else:
                    # there is a multivariate calculation and this requires multiple request dataset
                    pass

        # clipped data which creates an arbitrary geometry may not be written to netCDF
        if ops.spatial_operation != 'intersects':
            msg = 'Only "intersects" spatial operation allowed for netCDF output. Arbitrary geometries may not currently be written.'
            _raise_(msg, OutputFormat)
        # data may not be aggregated either
        if ops.aggregate:
            msg = 'Data may not be aggregated for netCDF output. The aggregate parameter must be False.'
            _raise_(msg, OutputFormat)
        # either the input data CRS or WGS84 is required for data output
        if ops.output_crs is not None and not isinstance(ops.output_crs, CFWGS84):
            msg = 'CFWGS84 is the only acceptable overloaded output CRS at this time for netCDF output.'
            _raise_(msg, OutputFormat)
        # calculations on raw values are not relevant as not aggregation can occur anyway.
        if ops.calc is not None:
            if ops.calc_raw:
                msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.'
                _raise_(msg)
            # no keyed output functions to netCDF
            if OcgCalculationEngine._check_calculation_members_(ops.calc, AbstractKeyedOutputFunction):
                msg = 'Keyed function output may not be written to netCDF.'
                _raise_(msg)
Example #7
0
    def __init__(self, ops, serial=True, nprocs=1, validate=True):
        self.ops = ops
        self.serial = serial
        self.nprocs = nprocs

        if validate:
            if env.VERBOSE: print('validating request datasets...')
            ops.dataset.validate()

        ## create the calculation engine
        if self.ops.calc is None:
            self.cengine = None
        else:
            if env.VERBOSE: print('initializing calculation engine...')
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                                self.ops.calc,
                                                raw=self.ops.calc_raw,
                                                agg=self.ops.aggregate)

        ## check for snippet request in the operations dictionary. if there is
        ## on, the time range should be set in the operations dictionary.
        if self.ops.snippet is True:
            ##TODO: move snippet to iteration
            if env.VERBOSE: print('getting snippet bounds...')
            for rd in self.ops.dataset:
                rd.level_range = [1, 1]
                ods = rd.ds
                ## load the first time slice if there is calculation or the
                ## calculation does not use a temporal group.
                if self.cengine is None or (self.cengine is not None
                                            and self.cengine.grouping is None):
                    ##TODO: improve slicing to not load all time values
                    ods._load_slice.update({'T': slice(0, 1)})
                ## snippet for the computation. this currently requires loading
                ## all the data for the time dimension into memory.
                ##TODO: more efficiently pull dates for monthly grouping (for
                ##example).
                else:
                    ods.temporal.set_grouping(self.cengine.grouping)
                    tgdim = ods.temporal.group
                    times = ods.temporal.value[tgdim.dgroups[0]]
                    rd.time_range = [times.min(), times.max()]
Example #8
0
    def __init__(self,ops,request_base_size_only=False,progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()

        ## create the calculation engine
        if self.ops.calc == None or self._request_base_size_only == True:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                           self.ops.calc,
                                           raw=self.ops.calc_raw,
                                           agg=self.ops.aggregate,
                                           calc_sample_size=self.ops.calc_sample_size,
                                           progress=self._progress)
            self._has_multivariate_calculations = any([self.cengine._check_calculation_members_(self.cengine.funcs,k) \
             for k in [AbstractMultivariateFunction,MultivariateEvalFunction]])
            
        ## in the case of netcdf output, geometries must be unioned. this is
        ## also true for the case of the selection geometry being requested as
        ## aggregated.
        if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \
         and self.ops.geom is not None:
            ocgis_lh('aggregating selection geometry',self._subset_log)
            build = True
            for element_geom in self.ops.geom:
                if build:
                    new_geom = element_geom['geom']
                    new_crs = element_geom['crs']
                    new_properties = {'UGID':1}
                    build = False
                else:
                    new_geom = new_geom.union(element_geom['geom'])
            itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}]
            self.ops.geom = itr
Example #9
0
class SubsetOperation(object):
    '''
    :param :class:~`ocgis.OcgOperations` ops:
    :param bool request_base_size_only: If ``True``, return field objects following
     the spatial subset performing as few operations as possible.
    :param :class:`ocgis.util.logging_ocgis.ProgressOcgOperations` progress:
    '''
    
    def __init__(self,ops,request_base_size_only=False,progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()

        ## create the calculation engine
        if self.ops.calc == None or self._request_base_size_only == True:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                           self.ops.calc,
                                           raw=self.ops.calc_raw,
                                           agg=self.ops.aggregate,
                                           calc_sample_size=self.ops.calc_sample_size,
                                           progress=self._progress)
            self._has_multivariate_calculations = any([self.cengine._check_calculation_members_(self.cengine.funcs,k) \
             for k in [AbstractMultivariateFunction,MultivariateEvalFunction]])
            
        ## in the case of netcdf output, geometries must be unioned. this is
        ## also true for the case of the selection geometry being requested as
        ## aggregated.
        if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \
         and self.ops.geom is not None:
            ocgis_lh('aggregating selection geometry',self._subset_log)
            build = True
            for element_geom in self.ops.geom:
                if build:
                    new_geom = element_geom['geom']
                    new_crs = element_geom['crs']
                    new_properties = {'UGID':1}
                    build = False
                else:
                    new_geom = new_geom.union(element_geom['geom'])
            itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}]
            self.ops.geom = itr
        
    def __iter__(self):
        ''':rtype: AbstractCollection'''
        
        ocgis_lh('beginning iteration',logger='conv.__iter__',level=logging.DEBUG)
        self._ugid_unique_store = []
        self._geom_unique_store = []
        
        ## simple iterator for serial operations
        for coll in self._iter_collections_():
            yield(coll)
        
    def _iter_collections_(self):
        '''
        :yields: :class:`~ocgis.SpatialCollection`
        '''
        
        ## multivariate calculations require datasets come in as a list with all
        ## variable inputs part of the same sequence.
        if self._has_multivariate_calculations:
            itr_rd = [[r for r in self.ops.dataset.itervalues()]]

        ## otherwise, process geometries expects a single element sequence
        else:
            itr_rd = [[rd] for rd in self.ops.dataset.itervalues()]
        
        ## configure the progress object
        self._progress.n_subsettables = len(itr_rd)
        self._progress.n_geometries = get_default_or_apply(self.ops.geom,len,default=1)
        self._progress.n_calculations = get_default_or_apply(self.ops.calc,len,default=0)
        ## send some messages
        msg = '{0} dataset collection(s) to process.'.format(self._progress.n_subsettables)
        ocgis_lh(msg=msg,logger=self._subset_log)
        if self.ops.geom is None:
            msg = 'Entire spatial domain returned. No selection geometries requested.'
        else:
            msg = 'Each data collection will be subsetted by {0} selection geometries.'.format(self._progress.n_geometries)
        ocgis_lh(msg=msg,logger=self._subset_log)
        if self._progress.n_calculations == 0:
            msg = 'No calculations requested.'
        else:
            msg = 'The following calculations will be applied to each data collection: {0}.'.\
             format(', '.join([_['func'] for _ in self.ops.calc]))
        ocgis_lh(msg=msg,logger=self._subset_log)
        
        ## process the data collections
        for rds in itr_rd:
            msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds])
            ocgis_lh(msg=msg,logger=self._subset_log)
            
            for coll in self._process_subsettables_(rds):
                ## if there are calculations, do those now and return a new type of collection
                if self.cengine is not None:
                    ocgis_lh('Starting calculations.',
                             self._subset_log,
                             alias=coll.items()[0][1].keys()[0],
                             ugid=coll.keys()[0])
                    
                    ## look for any optimizations for temporal grouping.
                    if self.ops.optimizations is None:
                        tgds = None
                    else:
                        tgds = self.ops.optimizations.get('tgds')
                    ## execute the calculations
                    coll = self.cengine.execute(coll,file_only=self.ops.file_only,
                                                tgds=tgds)
                else:
                    ## if there are no calculations, mark progress to indicate
                    ## a geometry has been completed.
                    self._progress.mark()
                
                ## conversion of groups.
                if self.ops.output_grouping is not None:
                    raise(NotImplementedError)
                else:
                    ocgis_lh('subset yielding',self._subset_log,level=logging.DEBUG)
                    yield(coll)

    def _process_subsettables_(self,rds):
        '''
        :param rds: Sequence of :class:~`ocgis.RequestDataset` objects.
        :type rds: sequence
        :yields: :class:~`ocgis.SpatialCollection`
        '''
        ocgis_lh(msg='entering _process_geometries_',logger=self._subset_log,level=logging.DEBUG)
        
        ## select headers
        if self.ops.headers is not None:
            headers = self.ops.headers
        else:
            if self.cengine is not None:
                if self._has_multivariate_calculations:
                    headers = constants.multi_headers
                else:
                    headers = constants.calc_headers
            else:
                headers = constants.raw_headers
                
        ## keyed output functions require appending headers regardless. there is
        ## only one keyed output function allowed in a request.
        if self.cengine is not None:
            if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractKeyedOutputFunction):
                value_keys = self.cengine.funcs[0]['ref'].structure_dtype['names']
                headers = list(headers) + value_keys
                ## remove the 'value' attribute headers as this is replaced by the
                ## keyed output names.
                try:
                    headers.remove('value')
                ## it may not be in the list because of a user overload
                except ValueError:
                    pass
            else:
                value_keys = None
        else:
            value_keys = None

        alias = '_'.join([r.name for r in rds])

        ocgis_lh('processing...',self._subset_log,alias=alias,level=logging.DEBUG)
        ## return the field object
        try:
            ## look for field optimizations
            if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
                field = [self.ops.optimizations['fields'][rd.alias] for rd in rds]
            else:
                field = [rd.get(format_time=self.ops.format_time,
                                interpolate_spatial_bounds=self.ops.interpolate_spatial_bounds) for rd in rds]
            ## update the spatial abstraction to match the operations value. sfield
            ## will be none if the operation returns empty and it is allowed to have
            ## empty returns.
            for f in field:
                f.spatial.abstraction = self.ops.abstraction

            if len(field) > 1:
                try:
                    ## reset the variable uid and let the collection handle its assignment
                    variable_to_add = field[1].variables.first()
                    variable_to_add.uid = None
                    field[0].variables.add_variable(variable_to_add)
                    ## reset the field names and let these be auto-generated
                    for f in field:
                        f._name = None
                ## this will fail for optimizations as the fields are already joined
                except VariableInCollectionError:
                    if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
                        pass
                    else:
                        raise
            field = field[0]
        ## this error is related to subsetting by time or level. spatial subsetting
        ## occurs below.
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(msg='time or level subset empty but empty returns allowed',
                         logger=self._subset_log,level=logging.WARN)
                coll = SpatialCollection(headers=headers)
                coll.add_field(1, None, None, name='_'.join([rd.name for rd in rds]))
                try:
                    yield(coll)
                finally:
                    return
            else:
                ocgis_lh(exc=ExtentError(message=str(e)),alias=rd.alias,logger=self._subset_log)
        
        ## set iterator based on presence of slice. slice always overrides geometry.
        if self.ops.slice is not None:
            itr = [{}]
        else:
            itr = [{}] if self.ops.geom is None else self.ops.geom
        
        for coll in self._process_geometries_(itr,field,headers,value_keys,alias):
            yield(coll)
    
    def _process_geometries_(self,itr,field,headers,value_keys,alias):
        '''
        :param sequence itr: Contains geometry dictionaries to process. If there
         are no geometries to process, this will be a sequence of one element with
         an empty dictionary.
        :param :class:`ocgis.interface.Field` field: The field object to use for
         operations.
        :param sequence headers: Sequence of strings to use as headers for the
         creation of the collection.
        :param sequence value_keys: Sequence of strings to use as headers for the
         keyed output functions.
        :param str alias: The request data alias currently being processed.
        :yields: :class:~`ocgis.SpatialCollection`
        '''
        ## loop over the iterator
        for gd in itr:
            ## always work with a new geometry dictionary
            gd = deepcopy(gd)
            ## CFRotatedPole takes special treatment. only do this if a subset
            ## geometry is available. this variable is needed to determine if 
            ## backtransforms are necessary.
            original_rotated_pole_crs = None
            if isinstance(field.spatial.crs,CFRotatedPole):
                ## only transform if there is a subset geometry
                if len(gd) > 0:
                    ## store row and column dimension metadata and names before
                    ## transforming as this information is lost w/out row and 
                    ## column dimensions on the transformations.
                    original_row_column_metadata = {'row':{'name':field.spatial.grid.row.name,
                                                           'meta':field.spatial.grid.row.meta},
                                                    'col':{'name':field.spatial.grid.col.name,
                                                           'meta':field.spatial.grid.col.meta}}
                    ## reset the geometries
                    field.spatial._geom = None
                    ## get the new grid dimension
                    field.spatial.grid = get_rotated_pole_spatial_grid_dimension(field.spatial.crs,field.spatial.grid)
                    ## update the CRS. copy the original CRS for possible later
                    ## transformation back to rotated pole.
                    original_rotated_pole_crs = deepcopy(field.spatial.crs)
                    field.spatial.crs = CFWGS84()
            
            ## initialize the collection object to store the subsetted data. if
            ## the output CRS differs from the field's CRS, adjust accordingly 
            ## when initializing.
            if self.ops.output_crs is not None and field.spatial.crs != self.ops.output_crs:
                collection_crs = self.ops.output_crs
            else:
                collection_crs = field.spatial.crs
                
            coll = SpatialCollection(crs=collection_crs,headers=headers,meta=gd.get('meta'),
                                     value_keys=value_keys)
            
            ## reference variables from the geometry dictionary
            geom = gd.get('geom')
            ## keep this around for the collection creation
            coll_geom = deepcopy(geom)
            crs = gd.get('crs')
            
            ## if there is a spatial abstraction, ensure it may be loaded.
            if self.ops.abstraction is not None:
                try:
                    getattr(field.spatial.geom,self.ops.abstraction)
                except ImproperPolygonBoundsError:
                    exc = ImproperPolygonBoundsError('A "polygon" spatial abstraction is not available without the presence of bounds.')
                    ocgis_lh(exc=exc,logger='subset')
                except Exception as e:
                    ocgis_lh(exc=e,logger='subset')
                    
            ## if there is a snippet, return the first realization, time, and level
            if self.ops.snippet:
                field = field[0,0,0,:,:]
            ## if there is a slice, use it to subset the field.
            elif self.ops.slice is not None:
                field = field.__getitem__(self.ops.slice)

            ## see if the selection crs matches the field's crs
            if crs is not None and crs != field.spatial.crs:
                geom = project_shapely_geometry(geom,crs.sr,field.spatial.crs.sr)
                crs = field.spatial.crs
            ## if the geometry is a point, we need to buffer it...
            if type(geom) in [Point,MultiPoint]:
                ocgis_lh(logger=self._subset_log,msg='buffering point geometry',level=logging.DEBUG)
                geom = geom.buffer(self.ops.search_radius_mult*field.spatial.grid.resolution)
                ## update the geometry to store in the collection
                coll_geom = deepcopy(geom)
            
            ## get the ugid following geometry manipulations
            if 'properties' in gd and 'UGID' in gd['properties']:
                ugid = gd['properties']['UGID']
            else:
                ugid = 1
                
            if geom is None:
                msg = 'No selection geometry. Returning all data. Assiging UGID as 1.'
            else:
                msg = 'Subsetting with selection geometry having UGID={0}'.format(ugid)
            ocgis_lh(msg=msg,logger=self._subset_log)
                
            ## check for unique ugids. this is an issue with point subsetting
            ## as the buffer radius changes by dataset.
            if ugid in self._ugid_unique_store and geom is not None:
                ## only update if the geometry is unique
                if not any([__.almost_equals(geom) for __ in self._geom_unique_store]):
                    prev_ugid = ugid
                    ugid = max(self._ugid_unique_store) + 1
                    self._ugid_unique_store.append(ugid)
                    msg = 'Updating UGID {0} to {1} to maintain uniqueness.'.format(prev_ugid,ugid)
                    ocgis_lh(msg,self._subset_log,level=logging.WARN,alias=alias,ugid=ugid)
                else:
                    self._geom_unique_store.append(geom)
            else:
                self._ugid_unique_store.append(ugid)
                self._geom_unique_store.append(geom)
                            
            ## try to update the properties
            try:
                gd['properties']['UGID'] = ugid
            except KeyError:
                if not isinstance(gd,dict):
                    raise
                
            ## unwrap the data if it is geographic and 360
            if geom is not None and crs == CFWGS84():
                if CFWGS84.get_is_360(field.spatial):
                    ocgis_lh('unwrapping selection geometry',self._subset_log,alias=alias,ugid=ugid,level=logging.DEBUG)
                    geom = Wrapper().unwrap(geom)
            ## perform the spatial operation
            if geom is not None:
                try:
                    if self.ops.spatial_operation == 'intersects':
                        sfield = field.get_intersects(geom, use_spatial_index=env.USE_SPATIAL_INDEX,
                                                      select_nearest=self.ops.select_nearest)
                    elif self.ops.spatial_operation == 'clip':
                        sfield = field.get_clip(geom, use_spatial_index=env.USE_SPATIAL_INDEX,
                                                select_nearest=self.ops.select_nearest)
                    else:
                        ocgis_lh(exc=NotImplementedError(self.ops.spatial_operation))
                except EmptySubsetError as e:
                    if self.ops.allow_empty:
                        ocgis_lh(alias=alias,ugid=ugid,msg='empty geometric operation but empty returns allowed',level=logging.WARN)
                        sfield = None
                    else:
                        msg = str(e) + ' This typically means the selection geometry falls outside the spatial domain of the target dataset.'
                        ocgis_lh(exc=ExtentError(message=msg),alias=alias,logger=self._subset_log)
            else:
                sfield = field
            
            ## if the base size is being requested, bypass the rest of the
            ## operations.
            if self._request_base_size_only == False:
                ## if empty returns are allowed, there be an empty field
                if sfield is not None:
                    ## aggregate if requested
                    if self.ops.aggregate:
                        ocgis_lh('executing spatial average',self._subset_log,alias=alias,ugid=ugid)
                        sfield = sfield.get_spatially_aggregated(new_spatial_uid=ugid)
                    
                    ## wrap the returned data.
                    if not env.OPTIMIZE_FOR_CALC:
                        if CFWGS84.get_is_360(sfield.spatial):
                            if self.ops.output_format != 'nc' and self.ops.vector_wrap:
                                ocgis_lh('wrapping output geometries',self._subset_log,alias=alias,ugid=ugid,
                                         level=logging.DEBUG)
                                ## modifying these values in place will change the values
                                ## in the base field. a copy is necessary.
                                sfield.spatial = deepcopy(sfield.spatial)
                                sfield.spatial.crs.wrap(sfield.spatial)
                                
                    ## check for all masked values
                    if env.OPTIMIZE_FOR_CALC is False and self.ops.file_only is False:
                        for variable in sfield.variables.itervalues():
                            ocgis_lh(msg='Fetching data for variable with alias "{0}".'.format(variable.alias),
                                     logger=self._subset_log)
                            if variable.value.mask.all():
                                ## masked data may be okay depending on other opeartional
                                ## conditions.
                                if self.ops.snippet or self.ops.allow_empty or (self.ops.output_format == 'numpy' and self.ops.allow_empty):
                                    if self.ops.snippet:
                                        ocgis_lh('all masked data encountered but allowed for snippet',
                                                 self._subset_log,alias=alias,ugid=ugid,level=logging.WARN)
                                    if self.ops.allow_empty:
                                        ocgis_lh('all masked data encountered but empty returns allowed',
                                                 self._subset_log,alias=alias,ugid=ugid,level=logging.WARN)
                                    if self.ops.output_format == 'numpy':
                                        ocgis_lh('all masked data encountered but numpy data being returned allowed',
                                                 logger=self._subset_log,alias=alias,ugid=ugid,level=logging.WARN)
                                else:
                                    ## if the geometry is also masked, it is an empty spatial
                                    ## operation.
                                    if sfield.spatial.abstraction_geometry.value.mask.all():
                                        ocgis_lh(exc=EmptyData,logger=self._subset_log)
                                    ## if none of the other conditions are met, raise the masked data error
                                    else:
                                        ocgis_lh(logger=self._subset_log,exc=MaskedDataError(),alias=alias,ugid=ugid)
                    
                    ## transform back to rotated pole if necessary
                    if original_rotated_pole_crs is not None:
                        if self.ops.output_crs is None and not isinstance(self.ops.output_crs,CFWGS84):
                            # copy the spatial mask to the new spatial array
                            spatial_mask_before_transform = deepcopy(sfield.spatial.get_mask())
                            # need to load the values before proceeding. source indices will disappear.
                            for variable in sfield.variables.itervalues():
                                variable.value
                            # reset the geometries
                            sfield.spatial._geom = None
                            sfield.spatial.grid = get_rotated_pole_spatial_grid_dimension(
                             original_rotated_pole_crs,sfield.spatial.grid,inverse=True,
                             rc_original=original_row_column_metadata)
                            # update the grid mask with the previous spatial mask
                            sfield.spatial.grid.value.mask = spatial_mask_before_transform
                            ## update the uid mask to match the spatial mask
                            sfield.spatial.uid = np.ma.array(sfield.spatial.uid,mask=spatial_mask_before_transform)
                            sfield.spatial.crs = original_rotated_pole_crs

                    ## update the coordinate system of the data output
                    if self.ops.output_crs is not None:
                        ## if the geometry is not None, it may need to be projected to match
                        ## the output crs.
                        if geom is not None and crs != self.ops.output_crs:
                            geom = project_shapely_geometry(geom,crs.sr,self.ops.output_crs.sr)
                            coll_geom = deepcopy(geom)
                        ## update the coordinate reference system of the spatial
                        ## dimension.
                        try:
                            sfield.spatial.update_crs(self.ops.output_crs)
                        ## this is likely a rotated pole origin
                        except RuntimeError as e:
                            if isinstance(sfield.spatial.crs,CFRotatedPole):
                                assert(isinstance(self.ops.output_crs,WGS84))
                                sfield.spatial._geom = None
                                sfield.spatial.grid = get_rotated_pole_spatial_grid_dimension(
                                 sfield.spatial.crs,sfield.spatial.grid)
                                sfield.spatial.crs = self.ops.output_crs
                            else:
                                ocgis_lh(exc=e,logger=self._subset_log)
                
            ## the geometry may need to be wrapped or unwrapped depending on
            ## the vector wrap situation
            name = alias if sfield is None else None
            coll.add_field(ugid, coll_geom, sfield, properties=gd.get('properties'), name=name)

            yield(coll)
Example #10
0
def compute(ops, tile_dimension, verbose=False, use_optimizations=True):
    """
    Used for computations on large arrays where memory limitations are a consideration. It is is also useful for
    extracting data from a server that has limitations on the size of requested data arrays. This function creates an
    empty destination NetCDF file that is then filled by executing the operations on chunks of the requested
    target dataset(s) and filling the destination NetCDF file.

    :param ops: The target operations to tile. There must be a calculation associated with
     the operations.
    :type ops: :class:`ocgis.OcgOperations`
    :param int tile_dimension: The target tile/chunk dimension. This integer value must be greater than zero.
    :param bool verbose: If ``True``, print more verbose information to terminal.
    :param bool use_optimizations: If ``True``, cache :class:`Field` and :class:`TemporalGroupDimension` objects for
     reuse during tile iteration.
    :raises: AssertionError, ValuError
    :returns: Path to the output NetCDF file.
    :rtype: str

    >>> from ocgis import RequestDataset, OcgOperations
    >>> from ocgis.util.large_array import compute
    >>> rd = RequestDataset(uri='/path/to/file',variable='tas')
    >>> ops = OcgOperations(dataset=rd,calc=[{'func':'mean','name':'mean'}],output_format='nc')
    >>> ret = compute(ops, 25)
    """

    # validate arguments
    assert isinstance(ops, OcgOperations)
    assert ops.calc is not None
    assert ops.output_format == "nc"

    # ensure that progress is not showing 100% at first
    if ops.callback is not None:
        orgcallback = ops.callback

        def zeropercentagecallback(p, m):
            orgcallback(0.0, m)

        ops.callback = zeropercentagecallback

    tile_dimension = int(tile_dimension)
    if tile_dimension <= 0:
        raise (ValueError('"tile_dimension" must be greater than 0'))

    # determine if we are working with a multivariate function
    if OcgCalculationEngine._check_calculation_members_(ops.calc, AbstractMultivariateFunction):
        # only one multivariate calculation allowed
        assert len(ops.calc) == 1
        has_multivariate = True
    else:
        # only one calculation allowed
        assert len(ops.dataset) == 1
        has_multivariate = False

    # work on a copy of the operations to create the template file
    ops_file_only = deepcopy(ops)
    # we need the output to be file only for the first request
    ops_file_only.file_only = True
    # save the environment flag for calculation optimizations.
    orig_oc = ocgis.env.OPTIMIZE_FOR_CALC

    try:
        # tell the software we are optimizing for calculations
        ocgis.env.OPTIMIZE_FOR_CALC = True

        # first, write the template file
        if verbose:
            print("getting fill file...")
        fill_file = ops_file_only.execute()
        # if there is a geometry, we have to find the offset for the slice. we
        # also need to account for the subset mask.
        if ops.geom is not None:
            if verbose:
                print("geometry subset is present. calculating slice offsets...")
            ops_offset = deepcopy(ops)
            ops_offset.output_format = "numpy"
            ops_offset.calc = None
            ops_offset.agg_selection = True
            ops_offset.snippet = False
            coll = ops_offset.execute()

            for row in coll.get_iter_melted():
                # assert the values are not loaded...
                assert row["variable"]._value is None
                # assert only 3 or 4 dimensional data is being used
                assert row["field"].shape_as_dict["R"] == 1

            ref_spatial = coll[1][ops_offset.dataset.first().name].spatial
            try:
                row_offset = ref_spatial.grid.row._src_idx[0]
                col_offset = ref_spatial.grid.col._src_idx[0]
            except (AttributeError, TypeError):
                # Likely no row and column for a 2-dimensional grid.
                row_offset = ref_spatial.grid._src_idx["row"][0]
                col_offset = ref_spatial.grid._src_idx["col"][0]
            mask_spatial = ref_spatial.get_mask()
        # otherwise the offset is zero...
        else:
            row_offset = 0
            col_offset = 0
            mask_spatial = None

        # get the shape for the tile schema
        if verbose:
            print("getting tile schema shape inputs...")
        #        if has_multivariate == False:
        #            shp_variable = '{0}_{1}'.format(ops.calc[0]['name'],ops.dataset[0].alias)
        #        else:
        #            shp_variable = ops.calc[0]['name']
        shp_variable = ops.calc[0]["name"]
        template_rd = ocgis.RequestDataset(uri=fill_file, variable=shp_variable)
        template_field = template_rd.get()
        shp = template_field.shape[-2:]

        if use_optimizations:
            # if there is a calculation grouping, optimize for it. otherwise, pass
            # this value as None.
            try:
                tgd_field = ops.dataset.first().get()
                template_tgd = tgd_field.temporal.get_grouping(deepcopy(ops.calc_grouping))
                if not has_multivariate:
                    key = ops.dataset.first().name
                else:
                    key = "_".join([__.name for __ in ops.dataset.itervalues()])
                optimizations = {"tgds": {key: template_tgd}}
            except TypeError:
                optimizations = None

            # load the fields and pass those for optimization
            field_optimizations = {}
            for rd in ops.dataset.itervalues():
                gotten_field = rd.get(format_time=ops.format_time)
                field_optimizations.update({rd.name: gotten_field})
            optimizations = optimizations or {}
            optimizations["fields"] = field_optimizations
        else:
            optimizations = None

        if verbose:
            print("getting tile schema...")
        schema = tile.get_tile_schema(shp[0], shp[1], tile_dimension)
        lschema = len(schema)

        # Create new callbackfunction where the 0-100% range is converted to a subset corresponding to the no. of blocks to be calculated
        if ops.callback is not None:
            percentageDone = 0
            callback = ops.callback

            def newcallback(p, m):
                p = (p / lschema) + percentageDone
                orgcallback(p, m)

            ops.callback = newcallback

        if verbose:
            print("output file is: {0}".format(fill_file))
            print("tile count: {0}".format(lschema))

        fds = nc.Dataset(fill_file, "a")
        try:
            if verbose:
                progress = ProgressBar("tiles progress")
            if ops.callback is not None and callback:
                callback(0, "Initializing calculation")
            for ctr, indices in enumerate(schema.itervalues(), start=1):
                # appropriate adjust the slices to account for the spatial subset
                row = [ii + row_offset for ii in indices["row"]]
                col = [ii + col_offset for ii in indices["col"]]

                # copy the operations and modify arguments
                ops_slice = deepcopy(ops)
                ops_slice.geom = None
                ops_slice.slice = [None, None, None, row, col]
                ops_slice.output_format = "numpy"
                ops_slice.optimizations = optimizations
                # return the object slice
                ret = ops_slice.execute()
                for field_map in ret.itervalues():
                    for field in field_map.itervalues():
                        field_shape = field.shape_as_dict
                        for alias, variable in field.variables.iteritems():
                            vref = fds.variables[alias]
                            assert isinstance(variable.value, np.ma.MaskedArray)
                            # we need to remove the offsets to adjust for the zero-based
                            # fill file.
                            slice_row = slice(row[0] - row_offset, row[1] - row_offset)
                            slice_col = slice(col[0] - col_offset, col[1] - col_offset)
                            # if there is a spatial mask, update accordingly
                            if mask_spatial is not None:
                                set_variable_spatial_mask(variable, mask_spatial, slice_row, slice_col)
                            # squeeze out extra dimensions from ocgis
                            fill_value = np.squeeze(variable.value)
                            # fill the netCDF container variable adjusting for shape
                            if len(vref.shape) == 3:
                                reshape = (field_shape["T"], field_shape["Y"], field_shape["X"])
                                vref[:, slice_row, slice_col] = fill_value.reshape(*reshape)
                            elif len(vref.shape) == 4:
                                reshape = (field_shape["T"], field_shape["Z"], field_shape["Y"], field_shape["X"])
                                vref[:, :, slice_row, slice_col] = fill_value.reshape(*reshape)
                            else:
                                raise (NotImplementedError(vref.shape))

                            # write the data to disk
                            fds.sync()
                if verbose:
                    progress.progress(int((float(ctr) / lschema) * 100))
                if ops.callback is not None and callback:
                    percentageDone = (float(ctr) / lschema) * 100
        finally:
            fds.close()
    finally:
        ocgis.env.OPTIMIZE_FOR_CALC = orig_oc
    if verbose:
        progress.endProgress()
        print("complete.")

    return fill_file
Example #11
0
 def _validate_(self):
     ocgis_lh(logger='operations',msg='validating operations')
     
     def _raise_(msg,obj=OutputFormat):
         e = DefinitionValidationError(obj,msg)
         ocgis_lh(exc=e,logger='operations')
         
     ## there are a bunch of constraints on the netCDF format
     if self.output_format == 'nc':
         ## we can only write one requestdataset to netCDF
         if len(self.dataset) > 1 and self.calc is None:
             msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                    'There are currently {dcount} RequestDatasets. Note, this is different than a '
                    'multifile dataset.'.format(dcount=len(self.dataset)))
             _raise_(msg,OutputFormat)
         ## we can write multivariate functions to netCDF however
         else:
             if self.calc is not None and len(self.dataset) > 1:
                 ## count the occurrences of these classes in the calculation
                 ## list.
                 klasses_to_check = [AbstractMultivariateFunction,MultivariateEvalFunction]
                 multivariate_checks = []
                 for klass in klasses_to_check:
                     for calc in self.calc:
                         multivariate_checks.append(issubclass(calc['ref'],klass))
                 if sum(multivariate_checks) != 1:
                     msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                            'There are currently {dcount} RequestDatasets. Note, this is different than a '
                            'multifile dataset.'.format(dcount=len(self.dataset)))
                     _raise_(msg,OutputFormat)
                 else:
                     ## there is a multivariate calculation and this requires
                     ## multiple request dataset
                     pass
         ## clipped data which creates an arbitrary geometry may not be written
         ## to netCDF
         if self.spatial_operation != 'intersects':
             msg = 'Only "intersects" spatial operation allowed for netCDF output. Arbitrary geometries may not currently be written.'
             _raise_(msg,OutputFormat)
         ## data may not be aggregated either
         if self.aggregate:
             msg = 'Data may not be aggregated for netCDF output. The aggregate parameter must be False.'
             _raise_(msg,OutputFormat)
         ## either the input data CRS or WGS84 is required for data output
         if self.output_crs is not None and not isinstance(self.output_crs,CFWGS84):
             msg = 'CFWGS84 is the only acceptable overloaded output CRS at this time for netCDF output.'
             _raise_(msg,OutputFormat)
         ## calculations on raw values are not relevant as not aggregation can
         ## occur anyway.
         if self.calc is not None:
             if self.calc_raw:
                 msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.'
                 _raise_(msg)
             ## no keyed output functions to netCDF
             if OcgCalculationEngine._check_calculation_members_(self.calc,AbstractKeyedOutputFunction):
                 msg = 'Keyed function output may not be written to netCDF.'
                 _raise_(msg)
         
     ## collect projections for the dataset sets. None is returned if one
     ## is not parsable. the WGS84 default is actually done in the RequestDataset
     ## object.
     projections = []
     for rd in self.dataset.itervalues():
         if not any([_ == rd.crs for _ in projections]):
             projections.append(rd.crs)
     ## if there is not output CRS and projections differ, raise an exception.
     ## however, it is okay to have data with different projections in the
     ## numpy output.
     if len(projections) > 1 and self.output_format != 'numpy': #@UndefinedVariable
         if self.output_crs is None:
             _raise_('Dataset coordinate reference systems must be equivalent if no output CRS is chosen.',obj=OutputCRS)
     ## clip and/or aggregation operations may not be written back to CFRotatedPole
     ## at this time. hence, the output crs must be set to CFWGS84.
     if CFRotatedPole in map(type,projections):
         if self.output_crs is not None and not isinstance(self.output_crs,WGS84):
             msg = ('{0} data may only be written to the same coordinate system (i.e. "output_crs=None") '
                    'or {1}.').format(CFRotatedPole.__name__,CFWGS84.__name__)
             _raise_(msg,obj=OutputCRS)
         if self.aggregate or self.spatial_operation == 'clip':
             msg = ('{0} data if clipped or spatially averaged must be written to '
                    '{1}. The "output_crs" is being updated to {2}.').format(
                    CFRotatedPole.__name__,CFWGS84.__name__,
                    CFWGS84.__name__)
             ocgis_lh(level=logging.WARN,msg=msg,logger='operations')
             self._get_object_('output_crs')._value = CFWGS84()
     ## only WGS84 may be written to to GeoJSON
     if self.output_format == 'geojson':
         if any([element != WGS84() for element in projections if element is not None]):
             _raise_('Only data with a WGS84 projection may be written to GeoJSON.')
         if self.output_crs is not None:
             if self.output_crs != WGS84():
                 _raise_('Only data with a WGS84 projection may be written to GeoJSON.')
     
     ## snippet only relevant for subsetting not operations with a calculation
     ## or time region
     if self.snippet:
         if self.calc is not None:
             _raise_('Snippets are not implemented for calculations. Apply a limiting time range for faster responses.',obj=Snippet)
         for rd in self.dataset.itervalues():
             if rd.time_region is not None:
                 _raise_('Snippets are not implemented for time regions.',obj=Snippet)
     
     ## no slicing with a geometry - can easily lead to extent errors
     if self.slice is not None:
         assert(self.geom is None)
     
     ## file only operations only valid for netCDF and calculations.
     if self.file_only:
         if self.output_format != 'nc':
             _raise_('Only netCDF may be written with file_only as True.',obj=FileOnly)
         if self.calc is None:
             _raise_('File only outputs are only relevant for computations.',obj=FileOnly)
     
     ## validate any calculations against the operations object. if the calculation
     ## is a string eval function do not validate.
     if self.calc is not None:
         if self._get_object_('calc')._is_eval_function:
             if self.calc_grouping is not None:
                 msg = 'Calculation groups are not applicable for string function expressions.'
                 _raise_(msg,obj=CalcGrouping)
         else:
             for c in self.calc:
                 c['ref'].validate(self)
Example #12
0
class SubsetOperation(object):
    
    def __init__(self,ops,serial=True,nprocs=1):
        self.ops = ops
        self.serial = serial
        self.nprocs = nprocs
        
        self._subset_log = ocgis_lh.get_logger('subset')

        ## create the calculation engine
        if self.ops.calc is None:
            self.cengine = None
        else:
            ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                           self.ops.calc,
                                           raw=self.ops.calc_raw,
                                           agg=self.ops.aggregate,
                                           calc_sample_size=self.ops.calc_sample_size)
            
        ## in the case of netcdf output, geometries must be unioned. this is
        ## also true for the case of the selection geometry being requested as
        ## aggregated.
        if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \
         and self.ops.geom is not None:
            ocgis_lh('aggregating selection geometry',self._subset_log)
            build = True
            for element_geom in self.ops.geom:
                if build:
                    new_geom = element_geom['geom']
                    new_crs = element_geom['crs']
                    new_properties = {'UGID':1}
                    build = False
                else:
                    new_geom = new_geom.union(element_geom['geom'])
            itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}]
            self.ops.geom = itr
        
    def __iter__(self):
        ''':rtype: AbstractCollection'''
        
        ocgis_lh('beginning iteration',logger='conv.__iter__',level=logging.DEBUG)
                
        ## simple iterator for serial operations
        if self.serial:
            for coll in self._iter_collections_():
                yield(coll)
        ## use a multiprocessing pool returning unordered geometries
        ## for the parallel case
        else:
            raise(ocgis_lh(exc=NotImplementedError('multiprocessing is not available')))

    def _process_geometries_(self,rds):
        ocgis_lh(msg='entering _process_geometries_',logger=self._subset_log,level=logging.DEBUG)
        
        ## select headers
        if self.ops.headers is not None:
            headers = self.ops.headers
        else:
            if self.cengine is not None:
                if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractMultivariateFunction):
                    headers = constants.multi_headers
                else:
                    headers = constants.calc_headers
            else:
                headers = constants.raw_headers
                
        ## keyed output functions require appending headers regardless. there is
        ## only one keyed output function allowed in a request.
        if self.cengine is not None:
            if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractKeyedOutputFunction):
                value_keys = self.cengine.funcs[0]['ref'].structure_dtype['names']
                headers = list(headers) + value_keys
                ## remove the 'value' attribute headers as this is replaced by the
                ## keyed output names.
                try:
                    headers.remove('value')
                ## it may not be in the list because of a user overload
                except ValueError:
                    pass
            else:
                value_keys = None
        else:
            value_keys = None
                    
        alias = '_'.join([r.alias for r in rds])
        ocgis_lh('processing...',self._subset_log,alias=alias)
        ## return the field object
        try:
            field = [rd.get(format_time=self.ops.format_time) for rd in rds]
            if len(field) > 1:
                field[0].variables.add_variable(field[1].variables.first())
            field = field[0]
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(msg='time or level subset empty but empty returns allowed',
                         logger=self._subset_log,level=logging.WARN)
                coll = SpatialCollection(headers=headers)
                coll.add_field(1,None,rd.alias,None)
                try:
                    yield(coll)
                finally:
                    return
            else:
                ocgis_lh(exc=ExtentError(message=str(e)),alias=rd.alias,logger=self._subset_log)
                
        ## set iterator based on presence of slice. slice always overrides geometry.
        if self.ops.slice is not None:
            itr = [{}]
        else:
            itr = [{}] if self.ops.geom is None else self.ops.geom
                
        ## loop over the iterator
        for gd in itr:
            ## initialize the collection object to store the subsetted data. if
            ## the output CRS differs from the field's CRS, adjust accordingly 
            ## when initilizing.
            if self.ops.output_crs is not None and field.spatial.crs != self.ops.output_crs:
                collection_crs = self.ops.output_crs
            else:
                collection_crs = field.spatial.crs
                
            coll = SpatialCollection(crs=collection_crs,headers=headers,meta=gd.get('meta'),
                                     value_keys=value_keys)
            
            ## reference variables from the geometry dictionary
            geom = gd.get('geom')
            
            crs = gd.get('crs')
            
            if 'properties' in gd and 'UGID' in gd['properties']:
                ugid = gd['properties']['UGID']
            else:
                ## try to get lowercase ugid in case the shapefile is not perfectly
                ## formed. however, if there is no geometry accept the error and
                ## use the default geometry identifier.
                if len(gd) == 0:
                    ugid = 1
                else:
                    ugid = gd['properties']['ugid']
                    
            ocgis_lh('processing',self._subset_log,level=logging.DEBUG,alias=alias,ugid=ugid)
            
            ## if there is a spatial abstraction, ensure it may be loaded.
            if self.ops.abstraction is not None:
                try:
                    getattr(field.spatial.geom,self.ops.abstraction)
                except ImproperPolygonBoundsError:
                    exc = ImproperPolygonBoundsError('A "polygon" spatial abstraction is not available without the presence of bounds.')
                    ocgis_lh(exc=exc,logger='subset')
                except Exception as e:
                    ocgis_lh(exc=e,logger='subset')
                    
            ## if there is a snippet, return the first realization, time, and level
            if self.ops.snippet:
                field = field[0,0,0,:,:]
            ## if there is a slice, use it to subset the field.
            elif self.ops.slice is not None:
                field = field.__getitem__(self.ops.slice)
                
            ## see if the selection crs matches the field's crs
            if crs is not None and crs != field.spatial.crs:
                geom = project_shapely_geometry(geom,crs.sr,field.spatial.crs.sr)
                crs = field.spatial.crs
            ## if the geometry is a point, we need to buffer it...
            if type(geom) in [Point,MultiPoint]:
                ocgis_lh(logger=self._subset_log,msg='buffering point geometry',level=logging.DEBUG)
                geom = geom.buffer(self.ops.search_radius_mult*field.spatial.grid.resolution)
            ## unwrap the data if it is geographic and 360
            if geom is not None and crs == CFWGS84():
                if CFWGS84.get_is_360(field.spatial):
                    ocgis_lh('unwrapping selection geometry',self._subset_log,alias=alias,ugid=ugid)
                    geom = Wrapper().unwrap(geom)
            ## perform the spatial operation
            if geom is not None:
                try:
                    if self.ops.spatial_operation == 'intersects':
                        sfield = field.get_intersects(geom)
                    elif self.ops.spatial_operation == 'clip':
                        sfield = field.get_clip(geom)
                    else:
                        ocgis_lh(exc=NotImplementedError(self.ops.spatial_operation))
                except EmptySubsetError as e:
                    if self.ops.allow_empty:
                        ocgis_lh(alias=alias,ugid=ugid,msg='empty geometric operation but empty returns allowed',level=logging.WARN)
                        sfield = None
                    else:
                        ocgis_lh(exc=ExtentError(message=str(e)),alias=alias,logger=self._subset_log)
            else:
                sfield = field
            
            ## if empty returns are allowed, there be an empty field
            if sfield is not None:
                ## aggregate if requested
                if self.ops.aggregate:
                    sfield = sfield.get_spatially_aggregated(new_spatial_uid=ugid)
                
                ## wrap the returned data.
                if not env.OPTIMIZE_FOR_CALC:
                    if CFWGS84.get_is_360(sfield.spatial):
                        if self.ops.output_format != 'nc' and self.ops.vector_wrap:
                            ocgis_lh('wrapping output geometries',self._subset_log,alias=alias,ugid=ugid)
                            sfield.spatial.crs.wrap(sfield.spatial)
                            
                ## check for all masked values
                if env.OPTIMIZE_FOR_CALC is False and self.ops.file_only is False:
                    for variable in sfield.variables.itervalues():
                        if variable.value.mask.all():
                            ## masked data may be okay depending on other opeartional
                            ## conditions.
                            if self.ops.snippet or self.ops.allow_empty or (self.ops.output_format == 'numpy' and self.ops.allow_empty):
                                if self.ops.snippet:
                                    ocgis_lh('all masked data encountered but allowed for snippet',
                                             self._subset_log,alias=alias,ugid=ugid,level=logging.WARN)
                                if self.ops.allow_empty:
                                    ocgis_lh('all masked data encountered but empty returns allowed',
                                             self._subset_log,alias=alias,ugid=ugid,level=logging.WARN)
                                if self.ops.output_format == 'numpy':
                                    ocgis_lh('all masked data encountered but numpy data being returned allowed',
                                             logger=self._subset_log,alias=alias,ugid=ugid,level=logging.WARN)
                            else:
                                ## if the geometry is also masked, it is an empty spatial
                                ## operation.
                                if sfield.spatial.abstraction_geometry.value.mask.all():
                                    ocgis_lh(exc=EmptyData,logger=self._subset_log)
                                ## if none of the other conditions are met, raise the masked data error
                                else:
                                    ocgis_lh(logger=self._subset_log,exc=MaskedDataError(),alias=alias,ugid=ugid)
            
            ## update the coordinate system of the data output
            if self.ops.output_crs is not None:
                ## if the geometry is not None, it may need to be projected to match
                ## the output crs.
                if geom is not None and crs != self.ops.output_crs:
                    geom = project_shapely_geometry(geom,crs.sr,self.ops.output_crs.sr)
                    
                sfield.spatial.update_crs(self.ops.output_crs)
            
            ## update the spatial abstraction to match the operations value. sfield
            ## will be none if the operation returns empty and it is allowed to have
            ## empty returns.
            if sfield is not None:
                sfield.spatial.abstraction = self.ops.abstraction
            
            coll.add_field(ugid,geom,alias,sfield,properties=gd.get('properties'))
            
            yield(coll)
    
    def _iter_collections_(self):
        
        ocgis_lh('{0} request dataset(s) to process'.format(len(self.ops.dataset)),'conv._iter_collections_')
        
        if self.cengine is None:
            itr_rd = ([rd] for rd in self.ops.dataset)
        else:
            if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractMultivariateFunction):
                itr_rd = [[r for r in self.ops.dataset]]
            else:
                itr_rd = ([rd] for rd in self.ops.dataset)
        
        for rds in itr_rd:
            for coll in self._process_geometries_(rds):
                ## if there are calculations, do those now and return a new type of collection
                if self.cengine is not None:
                    ocgis_lh('performing computations',
                             self._subset_log,
                             alias=coll.items()[0][1].keys()[0],
                             ugid=coll.keys()[0])
                    coll = self.cengine.execute(coll)
                
                ## conversion of groups.
                if self.ops.output_grouping is not None:
                    raise(NotImplementedError)
                else:
                    ocgis_lh('subset yielding',self._subset_log,level=logging.DEBUG)
                    yield(coll)
Example #13
0
class SubsetOperation(object):
    """
    :param :class:~`ocgis.OcgOperations` ops:
    :param bool request_base_size_only: If ``True``, return field objects following
     the spatial subset performing as few operations as possible.
    :param :class:`ocgis.util.logging_ocgis.ProgressOcgOperations` progress:
    """

    def __init__(self, ops, request_base_size_only=False, progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()

        # # create the calculation engine
        if self.ops.calc == None or self._request_base_size_only == True:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine', self._subset_log, level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                                self.ops.calc,
                                                raw=self.ops.calc_raw,
                                                agg=self.ops.aggregate,
                                                calc_sample_size=self.ops.calc_sample_size,
                                                progress=self._progress)
            self._has_multivariate_calculations = any([self.cengine._check_calculation_members_(self.cengine.funcs, k) \
                                                       for k in
                                                       [AbstractMultivariateFunction, MultivariateEvalFunction]])

        # in the case of netcdf output, geometries must be unioned. this is also true for the case of the selection
        # geometry being requested as aggregated.
        if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) and self.ops.geom is not None:
            ocgis_lh('aggregating selection geometry', self._subset_log)
            build = True
            for sdim in self.ops.geom:
                _geom = sdim.geom.get_highest_order_abstraction().value[0, 0]
                if build:
                    new_geom = _geom
                    new_crs = sdim.crs
                    new_properties = {'UGID': 1}
                    build = False
                else:
                    new_geom = new_geom.union(_geom)
            self.ops.geom = [{'geom': new_geom, 'properties': new_properties, 'crs': new_crs}]

    def __iter__(self):
        """:rtype: :class:`ocgis.api.collection.AbstractCollection`"""

        ocgis_lh('beginning iteration', logger='conv.__iter__', level=logging.DEBUG)
        self._ugid_unique_store = []
        self._geom_unique_store = []

        # simple iterator for serial operations
        for coll in self._iter_collections_():
            yield coll

    def _iter_collections_(self):
        """:rtype: :class:`ocgis.api.collection.AbstractCollection`"""

        # multivariate calculations require datasets come in as a list with all
        # variable inputs part of the same sequence.
        if self._has_multivariate_calculations:
            itr_rd = [[r for r in self.ops.dataset.itervalues()]]

        # otherwise, process geometries expects a single element sequence
        else:
            itr_rd = [[rd] for rd in self.ops.dataset.itervalues()]

        # configure the progress object
        self._progress.n_subsettables = len(itr_rd)
        self._progress.n_geometries = get_default_or_apply(self.ops.geom, len, default=1)
        self._progress.n_calculations = get_default_or_apply(self.ops.calc, len, default=0)
        # send some messages
        msg = '{0} dataset collection(s) to process.'.format(self._progress.n_subsettables)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self.ops.geom is None:
            msg = 'Entire spatial domain returned. No selection geometries requested.'
        else:
            msg = 'Each data collection will be subsetted by {0} selection geometries.'.format(
                self._progress.n_geometries)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self._progress.n_calculations == 0:
            msg = 'No calculations requested.'
        else:
            msg = 'The following calculations will be applied to each data collection: {0}.'. \
                format(', '.join([_['func'] for _ in self.ops.calc]))
        ocgis_lh(msg=msg, logger=self._subset_log)

        # process the data collections
        for rds in itr_rd:

            try:
                msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds])
            except AttributeError:
                # field objects do not have uris associated with them
                msg = []
                for rd in rds:
                    try:
                        msg.append(rd.uri)
                    except AttributeError:
                        # likely a field object
                        msg.append(rd.name)
                msg = 'Processing URI(s) / field names: {0}'.format(msg)
            ocgis_lh(msg=msg, logger=self._subset_log)

            for coll in self._process_subsettables_(rds):
                # if there are calculations, do those now and return a new type of collection
                if self.cengine is not None:
                    ocgis_lh('Starting calculations.',
                             self._subset_log,
                             alias=coll.items()[0][1].keys()[0],
                             ugid=coll.keys()[0])

                    # look for any optimizations for temporal grouping.
                    if self.ops.optimizations is None:
                        tgds = None
                    else:
                        tgds = self.ops.optimizations.get('tgds')
                    # execute the calculations
                    coll = self.cengine.execute(coll, file_only=self.ops.file_only,
                                                tgds=tgds)
                else:
                    # if there are no calculations, mark progress to indicate a geometry has been completed.
                    self._progress.mark()

                # conversion of groups.
                if self.ops.output_grouping is not None:
                    raise NotImplementedError
                else:
                    ocgis_lh('subset yielding', self._subset_log, level=logging.DEBUG)
                    yield coll

    def _process_subsettables_(self, rds):
        """
        :param rds: Sequence of :class:~`ocgis.RequestDataset` objects.
        :type rds: sequence
        :rtype: :class:`ocgis.api.collection.AbstractCollection`
        """

        ocgis_lh(msg='entering _process_geometries_', logger=self._subset_log, level=logging.DEBUG)

        # select headers and any value keys for keyed output functions
        value_keys = None
        if self.ops.headers is not None:
            headers = self.ops.headers
        else:
            if self.ops.melted:
                if self.cengine is not None:
                    if self._has_multivariate_calculations:
                        headers = constants.HEADERS_MULTI
                    else:
                        headers = constants.HEADERS_CALC
                else:
                    headers = constants.HEADERS_RAW
            else:
                headers = None

        # keyed output functions require appending headers regardless. there is only one keyed output function
        # allowed in a request.
        if headers is not None:
            if self.cengine is not None:
                if self.cengine._check_calculation_members_(self.cengine.funcs, AbstractKeyedOutputFunction):
                    value_keys = self.cengine.funcs[0]['ref'].structure_dtype['names']
                    headers = list(headers) + value_keys
                    # remove the 'value' attribute headers as this is replaced by the keyed output names.
                    try:
                        headers.remove('value')
                    # it may not be in the list because of a user overload
                    except ValueError:
                        pass

        alias = '_'.join([r.name for r in rds])

        ocgis_lh('processing...', self._subset_log, alias=alias, level=logging.DEBUG)
        # return the field object
        try:
            # look for field optimizations
            if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
                ocgis_lh('applying optimizations', self._subset_log, level=logging.DEBUG)
                field = [self.ops.optimizations['fields'][rd.alias] for rd in rds]
            # no field optimizations, extract the target data from the dataset collection
            else:
                ocgis_lh('creating field objects', self._subset_log, level=logging.DEBUG)
                len_rds = len(rds)
                field = [None] * len_rds
                for ii in range(len_rds):
                    rds_element = rds[ii]
                    try:
                        field_object = rds_element.get(format_time=self.ops.format_time)
                    except AttributeError:
                        # likely a field object which does not need to be loaded from source
                        if not self.ops.format_time:
                            raise NotImplementedError
                        field_object = rds_element

                    # extrapolate the spatial bounds if requested
                    if self.ops.interpolate_spatial_bounds:
                        try:
                            try:
                                field_object.spatial.grid.row.set_extrapolated_bounds()
                                field_object.spatial.grid.col.set_extrapolated_bounds()
                            except AttributeError:
                                # row/col is likely none. attempt to extrapolate using the grid values
                                field_object.spatial.grid.set_extrapolated_corners()
                        except BoundsAlreadyAvailableError:
                            msg = 'Bounds/corners already on object. Ignoring "interpolate_spatial_bounds".'
                            ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARNING)

                    field[ii] = field_object

            # update the spatial abstraction to match the operations value. sfield will be none if the operation returns
            # empty and it is allowed to have empty returns.
            for f in field:
                f.spatial.abstraction = self.ops.abstraction

            if len(field) > 1:
                try:
                    # reset the variable uid and let the collection handle its assignment
                    variable_to_add = field[1].variables.first()
                    variable_to_add.uid = None
                    field[0].variables.add_variable(variable_to_add)
                    # reset the field names and let these be auto-generated
                    for f in field:
                        f._name = None
                # this will fail for optimizations as the fields are already joined
                except VariableInCollectionError:
                    if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
                        pass
                    else:
                        raise
            field = field[0]
        # this error is related to subsetting by time or level. spatial subsetting occurs below.
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(msg='time or level subset empty but empty returns allowed', logger=self._subset_log,
                         level=logging.WARN)
                coll = SpatialCollection(headers=headers)
                name = '_'.join([rd.name for rd in rds])
                coll.add_field(None, name=name)
                try:
                    yield coll
                finally:
                    return
            else:
                ocgis_lh(exc=ExtentError(message=str(e)), alias=str([rd.name for rd in rds]), logger=self._subset_log)

        # set iterator based on presence of slice. slice always overrides geometry.
        if self.ops.slice is not None:
            itr = [None]
        else:
            itr = [None] if self.ops.geom is None else self.ops.geom
        for coll in self._process_geometries_(itr, field, headers, value_keys, alias):
            yield (coll)

    def _get_initialized_collection_(self, field, headers, value_keys):
        """
        Initialize the spatial collection object selecting the output CRS in the process.

        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        :param headers:
        :type headers: list[str]
        :param value_keys:
        :type value_keys: list[str]
        :rtype: :class:`ocgis.api.collection.SpatialCollection`
        """

        # initialize the collection object to store the subsetted data. if the output CRS differs from the field's
        # CRS, adjust accordingly when initializing.
        if self.ops.output_crs is not None and field.spatial.crs != self.ops.output_crs:
            collection_crs = self.ops.output_crs
        else:
            collection_crs = field.spatial.crs
        coll = SpatialCollection(crs=collection_crs, headers=headers, value_keys=value_keys)
        return coll

    def _get_update_rotated_pole_state_(self, field, subset_sdim):
        """
        Rotated pole coordinate systems are handled internally by transforming the CRS to a geographic coordinate
        system.

        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        :param subset_sdim:
        :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` or None
        :rtype: None or :class:`ocgis.interface.base.crs.CFRotatedPole`
        :raises: AssertionError
        """

        # CFRotatedPole takes special treatment. only do this if a subset geometry is available. this variable is
        # needed to determine if backtransforms are necessary.
        original_rotated_pole_crs = None
        if isinstance(field.spatial.crs, CFRotatedPole):
            # only transform if there is a subset geometry
            if subset_sdim is not None or self.ops.aggregate or self.ops.spatial_operation == 'clip':
                # update the CRS. copy the original CRS for possible later transformation back to rotated pole.
                original_rotated_pole_crs = copy(field.spatial.crs)
                ocgis_lh('initial rotated pole transformation...', self._subset_log, level=logging.DEBUG)
                field.spatial.update_crs(CFWGS84())
                ocgis_lh('...finished initial rotated pole transformation', self._subset_log, level=logging.DEBUG)
        return original_rotated_pole_crs

    def _assert_abstraction_available_(self, field):
        """
        Assert the spatial abstraction may be loaded on the field object if one is provided in the operations.

        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        """

        if self.ops.abstraction is not None:
            attr = getattr(field.spatial.geom, self.ops.abstraction)
            if attr is None:
                msg = 'A "{0}" spatial abstraction is not available.'.format(self.ops.abstraction)
                ocgis_lh(exc=ValueError(msg), logger='subset')

    def _get_slice_or_snippet_(self, field):
        """
        Slice the incoming field if a slice or snippet argument is present.

        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        :rtype: :class:`ocgis.interface.base.field.Field`
        """

        # if there is a snippet, return the first realization, time, and level
        if self.ops.snippet:
            field = field[0, 0, 0, :, :]
        # if there is a slice, use it to subset the field.
        elif self.ops.slice is not None:
            field = field.__getitem__(self.ops.slice)
        return field

    def _get_spatially_subsetted_field_(self, alias, field, subset_sdim, subset_ugid):
        """
        Spatially subset a field with a selection geometry.

        :param str alias: The request data alias currently being processed.
        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        :param subset_sdim:
        :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension`
        :rtype: None or :class:`ocgis.interface.base.field.Field`
        :raises: AssertionError, ExtentError
        """

        assert (subset_sdim is not None)

        subset_geom = subset_sdim.single.geom

        # check for unique ugids. this is an issue with point subsetting as the buffer radius changes by dataset.
        if subset_ugid in self._ugid_unique_store:
            # # only update if the geometry is unique
            if not any([__.almost_equals(subset_geom) for __ in self._geom_unique_store]):
                prev_ugid = subset_ugid
                ugid = max(self._ugid_unique_store) + 1

                # update the geometry property and uid
                subset_sdim.properties['UGID'][0] = ugid
                subset_sdim.uid[:] = ugid

                self._ugid_unique_store.append(ugid)
                self._geom_unique_store.append(subset_geom)
                msg = 'Updating UGID {0} to {1} to maintain uniqueness.'.format(prev_ugid, ugid)
                ocgis_lh(msg, self._subset_log, level=logging.WARN, alias=alias, ugid=ugid)
            else:
                pass
                # self._ugid_unique_store.append(subset_ugid)
                # self._geom_unique_store.append(subset_geom)
        else:
            self._ugid_unique_store.append(subset_ugid)
            self._geom_unique_store.append(subset_geom)

        # unwrap the data if it is geographic and 360
        if field.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped:
            if subset_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_wrapped:
                ocgis_lh('unwrapping selection geometry', self._subset_log, alias=alias, ugid=subset_ugid,
                         level=logging.DEBUG)
                subset_sdim.unwrap()
                # update the geometry reference as the spatial dimension was unwrapped and modified in place
                subset_geom = subset_sdim.single.geom

        # perform the spatial operation
        try:
            if self.ops.spatial_operation == 'intersects':
                sfield = field.get_intersects(subset_geom, use_spatial_index=env.USE_SPATIAL_INDEX,
                                              select_nearest=self.ops.select_nearest)
            elif self.ops.spatial_operation == 'clip':
                sfield = field.get_clip(subset_geom, use_spatial_index=env.USE_SPATIAL_INDEX,
                                        select_nearest=self.ops.select_nearest)
            else:
                ocgis_lh(exc=NotImplementedError(self.ops.spatial_operation))
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(alias=alias, ugid=subset_ugid, msg='empty geometric operation but empty returns allowed',
                         level=logging.WARN)
                sfield = None
            else:
                msg = ' This typically means the selection geometry falls outside the spatial domain of the target dataset.'
                msg = str(e) + msg
                ocgis_lh(exc=ExtentError(message=msg), alias=alias, logger=self._subset_log)

        # if the subset geometry is unwrapped and the vector wrap option is true, wrap the subset geometry.
        if self.ops.vector_wrap:
            if subset_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped:
                subset_sdim.wrap()

        return sfield

    def _update_subset_geometry_if_point_(self, field, subset_sdim, subset_ugid):
        """
        If the subset geometry is a point of multipoint, it will need to be buffered and the spatial dimension updated
        accordingly. If the subset geometry is a polygon, pass through.

        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        :param subset_sdim:
        :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension`
        :param int subset_ugid:
        :raises: AssertionError
        """

        if type(subset_sdim.single.geom) in [Point, MultiPoint]:
            assert subset_sdim.abstraction == 'point'
            ocgis_lh(logger=self._subset_log, msg='buffering point geometry', level=logging.DEBUG)
            subset_geom = subset_sdim.single.geom.buffer(self.ops.search_radius_mult * field.spatial.grid.resolution)
            value = np.ma.array([[None]])
            value[0, 0] = subset_geom
            subset_sdim.geom._polygon = SpatialGeometryPolygonDimension(value=value, uid=subset_ugid)
            # the polygon should be used for subsetting, update the spatial dimension to use this abstraction
            subset_sdim.abstraction = 'polygon'
        assert subset_sdim.abstraction == 'polygon'

    def _check_masking_(self, alias, sfield, subset_ugid):
        """
        :param str alias: The field's alias value.
        :param sfield: The target field containing variables to check for masking.
        :type sfield: :class:`ocgis.interface.base.field.Field`
        :param int subset_ugid: The unique identifier for the geometry.
        """

        for variable in sfield.variables.itervalues():
            ocgis_lh(msg='Fetching data for variable with alias "{0}".'.format(variable.alias),
                     logger=self._subset_log)
            if variable.value.mask.all():
                # masked data may be okay...
                if self.ops.snippet or self.ops.allow_empty or (
                                self.ops.output_format == 'numpy' and self.ops.allow_empty):
                    if self.ops.snippet:
                        ocgis_lh('all masked data encountered but allowed for snippet',
                                 self._subset_log, alias=alias, ugid=subset_ugid, level=logging.WARN)
                    if self.ops.allow_empty:
                        ocgis_lh('all masked data encountered but empty returns allowed',
                                 self._subset_log, alias=alias, ugid=subset_ugid, level=logging.WARN)
                    if self.ops.output_format == 'numpy':
                        ocgis_lh('all masked data encountered but numpy data being returned allowed',
                                 logger=self._subset_log, alias=alias, ugid=subset_ugid, level=logging.WARN)
                else:
                    # if the geometry is also masked, it is an empty spatial operation.
                    if sfield.spatial.abstraction_geometry.value.mask.all():
                        ocgis_lh(exc=EmptyData, logger=self._subset_log)
                    # if none of the other conditions are met, raise the masked data error
                    else:
                        ocgis_lh(logger=self._subset_log, exc=MaskedDataError(), alias=alias,
                                 ugid=subset_ugid)

    def _get_regridded_field_with_subset_(self, sfield, subset_sdim_for_regridding=None, with_buffer=True):
        """
        Regrid ``sfield`` subsetting the regrid destination in the process.

        :param sfield: The input field to regrid.
        :type sfield: :class:`ocgis.interface.base.field.Field`
        :param subset_sdim_for_regridding: The original, unaltered spatial dimension to use for subsetting.
        :type subset_sdim_for_regridding: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension`
        :param bool with_buffer: If ``True``, buffer the geometry used to subset the destination grid.
        """

        # todo: cache spatial operations on regrid destination field

        from ocgis.regrid.base import iter_regridded_fields
        from ocgis.util.spatial.spatial_subset import SpatialSubsetOperation

        if subset_sdim_for_regridding is None:
            regrid_destination = self.ops.regrid_destination
        else:
            if with_buffer:
                # buffer the subset geometry by the resolution of the source field to give extents a chance to be
                # compatible
                buffer_value = sfield.spatial.grid.resolution
                buffer_crs = sfield.spatial.crs
            else:
                buffer_value, buffer_crs = [None, None]
            ss = SpatialSubsetOperation(self.ops.regrid_destination)
            regrid_destination = ss.get_spatial_subset('intersects', subset_sdim_for_regridding,
                                                       use_spatial_index=env.USE_SPATIAL_INDEX,
                                                       select_nearest=False, buffer_value=buffer_value,
                                                       buffer_crs=buffer_crs)

        original_sfield_crs = sfield.spatial.crs
        # check crs on the source field
        regrid_required_update_crs = False
        if not isinstance(sfield.spatial.crs, Spherical):
            # this as _assigned_ a WGS84 crs hence we cannot assume the default crs
            if isinstance(sfield.spatial.crs, WGS84) and sfield._has_assigned_coordinate_system:
                regrid_required_update_crs = True
            # the data has a coordinate system that is not WGS84
            elif not isinstance(sfield.spatial.crs, WGS84):
                regrid_required_update_crs = True
        if regrid_required_update_crs:
            # need to load values as source indices will disappear during crs update
            for variable in sfield.variables.itervalues():
                variable.value
            sfield.spatial.update_crs(Spherical())
        else:
            sfield.spatial.crs = Spherical()

        # update the coordinate system of the regrid destination if required
        try:
            destination_sdim = regrid_destination.spatial
        except AttributeError:
            # likely a spatial dimension object
            destination_sdim = regrid_destination
        update_regrid_destination_crs = False
        if not isinstance(destination_sdim.crs, Spherical):
            if isinstance(regrid_destination, Field):
                if isinstance(destination_sdim.crs, WGS84) and regrid_destination._has_assigned_coordinate_system:
                    update_regrid_destination_crs = True
                elif isinstance(destination_sdim.crs,
                                WGS84) and not regrid_destination._has_assigned_coordinate_system:
                    pass
                else:
                    update_regrid_destination_crs = True
            else:
                if not isinstance(destination_sdim.crs, Spherical):
                    update_regrid_destination_crs = True
        if update_regrid_destination_crs:
            destination_sdim.update_crs(Spherical())
        else:
            destination_sdim.crs = Spherical()

        # check that wrapping is equivalent
        if destination_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped:
            if sfield.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_wrapped:
                sfield.spatial = deepcopy(sfield.spatial)
                sfield.spatial.unwrap()
        if destination_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_wrapped:
            if sfield.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped:
                sfield.spatial = deepcopy(sfield.spatial)
                sfield.spatial.wrap()

        # remove the mask from the destination field.
        new_mask = np.zeros(destination_sdim.shape, dtype=bool)
        destination_sdim.set_mask(new_mask)

        # regrid the input fields.
        sfield = list(iter_regridded_fields([sfield], destination_sdim, **self.ops.regrid_options))[0]

        if regrid_required_update_crs:
            sfield.spatial.update_crs(original_sfield_crs)
        else:
            sfield.spatial.crs = original_sfield_crs

        # subset the output from the regrid operation as masked values may be introduced on the edges
        if subset_sdim_for_regridding is not None:
            ss = SpatialSubsetOperation(sfield)
            sfield = ss.get_spatial_subset('intersects', subset_sdim_for_regridding,
                                           use_spatial_index=env.USE_SPATIAL_INDEX,
                                           select_nearest=False)

        return sfield

    def _process_geometries_(self, itr, field, headers, value_keys, alias):
        """
        :param sequence itr: An iterator yielding :class:`~ocgis.SpatialDimension` objects.
        :param :class:`ocgis.interface.Field` field: The field object to use for
         operations.
        :param sequence headers: Sequence of strings to use as headers for the
         creation of the collection.
        :param sequence value_keys: Sequence of strings to use as headers for the
         keyed output functions.
        :param str alias: The request data alias currently being processed.
        :rtype: :class:~`ocgis.SpatialCollection`
        """

        ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG)
        # process each geometry
        for subset_sdim in itr:
            # always work with a copy of the target geometry
            subset_sdim = deepcopy(subset_sdim)
            """:type subset_sdim: ocgis.interface.base.dimension.spatial.SpatialDimension"""

            if self.ops.regrid_destination is not None:
                # if there is regridding, make another copy as this geometry may be manipulated during subsetting of
                # sources
                subset_sdim_for_regridding = deepcopy(subset_sdim)

            # operate on the rotated pole coordinate system by first transforming it to CFWGS84
            original_rotated_pole_crs = self._get_update_rotated_pole_state_(field, subset_sdim)

            # initialize the collection storage
            coll = self._get_initialized_collection_(field, headers, value_keys)

            # check if the geometric abstraction is available on the field object
            self._assert_abstraction_available_(field)

            # return a slice or snippet if either of these are requested.
            field = self._get_slice_or_snippet_(field)

            # choose the subset ugid value
            if subset_sdim is None:
                msg = 'No selection geometry. Returning all data. Assigning UGID as 1.'
                subset_ugid = 1
            else:
                subset_ugid = subset_sdim.single.uid
                msg = 'Subsetting with selection geometry having UGID={0}'.format(subset_ugid)
            ocgis_lh(msg=msg, logger=self._subset_log)

            if subset_sdim is not None:
                # if the CRS's differ, update the spatial dimension to match the field
                if subset_sdim.crs is not None and subset_sdim.crs != field.spatial.crs:
                    subset_sdim.update_crs(field.spatial.crs)
                # if the geometry is a point, it needs to be buffered
                self._update_subset_geometry_if_point_(field, subset_sdim, subset_ugid)

            # if there is a selection geometry present, use it for the spatial subset. if not, all the field's data is
            # being returned.
            if subset_sdim is None:
                sfield = field
            else:
                sfield = self._get_spatially_subsetted_field_(alias, field, subset_sdim, subset_ugid)

            # if the base size is being requested, bypass the rest of the operations.
            if not self._request_base_size_only:
                # perform regridding operations if requested
                if self.ops.regrid_destination is not None and sfield._should_regrid:
                    try:
                        original_sfield_sdim = deepcopy(sfield.spatial)
                        sfield = self._get_regridded_field_with_subset_(
                            sfield,
                            subset_sdim_for_regridding=subset_sdim_for_regridding,
                            with_buffer=True)
                    except ValueError:
                        # attempt without buffering the subset geometry for the target field.
                        sfield.spatial = original_sfield_sdim
                        sfield = self._get_regridded_field_with_subset_(sfield,
                                                                        subset_sdim_for_regridding=subset_sdim_for_regridding,
                                                                        with_buffer=False)

                # if empty returns are allowed, there be an empty field
                if sfield is not None:
                    # aggregate if requested
                    if self.ops.aggregate:
                        ocgis_lh('executing spatial average', self._subset_log, alias=alias, ugid=subset_ugid)
                        sfield = sfield.get_spatially_aggregated(new_spatial_uid=subset_ugid)

                    # wrap the returned data.
                    if not env.OPTIMIZE_FOR_CALC:
                        if sfield is not None and sfield.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped:
                            if self.ops.output_format != 'nc' and self.ops.vector_wrap:
                                ocgis_lh('wrapping output geometries', self._subset_log, alias=alias, ugid=subset_ugid,
                                         level=logging.DEBUG)
                                # deepcopy the spatial dimension before wrapping as wrapping will modify the spatial
                                # dimension on the parent field object. which may need to be reused for additional
                                # subsets.
                                sfield.spatial = deepcopy(sfield.spatial)
                                sfield.spatial.wrap()

                    # check for all masked values
                    if env.OPTIMIZE_FOR_CALC is False and self.ops.file_only is False:
                        self._check_masking_(alias, sfield, subset_ugid)

                    # transform back to rotated pole if necessary
                    if original_rotated_pole_crs is not None:
                        if not isinstance(self.ops.output_crs, CFWGS84):
                            sfield.spatial.update_crs(original_rotated_pole_crs)

                    # update the coordinate system of the data output
                    if self.ops.output_crs is not None:
                        # if the geometry is not None, it may need to be projected to match the output crs.
                        if subset_sdim is not None and subset_sdim.crs != self.ops.output_crs:
                            subset_sdim.update_crs(self.ops.output_crs)
                        # update the subset field CRS
                        sfield.spatial = deepcopy(sfield.spatial)
                        sfield.spatial.update_crs(self.ops.output_crs)

            # use the field's alias if it is provided. otherwise, let it be automatically assigned
            name = alias if sfield is None else None

            # add the created field to the output collection with the selection geometry.
            coll.add_field(sfield, ugeom=subset_sdim, name=name)

            yield coll