def test_agg_raw(self): grouping = ['month'] funcs = [{ 'func': 'std', 'name': 'std', 'ref': StandardDeviation, 'kwds': {} }] raws = [True, False] aggs = [True, False] for raw, agg in itertools.product(raws, aggs): coll = self.get_collection(aggregate=agg) ce = OcgCalculationEngine(grouping, funcs, raw, agg) ret = ce.execute(coll) shape = ret.calc['tas']['std'].shape value, weights = ce._get_value_weights_(coll.variables['tas']) ## aggregated data should have a (1,1) spatial dimension if agg is True: self.assertNumpyAll(shape[-2:], (1, 1)) ## if raw data is used, the input values to a calculation should be ## returned with a different shape - aggregated spatial dimension if raw is True and agg is True: self.assertNumpyAll(value.shape[-2:], weights.shape) self.assertNumpyNotAll(value.shape[-2:], shape[-2:]) if raw is True and agg is False: self.assertNumpyAll(shape[-3:], value.shape[-3:])
def test_agg_raw(self): grouping = ['month'] funcs = [{'func':'threshold','name':'threshold','ref':Threshold,'kwds':{'operation':'gte','threshold':200}}] raws = [True,False] aggs = [True,False] for raw,agg in itertools.product(raws,aggs): coll = self.get_collection(aggregate=agg) ce = OcgCalculationEngine(grouping,funcs,raw,agg) ret = ce.execute(coll) value = ret[25]['tas'].variables['threshold'].value ## aggregated data should have a (1,1) spatial dimension if agg is True: self.assertNumpyAll(value.shape[-2:],(1,1))
def __init__(self,ops,serial=True,nprocs=1): self.ops = ops self.serial = serial self.nprocs = nprocs self._subset_log = ocgis_lh.get_logger('subset') ## create the calculation engine if self.ops.calc is None: self.cengine = None else: ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate, calc_sample_size=self.ops.calc_sample_size) ## in the case of netcdf output, geometries must be unioned. this is ## also true for the case of the selection geometry being requested as ## aggregated. if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \ and self.ops.geom is not None: ocgis_lh('aggregating selection geometry',self._subset_log) build = True for element_geom in self.ops.geom: if build: new_geom = element_geom['geom'] new_crs = element_geom['crs'] new_properties = {'UGID':1} build = False else: new_geom = new_geom.union(element_geom['geom']) itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}] self.ops.geom = itr
def __init__(self, ops, serial=True, nprocs=1, validate=True): self.ops = ops self.serial = serial self.nprocs = nprocs subset_log = ocgis_lh.get_logger('subset') if validate: ocgis_lh('validating request datasets', subset_log, level=logging.DEBUG) ops.dataset.validate(ops=ops) ## create the calculation engine if self.ops.calc is None: self.cengine = None else: ocgis_lh('initializing calculation engine', subset_log, level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate) ## check for snippet request in the operations dictionary. if there is ## on, the time range should be set in the operations dictionary. if self.ops.snippet is True: ##TODO: move snippet to iteration ocgis_lh('getting snippet bounds', subset_log) for rd in self.ops.dataset: ## snippet is not implemented for time regions if rd.time_region is not None: exc = NotImplementedError( 'snippet is not implemented for time regions') ocgis_lh(exc=exc, logger=subset_log) rd.level_range = [1, 1] ods = rd.ds ## load the first time slice if there is calculation or the ## calculation does not use a temporal group. if self.cengine is None or (self.cengine is not None and self.cengine.grouping is None): ##TODO: improve slicing to not load all time values in a more ## elegant way. ods._load_slice.update({'T': slice(0, 1)}) ## snippet for the computation. this currently requires loading ## all the data from the time dimension into memory. ##TODO: more efficiently pull dates for monthly grouping (for ##example). else: ods.temporal.set_grouping(self.cengine.grouping) tgdim = ods.temporal.group times = ods.temporal.value[tgdim.dgroups[0]] rd.time_range = list( ods.temporal.get_datetime([times.min(), times.max()]))
def test_agg_raw(self): grouping = ['month'] funcs = [{'func':'std','name':'std','ref':StandardDeviation,'kwds':{}}] raws = [True,False] aggs = [True,False] for raw,agg in itertools.product(raws,aggs): coll = self.get_collection(aggregate=agg) ce = OcgCalculationEngine(grouping,funcs,raw,agg) ret = ce.execute(coll) shape = ret.calc['tas']['std'].shape value,weights = ce._get_value_weights_(coll.variables['tas']) ## aggregated data should have a (1,1) spatial dimension if agg is True: self.assertNumpyAll(shape[-2:],(1,1)) ## if raw data is used, the input values to a calculation should be ## returned with a different shape - aggregated spatial dimension if raw is True and agg is True: self.assertNumpyAll(value.shape[-2:],weights.shape) self.assertNumpyNotAll(value.shape[-2:],shape[-2:]) if raw is True and agg is False: self.assertNumpyAll(shape[-3:],value.shape[-3:])
def validate_ops(cls, ops): from ocgis.api.parms.definition import OutputFormat def _raise_(msg, ocg_arugument=OutputFormat): raise DefinitionValidationError(ocg_arugument, msg) # we can only write one requestdataset to netCDF if len(ops.dataset) > 1 and ops.calc is None: msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. ' 'There are currently {dcount} RequestDatasets. Note, this is different than a ' 'multifile dataset.'.format(dcount=len(ops.dataset))) _raise_(msg, OutputFormat) # we can write multivariate functions to netCDF however else: if ops.calc is not None and len(ops.dataset) > 1: # count the occurrences of these classes in the calculation list. klasses_to_check = [AbstractMultivariateFunction, MultivariateEvalFunction] multivariate_checks = [] for klass in klasses_to_check: for calc in ops.calc: multivariate_checks.append(issubclass(calc['ref'], klass)) if sum(multivariate_checks) != 1: msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. ' 'There are currently {dcount} RequestDatasets. Note, this is different than a ' 'multifile dataset.'.format(dcount=len(ops.dataset))) _raise_(msg, OutputFormat) else: # there is a multivariate calculation and this requires multiple request dataset pass # clipped data which creates an arbitrary geometry may not be written to netCDF if ops.spatial_operation != 'intersects': msg = 'Only "intersects" spatial operation allowed for netCDF output. Arbitrary geometries may not currently be written.' _raise_(msg, OutputFormat) # data may not be aggregated either if ops.aggregate: msg = 'Data may not be aggregated for netCDF output. The aggregate parameter must be False.' _raise_(msg, OutputFormat) # either the input data CRS or WGS84 is required for data output if ops.output_crs is not None and not isinstance(ops.output_crs, CFWGS84): msg = 'CFWGS84 is the only acceptable overloaded output CRS at this time for netCDF output.' _raise_(msg, OutputFormat) # calculations on raw values are not relevant as not aggregation can occur anyway. if ops.calc is not None: if ops.calc_raw: msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.' _raise_(msg) # no keyed output functions to netCDF if OcgCalculationEngine._check_calculation_members_(ops.calc, AbstractKeyedOutputFunction): msg = 'Keyed function output may not be written to netCDF.' _raise_(msg)
def __init__(self, ops, serial=True, nprocs=1, validate=True): self.ops = ops self.serial = serial self.nprocs = nprocs if validate: if env.VERBOSE: print('validating request datasets...') ops.dataset.validate() ## create the calculation engine if self.ops.calc is None: self.cengine = None else: if env.VERBOSE: print('initializing calculation engine...') self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate) ## check for snippet request in the operations dictionary. if there is ## on, the time range should be set in the operations dictionary. if self.ops.snippet is True: ##TODO: move snippet to iteration if env.VERBOSE: print('getting snippet bounds...') for rd in self.ops.dataset: rd.level_range = [1, 1] ods = rd.ds ## load the first time slice if there is calculation or the ## calculation does not use a temporal group. if self.cengine is None or (self.cengine is not None and self.cengine.grouping is None): ##TODO: improve slicing to not load all time values ods._load_slice.update({'T': slice(0, 1)}) ## snippet for the computation. this currently requires loading ## all the data for the time dimension into memory. ##TODO: more efficiently pull dates for monthly grouping (for ##example). else: ods.temporal.set_grouping(self.cengine.grouping) tgdim = ods.temporal.group times = ods.temporal.value[tgdim.dgroups[0]] rd.time_range = [times.min(), times.max()]
def __init__(self,ops,request_base_size_only=False,progress=None): self.ops = ops self._request_base_size_only = request_base_size_only self._subset_log = ocgis_lh.get_logger('subset') self._progress = progress or ProgressOcgOperations() ## create the calculation engine if self.ops.calc == None or self._request_base_size_only == True: self.cengine = None self._has_multivariate_calculations = False else: ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate, calc_sample_size=self.ops.calc_sample_size, progress=self._progress) self._has_multivariate_calculations = any([self.cengine._check_calculation_members_(self.cengine.funcs,k) \ for k in [AbstractMultivariateFunction,MultivariateEvalFunction]]) ## in the case of netcdf output, geometries must be unioned. this is ## also true for the case of the selection geometry being requested as ## aggregated. if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \ and self.ops.geom is not None: ocgis_lh('aggregating selection geometry',self._subset_log) build = True for element_geom in self.ops.geom: if build: new_geom = element_geom['geom'] new_crs = element_geom['crs'] new_properties = {'UGID':1} build = False else: new_geom = new_geom.union(element_geom['geom']) itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}] self.ops.geom = itr
class SubsetOperation(object): ''' :param :class:~`ocgis.OcgOperations` ops: :param bool request_base_size_only: If ``True``, return field objects following the spatial subset performing as few operations as possible. :param :class:`ocgis.util.logging_ocgis.ProgressOcgOperations` progress: ''' def __init__(self,ops,request_base_size_only=False,progress=None): self.ops = ops self._request_base_size_only = request_base_size_only self._subset_log = ocgis_lh.get_logger('subset') self._progress = progress or ProgressOcgOperations() ## create the calculation engine if self.ops.calc == None or self._request_base_size_only == True: self.cengine = None self._has_multivariate_calculations = False else: ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate, calc_sample_size=self.ops.calc_sample_size, progress=self._progress) self._has_multivariate_calculations = any([self.cengine._check_calculation_members_(self.cengine.funcs,k) \ for k in [AbstractMultivariateFunction,MultivariateEvalFunction]]) ## in the case of netcdf output, geometries must be unioned. this is ## also true for the case of the selection geometry being requested as ## aggregated. if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \ and self.ops.geom is not None: ocgis_lh('aggregating selection geometry',self._subset_log) build = True for element_geom in self.ops.geom: if build: new_geom = element_geom['geom'] new_crs = element_geom['crs'] new_properties = {'UGID':1} build = False else: new_geom = new_geom.union(element_geom['geom']) itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}] self.ops.geom = itr def __iter__(self): ''':rtype: AbstractCollection''' ocgis_lh('beginning iteration',logger='conv.__iter__',level=logging.DEBUG) self._ugid_unique_store = [] self._geom_unique_store = [] ## simple iterator for serial operations for coll in self._iter_collections_(): yield(coll) def _iter_collections_(self): ''' :yields: :class:`~ocgis.SpatialCollection` ''' ## multivariate calculations require datasets come in as a list with all ## variable inputs part of the same sequence. if self._has_multivariate_calculations: itr_rd = [[r for r in self.ops.dataset.itervalues()]] ## otherwise, process geometries expects a single element sequence else: itr_rd = [[rd] for rd in self.ops.dataset.itervalues()] ## configure the progress object self._progress.n_subsettables = len(itr_rd) self._progress.n_geometries = get_default_or_apply(self.ops.geom,len,default=1) self._progress.n_calculations = get_default_or_apply(self.ops.calc,len,default=0) ## send some messages msg = '{0} dataset collection(s) to process.'.format(self._progress.n_subsettables) ocgis_lh(msg=msg,logger=self._subset_log) if self.ops.geom is None: msg = 'Entire spatial domain returned. No selection geometries requested.' else: msg = 'Each data collection will be subsetted by {0} selection geometries.'.format(self._progress.n_geometries) ocgis_lh(msg=msg,logger=self._subset_log) if self._progress.n_calculations == 0: msg = 'No calculations requested.' else: msg = 'The following calculations will be applied to each data collection: {0}.'.\ format(', '.join([_['func'] for _ in self.ops.calc])) ocgis_lh(msg=msg,logger=self._subset_log) ## process the data collections for rds in itr_rd: msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds]) ocgis_lh(msg=msg,logger=self._subset_log) for coll in self._process_subsettables_(rds): ## if there are calculations, do those now and return a new type of collection if self.cengine is not None: ocgis_lh('Starting calculations.', self._subset_log, alias=coll.items()[0][1].keys()[0], ugid=coll.keys()[0]) ## look for any optimizations for temporal grouping. if self.ops.optimizations is None: tgds = None else: tgds = self.ops.optimizations.get('tgds') ## execute the calculations coll = self.cengine.execute(coll,file_only=self.ops.file_only, tgds=tgds) else: ## if there are no calculations, mark progress to indicate ## a geometry has been completed. self._progress.mark() ## conversion of groups. if self.ops.output_grouping is not None: raise(NotImplementedError) else: ocgis_lh('subset yielding',self._subset_log,level=logging.DEBUG) yield(coll) def _process_subsettables_(self,rds): ''' :param rds: Sequence of :class:~`ocgis.RequestDataset` objects. :type rds: sequence :yields: :class:~`ocgis.SpatialCollection` ''' ocgis_lh(msg='entering _process_geometries_',logger=self._subset_log,level=logging.DEBUG) ## select headers if self.ops.headers is not None: headers = self.ops.headers else: if self.cengine is not None: if self._has_multivariate_calculations: headers = constants.multi_headers else: headers = constants.calc_headers else: headers = constants.raw_headers ## keyed output functions require appending headers regardless. there is ## only one keyed output function allowed in a request. if self.cengine is not None: if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractKeyedOutputFunction): value_keys = self.cengine.funcs[0]['ref'].structure_dtype['names'] headers = list(headers) + value_keys ## remove the 'value' attribute headers as this is replaced by the ## keyed output names. try: headers.remove('value') ## it may not be in the list because of a user overload except ValueError: pass else: value_keys = None else: value_keys = None alias = '_'.join([r.name for r in rds]) ocgis_lh('processing...',self._subset_log,alias=alias,level=logging.DEBUG) ## return the field object try: ## look for field optimizations if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: field = [self.ops.optimizations['fields'][rd.alias] for rd in rds] else: field = [rd.get(format_time=self.ops.format_time, interpolate_spatial_bounds=self.ops.interpolate_spatial_bounds) for rd in rds] ## update the spatial abstraction to match the operations value. sfield ## will be none if the operation returns empty and it is allowed to have ## empty returns. for f in field: f.spatial.abstraction = self.ops.abstraction if len(field) > 1: try: ## reset the variable uid and let the collection handle its assignment variable_to_add = field[1].variables.first() variable_to_add.uid = None field[0].variables.add_variable(variable_to_add) ## reset the field names and let these be auto-generated for f in field: f._name = None ## this will fail for optimizations as the fields are already joined except VariableInCollectionError: if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: pass else: raise field = field[0] ## this error is related to subsetting by time or level. spatial subsetting ## occurs below. except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(msg='time or level subset empty but empty returns allowed', logger=self._subset_log,level=logging.WARN) coll = SpatialCollection(headers=headers) coll.add_field(1, None, None, name='_'.join([rd.name for rd in rds])) try: yield(coll) finally: return else: ocgis_lh(exc=ExtentError(message=str(e)),alias=rd.alias,logger=self._subset_log) ## set iterator based on presence of slice. slice always overrides geometry. if self.ops.slice is not None: itr = [{}] else: itr = [{}] if self.ops.geom is None else self.ops.geom for coll in self._process_geometries_(itr,field,headers,value_keys,alias): yield(coll) def _process_geometries_(self,itr,field,headers,value_keys,alias): ''' :param sequence itr: Contains geometry dictionaries to process. If there are no geometries to process, this will be a sequence of one element with an empty dictionary. :param :class:`ocgis.interface.Field` field: The field object to use for operations. :param sequence headers: Sequence of strings to use as headers for the creation of the collection. :param sequence value_keys: Sequence of strings to use as headers for the keyed output functions. :param str alias: The request data alias currently being processed. :yields: :class:~`ocgis.SpatialCollection` ''' ## loop over the iterator for gd in itr: ## always work with a new geometry dictionary gd = deepcopy(gd) ## CFRotatedPole takes special treatment. only do this if a subset ## geometry is available. this variable is needed to determine if ## backtransforms are necessary. original_rotated_pole_crs = None if isinstance(field.spatial.crs,CFRotatedPole): ## only transform if there is a subset geometry if len(gd) > 0: ## store row and column dimension metadata and names before ## transforming as this information is lost w/out row and ## column dimensions on the transformations. original_row_column_metadata = {'row':{'name':field.spatial.grid.row.name, 'meta':field.spatial.grid.row.meta}, 'col':{'name':field.spatial.grid.col.name, 'meta':field.spatial.grid.col.meta}} ## reset the geometries field.spatial._geom = None ## get the new grid dimension field.spatial.grid = get_rotated_pole_spatial_grid_dimension(field.spatial.crs,field.spatial.grid) ## update the CRS. copy the original CRS for possible later ## transformation back to rotated pole. original_rotated_pole_crs = deepcopy(field.spatial.crs) field.spatial.crs = CFWGS84() ## initialize the collection object to store the subsetted data. if ## the output CRS differs from the field's CRS, adjust accordingly ## when initializing. if self.ops.output_crs is not None and field.spatial.crs != self.ops.output_crs: collection_crs = self.ops.output_crs else: collection_crs = field.spatial.crs coll = SpatialCollection(crs=collection_crs,headers=headers,meta=gd.get('meta'), value_keys=value_keys) ## reference variables from the geometry dictionary geom = gd.get('geom') ## keep this around for the collection creation coll_geom = deepcopy(geom) crs = gd.get('crs') ## if there is a spatial abstraction, ensure it may be loaded. if self.ops.abstraction is not None: try: getattr(field.spatial.geom,self.ops.abstraction) except ImproperPolygonBoundsError: exc = ImproperPolygonBoundsError('A "polygon" spatial abstraction is not available without the presence of bounds.') ocgis_lh(exc=exc,logger='subset') except Exception as e: ocgis_lh(exc=e,logger='subset') ## if there is a snippet, return the first realization, time, and level if self.ops.snippet: field = field[0,0,0,:,:] ## if there is a slice, use it to subset the field. elif self.ops.slice is not None: field = field.__getitem__(self.ops.slice) ## see if the selection crs matches the field's crs if crs is not None and crs != field.spatial.crs: geom = project_shapely_geometry(geom,crs.sr,field.spatial.crs.sr) crs = field.spatial.crs ## if the geometry is a point, we need to buffer it... if type(geom) in [Point,MultiPoint]: ocgis_lh(logger=self._subset_log,msg='buffering point geometry',level=logging.DEBUG) geom = geom.buffer(self.ops.search_radius_mult*field.spatial.grid.resolution) ## update the geometry to store in the collection coll_geom = deepcopy(geom) ## get the ugid following geometry manipulations if 'properties' in gd and 'UGID' in gd['properties']: ugid = gd['properties']['UGID'] else: ugid = 1 if geom is None: msg = 'No selection geometry. Returning all data. Assiging UGID as 1.' else: msg = 'Subsetting with selection geometry having UGID={0}'.format(ugid) ocgis_lh(msg=msg,logger=self._subset_log) ## check for unique ugids. this is an issue with point subsetting ## as the buffer radius changes by dataset. if ugid in self._ugid_unique_store and geom is not None: ## only update if the geometry is unique if not any([__.almost_equals(geom) for __ in self._geom_unique_store]): prev_ugid = ugid ugid = max(self._ugid_unique_store) + 1 self._ugid_unique_store.append(ugid) msg = 'Updating UGID {0} to {1} to maintain uniqueness.'.format(prev_ugid,ugid) ocgis_lh(msg,self._subset_log,level=logging.WARN,alias=alias,ugid=ugid) else: self._geom_unique_store.append(geom) else: self._ugid_unique_store.append(ugid) self._geom_unique_store.append(geom) ## try to update the properties try: gd['properties']['UGID'] = ugid except KeyError: if not isinstance(gd,dict): raise ## unwrap the data if it is geographic and 360 if geom is not None and crs == CFWGS84(): if CFWGS84.get_is_360(field.spatial): ocgis_lh('unwrapping selection geometry',self._subset_log,alias=alias,ugid=ugid,level=logging.DEBUG) geom = Wrapper().unwrap(geom) ## perform the spatial operation if geom is not None: try: if self.ops.spatial_operation == 'intersects': sfield = field.get_intersects(geom, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=self.ops.select_nearest) elif self.ops.spatial_operation == 'clip': sfield = field.get_clip(geom, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=self.ops.select_nearest) else: ocgis_lh(exc=NotImplementedError(self.ops.spatial_operation)) except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(alias=alias,ugid=ugid,msg='empty geometric operation but empty returns allowed',level=logging.WARN) sfield = None else: msg = str(e) + ' This typically means the selection geometry falls outside the spatial domain of the target dataset.' ocgis_lh(exc=ExtentError(message=msg),alias=alias,logger=self._subset_log) else: sfield = field ## if the base size is being requested, bypass the rest of the ## operations. if self._request_base_size_only == False: ## if empty returns are allowed, there be an empty field if sfield is not None: ## aggregate if requested if self.ops.aggregate: ocgis_lh('executing spatial average',self._subset_log,alias=alias,ugid=ugid) sfield = sfield.get_spatially_aggregated(new_spatial_uid=ugid) ## wrap the returned data. if not env.OPTIMIZE_FOR_CALC: if CFWGS84.get_is_360(sfield.spatial): if self.ops.output_format != 'nc' and self.ops.vector_wrap: ocgis_lh('wrapping output geometries',self._subset_log,alias=alias,ugid=ugid, level=logging.DEBUG) ## modifying these values in place will change the values ## in the base field. a copy is necessary. sfield.spatial = deepcopy(sfield.spatial) sfield.spatial.crs.wrap(sfield.spatial) ## check for all masked values if env.OPTIMIZE_FOR_CALC is False and self.ops.file_only is False: for variable in sfield.variables.itervalues(): ocgis_lh(msg='Fetching data for variable with alias "{0}".'.format(variable.alias), logger=self._subset_log) if variable.value.mask.all(): ## masked data may be okay depending on other opeartional ## conditions. if self.ops.snippet or self.ops.allow_empty or (self.ops.output_format == 'numpy' and self.ops.allow_empty): if self.ops.snippet: ocgis_lh('all masked data encountered but allowed for snippet', self._subset_log,alias=alias,ugid=ugid,level=logging.WARN) if self.ops.allow_empty: ocgis_lh('all masked data encountered but empty returns allowed', self._subset_log,alias=alias,ugid=ugid,level=logging.WARN) if self.ops.output_format == 'numpy': ocgis_lh('all masked data encountered but numpy data being returned allowed', logger=self._subset_log,alias=alias,ugid=ugid,level=logging.WARN) else: ## if the geometry is also masked, it is an empty spatial ## operation. if sfield.spatial.abstraction_geometry.value.mask.all(): ocgis_lh(exc=EmptyData,logger=self._subset_log) ## if none of the other conditions are met, raise the masked data error else: ocgis_lh(logger=self._subset_log,exc=MaskedDataError(),alias=alias,ugid=ugid) ## transform back to rotated pole if necessary if original_rotated_pole_crs is not None: if self.ops.output_crs is None and not isinstance(self.ops.output_crs,CFWGS84): # copy the spatial mask to the new spatial array spatial_mask_before_transform = deepcopy(sfield.spatial.get_mask()) # need to load the values before proceeding. source indices will disappear. for variable in sfield.variables.itervalues(): variable.value # reset the geometries sfield.spatial._geom = None sfield.spatial.grid = get_rotated_pole_spatial_grid_dimension( original_rotated_pole_crs,sfield.spatial.grid,inverse=True, rc_original=original_row_column_metadata) # update the grid mask with the previous spatial mask sfield.spatial.grid.value.mask = spatial_mask_before_transform ## update the uid mask to match the spatial mask sfield.spatial.uid = np.ma.array(sfield.spatial.uid,mask=spatial_mask_before_transform) sfield.spatial.crs = original_rotated_pole_crs ## update the coordinate system of the data output if self.ops.output_crs is not None: ## if the geometry is not None, it may need to be projected to match ## the output crs. if geom is not None and crs != self.ops.output_crs: geom = project_shapely_geometry(geom,crs.sr,self.ops.output_crs.sr) coll_geom = deepcopy(geom) ## update the coordinate reference system of the spatial ## dimension. try: sfield.spatial.update_crs(self.ops.output_crs) ## this is likely a rotated pole origin except RuntimeError as e: if isinstance(sfield.spatial.crs,CFRotatedPole): assert(isinstance(self.ops.output_crs,WGS84)) sfield.spatial._geom = None sfield.spatial.grid = get_rotated_pole_spatial_grid_dimension( sfield.spatial.crs,sfield.spatial.grid) sfield.spatial.crs = self.ops.output_crs else: ocgis_lh(exc=e,logger=self._subset_log) ## the geometry may need to be wrapped or unwrapped depending on ## the vector wrap situation name = alias if sfield is None else None coll.add_field(ugid, coll_geom, sfield, properties=gd.get('properties'), name=name) yield(coll)
def compute(ops, tile_dimension, verbose=False, use_optimizations=True): """ Used for computations on large arrays where memory limitations are a consideration. It is is also useful for extracting data from a server that has limitations on the size of requested data arrays. This function creates an empty destination NetCDF file that is then filled by executing the operations on chunks of the requested target dataset(s) and filling the destination NetCDF file. :param ops: The target operations to tile. There must be a calculation associated with the operations. :type ops: :class:`ocgis.OcgOperations` :param int tile_dimension: The target tile/chunk dimension. This integer value must be greater than zero. :param bool verbose: If ``True``, print more verbose information to terminal. :param bool use_optimizations: If ``True``, cache :class:`Field` and :class:`TemporalGroupDimension` objects for reuse during tile iteration. :raises: AssertionError, ValuError :returns: Path to the output NetCDF file. :rtype: str >>> from ocgis import RequestDataset, OcgOperations >>> from ocgis.util.large_array import compute >>> rd = RequestDataset(uri='/path/to/file',variable='tas') >>> ops = OcgOperations(dataset=rd,calc=[{'func':'mean','name':'mean'}],output_format='nc') >>> ret = compute(ops, 25) """ # validate arguments assert isinstance(ops, OcgOperations) assert ops.calc is not None assert ops.output_format == "nc" # ensure that progress is not showing 100% at first if ops.callback is not None: orgcallback = ops.callback def zeropercentagecallback(p, m): orgcallback(0.0, m) ops.callback = zeropercentagecallback tile_dimension = int(tile_dimension) if tile_dimension <= 0: raise (ValueError('"tile_dimension" must be greater than 0')) # determine if we are working with a multivariate function if OcgCalculationEngine._check_calculation_members_(ops.calc, AbstractMultivariateFunction): # only one multivariate calculation allowed assert len(ops.calc) == 1 has_multivariate = True else: # only one calculation allowed assert len(ops.dataset) == 1 has_multivariate = False # work on a copy of the operations to create the template file ops_file_only = deepcopy(ops) # we need the output to be file only for the first request ops_file_only.file_only = True # save the environment flag for calculation optimizations. orig_oc = ocgis.env.OPTIMIZE_FOR_CALC try: # tell the software we are optimizing for calculations ocgis.env.OPTIMIZE_FOR_CALC = True # first, write the template file if verbose: print("getting fill file...") fill_file = ops_file_only.execute() # if there is a geometry, we have to find the offset for the slice. we # also need to account for the subset mask. if ops.geom is not None: if verbose: print("geometry subset is present. calculating slice offsets...") ops_offset = deepcopy(ops) ops_offset.output_format = "numpy" ops_offset.calc = None ops_offset.agg_selection = True ops_offset.snippet = False coll = ops_offset.execute() for row in coll.get_iter_melted(): # assert the values are not loaded... assert row["variable"]._value is None # assert only 3 or 4 dimensional data is being used assert row["field"].shape_as_dict["R"] == 1 ref_spatial = coll[1][ops_offset.dataset.first().name].spatial try: row_offset = ref_spatial.grid.row._src_idx[0] col_offset = ref_spatial.grid.col._src_idx[0] except (AttributeError, TypeError): # Likely no row and column for a 2-dimensional grid. row_offset = ref_spatial.grid._src_idx["row"][0] col_offset = ref_spatial.grid._src_idx["col"][0] mask_spatial = ref_spatial.get_mask() # otherwise the offset is zero... else: row_offset = 0 col_offset = 0 mask_spatial = None # get the shape for the tile schema if verbose: print("getting tile schema shape inputs...") # if has_multivariate == False: # shp_variable = '{0}_{1}'.format(ops.calc[0]['name'],ops.dataset[0].alias) # else: # shp_variable = ops.calc[0]['name'] shp_variable = ops.calc[0]["name"] template_rd = ocgis.RequestDataset(uri=fill_file, variable=shp_variable) template_field = template_rd.get() shp = template_field.shape[-2:] if use_optimizations: # if there is a calculation grouping, optimize for it. otherwise, pass # this value as None. try: tgd_field = ops.dataset.first().get() template_tgd = tgd_field.temporal.get_grouping(deepcopy(ops.calc_grouping)) if not has_multivariate: key = ops.dataset.first().name else: key = "_".join([__.name for __ in ops.dataset.itervalues()]) optimizations = {"tgds": {key: template_tgd}} except TypeError: optimizations = None # load the fields and pass those for optimization field_optimizations = {} for rd in ops.dataset.itervalues(): gotten_field = rd.get(format_time=ops.format_time) field_optimizations.update({rd.name: gotten_field}) optimizations = optimizations or {} optimizations["fields"] = field_optimizations else: optimizations = None if verbose: print("getting tile schema...") schema = tile.get_tile_schema(shp[0], shp[1], tile_dimension) lschema = len(schema) # Create new callbackfunction where the 0-100% range is converted to a subset corresponding to the no. of blocks to be calculated if ops.callback is not None: percentageDone = 0 callback = ops.callback def newcallback(p, m): p = (p / lschema) + percentageDone orgcallback(p, m) ops.callback = newcallback if verbose: print("output file is: {0}".format(fill_file)) print("tile count: {0}".format(lschema)) fds = nc.Dataset(fill_file, "a") try: if verbose: progress = ProgressBar("tiles progress") if ops.callback is not None and callback: callback(0, "Initializing calculation") for ctr, indices in enumerate(schema.itervalues(), start=1): # appropriate adjust the slices to account for the spatial subset row = [ii + row_offset for ii in indices["row"]] col = [ii + col_offset for ii in indices["col"]] # copy the operations and modify arguments ops_slice = deepcopy(ops) ops_slice.geom = None ops_slice.slice = [None, None, None, row, col] ops_slice.output_format = "numpy" ops_slice.optimizations = optimizations # return the object slice ret = ops_slice.execute() for field_map in ret.itervalues(): for field in field_map.itervalues(): field_shape = field.shape_as_dict for alias, variable in field.variables.iteritems(): vref = fds.variables[alias] assert isinstance(variable.value, np.ma.MaskedArray) # we need to remove the offsets to adjust for the zero-based # fill file. slice_row = slice(row[0] - row_offset, row[1] - row_offset) slice_col = slice(col[0] - col_offset, col[1] - col_offset) # if there is a spatial mask, update accordingly if mask_spatial is not None: set_variable_spatial_mask(variable, mask_spatial, slice_row, slice_col) # squeeze out extra dimensions from ocgis fill_value = np.squeeze(variable.value) # fill the netCDF container variable adjusting for shape if len(vref.shape) == 3: reshape = (field_shape["T"], field_shape["Y"], field_shape["X"]) vref[:, slice_row, slice_col] = fill_value.reshape(*reshape) elif len(vref.shape) == 4: reshape = (field_shape["T"], field_shape["Z"], field_shape["Y"], field_shape["X"]) vref[:, :, slice_row, slice_col] = fill_value.reshape(*reshape) else: raise (NotImplementedError(vref.shape)) # write the data to disk fds.sync() if verbose: progress.progress(int((float(ctr) / lschema) * 100)) if ops.callback is not None and callback: percentageDone = (float(ctr) / lschema) * 100 finally: fds.close() finally: ocgis.env.OPTIMIZE_FOR_CALC = orig_oc if verbose: progress.endProgress() print("complete.") return fill_file
def _validate_(self): ocgis_lh(logger='operations',msg='validating operations') def _raise_(msg,obj=OutputFormat): e = DefinitionValidationError(obj,msg) ocgis_lh(exc=e,logger='operations') ## there are a bunch of constraints on the netCDF format if self.output_format == 'nc': ## we can only write one requestdataset to netCDF if len(self.dataset) > 1 and self.calc is None: msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. ' 'There are currently {dcount} RequestDatasets. Note, this is different than a ' 'multifile dataset.'.format(dcount=len(self.dataset))) _raise_(msg,OutputFormat) ## we can write multivariate functions to netCDF however else: if self.calc is not None and len(self.dataset) > 1: ## count the occurrences of these classes in the calculation ## list. klasses_to_check = [AbstractMultivariateFunction,MultivariateEvalFunction] multivariate_checks = [] for klass in klasses_to_check: for calc in self.calc: multivariate_checks.append(issubclass(calc['ref'],klass)) if sum(multivariate_checks) != 1: msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. ' 'There are currently {dcount} RequestDatasets. Note, this is different than a ' 'multifile dataset.'.format(dcount=len(self.dataset))) _raise_(msg,OutputFormat) else: ## there is a multivariate calculation and this requires ## multiple request dataset pass ## clipped data which creates an arbitrary geometry may not be written ## to netCDF if self.spatial_operation != 'intersects': msg = 'Only "intersects" spatial operation allowed for netCDF output. Arbitrary geometries may not currently be written.' _raise_(msg,OutputFormat) ## data may not be aggregated either if self.aggregate: msg = 'Data may not be aggregated for netCDF output. The aggregate parameter must be False.' _raise_(msg,OutputFormat) ## either the input data CRS or WGS84 is required for data output if self.output_crs is not None and not isinstance(self.output_crs,CFWGS84): msg = 'CFWGS84 is the only acceptable overloaded output CRS at this time for netCDF output.' _raise_(msg,OutputFormat) ## calculations on raw values are not relevant as not aggregation can ## occur anyway. if self.calc is not None: if self.calc_raw: msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.' _raise_(msg) ## no keyed output functions to netCDF if OcgCalculationEngine._check_calculation_members_(self.calc,AbstractKeyedOutputFunction): msg = 'Keyed function output may not be written to netCDF.' _raise_(msg) ## collect projections for the dataset sets. None is returned if one ## is not parsable. the WGS84 default is actually done in the RequestDataset ## object. projections = [] for rd in self.dataset.itervalues(): if not any([_ == rd.crs for _ in projections]): projections.append(rd.crs) ## if there is not output CRS and projections differ, raise an exception. ## however, it is okay to have data with different projections in the ## numpy output. if len(projections) > 1 and self.output_format != 'numpy': #@UndefinedVariable if self.output_crs is None: _raise_('Dataset coordinate reference systems must be equivalent if no output CRS is chosen.',obj=OutputCRS) ## clip and/or aggregation operations may not be written back to CFRotatedPole ## at this time. hence, the output crs must be set to CFWGS84. if CFRotatedPole in map(type,projections): if self.output_crs is not None and not isinstance(self.output_crs,WGS84): msg = ('{0} data may only be written to the same coordinate system (i.e. "output_crs=None") ' 'or {1}.').format(CFRotatedPole.__name__,CFWGS84.__name__) _raise_(msg,obj=OutputCRS) if self.aggregate or self.spatial_operation == 'clip': msg = ('{0} data if clipped or spatially averaged must be written to ' '{1}. The "output_crs" is being updated to {2}.').format( CFRotatedPole.__name__,CFWGS84.__name__, CFWGS84.__name__) ocgis_lh(level=logging.WARN,msg=msg,logger='operations') self._get_object_('output_crs')._value = CFWGS84() ## only WGS84 may be written to to GeoJSON if self.output_format == 'geojson': if any([element != WGS84() for element in projections if element is not None]): _raise_('Only data with a WGS84 projection may be written to GeoJSON.') if self.output_crs is not None: if self.output_crs != WGS84(): _raise_('Only data with a WGS84 projection may be written to GeoJSON.') ## snippet only relevant for subsetting not operations with a calculation ## or time region if self.snippet: if self.calc is not None: _raise_('Snippets are not implemented for calculations. Apply a limiting time range for faster responses.',obj=Snippet) for rd in self.dataset.itervalues(): if rd.time_region is not None: _raise_('Snippets are not implemented for time regions.',obj=Snippet) ## no slicing with a geometry - can easily lead to extent errors if self.slice is not None: assert(self.geom is None) ## file only operations only valid for netCDF and calculations. if self.file_only: if self.output_format != 'nc': _raise_('Only netCDF may be written with file_only as True.',obj=FileOnly) if self.calc is None: _raise_('File only outputs are only relevant for computations.',obj=FileOnly) ## validate any calculations against the operations object. if the calculation ## is a string eval function do not validate. if self.calc is not None: if self._get_object_('calc')._is_eval_function: if self.calc_grouping is not None: msg = 'Calculation groups are not applicable for string function expressions.' _raise_(msg,obj=CalcGrouping) else: for c in self.calc: c['ref'].validate(self)
class SubsetOperation(object): def __init__(self,ops,serial=True,nprocs=1): self.ops = ops self.serial = serial self.nprocs = nprocs self._subset_log = ocgis_lh.get_logger('subset') ## create the calculation engine if self.ops.calc is None: self.cengine = None else: ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate, calc_sample_size=self.ops.calc_sample_size) ## in the case of netcdf output, geometries must be unioned. this is ## also true for the case of the selection geometry being requested as ## aggregated. if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \ and self.ops.geom is not None: ocgis_lh('aggregating selection geometry',self._subset_log) build = True for element_geom in self.ops.geom: if build: new_geom = element_geom['geom'] new_crs = element_geom['crs'] new_properties = {'UGID':1} build = False else: new_geom = new_geom.union(element_geom['geom']) itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}] self.ops.geom = itr def __iter__(self): ''':rtype: AbstractCollection''' ocgis_lh('beginning iteration',logger='conv.__iter__',level=logging.DEBUG) ## simple iterator for serial operations if self.serial: for coll in self._iter_collections_(): yield(coll) ## use a multiprocessing pool returning unordered geometries ## for the parallel case else: raise(ocgis_lh(exc=NotImplementedError('multiprocessing is not available'))) def _process_geometries_(self,rds): ocgis_lh(msg='entering _process_geometries_',logger=self._subset_log,level=logging.DEBUG) ## select headers if self.ops.headers is not None: headers = self.ops.headers else: if self.cengine is not None: if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractMultivariateFunction): headers = constants.multi_headers else: headers = constants.calc_headers else: headers = constants.raw_headers ## keyed output functions require appending headers regardless. there is ## only one keyed output function allowed in a request. if self.cengine is not None: if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractKeyedOutputFunction): value_keys = self.cengine.funcs[0]['ref'].structure_dtype['names'] headers = list(headers) + value_keys ## remove the 'value' attribute headers as this is replaced by the ## keyed output names. try: headers.remove('value') ## it may not be in the list because of a user overload except ValueError: pass else: value_keys = None else: value_keys = None alias = '_'.join([r.alias for r in rds]) ocgis_lh('processing...',self._subset_log,alias=alias) ## return the field object try: field = [rd.get(format_time=self.ops.format_time) for rd in rds] if len(field) > 1: field[0].variables.add_variable(field[1].variables.first()) field = field[0] except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(msg='time or level subset empty but empty returns allowed', logger=self._subset_log,level=logging.WARN) coll = SpatialCollection(headers=headers) coll.add_field(1,None,rd.alias,None) try: yield(coll) finally: return else: ocgis_lh(exc=ExtentError(message=str(e)),alias=rd.alias,logger=self._subset_log) ## set iterator based on presence of slice. slice always overrides geometry. if self.ops.slice is not None: itr = [{}] else: itr = [{}] if self.ops.geom is None else self.ops.geom ## loop over the iterator for gd in itr: ## initialize the collection object to store the subsetted data. if ## the output CRS differs from the field's CRS, adjust accordingly ## when initilizing. if self.ops.output_crs is not None and field.spatial.crs != self.ops.output_crs: collection_crs = self.ops.output_crs else: collection_crs = field.spatial.crs coll = SpatialCollection(crs=collection_crs,headers=headers,meta=gd.get('meta'), value_keys=value_keys) ## reference variables from the geometry dictionary geom = gd.get('geom') crs = gd.get('crs') if 'properties' in gd and 'UGID' in gd['properties']: ugid = gd['properties']['UGID'] else: ## try to get lowercase ugid in case the shapefile is not perfectly ## formed. however, if there is no geometry accept the error and ## use the default geometry identifier. if len(gd) == 0: ugid = 1 else: ugid = gd['properties']['ugid'] ocgis_lh('processing',self._subset_log,level=logging.DEBUG,alias=alias,ugid=ugid) ## if there is a spatial abstraction, ensure it may be loaded. if self.ops.abstraction is not None: try: getattr(field.spatial.geom,self.ops.abstraction) except ImproperPolygonBoundsError: exc = ImproperPolygonBoundsError('A "polygon" spatial abstraction is not available without the presence of bounds.') ocgis_lh(exc=exc,logger='subset') except Exception as e: ocgis_lh(exc=e,logger='subset') ## if there is a snippet, return the first realization, time, and level if self.ops.snippet: field = field[0,0,0,:,:] ## if there is a slice, use it to subset the field. elif self.ops.slice is not None: field = field.__getitem__(self.ops.slice) ## see if the selection crs matches the field's crs if crs is not None and crs != field.spatial.crs: geom = project_shapely_geometry(geom,crs.sr,field.spatial.crs.sr) crs = field.spatial.crs ## if the geometry is a point, we need to buffer it... if type(geom) in [Point,MultiPoint]: ocgis_lh(logger=self._subset_log,msg='buffering point geometry',level=logging.DEBUG) geom = geom.buffer(self.ops.search_radius_mult*field.spatial.grid.resolution) ## unwrap the data if it is geographic and 360 if geom is not None and crs == CFWGS84(): if CFWGS84.get_is_360(field.spatial): ocgis_lh('unwrapping selection geometry',self._subset_log,alias=alias,ugid=ugid) geom = Wrapper().unwrap(geom) ## perform the spatial operation if geom is not None: try: if self.ops.spatial_operation == 'intersects': sfield = field.get_intersects(geom) elif self.ops.spatial_operation == 'clip': sfield = field.get_clip(geom) else: ocgis_lh(exc=NotImplementedError(self.ops.spatial_operation)) except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(alias=alias,ugid=ugid,msg='empty geometric operation but empty returns allowed',level=logging.WARN) sfield = None else: ocgis_lh(exc=ExtentError(message=str(e)),alias=alias,logger=self._subset_log) else: sfield = field ## if empty returns are allowed, there be an empty field if sfield is not None: ## aggregate if requested if self.ops.aggregate: sfield = sfield.get_spatially_aggregated(new_spatial_uid=ugid) ## wrap the returned data. if not env.OPTIMIZE_FOR_CALC: if CFWGS84.get_is_360(sfield.spatial): if self.ops.output_format != 'nc' and self.ops.vector_wrap: ocgis_lh('wrapping output geometries',self._subset_log,alias=alias,ugid=ugid) sfield.spatial.crs.wrap(sfield.spatial) ## check for all masked values if env.OPTIMIZE_FOR_CALC is False and self.ops.file_only is False: for variable in sfield.variables.itervalues(): if variable.value.mask.all(): ## masked data may be okay depending on other opeartional ## conditions. if self.ops.snippet or self.ops.allow_empty or (self.ops.output_format == 'numpy' and self.ops.allow_empty): if self.ops.snippet: ocgis_lh('all masked data encountered but allowed for snippet', self._subset_log,alias=alias,ugid=ugid,level=logging.WARN) if self.ops.allow_empty: ocgis_lh('all masked data encountered but empty returns allowed', self._subset_log,alias=alias,ugid=ugid,level=logging.WARN) if self.ops.output_format == 'numpy': ocgis_lh('all masked data encountered but numpy data being returned allowed', logger=self._subset_log,alias=alias,ugid=ugid,level=logging.WARN) else: ## if the geometry is also masked, it is an empty spatial ## operation. if sfield.spatial.abstraction_geometry.value.mask.all(): ocgis_lh(exc=EmptyData,logger=self._subset_log) ## if none of the other conditions are met, raise the masked data error else: ocgis_lh(logger=self._subset_log,exc=MaskedDataError(),alias=alias,ugid=ugid) ## update the coordinate system of the data output if self.ops.output_crs is not None: ## if the geometry is not None, it may need to be projected to match ## the output crs. if geom is not None and crs != self.ops.output_crs: geom = project_shapely_geometry(geom,crs.sr,self.ops.output_crs.sr) sfield.spatial.update_crs(self.ops.output_crs) ## update the spatial abstraction to match the operations value. sfield ## will be none if the operation returns empty and it is allowed to have ## empty returns. if sfield is not None: sfield.spatial.abstraction = self.ops.abstraction coll.add_field(ugid,geom,alias,sfield,properties=gd.get('properties')) yield(coll) def _iter_collections_(self): ocgis_lh('{0} request dataset(s) to process'.format(len(self.ops.dataset)),'conv._iter_collections_') if self.cengine is None: itr_rd = ([rd] for rd in self.ops.dataset) else: if self.cengine._check_calculation_members_(self.cengine.funcs,AbstractMultivariateFunction): itr_rd = [[r for r in self.ops.dataset]] else: itr_rd = ([rd] for rd in self.ops.dataset) for rds in itr_rd: for coll in self._process_geometries_(rds): ## if there are calculations, do those now and return a new type of collection if self.cengine is not None: ocgis_lh('performing computations', self._subset_log, alias=coll.items()[0][1].keys()[0], ugid=coll.keys()[0]) coll = self.cengine.execute(coll) ## conversion of groups. if self.ops.output_grouping is not None: raise(NotImplementedError) else: ocgis_lh('subset yielding',self._subset_log,level=logging.DEBUG) yield(coll)
class SubsetOperation(object): """ :param :class:~`ocgis.OcgOperations` ops: :param bool request_base_size_only: If ``True``, return field objects following the spatial subset performing as few operations as possible. :param :class:`ocgis.util.logging_ocgis.ProgressOcgOperations` progress: """ def __init__(self, ops, request_base_size_only=False, progress=None): self.ops = ops self._request_base_size_only = request_base_size_only self._subset_log = ocgis_lh.get_logger('subset') self._progress = progress or ProgressOcgOperations() # # create the calculation engine if self.ops.calc == None or self._request_base_size_only == True: self.cengine = None self._has_multivariate_calculations = False else: ocgis_lh('initializing calculation engine', self._subset_log, level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate, calc_sample_size=self.ops.calc_sample_size, progress=self._progress) self._has_multivariate_calculations = any([self.cengine._check_calculation_members_(self.cengine.funcs, k) \ for k in [AbstractMultivariateFunction, MultivariateEvalFunction]]) # in the case of netcdf output, geometries must be unioned. this is also true for the case of the selection # geometry being requested as aggregated. if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) and self.ops.geom is not None: ocgis_lh('aggregating selection geometry', self._subset_log) build = True for sdim in self.ops.geom: _geom = sdim.geom.get_highest_order_abstraction().value[0, 0] if build: new_geom = _geom new_crs = sdim.crs new_properties = {'UGID': 1} build = False else: new_geom = new_geom.union(_geom) self.ops.geom = [{'geom': new_geom, 'properties': new_properties, 'crs': new_crs}] def __iter__(self): """:rtype: :class:`ocgis.api.collection.AbstractCollection`""" ocgis_lh('beginning iteration', logger='conv.__iter__', level=logging.DEBUG) self._ugid_unique_store = [] self._geom_unique_store = [] # simple iterator for serial operations for coll in self._iter_collections_(): yield coll def _iter_collections_(self): """:rtype: :class:`ocgis.api.collection.AbstractCollection`""" # multivariate calculations require datasets come in as a list with all # variable inputs part of the same sequence. if self._has_multivariate_calculations: itr_rd = [[r for r in self.ops.dataset.itervalues()]] # otherwise, process geometries expects a single element sequence else: itr_rd = [[rd] for rd in self.ops.dataset.itervalues()] # configure the progress object self._progress.n_subsettables = len(itr_rd) self._progress.n_geometries = get_default_or_apply(self.ops.geom, len, default=1) self._progress.n_calculations = get_default_or_apply(self.ops.calc, len, default=0) # send some messages msg = '{0} dataset collection(s) to process.'.format(self._progress.n_subsettables) ocgis_lh(msg=msg, logger=self._subset_log) if self.ops.geom is None: msg = 'Entire spatial domain returned. No selection geometries requested.' else: msg = 'Each data collection will be subsetted by {0} selection geometries.'.format( self._progress.n_geometries) ocgis_lh(msg=msg, logger=self._subset_log) if self._progress.n_calculations == 0: msg = 'No calculations requested.' else: msg = 'The following calculations will be applied to each data collection: {0}.'. \ format(', '.join([_['func'] for _ in self.ops.calc])) ocgis_lh(msg=msg, logger=self._subset_log) # process the data collections for rds in itr_rd: try: msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds]) except AttributeError: # field objects do not have uris associated with them msg = [] for rd in rds: try: msg.append(rd.uri) except AttributeError: # likely a field object msg.append(rd.name) msg = 'Processing URI(s) / field names: {0}'.format(msg) ocgis_lh(msg=msg, logger=self._subset_log) for coll in self._process_subsettables_(rds): # if there are calculations, do those now and return a new type of collection if self.cengine is not None: ocgis_lh('Starting calculations.', self._subset_log, alias=coll.items()[0][1].keys()[0], ugid=coll.keys()[0]) # look for any optimizations for temporal grouping. if self.ops.optimizations is None: tgds = None else: tgds = self.ops.optimizations.get('tgds') # execute the calculations coll = self.cengine.execute(coll, file_only=self.ops.file_only, tgds=tgds) else: # if there are no calculations, mark progress to indicate a geometry has been completed. self._progress.mark() # conversion of groups. if self.ops.output_grouping is not None: raise NotImplementedError else: ocgis_lh('subset yielding', self._subset_log, level=logging.DEBUG) yield coll def _process_subsettables_(self, rds): """ :param rds: Sequence of :class:~`ocgis.RequestDataset` objects. :type rds: sequence :rtype: :class:`ocgis.api.collection.AbstractCollection` """ ocgis_lh(msg='entering _process_geometries_', logger=self._subset_log, level=logging.DEBUG) # select headers and any value keys for keyed output functions value_keys = None if self.ops.headers is not None: headers = self.ops.headers else: if self.ops.melted: if self.cengine is not None: if self._has_multivariate_calculations: headers = constants.HEADERS_MULTI else: headers = constants.HEADERS_CALC else: headers = constants.HEADERS_RAW else: headers = None # keyed output functions require appending headers regardless. there is only one keyed output function # allowed in a request. if headers is not None: if self.cengine is not None: if self.cengine._check_calculation_members_(self.cengine.funcs, AbstractKeyedOutputFunction): value_keys = self.cengine.funcs[0]['ref'].structure_dtype['names'] headers = list(headers) + value_keys # remove the 'value' attribute headers as this is replaced by the keyed output names. try: headers.remove('value') # it may not be in the list because of a user overload except ValueError: pass alias = '_'.join([r.name for r in rds]) ocgis_lh('processing...', self._subset_log, alias=alias, level=logging.DEBUG) # return the field object try: # look for field optimizations if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: ocgis_lh('applying optimizations', self._subset_log, level=logging.DEBUG) field = [self.ops.optimizations['fields'][rd.alias] for rd in rds] # no field optimizations, extract the target data from the dataset collection else: ocgis_lh('creating field objects', self._subset_log, level=logging.DEBUG) len_rds = len(rds) field = [None] * len_rds for ii in range(len_rds): rds_element = rds[ii] try: field_object = rds_element.get(format_time=self.ops.format_time) except AttributeError: # likely a field object which does not need to be loaded from source if not self.ops.format_time: raise NotImplementedError field_object = rds_element # extrapolate the spatial bounds if requested if self.ops.interpolate_spatial_bounds: try: try: field_object.spatial.grid.row.set_extrapolated_bounds() field_object.spatial.grid.col.set_extrapolated_bounds() except AttributeError: # row/col is likely none. attempt to extrapolate using the grid values field_object.spatial.grid.set_extrapolated_corners() except BoundsAlreadyAvailableError: msg = 'Bounds/corners already on object. Ignoring "interpolate_spatial_bounds".' ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARNING) field[ii] = field_object # update the spatial abstraction to match the operations value. sfield will be none if the operation returns # empty and it is allowed to have empty returns. for f in field: f.spatial.abstraction = self.ops.abstraction if len(field) > 1: try: # reset the variable uid and let the collection handle its assignment variable_to_add = field[1].variables.first() variable_to_add.uid = None field[0].variables.add_variable(variable_to_add) # reset the field names and let these be auto-generated for f in field: f._name = None # this will fail for optimizations as the fields are already joined except VariableInCollectionError: if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: pass else: raise field = field[0] # this error is related to subsetting by time or level. spatial subsetting occurs below. except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(msg='time or level subset empty but empty returns allowed', logger=self._subset_log, level=logging.WARN) coll = SpatialCollection(headers=headers) name = '_'.join([rd.name for rd in rds]) coll.add_field(None, name=name) try: yield coll finally: return else: ocgis_lh(exc=ExtentError(message=str(e)), alias=str([rd.name for rd in rds]), logger=self._subset_log) # set iterator based on presence of slice. slice always overrides geometry. if self.ops.slice is not None: itr = [None] else: itr = [None] if self.ops.geom is None else self.ops.geom for coll in self._process_geometries_(itr, field, headers, value_keys, alias): yield (coll) def _get_initialized_collection_(self, field, headers, value_keys): """ Initialize the spatial collection object selecting the output CRS in the process. :param field: :type field: :class:`ocgis.interface.base.field.Field` :param headers: :type headers: list[str] :param value_keys: :type value_keys: list[str] :rtype: :class:`ocgis.api.collection.SpatialCollection` """ # initialize the collection object to store the subsetted data. if the output CRS differs from the field's # CRS, adjust accordingly when initializing. if self.ops.output_crs is not None and field.spatial.crs != self.ops.output_crs: collection_crs = self.ops.output_crs else: collection_crs = field.spatial.crs coll = SpatialCollection(crs=collection_crs, headers=headers, value_keys=value_keys) return coll def _get_update_rotated_pole_state_(self, field, subset_sdim): """ Rotated pole coordinate systems are handled internally by transforming the CRS to a geographic coordinate system. :param field: :type field: :class:`ocgis.interface.base.field.Field` :param subset_sdim: :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` or None :rtype: None or :class:`ocgis.interface.base.crs.CFRotatedPole` :raises: AssertionError """ # CFRotatedPole takes special treatment. only do this if a subset geometry is available. this variable is # needed to determine if backtransforms are necessary. original_rotated_pole_crs = None if isinstance(field.spatial.crs, CFRotatedPole): # only transform if there is a subset geometry if subset_sdim is not None or self.ops.aggregate or self.ops.spatial_operation == 'clip': # update the CRS. copy the original CRS for possible later transformation back to rotated pole. original_rotated_pole_crs = copy(field.spatial.crs) ocgis_lh('initial rotated pole transformation...', self._subset_log, level=logging.DEBUG) field.spatial.update_crs(CFWGS84()) ocgis_lh('...finished initial rotated pole transformation', self._subset_log, level=logging.DEBUG) return original_rotated_pole_crs def _assert_abstraction_available_(self, field): """ Assert the spatial abstraction may be loaded on the field object if one is provided in the operations. :param field: :type field: :class:`ocgis.interface.base.field.Field` """ if self.ops.abstraction is not None: attr = getattr(field.spatial.geom, self.ops.abstraction) if attr is None: msg = 'A "{0}" spatial abstraction is not available.'.format(self.ops.abstraction) ocgis_lh(exc=ValueError(msg), logger='subset') def _get_slice_or_snippet_(self, field): """ Slice the incoming field if a slice or snippet argument is present. :param field: :type field: :class:`ocgis.interface.base.field.Field` :rtype: :class:`ocgis.interface.base.field.Field` """ # if there is a snippet, return the first realization, time, and level if self.ops.snippet: field = field[0, 0, 0, :, :] # if there is a slice, use it to subset the field. elif self.ops.slice is not None: field = field.__getitem__(self.ops.slice) return field def _get_spatially_subsetted_field_(self, alias, field, subset_sdim, subset_ugid): """ Spatially subset a field with a selection geometry. :param str alias: The request data alias currently being processed. :param field: :type field: :class:`ocgis.interface.base.field.Field` :param subset_sdim: :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` :rtype: None or :class:`ocgis.interface.base.field.Field` :raises: AssertionError, ExtentError """ assert (subset_sdim is not None) subset_geom = subset_sdim.single.geom # check for unique ugids. this is an issue with point subsetting as the buffer radius changes by dataset. if subset_ugid in self._ugid_unique_store: # # only update if the geometry is unique if not any([__.almost_equals(subset_geom) for __ in self._geom_unique_store]): prev_ugid = subset_ugid ugid = max(self._ugid_unique_store) + 1 # update the geometry property and uid subset_sdim.properties['UGID'][0] = ugid subset_sdim.uid[:] = ugid self._ugid_unique_store.append(ugid) self._geom_unique_store.append(subset_geom) msg = 'Updating UGID {0} to {1} to maintain uniqueness.'.format(prev_ugid, ugid) ocgis_lh(msg, self._subset_log, level=logging.WARN, alias=alias, ugid=ugid) else: pass # self._ugid_unique_store.append(subset_ugid) # self._geom_unique_store.append(subset_geom) else: self._ugid_unique_store.append(subset_ugid) self._geom_unique_store.append(subset_geom) # unwrap the data if it is geographic and 360 if field.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped: if subset_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_wrapped: ocgis_lh('unwrapping selection geometry', self._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) subset_sdim.unwrap() # update the geometry reference as the spatial dimension was unwrapped and modified in place subset_geom = subset_sdim.single.geom # perform the spatial operation try: if self.ops.spatial_operation == 'intersects': sfield = field.get_intersects(subset_geom, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=self.ops.select_nearest) elif self.ops.spatial_operation == 'clip': sfield = field.get_clip(subset_geom, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=self.ops.select_nearest) else: ocgis_lh(exc=NotImplementedError(self.ops.spatial_operation)) except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(alias=alias, ugid=subset_ugid, msg='empty geometric operation but empty returns allowed', level=logging.WARN) sfield = None else: msg = ' This typically means the selection geometry falls outside the spatial domain of the target dataset.' msg = str(e) + msg ocgis_lh(exc=ExtentError(message=msg), alias=alias, logger=self._subset_log) # if the subset geometry is unwrapped and the vector wrap option is true, wrap the subset geometry. if self.ops.vector_wrap: if subset_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped: subset_sdim.wrap() return sfield def _update_subset_geometry_if_point_(self, field, subset_sdim, subset_ugid): """ If the subset geometry is a point of multipoint, it will need to be buffered and the spatial dimension updated accordingly. If the subset geometry is a polygon, pass through. :param field: :type field: :class:`ocgis.interface.base.field.Field` :param subset_sdim: :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` :param int subset_ugid: :raises: AssertionError """ if type(subset_sdim.single.geom) in [Point, MultiPoint]: assert subset_sdim.abstraction == 'point' ocgis_lh(logger=self._subset_log, msg='buffering point geometry', level=logging.DEBUG) subset_geom = subset_sdim.single.geom.buffer(self.ops.search_radius_mult * field.spatial.grid.resolution) value = np.ma.array([[None]]) value[0, 0] = subset_geom subset_sdim.geom._polygon = SpatialGeometryPolygonDimension(value=value, uid=subset_ugid) # the polygon should be used for subsetting, update the spatial dimension to use this abstraction subset_sdim.abstraction = 'polygon' assert subset_sdim.abstraction == 'polygon' def _check_masking_(self, alias, sfield, subset_ugid): """ :param str alias: The field's alias value. :param sfield: The target field containing variables to check for masking. :type sfield: :class:`ocgis.interface.base.field.Field` :param int subset_ugid: The unique identifier for the geometry. """ for variable in sfield.variables.itervalues(): ocgis_lh(msg='Fetching data for variable with alias "{0}".'.format(variable.alias), logger=self._subset_log) if variable.value.mask.all(): # masked data may be okay... if self.ops.snippet or self.ops.allow_empty or ( self.ops.output_format == 'numpy' and self.ops.allow_empty): if self.ops.snippet: ocgis_lh('all masked data encountered but allowed for snippet', self._subset_log, alias=alias, ugid=subset_ugid, level=logging.WARN) if self.ops.allow_empty: ocgis_lh('all masked data encountered but empty returns allowed', self._subset_log, alias=alias, ugid=subset_ugid, level=logging.WARN) if self.ops.output_format == 'numpy': ocgis_lh('all masked data encountered but numpy data being returned allowed', logger=self._subset_log, alias=alias, ugid=subset_ugid, level=logging.WARN) else: # if the geometry is also masked, it is an empty spatial operation. if sfield.spatial.abstraction_geometry.value.mask.all(): ocgis_lh(exc=EmptyData, logger=self._subset_log) # if none of the other conditions are met, raise the masked data error else: ocgis_lh(logger=self._subset_log, exc=MaskedDataError(), alias=alias, ugid=subset_ugid) def _get_regridded_field_with_subset_(self, sfield, subset_sdim_for_regridding=None, with_buffer=True): """ Regrid ``sfield`` subsetting the regrid destination in the process. :param sfield: The input field to regrid. :type sfield: :class:`ocgis.interface.base.field.Field` :param subset_sdim_for_regridding: The original, unaltered spatial dimension to use for subsetting. :type subset_sdim_for_regridding: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` :param bool with_buffer: If ``True``, buffer the geometry used to subset the destination grid. """ # todo: cache spatial operations on regrid destination field from ocgis.regrid.base import iter_regridded_fields from ocgis.util.spatial.spatial_subset import SpatialSubsetOperation if subset_sdim_for_regridding is None: regrid_destination = self.ops.regrid_destination else: if with_buffer: # buffer the subset geometry by the resolution of the source field to give extents a chance to be # compatible buffer_value = sfield.spatial.grid.resolution buffer_crs = sfield.spatial.crs else: buffer_value, buffer_crs = [None, None] ss = SpatialSubsetOperation(self.ops.regrid_destination) regrid_destination = ss.get_spatial_subset('intersects', subset_sdim_for_regridding, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=False, buffer_value=buffer_value, buffer_crs=buffer_crs) original_sfield_crs = sfield.spatial.crs # check crs on the source field regrid_required_update_crs = False if not isinstance(sfield.spatial.crs, Spherical): # this as _assigned_ a WGS84 crs hence we cannot assume the default crs if isinstance(sfield.spatial.crs, WGS84) and sfield._has_assigned_coordinate_system: regrid_required_update_crs = True # the data has a coordinate system that is not WGS84 elif not isinstance(sfield.spatial.crs, WGS84): regrid_required_update_crs = True if regrid_required_update_crs: # need to load values as source indices will disappear during crs update for variable in sfield.variables.itervalues(): variable.value sfield.spatial.update_crs(Spherical()) else: sfield.spatial.crs = Spherical() # update the coordinate system of the regrid destination if required try: destination_sdim = regrid_destination.spatial except AttributeError: # likely a spatial dimension object destination_sdim = regrid_destination update_regrid_destination_crs = False if not isinstance(destination_sdim.crs, Spherical): if isinstance(regrid_destination, Field): if isinstance(destination_sdim.crs, WGS84) and regrid_destination._has_assigned_coordinate_system: update_regrid_destination_crs = True elif isinstance(destination_sdim.crs, WGS84) and not regrid_destination._has_assigned_coordinate_system: pass else: update_regrid_destination_crs = True else: if not isinstance(destination_sdim.crs, Spherical): update_regrid_destination_crs = True if update_regrid_destination_crs: destination_sdim.update_crs(Spherical()) else: destination_sdim.crs = Spherical() # check that wrapping is equivalent if destination_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped: if sfield.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_wrapped: sfield.spatial = deepcopy(sfield.spatial) sfield.spatial.unwrap() if destination_sdim.wrapped_state == WrappableCoordinateReferenceSystem._flag_wrapped: if sfield.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped: sfield.spatial = deepcopy(sfield.spatial) sfield.spatial.wrap() # remove the mask from the destination field. new_mask = np.zeros(destination_sdim.shape, dtype=bool) destination_sdim.set_mask(new_mask) # regrid the input fields. sfield = list(iter_regridded_fields([sfield], destination_sdim, **self.ops.regrid_options))[0] if regrid_required_update_crs: sfield.spatial.update_crs(original_sfield_crs) else: sfield.spatial.crs = original_sfield_crs # subset the output from the regrid operation as masked values may be introduced on the edges if subset_sdim_for_regridding is not None: ss = SpatialSubsetOperation(sfield) sfield = ss.get_spatial_subset('intersects', subset_sdim_for_regridding, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=False) return sfield def _process_geometries_(self, itr, field, headers, value_keys, alias): """ :param sequence itr: An iterator yielding :class:`~ocgis.SpatialDimension` objects. :param :class:`ocgis.interface.Field` field: The field object to use for operations. :param sequence headers: Sequence of strings to use as headers for the creation of the collection. :param sequence value_keys: Sequence of strings to use as headers for the keyed output functions. :param str alias: The request data alias currently being processed. :rtype: :class:~`ocgis.SpatialCollection` """ ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG) # process each geometry for subset_sdim in itr: # always work with a copy of the target geometry subset_sdim = deepcopy(subset_sdim) """:type subset_sdim: ocgis.interface.base.dimension.spatial.SpatialDimension""" if self.ops.regrid_destination is not None: # if there is regridding, make another copy as this geometry may be manipulated during subsetting of # sources subset_sdim_for_regridding = deepcopy(subset_sdim) # operate on the rotated pole coordinate system by first transforming it to CFWGS84 original_rotated_pole_crs = self._get_update_rotated_pole_state_(field, subset_sdim) # initialize the collection storage coll = self._get_initialized_collection_(field, headers, value_keys) # check if the geometric abstraction is available on the field object self._assert_abstraction_available_(field) # return a slice or snippet if either of these are requested. field = self._get_slice_or_snippet_(field) # choose the subset ugid value if subset_sdim is None: msg = 'No selection geometry. Returning all data. Assigning UGID as 1.' subset_ugid = 1 else: subset_ugid = subset_sdim.single.uid msg = 'Subsetting with selection geometry having UGID={0}'.format(subset_ugid) ocgis_lh(msg=msg, logger=self._subset_log) if subset_sdim is not None: # if the CRS's differ, update the spatial dimension to match the field if subset_sdim.crs is not None and subset_sdim.crs != field.spatial.crs: subset_sdim.update_crs(field.spatial.crs) # if the geometry is a point, it needs to be buffered self._update_subset_geometry_if_point_(field, subset_sdim, subset_ugid) # if there is a selection geometry present, use it for the spatial subset. if not, all the field's data is # being returned. if subset_sdim is None: sfield = field else: sfield = self._get_spatially_subsetted_field_(alias, field, subset_sdim, subset_ugid) # if the base size is being requested, bypass the rest of the operations. if not self._request_base_size_only: # perform regridding operations if requested if self.ops.regrid_destination is not None and sfield._should_regrid: try: original_sfield_sdim = deepcopy(sfield.spatial) sfield = self._get_regridded_field_with_subset_( sfield, subset_sdim_for_regridding=subset_sdim_for_regridding, with_buffer=True) except ValueError: # attempt without buffering the subset geometry for the target field. sfield.spatial = original_sfield_sdim sfield = self._get_regridded_field_with_subset_(sfield, subset_sdim_for_regridding=subset_sdim_for_regridding, with_buffer=False) # if empty returns are allowed, there be an empty field if sfield is not None: # aggregate if requested if self.ops.aggregate: ocgis_lh('executing spatial average', self._subset_log, alias=alias, ugid=subset_ugid) sfield = sfield.get_spatially_aggregated(new_spatial_uid=subset_ugid) # wrap the returned data. if not env.OPTIMIZE_FOR_CALC: if sfield is not None and sfield.spatial.wrapped_state == WrappableCoordinateReferenceSystem._flag_unwrapped: if self.ops.output_format != 'nc' and self.ops.vector_wrap: ocgis_lh('wrapping output geometries', self._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # deepcopy the spatial dimension before wrapping as wrapping will modify the spatial # dimension on the parent field object. which may need to be reused for additional # subsets. sfield.spatial = deepcopy(sfield.spatial) sfield.spatial.wrap() # check for all masked values if env.OPTIMIZE_FOR_CALC is False and self.ops.file_only is False: self._check_masking_(alias, sfield, subset_ugid) # transform back to rotated pole if necessary if original_rotated_pole_crs is not None: if not isinstance(self.ops.output_crs, CFWGS84): sfield.spatial.update_crs(original_rotated_pole_crs) # update the coordinate system of the data output if self.ops.output_crs is not None: # if the geometry is not None, it may need to be projected to match the output crs. if subset_sdim is not None and subset_sdim.crs != self.ops.output_crs: subset_sdim.update_crs(self.ops.output_crs) # update the subset field CRS sfield.spatial = deepcopy(sfield.spatial) sfield.spatial.update_crs(self.ops.output_crs) # use the field's alias if it is provided. otherwise, let it be automatically assigned name = alias if sfield is None else None # add the created field to the output collection with the selection geometry. coll.add_field(sfield, ugeom=subset_sdim, name=name) yield coll