예제 #1
0
 def get_engine(self, kwds=None, funcs=None, grouping="None"):
     """
     :type grouping: None or str
     """
     kwds = kwds or {}
     funcs = funcs or self.funcs
     if grouping == 'None':
         grouping = self.grouping
     return CalculationEngine(grouping, funcs, **kwds)
예제 #2
0
파일: nc.py 프로젝트: NCPP/ocgis
    def validate_ops(cls, ops):
        from ocgis.ops.parms.definition import OutputFormat

        def _raise_(msg, ocg_argument=OutputFormat):
            raise DefinitionValidationError(ocg_argument, msg)

        # We can only write one request dataset to netCDF.
        len_ops_dataset = len(list(ops.dataset))
        if len_ops_dataset > 1 and ops.calc is None:
            msg = 'Data packages (i.e. more than one RequestDataset) may not be written to netCDF. There are ' \
                  'currently {dcount} RequestDatasets. Note, this is different than a multifile dataset.'
            msg = msg.format(dcount=len_ops_dataset)
            _raise_(msg, OutputFormat)
        # We can write multivariate functions to netCDF.
        else:
            if ops.calc is not None and len_ops_dataset > 1:
                # Count the occurrences of these classes in the calculation list.
                klasses_to_check = [AbstractMultivariateFunction, MultivariateEvalFunction]
                multivariate_checks = []
                for klass in klasses_to_check:
                    for calc in ops.calc:
                        multivariate_checks.append(issubclass(calc['ref'], klass))
                if sum(multivariate_checks) != 1:
                    msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                           'There are currently {dcount} RequestDatasets. Note, this is different than a '
                           'multifile dataset.'.format(dcount=len(ops.dataset)))
                    _raise_(msg, OutputFormat)
                else:
                    # There is a multivariate calculation and this requires multiple request datasets.
                    pass

        # Clipped data which creates an arbitrary geometry may not be written to netCDF.
        if ops.spatial_operation != 'intersects' and not ops.aggregate:
            msg = ('Only "intersects" spatial operation allowed for netCDF output. Arbitrary geometries may not '
                   'currently be written unless ``aggregate`` is True.')
            _raise_(msg, OutputFormat)

        # Calculations on raw values are not relevant as no aggregation can occur anyway.
        if ops.calc is not None:
            if ops.calc_raw:
                msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.'
                _raise_(msg)
            # No keyed output functions to netCDF.
            if CalculationEngine._check_calculation_members_(ops.calc, AbstractKeyedOutputFunction):
                msg = 'Keyed function output may not be written to netCDF.'
                _raise_(msg)

        # Re-organize the collections following a discrete geometry model if aggregate is True
        if ops.aggregate and not ops.geom:
            msg = 'If aggregate is True than a geometry must be provided for netCDF output. '
            _raise_(msg, OutputFormat)

        if not ops.aggregate and not ops.agg_selection and ops.geom and len(ops.geom) > 1:
            msg = 'Multiple geometries must either be unioned (agg_selection=True) ' \
                  'or aggregated (aggregate=True).'
            _raise_(msg, OutputFormat)
예제 #3
0
    def validate_ops(cls, ops):
        from ocgis.ops.parms.definition import OutputFormat

        def _raise_(msg, ocg_argument=OutputFormat):
            raise DefinitionValidationError(ocg_argument, msg)

        # We can only write one request dataset to netCDF.
        len_ops_dataset = len(list(ops.dataset))
        if len_ops_dataset > 1 and ops.calc is None:
            msg = 'Data packages (i.e. more than one RequestDataset) may not be written to netCDF. There are ' \
                  'currently {dcount} RequestDatasets. Note, this is different than a multifile dataset.'
            msg = msg.format(dcount=len_ops_dataset)
            _raise_(msg, OutputFormat)
        # We can write multivariate functions to netCDF.
        else:
            if ops.calc is not None and len_ops_dataset > 1:
                # Count the occurrences of these classes in the calculation list.
                klasses_to_check = [AbstractMultivariateFunction, MultivariateEvalFunction]
                multivariate_checks = []
                for klass in klasses_to_check:
                    for calc in ops.calc:
                        multivariate_checks.append(issubclass(calc['ref'], klass))
                if sum(multivariate_checks) != 1:
                    msg = ('Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                           'There are currently {dcount} RequestDatasets. Note, this is different than a '
                           'multifile dataset.'.format(dcount=len(ops.dataset)))
                    _raise_(msg, OutputFormat)
                else:
                    # There is a multivariate calculation and this requires multiple request datasets.
                    pass

        # Clipped data which creates an arbitrary geometry may not be written to netCDF.
        if ops.spatial_operation != 'intersects' and not ops.aggregate:
            msg = ('Only "intersects" spatial operation allowed for netCDF output. Arbitrary geometries may not '
                   'currently be written unless ``aggregate`` is True.')
            _raise_(msg, OutputFormat)

        # Calculations on raw values are not relevant as no aggregation can occur anyway.
        if ops.calc is not None:
            if ops.calc_raw:
                msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.'
                _raise_(msg)
            # No keyed output functions to netCDF.
            if CalculationEngine._check_calculation_members_(ops.calc, AbstractKeyedOutputFunction):
                msg = 'Keyed function output may not be written to netCDF.'
                _raise_(msg)

        # Re-organize the collections following a discrete geometry model if aggregate is True
        if ops.aggregate and not ops.geom:
            msg = 'If aggregate is True than a geometry must be provided for netCDF output. '
            _raise_(msg, OutputFormat)

        if not ops.aggregate and not ops.agg_selection and ops.geom and len(ops.geom) > 1:
            msg = 'Multiple geometries must either be unioned (agg_selection=True) ' \
                  'or aggregated (aggregate=True).'
            _raise_(msg, OutputFormat)
예제 #4
0
    def __init__(self, ops, request_base_size_only=False, progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()
        self._original_subcomm = deepcopy(vm.current_comm_name)
        self._backtransform = {}

        # Create the calculation engine is calculations are present.
        if self.ops.calc is None or self._request_base_size_only:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine',
                     self._subset_log,
                     level=logging.DEBUG)
            self.cengine = CalculationEngine(
                self.ops.calc_grouping,
                self.ops.calc,
                calc_sample_size=self.ops.calc_sample_size,
                progress=self._progress,
                spatial_aggregation=self.ops.aggregate)
            self._has_multivariate_calculations = self.cengine.has_multivariate_functions
예제 #5
0
파일: nc.py 프로젝트: Ouranosinc/ocgis
    def validate_ops(cls, ops):
        from ocgis.ops.parms.definition import OutputFormat

        def _raise_(msg, ocg_arugument=OutputFormat):
            raise DefinitionValidationError(ocg_arugument, msg)

        # We can only write one request dataset to netCDF.
        len_ops_dataset = len(list(ops.dataset))
        if len_ops_dataset > 1 and ops.calc is None:
            msg = 'Data packages (i.e. more than one RequestDataset) may not be written to netCDF. There are ' \
                  'currently {dcount} RequestDatasets. Note, this is different than a multifile dataset.'
            msg = msg.format(dcount=len_ops_dataset)
            _raise_(msg, OutputFormat)
        # We can write multivariate functions to netCDF.
        else:
            if ops.calc is not None and len_ops_dataset > 1:
                # Count the occurrences of these classes in the calculation list.
                klasses_to_check = [AbstractMultivariateFunction,
                                    MultivariateEvalFunction]
                multivariate_checks = []
                for klass in klasses_to_check:
                    for calc in ops.calc:
                        multivariate_checks.append(
                            issubclass(calc['ref'], klass))
                if sum(multivariate_checks) != 1:
                    msg = (
                    'Data packages (i.e. more than one RequestDataset) may not be written to netCDF. '
                    'There are currently {dcount} RequestDatasets. Note, this is different than a '
                    'multifile dataset.'.format(dcount=len(ops.dataset)))
                    _raise_(msg, OutputFormat)
                else:
                    # There is a multivariate calculation and this requires multiple request datasets.
                    pass

        # Only aggregated data is supported.
        if not ops.aggregate:
            msg = 'This output format is only for aggregated data. The aggregate parameter must be True.'
            _raise_(msg, OutputFormat)
        # Calculations on raw values are not relevant as not aggregation can occur anyway.
        if ops.calc is not None:
            if ops.calc_raw:
                msg = 'Calculations must be performed on original values (i.e. calc_raw=False) for netCDF output.'
                _raise_(msg)
            # No keyed output functions to netCDF.
            if CalculationEngine._check_calculation_members_(ops.calc,
                                                             AbstractKeyedOutputFunction):
                msg = 'Keyed function output may not be written to netCDF.'
                _raise_(msg)
예제 #6
0
파일: engine.py 프로젝트: NCPP/ocgis
    def __init__(self, ops, request_base_size_only=False, progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()
        self._original_subcomm = deepcopy(vm.current_comm_name)
        self._backtransform = {}

        # Create the calculation engine is calculations are present.
        if self.ops.calc is None or self._request_base_size_only:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine', self._subset_log, level=logging.DEBUG)
            self.cengine = CalculationEngine(self.ops.calc_grouping,
                                             self.ops.calc,
                                             calc_sample_size=self.ops.calc_sample_size,
                                             progress=self._progress,
                                             spatial_aggregation=self.ops.aggregate)
            self._has_multivariate_calculations = self.cengine.has_multivariate_functions
예제 #7
0
class OperationsEngine(AbstractOcgisObject):
    """
    Executes the operations defined by ``ops``.
    
    :param ops: The operations to interpret.
    :type ops: :class:`~ocgis.OcgOperations`
    :param bool request_base_size_only: If ``True``, return field objects following the spatial subset performing as 
     few operations as possible.
    :param progress: A progress object to update.
    :type progress: :class:`~ocgis.util.logging_ocgis.ProgressOcgOperations`
    """
    def __init__(self, ops, request_base_size_only=False, progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()
        self._original_subcomm = deepcopy(vm.current_comm_name)
        self._backtransform = {}

        # Create the calculation engine is calculations are present.
        if self.ops.calc is None or self._request_base_size_only:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine',
                     self._subset_log,
                     level=logging.DEBUG)
            self.cengine = CalculationEngine(
                self.ops.calc_grouping,
                self.ops.calc,
                calc_sample_size=self.ops.calc_sample_size,
                progress=self._progress,
                spatial_aggregation=self.ops.aggregate)
            self._has_multivariate_calculations = self.cengine.has_multivariate_functions

    def __iter__(self):
        """:rtype: :class:`ocgis.collection.base.AbstractCollection`"""
        ocgis_lh('beginning iteration',
                 logger='conv.__iter__',
                 level=logging.DEBUG)

        # Yields collections with all operations applied.
        try:
            for coll in self._iter_collections_():
                ocgis_lh('__iter__ yielding',
                         self._subset_log,
                         level=logging.DEBUG)
                yield coll
        finally:
            # Try and remove any subcommunicators associated with operations.
            for v in SubcommName.__members__.values():
                try:
                    vm.free_subcomm(name=v)
                except SubcommNotFoundError:
                    pass
            vm.set_comm(self._original_subcomm)

            # Remove any back transformations.
            for v in constants.BackTransform.__members__.values():
                self._backtransform.pop(v, None)

    def _iter_collections_(self):
        """:rtype: :class:`ocgis.collection.base.AbstractCollection`"""

        # Multivariate calculations require datasets come in as a list with all variable inputs part of the same
        # sequence.
        if self._has_multivariate_calculations:
            itr_rd = [[rd for rd in self.ops.dataset]]
        # Otherwise, process geometries expects a single element sequence.
        else:
            itr_rd = [[rd] for rd in self.ops.dataset]

        # Configure the progress object.
        self._progress.n_subsettables = len(itr_rd)
        self._progress.n_geometries = get_default_or_apply(self.ops.geom,
                                                           len,
                                                           default=1)
        self._progress.n_calculations = get_default_or_apply(self.ops.calc,
                                                             len,
                                                             default=0)

        # Some introductory logging.
        msg = '{0} dataset collection(s) to process.'.format(
            self._progress.n_subsettables)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self.ops.geom is None:
            msg = 'Entire spatial domain returned. No selection geometries requested.'
        else:
            msg = 'Each data collection will be subsetted by {0} selection geometries.'.format(
                self._progress.n_geometries)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self._progress.n_calculations == 0:
            msg = 'No calculations requested.'
        else:
            msg = 'The following calculations will be applied to each data collection: {0}.'. \
                format(', '.join([_['func'] for _ in self.ops.calc]))
        ocgis_lh(msg=msg, logger=self._subset_log)

        # Process the incoming datasets. Convert from request datasets to fields as needed.
        for rds in itr_rd:

            try:
                msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds])
            except AttributeError:
                # Field objects have no URIs. Multivariate calculations change how the request dataset iterator is
                # configured as well.
                msg = []
                for rd in rds:
                    try:
                        msg.append(rd.uri)
                    except AttributeError:
                        # Likely a field object which does have a name.
                        msg.append(rd.name)
                msg = 'Processing URI(s) / field names: {0}'.format(msg)
            ocgis_lh(msg=msg, logger=self._subset_log)

            for coll in self._process_subsettables_(rds):
                # If there are calculations, do those now and return a collection.
                if not vm.is_null and self.cengine is not None:
                    ocgis_lh('Starting calculations.', self._subset_log)
                    raise_if_empty(coll)

                    # Look for any temporal grouping optimizations.
                    if self.ops.optimizations is None:
                        tgds = None
                    else:
                        tgds = self.ops.optimizations.get('tgds')

                    # Execute the calculations.
                    coll = self.cengine.execute(coll,
                                                file_only=self.ops.file_only,
                                                tgds=tgds)

                    # If we need to spatially aggregate and calculations used raw values, update the collection
                    # fields and subset geometries.
                    if self.ops.aggregate and self.ops.calc_raw:
                        coll_to_itr = coll.copy()
                        for sfield, container in coll_to_itr.iter_fields(
                                yield_container=True):
                            sfield = _update_aggregation_wrapping_crs_(
                                self, None, sfield, container, None)
                            coll.add_field(sfield, container, force=True)
                else:
                    # If there are no calculations, mark progress to indicate a geometry has been completed.
                    self._progress.mark()

                # Conversion of groups.
                if self.ops.output_grouping is not None:
                    raise NotImplementedError
                else:
                    ocgis_lh('_iter_collections_ yielding',
                             self._subset_log,
                             level=logging.DEBUG)
                    yield coll

    def _process_subsettables_(self, rds):
        """
        :param rds: Sequence of :class:~`ocgis.RequestDataset` objects.
        :type rds: sequence
        :rtype: :class:`ocgis.collection.base.AbstractCollection`
        """

        ocgis_lh(msg='entering _process_subsettables_',
                 logger=self._subset_log,
                 level=logging.DEBUG)

        # This is used to define the group of request datasets for these like logging and exceptions.
        try:
            alias = '_'.join([r.field_name for r in rds])
        except AttributeError:
            # Allow field objects with do not expose the "field_name" attribute.
            try:
                alias = '_'.join([r.name for r in rds])
            except TypeError:
                # The alias is used for logging, etc. If it cannot be constructed easily, leave it as None.
                alias = None
        except NoDataVariablesFound:
            # If an alias is not provided and there are no data variables, set to None as this is used only for logging.
            alias = None

        ocgis_lh('processing...',
                 self._subset_log,
                 alias=alias,
                 level=logging.DEBUG)
        # Create the field object. Field objects may be passed directly to operations.
        # Look for field optimizations. Field optimizations typically include pre-loaded datetime objects.
        if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
            ocgis_lh('applying optimizations',
                     self._subset_log,
                     level=logging.DEBUG)
            field = [
                self.ops.optimizations['fields'][rd.field_name].copy()
                for rd in rds
            ]
            has_field_optimizations = True
        else:
            # Indicates no field optimizations loaded.
            has_field_optimizations = False
        try:
            # No field optimizations and data should be loaded from source.
            if not has_field_optimizations:
                ocgis_lh('creating field objects',
                         self._subset_log,
                         level=logging.DEBUG)
                len_rds = len(rds)
                field = [None] * len_rds
                for ii in range(len_rds):
                    rds_element = rds[ii]
                    try:
                        field_object = rds_element.get(
                            format_time=self.ops.format_time,
                            grid_abstraction=self.ops.abstraction)
                    except (AttributeError, TypeError):
                        # Likely a field object which does not need to be loaded from source.
                        if not self.ops.format_time:
                            raise NotImplementedError
                        # Check that is indeed a field before a proceeding.
                        if not isinstance(rds_element, Field):
                            raise
                        field_object = rds_element

                    field[ii] = field_object

            # Multivariate calculations require pulling variables across fields.
            if self._has_multivariate_calculations and len(field) > 1:
                for midx in range(1, len(field)):
                    # Use the data variable tag if it is available. Otherwise, attempt to merge the fields raising
                    # warning if the variable exists in the squashed field.
                    if len(field[midx].data_variables) > 0:
                        vitr = field[midx].data_variables
                        is_data = True
                    else:
                        vitr = list(field[midx].values())
                        is_data = False
                    for mvar in vitr:
                        mvar = mvar.extract()
                        field[0].add_variable(mvar, is_data=is_data)
                    new_field_name = '_'.join([str(f.name) for f in field])
                    field[0].set_name(new_field_name)

            # The first field in the list is always the target for other operations.
            field = field[0]
            assert isinstance(field, Field)

            # Break out of operations if the rank is empty.
            vm.create_subcomm_by_emptyable(SubcommName.FIELD_GET,
                                           field,
                                           is_current=True,
                                           clobber=True)
            if not vm.is_null:
                if not has_field_optimizations:
                    if field.is_empty:
                        raise ValueError('No empty fields allowed.')

                    # Time, level, etc. subsets.
                    field = self._get_nonspatial_subset_(field)

                    # Spatially reorder the data.
                    ocgis_lh(msg='before spatial reorder',
                             logger=self._subset_log,
                             level=logging.DEBUG)
                    if self.ops.spatial_reorder:
                        self._update_spatial_order_(field)

                    # Extrapolate the spatial bounds if requested.
                    # TODO: Rename "interpolate" to "extrapolate".
                    if self.ops.interpolate_spatial_bounds:
                        self._update_bounds_extrapolation_(field)

        # This error is related to subsetting by time or level. Spatial subsetting occurs below.
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(
                    msg='time or level subset empty but empty returns allowed',
                    logger=self._subset_log,
                    level=logging.WARN)
                coll = self._get_initialized_collection_()
                name = '_'.join([rd.field_name for rd in rds])
                field = Field(name=name, is_empty=True)
                coll.add_field(field, None)
                try:
                    yield coll
                finally:
                    return
            else:
                # Raise an exception as empty subsets are not allowed.
                ocgis_lh(exc=ExtentError(message=str(e)),
                         alias=str([rd.field_name for rd in rds]),
                         logger=self._subset_log)

        # Set iterator based on presence of slice. Slice always overrides geometry.
        if self.ops.slice is not None:
            itr = [None]
        else:
            itr = [None] if self.ops.geom is None else self.ops.geom

        for coll in self._process_geometries_(itr, field, alias):
            # Conform units following the spatial subset.
            if not vm.is_null and self.ops.conform_units_to is not None:
                for to_conform in coll.iter_fields():
                    for dv in to_conform.data_variables:
                        dv.cfunits_conform(self.ops.conform_units_to)
            ocgis_lh(msg='_process_subsettables_ yielding',
                     logger=self._subset_log,
                     level=logging.DEBUG)
            yield coll

    def _process_geometries_(self, itr, field, alias):
        """
        :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting.
        :type itr: [None] or [:class:`~ocgis.Field`, ...]
        :param :class:`ocgis.Field` field: The target field for operations.
        :param str alias: The request data alias currently being processed.
        :rtype: :class:`~ocgis.SpatialCollection`
        """

        assert isinstance(field, Field)

        ocgis_lh('processing geometries',
                 self._subset_log,
                 level=logging.DEBUG)
        # Process each geometry.
        for subset_field in itr:

            # Initialize the collection storage.
            coll = self._get_initialized_collection_()
            if vm.is_null:
                sfield = field
            else:
                # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the
                # subset target with wrapping, coordinate system conversion, etc.
                subset_field = deepcopy(subset_field)

                if self.ops.regrid_destination is not None:
                    # If there is regridding, make another copy as this geometry may be manipulated during subsetting of
                    # sources.
                    subset_field_for_regridding = deepcopy(subset_field)

                # Operate on the rotated pole coordinate system by first transforming it to the default coordinate
                # system.
                key = constants.BackTransform.ROTATED_POLE
                self._backtransform[
                    key] = self._get_update_rotated_pole_state_(
                        field, subset_field)

                # Check if the geometric abstraction is available on the field object.
                self._assert_abstraction_available_(field)

                # Return a slice or snippet if either of these are requested.
                field = self._get_slice_or_snippet_(field)

                # Choose the subset UGID value.
                if subset_field is None:
                    msg = 'No selection geometry. Returning all data. No unique geometry identifier.'
                    subset_ugid = None
                else:
                    subset_ugid = subset_field.geom.ugid.get_value()[0]
                    msg = 'Subsetting with selection geometry having UGID={0}'.format(
                        subset_ugid)
                ocgis_lh(msg=msg, logger=self._subset_log)

                if subset_field is not None:
                    # If the coordinate systems differ, update the spatial subset's CRS to match the field.
                    if subset_field.crs is not None and subset_field.crs != field.crs:
                        subset_field.update_crs(field.crs)
                    # If the geometry is a point, it needs to be buffered if there is a search radius multiplier.
                    subset_field = self._get_buffered_subset_geometry_if_point_(
                        field, subset_field)

                # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data
                # is being returned.
                if subset_field is None:
                    sfield = field
                else:
                    sfield = self._get_spatially_subsetted_field_(
                        alias, field, subset_field, subset_ugid)

                ocgis_lh(msg='after self._get_spatially_subsetted_field_',
                         logger=self._subset_log,
                         level=logging.DEBUG)

                # Create the subcommunicator following the data subset to ensure non-empty communication.
                vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET,
                                               sfield,
                                               is_current=True,
                                               clobber=True)

                if not vm.is_null:
                    if not sfield.is_empty and not self.ops.allow_empty:
                        raise_if_empty(sfield)

                        # If the base size is being requested, bypass the rest of the operations.
                        if not self._request_base_size_only:
                            # Perform regridding operations if requested.
                            if self.ops.regrid_destination is not None and sfield.regrid_source:
                                sfield = self._get_regridded_field_with_subset_(
                                    sfield,
                                    subset_field_for_regridding=
                                    subset_field_for_regridding)
                            else:
                                ocgis_lh(msg='no regridding operations',
                                         logger=self._subset_log,
                                         level=logging.DEBUG)
                            # If empty returns are allowed, there may be an empty field.
                            if sfield is not None:
                                # Only update spatial stuff if there are no calculations and, if there are calculations,
                                # those calculations are not expecting raw values.
                                if self.ops.calc is None or (
                                        self.ops.calc is not None
                                        and not self.ops.calc_raw):
                                    # Update spatial aggregation, wrapping, and coordinate systems.
                                    sfield = _update_aggregation_wrapping_crs_(
                                        self, alias, sfield, subset_field,
                                        subset_ugid)
                                    ocgis_lh(
                                        'after _update_aggregation_wrapping_crs_ in _process_geometries_',
                                        self._subset_log,
                                        level=logging.DEBUG)

            # Add the created field to the output collection with the selection geometry.
            if sfield is None:
                assert self.ops.aggregate
            if sfield is not None:
                coll.add_field(sfield, subset_field)

            yield coll

    def _get_nonspatial_subset_(self, field):
        """
        
        :param field:
        :type field: :class:`~ocgis.Field`
        :return: 
        :raises: EmptySubsetError
        """

        # Apply any time or level subsetting provided through operations.
        if self.ops.time_range is not None:
            field = field.time.get_between(*self.ops.time_range).parent
        if self.ops.time_region is not None:
            field = field.time.get_time_region(self.ops.time_region).parent
        if self.ops.time_subset_func is not None:
            field = field.time.get_subset_by_function(
                self.ops.time_subset_func).parent
        if self.ops.level_range is not None:
            field = field.level.get_between(*self.ops.level_range).parent

        return field

    @staticmethod
    def _get_initialized_collection_():
        coll = SpatialCollection()
        return coll

    def _get_update_rotated_pole_state_(self, field, subset_field):
        """
        Rotated pole coordinate systems are handled internally by transforming the CRS to a geographic coordinate
        system.

        :param field:
        :type field: :class:`ocgis.Field`
        :param subset_field:
        :type subset_field: :class:`ocgis.Field` or None
        :rtype: None or :class:`ocgis.variable.crs.CFRotatedPole`
        :raises: AssertionError
        """

        # CFRotatedPole takes special treatment. only do this if a subset geometry is available. this variable is
        # needed to determine if backtransforms are necessary.
        original_rotated_pole_crs = None
        if isinstance(field.crs, CFRotatedPole):
            # Only transform if there is a subset geometry.
            if subset_field is not None or self.ops.aggregate or self.ops.spatial_operation == 'clip':
                # Update the CRS. Copy the original CRS for possible later transformation back to rotated pole.
                original_rotated_pole_crs = deepcopy(field.crs)
                ocgis_lh('initial rotated pole transformation...',
                         self._subset_log,
                         level=logging.DEBUG)
                field.update_crs(env.DEFAULT_COORDSYS)
                ocgis_lh('...finished initial rotated pole transformation',
                         self._subset_log,
                         level=logging.DEBUG)
        return original_rotated_pole_crs

    def _assert_abstraction_available_(self, field):
        """
        Assert the spatial abstraction may be loaded on the field object if one is provided in the operations.

        :param field: The field to check for a spatial abstraction.
        :type field: :class:`ocgis.Field`
        """

        if self.ops.abstraction != 'auto':
            is_available = field.grid.is_abstraction_available(
                self.ops.abstraction)
            if not is_available:
                msg = 'A "{0}" spatial abstraction is not available.'.format(
                    self.ops.abstraction)
                ocgis_lh(exc=ValueError(msg), logger='subset')

    def _get_slice_or_snippet_(self, field):
        """
        Slice the incoming field if a slice or snippet argument is present.

        :param field: The field to slice.
        :type field: :class:`ocgis.Field`
        :rtype: :class:`ocgis.Field`
        """

        # If there is a snippet, return the first realization, time, and level.
        if self.ops.snippet:
            the_slice = {'time': 0, 'realization': 0, 'level': 0}
        # If there is a slice, use it to subset the field. Only field slices are supported.
        elif self.ops.slice is not None:
            the_slice = self.ops.slice
        else:
            the_slice = None
        if the_slice is not None:
            field = field.get_field_slice(the_slice,
                                          strict=False,
                                          distributed=True)
        return field

    def _get_spatially_subsetted_field_(self, alias, field, subset_field,
                                        subset_ugid):
        """
        Spatially subset a field with a selection field.

        :param str alias: The request data alias currently being processed.
        :param field: Target field to subset.
        :type field: :class:`ocgis.Field`
        :param subset_field: The field to use for subsetting.
        :type subset_field: :class:`ocgis.Field`
        :rtype: :class:`ocgis.Field`
        :raises: AssertionError, ExtentError
        """

        assert subset_field is not None

        ocgis_lh('executing spatial subset operation',
                 self._subset_log,
                 level=logging.DEBUG,
                 alias=alias,
                 ugid=subset_ugid)
        sso = SpatialSubsetOperation(field)
        try:
            # Execute the spatial subset and return the subsetted field.
            sfield = sso.get_spatial_subset(
                self.ops.spatial_operation,
                subset_field.geom,
                select_nearest=self.ops.select_nearest,
                optimized_bbox_subset=self.ops.optimized_bbox_subset)
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(
                    alias=alias,
                    ugid=subset_ugid,
                    msg='Empty geometric operation but empty returns allowed.',
                    level=logging.WARN)
                sfield = Field(name=field.name, is_empty=True)
            else:
                msg = ' This typically means the selection geometry falls outside the spatial domain of the target ' \
                      'dataset.'
                msg = str(e) + msg
                ocgis_lh(exc=ExtentError(message=msg),
                         alias=alias,
                         logger=self._subset_log)

        # If the subset geometry is unwrapped and the vector wrap option is true, wrap the subset geometry.
        if self.ops.vector_wrap:
            if subset_field.wrapped_state == WrappedState.UNWRAPPED:
                subset_field.wrap()

        return sfield

    def _get_buffered_subset_geometry_if_point_(self, field, subset_field):
        """
        If the subset geometry is a point of multipoint, it will need to be buffered and the spatial dimension updated
        accordingly. If the subset geometry is a polygon, pass through.

        :param field:
        :type field: :class:`ocgis.Field`
        :param subset_field:
        :type subset_field: :class:`ocgis.Field`
        """

        if subset_field.geom.geom_type in [
                'Point', 'MultiPoint'
        ] and self.ops.search_radius_mult is not None:
            ocgis_lh(logger=self._subset_log,
                     msg='buffering point geometry',
                     level=logging.DEBUG)
            subset_field = subset_field.geom.get_buffer(
                self.ops.search_radius_mult * field.grid.resolution).parent
            assert subset_field.geom.geom_type in ['Polygon', 'MultiPolygon']

        return subset_field

    def _get_regridded_field_with_subset_(self,
                                          sfield,
                                          subset_field_for_regridding=None):
        """
        Regrid ``sfield`` subsetting the regrid destination in the process.

        :param sfield: The input field to regrid.
        :type sfield: :class:`ocgis.Field`
        :param subset_field_for_regridding: The original, unaltered spatial dimension to use for subsetting.
        :type subset_field_for_regridding: :class:`ocgis.Field`
        :rtype: :class:`~ocgis.Field`
        """

        from ocgis.regrid.base import RegridOperation
        ocgis_lh(logger=self._subset_log,
                 msg='Starting regrid operation...',
                 level=logging.INFO)
        ro = RegridOperation(sfield,
                             self.ops.regrid_destination,
                             subset_field=subset_field_for_regridding,
                             regrid_options=self.ops.regrid_options)
        sfield = ro.execute()
        ocgis_lh(logger=self._subset_log,
                 msg='Regrid operation complete.',
                 level=logging.INFO)
        return sfield

    def _update_bounds_extrapolation_(self, field):
        try:
            name_x_variable = '{}_{}'.format(field.grid.x.name,
                                             constants.OCGIS_BOUNDS)
            name_y_variable = '{}_{}'.format(field.grid.y.name,
                                             constants.OCGIS_BOUNDS)
            field.grid.set_extrapolated_bounds(name_x_variable,
                                               name_y_variable,
                                               constants.OCGIS_BOUNDS)
        except BoundsAlreadyAvailableError:
            msg = 'Bounds/corners already on object. Ignoring "interpolate_spatial_bounds".'
            ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARNING)

    def _update_spatial_order_(self, field):
        _update_wrapping_(self, field)
        if field.grid is not None:
            wrapped_state = field.grid.wrapped_state
            if wrapped_state == WrappedState.WRAPPED:
                field.grid.reorder()
            else:
                msg = 'Reorder not relevant for wrapped state "{}". Doing nothing.'.format(
                    str(wrapped_state))
                ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARN)
예제 #8
0
def compute(ops, tile_dimension, verbose=False, use_optimizations=True):
    """
    Used for computations on large arrays where memory limitations are a consideration. It is is also useful for
    extracting data from a server that has limitations on the size of requested data arrays. This function creates an
    empty destination NetCDF file that is then filled by executing the operations on chunks of the requested
    target dataset(s) and filling the destination NetCDF file.

    :param ops: The target operations to tile. There must be a calculation associated with
     the operations.
    :type ops: :class:`ocgis.OcgOperations`
    :param int tile_dimension: The target tile/chunk dimension. This integer value must be greater than zero.
    :param bool verbose: If ``True``, print more verbose information to terminal.
    :param bool use_optimizations: If ``True``, cache :class:`Field` and :class:`TemporalGroupDimension` objects for
     reuse during tile iteration.
    :raises: AssertionError, ValuError
    :returns: Path to the output NetCDF file.
    :rtype: str

    >>> from ocgis import RequestDataset, OcgOperations
    >>> from ocgis.util.large_array import compute
    >>> rd = RequestDataset(uri='/path/to/file', variable='tas')
    >>> ops = OcgOperations(dataset=rd,calc=[{'func':'mean','name':'mean'}],output_format='nc')
    >>> ret = compute(ops, 25)
    """

    assert isinstance(ops, OcgOperations)
    assert ops.calc is not None
    assert ops.output_format == constants.OutputFormatName.NETCDF

    # Ensure that progress is not showing 100% at first.
    if ops.callback is not None:
        orgcallback = ops.callback

        def zeropercentagecallback(p, m):
            orgcallback(0., m)

        ops.callback = zeropercentagecallback

    tile_dimension = int(tile_dimension)
    if tile_dimension <= 0:
        raise ValueError('"tile_dimension" must be greater than 0')

    # Determine if we are working with a multivariate function.
    if CalculationEngine._check_calculation_members_(
            ops.calc, AbstractMultivariateFunction):
        # Only one multivariate calculation allowed.
        assert len(ops.calc) == 1
        has_multivariate = True
    else:
        # Only one dataset allowed.
        assert len(list(ops.dataset)) == 1
        has_multivariate = False

    # work on a copy of the operations to create the template file
    ops_file_only = deepcopy(ops)
    # we need the output to be file only for the first request
    ops_file_only.file_only = True
    # save the environment flag for calculation optimizations.
    orig_oc = ocgis.env.OPTIMIZE_FOR_CALC

    try:
        # tell the software we are optimizing for calculations
        ocgis.env.OPTIMIZE_FOR_CALC = True

        # first, write the template file
        if verbose:
            print('getting fill file...')
        fill_file = ops_file_only.execute()

        # if there is a geometry, we have to find the offset for the slice. we
        # also need to account for the subset mask.
        if ops.geom is not None:
            if verbose:
                print(
                    'geometry subset is present. calculating slice offsets...')
            ops_offset = deepcopy(ops)
            ops_offset.output_format = constants.OutputFormatName.OCGIS
            ops_offset.calc = None
            ops_offset.agg_selection = True
            ops_offset.snippet = False
            coll = ops_offset.execute()

            for row in coll.iter_melted(tag=TagName.DATA_VARIABLES):
                assert row['variable']._value is None

            ref_field = coll.get_element()
            ref_grid = ref_field.grid
            row_offset = ref_grid.dimensions[0]._src_idx[0]
            col_offset = ref_grid.dimensions[1]._src_idx[0]
            mask_spatial = ref_grid.get_mask()
        # otherwise the offset is zero...
        else:
            row_offset = 0
            col_offset = 0
            mask_spatial = None

        # get the shape for the tile schema
        if verbose:
            print('getting tile schema shape inputs...')
        shp_variable = ops.calc[0]['name']
        template_rd = ocgis.RequestDataset(uri=fill_file,
                                           variable=shp_variable)
        template_field = template_rd.get()
        shp = template_field.grid.shape

        if use_optimizations:
            # if there is a calculation grouping, optimize for it. otherwise, pass
            # this value as None.
            try:
                # tgd_field = ops.dataset.first().get()
                archetype_dataset = list(ops.dataset)[0]
                tgd_field = archetype_dataset.get()
                template_tgd = tgd_field.temporal.get_grouping(
                    deepcopy(ops.calc_grouping))
                if not has_multivariate:
                    key = archetype_dataset.field_name
                else:
                    key = '_'.join([__.field_name for __ in ops.dataset])
                optimizations = {'tgds': {key: template_tgd}}
            except TypeError:
                optimizations = None

            # load the fields and pass those for optimization
            field_optimizations = {}
            for rd in ops.dataset:
                gotten_field = rd.get(format_time=ops.format_time)
                field_optimizations.update({rd.field_name: gotten_field})
            optimizations = optimizations or {}
            optimizations['fields'] = field_optimizations
        else:
            optimizations = None

        if verbose:
            print('getting tile schema...')
        schema = tile.get_tile_schema(shp[0], shp[1], tile_dimension)
        lschema = len(schema)

        # Create new callbackfunction where the 0-100% range is converted to a subset corresponding to the no. of
        # blocks to be calculated
        if ops.callback is not None:
            percentageDone = 0
            callback = ops.callback

            def newcallback(p, m):
                p = (p / lschema) + percentageDone
                orgcallback(p, m)

            ops.callback = newcallback

        if verbose:
            print(('output file is: {0}'.format(fill_file)))
            print(('tile count: {0}'.format(lschema)))

        fds = nc.Dataset(fill_file, 'a')
        try:
            if verbose:
                progress = ProgressBar('tiles progress')
            if ops.callback is not None and callback:
                callback(0, "Initializing calculation")
            for ctr, indices in enumerate(iter(schema.values()), start=1):
                # appropriate adjust the slices to account for the spatial subset
                row = [ii + row_offset for ii in indices['row']]
                col = [ii + col_offset for ii in indices['col']]

                # copy the operations and modify arguments
                ops_slice = deepcopy(ops)
                ops_slice.geom = None
                ops_slice.slice = [None, None, None, row, col]
                ops_slice.output_format = constants.OutputFormatName.OCGIS
                ops_slice.optimizations = optimizations
                # return the object slice
                ret = ops_slice.execute()

                for field in ret.iter_fields():
                    for variable in field.data_variables:
                        vref = fds.variables[variable.name]
                        # we need to remove the offsets to adjust for the zero-based fill file.
                        slice_row = slice(row[0] - row_offset,
                                          row[1] - row_offset)
                        slice_col = slice(col[0] - col_offset,
                                          col[1] - col_offset)
                        # if there is a spatial mask, update accordingly
                        if mask_spatial is not None:
                            set_variable_spatial_mask(variable, mask_spatial,
                                                      slice_row, slice_col)
                            fill_mask = field.grid.get_mask(create=True)
                            fill_mask[:, :] = mask_spatial[slice_row,
                                                           slice_col]
                            fill_mask = np.ma.array(np.zeros(fill_mask.shape),
                                                    mask=fill_mask)
                            fds.variables[field.grid.mask_variable.name][
                                slice_row, slice_col] = fill_mask
                        fill_value = variable.get_masked_value()
                        # fill the netCDF container variable adjusting for shape
                        if len(vref.shape) == 3:
                            vref[:, slice_row, slice_col] = fill_value
                        elif len(vref.shape) == 4:
                            vref[:, :, slice_row, slice_col] = fill_value
                        else:
                            raise NotImplementedError(vref.shape)

                        fds.sync()
                if verbose:
                    progress.progress(int((float(ctr) / lschema) * 100))
                if ops.callback is not None and callback:
                    percentageDone = ((float(ctr) / lschema) * 100)
        finally:
            fds.close()
    finally:
        ocgis.env.OPTIMIZE_FOR_CALC = orig_oc
    if verbose:
        progress.endProgress()
        print('complete.')

    return fill_file
예제 #9
0
파일: large_array.py 프로젝트: NCPP/ocgis
def compute(ops, tile_dimension, verbose=False, use_optimizations=True):
    """
    Used for computations on large arrays where memory limitations are a consideration. It is is also useful for
    extracting data from a server that has limitations on the size of requested data arrays. This function creates an
    empty destination NetCDF file that is then filled by executing the operations on chunks of the requested
    target dataset(s) and filling the destination NetCDF file.

    :param ops: The target operations to tile. There must be a calculation associated with
     the operations.
    :type ops: :class:`ocgis.OcgOperations`
    :param int tile_dimension: The target tile/chunk dimension. This integer value must be greater than zero.
    :param bool verbose: If ``True``, print more verbose information to terminal.
    :param bool use_optimizations: If ``True``, cache :class:`~ocgis.Field` and :class:`~ocgis.TemporalGroupVariable`
     objects for reuse during tile iteration.
    :raises: AssertionError, ValueError
    :returns: Path to the output NetCDF file.
    :rtype: str

    >>> from ocgis import RequestDataset, OcgOperations
    >>> from ocgis.util.large_array import compute
    >>> rd = RequestDataset(uri='/path/to/file', variable='tas')
    >>> ops = OcgOperations(dataset=rd, calc=[{'func':'mean','name':'mean'}],output_format='nc')
    >>> ret = compute(ops, 25)
    """

    assert isinstance(ops, OcgOperations)
    assert ops.output_format == constants.OutputFormatName.NETCDF

    # Ensure that progress is not showing 100% at first.
    if ops.callback is not None:
        orgcallback = ops.callback

        def zeropercentagecallback(p, m):
            orgcallback(0., m)

        ops.callback = zeropercentagecallback

    tile_dimension = int(tile_dimension)
    if tile_dimension <= 0:
        raise ValueError('"tile_dimension" must be greater than 0')

    # Determine if we are working with a multivariate function.
    if ops.calc is not None:
        if CalculationEngine._check_calculation_members_(ops.calc, AbstractMultivariateFunction):
            # Only one multivariate calculation allowed.
            assert len(ops.calc) == 1
            has_multivariate = True
        else:
            # Only one dataset allowed.
            assert len(list(ops.dataset)) == 1
            has_multivariate = False
    else:
        has_multivariate = False

    # work on a copy of the operations to create the template file
    ops_file_only = deepcopy(ops)
    # we need the output to be file only for the first request
    if ops.calc is not None:
        ops_file_only.file_only = True
    # save the environment flag for calculation optimizations.
    orig_oc = ocgis.env.OPTIMIZE_FOR_CALC

    try:
        # tell the software we are optimizing for calculations   
        ocgis.env.OPTIMIZE_FOR_CALC = True

        # first, write the template file
        if verbose:
            print('getting fill file...')
        fill_file = ops_file_only.execute()

        # if there is a geometry, we have to find the offset for the slice. we
        # also need to account for the subset mask.
        if ops.geom is not None:
            if verbose:
                print('geometry subset is present. calculating slice offsets...')
            ops_offset = deepcopy(ops)
            ops_offset.output_format = constants.OutputFormatName.OCGIS
            ops_offset.calc = None
            ops_offset.agg_selection = True
            ops_offset.snippet = False
            coll = ops_offset.execute()

            for row in coll.iter_melted(tag=TagName.DATA_VARIABLES):
                assert row['variable']._value is None

            ref_field = coll.get_element()
            ref_grid = ref_field.grid
            row_offset = ref_grid.dimensions[0]._src_idx[0]
            col_offset = ref_grid.dimensions[1]._src_idx[0]
            mask_spatial = ref_grid.get_mask()
        # otherwise the offset is zero...
        else:
            row_offset = 0
            col_offset = 0
            mask_spatial = None

        # get the shape for the tile schema
        if verbose:
            print('getting tile schema shape inputs...')
        if ops.calc is not None:
            shp_variable = ops.calc[0]['name']
        else:
            shp_variable = None
        template_rd = ocgis.RequestDataset(uri=fill_file, variable=shp_variable)
        template_field = template_rd.get()
        shp = template_field.grid.shape

        if use_optimizations:
            # if there is a calculation grouping, optimize for it. otherwise, pass
            # this value as None.
            try:
                # tgd_field = ops.dataset.first().get()
                archetype_dataset = list(ops.dataset)[0]
                tgd_field = archetype_dataset.get()
                template_tgd = tgd_field.temporal.get_grouping(deepcopy(ops.calc_grouping))
                if not has_multivariate:
                    key = archetype_dataset.field_name
                else:
                    key = '_'.join([__.field_name for __ in ops.dataset])
                optimizations = {'tgds': {key: template_tgd}}
            except TypeError:
                optimizations = None

            # load the fields and pass those for optimization
            field_optimizations = {}
            for rd in ops.dataset:
                gotten_field = rd.get(format_time=ops.format_time)
                field_optimizations.update({rd.field_name: gotten_field})
            optimizations = optimizations or {}
            optimizations['fields'] = field_optimizations
        else:
            optimizations = None

        if verbose:
            print('getting tile schema...')
        schema = tile.get_tile_schema(shp[0], shp[1], tile_dimension)
        lschema = len(schema)

        # Create new callbackfunction where the 0-100% range is converted to a subset corresponding to the no. of
        # blocks to be calculated
        if ops.callback is not None:
            percentageDone = 0
            callback = ops.callback

            def newcallback(p, m):
                p = (p / lschema) + percentageDone
                orgcallback(p, m)

            ops.callback = newcallback

        if verbose:
            print(('output file is: {0}'.format(fill_file)))
            print(('tile count: {0}'.format(lschema)))

        fds = nc.Dataset(fill_file, 'a')
        try:
            if verbose:
                progress = ProgressBar('tiles progress')
            if ops.callback is not None and callback:
                callback(0, "Initializing calculation")
            for ctr, indices in enumerate(iter(schema.values()), start=1):
                # appropriate adjust the slices to account for the spatial subset
                row = [ii + row_offset for ii in indices['row']]
                col = [ii + col_offset for ii in indices['col']]

                # copy the operations and modify arguments
                ops_slice = deepcopy(ops)
                ops_slice.geom = None
                ops_slice.slice = [None, None, None, row, col]
                ops_slice.output_format = constants.OutputFormatName.OCGIS
                ops_slice.optimizations = optimizations
                # return the object slice
                ret = ops_slice.execute()

                for field in ret.iter_fields():
                    for variable in field.data_variables:
                        vref = fds.variables[variable.name]
                        # we need to remove the offsets to adjust for the zero-based fill file.
                        slice_row = slice(row[0] - row_offset, row[1] - row_offset)
                        slice_col = slice(col[0] - col_offset, col[1] - col_offset)
                        # if there is a spatial mask, update accordingly
                        if mask_spatial is not None:
                            set_variable_spatial_mask(variable, mask_spatial, slice_row, slice_col)
                            fill_mask = field.grid.get_mask(create=True)
                            fill_mask[:, :] = mask_spatial[slice_row, slice_col]
                            fill_mask = np.ma.array(np.zeros(fill_mask.shape), mask=fill_mask)
                            fds.variables[field.grid.mask_variable.name][slice_row, slice_col] = fill_mask
                        fill_value = variable.get_masked_value()
                        # fill the netCDF container variable adjusting for shape
                        if len(vref.shape) == 3:
                            vref[:, slice_row, slice_col] = fill_value
                        elif len(vref.shape) == 4:
                            vref[:, :, slice_row, slice_col] = fill_value
                        else:
                            raise NotImplementedError(vref.shape)

                        fds.sync()
                if verbose:
                    progress.progress(int((float(ctr) / lschema) * 100))
                if ops.callback is not None and callback:
                    percentageDone = ((float(ctr) / lschema) * 100)
        finally:
            fds.close()
    finally:
        ocgis.env.OPTIMIZE_FOR_CALC = orig_oc
    if verbose:
        progress.endProgress()
        print('complete.')

    return fill_file
예제 #10
0
파일: engine.py 프로젝트: NCPP/ocgis
class OperationsEngine(AbstractOcgisObject):
    """
    Executes the operations defined by ``ops``.
    
    :param ops: The operations to interpret.
    :type ops: :class:`~ocgis.OcgOperations`
    :param bool request_base_size_only: If ``True``, return field objects following the spatial subset performing as 
     few operations as possible.
    :param progress: A progress object to update.
    :type progress: :class:`~ocgis.util.logging_ocgis.ProgressOcgOperations`
    """

    def __init__(self, ops, request_base_size_only=False, progress=None):
        self.ops = ops
        self._request_base_size_only = request_base_size_only
        self._subset_log = ocgis_lh.get_logger('subset')
        self._progress = progress or ProgressOcgOperations()
        self._original_subcomm = deepcopy(vm.current_comm_name)
        self._backtransform = {}

        # Create the calculation engine is calculations are present.
        if self.ops.calc is None or self._request_base_size_only:
            self.cengine = None
            self._has_multivariate_calculations = False
        else:
            ocgis_lh('initializing calculation engine', self._subset_log, level=logging.DEBUG)
            self.cengine = CalculationEngine(self.ops.calc_grouping,
                                             self.ops.calc,
                                             calc_sample_size=self.ops.calc_sample_size,
                                             progress=self._progress,
                                             spatial_aggregation=self.ops.aggregate)
            self._has_multivariate_calculations = self.cengine.has_multivariate_functions

    def __iter__(self):
        """:rtype: :class:`ocgis.collection.base.AbstractCollection`"""
        ocgis_lh('beginning iteration', logger='conv.__iter__', level=logging.DEBUG)

        # Yields collections with all operations applied.
        try:
            for coll in self._iter_collections_():
                ocgis_lh('__iter__ yielding', self._subset_log, level=logging.DEBUG)
                yield coll
        finally:
            # Try and remove any subcommunicators associated with operations.
            for v in SubcommName.__members__.values():
                try:
                    vm.free_subcomm(name=v)
                except SubcommNotFoundError:
                    pass
            vm.set_comm(self._original_subcomm)

            # Remove any back transformations.
            for v in constants.BackTransform.__members__.values():
                self._backtransform.pop(v, None)

    def _iter_collections_(self):
        """:rtype: :class:`ocgis.collection.base.AbstractCollection`"""

        # Multivariate calculations require datasets come in as a list with all variable inputs part of the same
        # sequence.
        if self._has_multivariate_calculations:
            itr_rd = [[rd for rd in self.ops.dataset]]
        # Otherwise, process geometries expects a single element sequence.
        else:
            itr_rd = [[rd] for rd in self.ops.dataset]

        # Configure the progress object.
        self._progress.n_subsettables = len(itr_rd)
        self._progress.n_geometries = get_default_or_apply(self.ops.geom, len, default=1)
        self._progress.n_calculations = get_default_or_apply(self.ops.calc, len, default=0)

        # Some introductory logging.
        msg = '{0} dataset collection(s) to process.'.format(self._progress.n_subsettables)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self.ops.geom is None:
            msg = 'Entire spatial domain returned. No selection geometries requested.'
        else:
            msg = 'Each data collection will be subsetted by {0} selection geometries.'.format(
                self._progress.n_geometries)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self._progress.n_calculations == 0:
            msg = 'No calculations requested.'
        else:
            msg = 'The following calculations will be applied to each data collection: {0}.'. \
                format(', '.join([_['func'] for _ in self.ops.calc]))
        ocgis_lh(msg=msg, logger=self._subset_log)

        # Process the incoming datasets. Convert from request datasets to fields as needed.
        for rds in itr_rd:

            try:
                msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds])
            except AttributeError:
                # Field objects have no URIs. Multivariate calculations change how the request dataset iterator is
                # configured as well.
                msg = []
                for rd in rds:
                    try:
                        msg.append(rd.uri)
                    except AttributeError:
                        # Likely a field object which does have a name.
                        msg.append(rd.name)
                msg = 'Processing URI(s) / field names: {0}'.format(msg)
            ocgis_lh(msg=msg, logger=self._subset_log)

            for coll in self._process_subsettables_(rds):
                # If there are calculations, do those now and return a collection.
                if not vm.is_null and self.cengine is not None:
                    ocgis_lh('Starting calculations.', self._subset_log)
                    raise_if_empty(coll)

                    # Look for any temporal grouping optimizations.
                    if self.ops.optimizations is None:
                        tgds = None
                    else:
                        tgds = self.ops.optimizations.get('tgds')

                    # Execute the calculations.
                    coll = self.cengine.execute(coll, file_only=self.ops.file_only, tgds=tgds)

                    # If we need to spatially aggregate and calculations used raw values, update the collection
                    # fields and subset geometries.
                    if self.ops.aggregate and self.ops.calc_raw:
                        coll_to_itr = coll.copy()
                        for sfield, container in coll_to_itr.iter_fields(yield_container=True):
                            sfield = _update_aggregation_wrapping_crs_(self, None, sfield, container, None)
                            coll.add_field(sfield, container, force=True)
                else:
                    # If there are no calculations, mark progress to indicate a geometry has been completed.
                    self._progress.mark()

                # Conversion of groups.
                if self.ops.output_grouping is not None:
                    raise NotImplementedError
                else:
                    ocgis_lh('_iter_collections_ yielding', self._subset_log, level=logging.DEBUG)
                    yield coll

    def _process_subsettables_(self, rds):
        """
        :param rds: Sequence of :class:~`ocgis.RequestDataset` objects.
        :type rds: sequence
        :rtype: :class:`ocgis.collection.base.AbstractCollection`
        """

        ocgis_lh(msg='entering _process_subsettables_', logger=self._subset_log, level=logging.DEBUG)

        # This is used to define the group of request datasets for these like logging and exceptions.
        try:
            alias = '_'.join([r.field_name for r in rds])
        except AttributeError:
            # Allow field objects with do not expose the "field_name" attribute.
            try:
                alias = '_'.join([r.name for r in rds])
            except TypeError:
                # The alias is used for logging, etc. If it cannot be constructed easily, leave it as None.
                alias = None
        except NoDataVariablesFound:
            # If an alias is not provided and there are no data variables, set to None as this is used only for logging.
            alias = None

        ocgis_lh('processing...', self._subset_log, alias=alias, level=logging.DEBUG)
        # Create the field object. Field objects may be passed directly to operations.
        # Look for field optimizations. Field optimizations typically include pre-loaded datetime objects.
        if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
            ocgis_lh('applying optimizations', self._subset_log, level=logging.DEBUG)
            field = [self.ops.optimizations['fields'][rd.field_name].copy() for rd in rds]
            has_field_optimizations = True
        else:
            # Indicates no field optimizations loaded.
            has_field_optimizations = False
        try:
            # No field optimizations and data should be loaded from source.
            if not has_field_optimizations:
                ocgis_lh('creating field objects', self._subset_log, level=logging.DEBUG)
                len_rds = len(rds)
                field = [None] * len_rds
                for ii in range(len_rds):
                    rds_element = rds[ii]
                    try:
                        field_object = rds_element.get(format_time=self.ops.format_time,
                                                       grid_abstraction=self.ops.abstraction)
                    except (AttributeError, TypeError):
                        # Likely a field object which does not need to be loaded from source.
                        if not self.ops.format_time:
                            raise NotImplementedError
                        # Check that is indeed a field before a proceeding.
                        if not isinstance(rds_element, Field):
                            raise
                        field_object = rds_element

                    field[ii] = field_object

            # Multivariate calculations require pulling variables across fields.
            if self._has_multivariate_calculations and len(field) > 1:
                for midx in range(1, len(field)):
                    # Use the data variable tag if it is available. Otherwise, attempt to merge the fields raising
                    # warning if the variable exists in the squashed field.
                    if len(field[midx].data_variables) > 0:
                        vitr = field[midx].data_variables
                        is_data = True
                    else:
                        vitr = list(field[midx].values())
                        is_data = False
                    for mvar in vitr:
                        mvar = mvar.extract()
                        field[0].add_variable(mvar, is_data=is_data)
                    new_field_name = '_'.join([str(f.name) for f in field])
                    field[0].set_name(new_field_name)

            # The first field in the list is always the target for other operations.
            field = field[0]
            assert isinstance(field, Field)

            # Break out of operations if the rank is empty.
            vm.create_subcomm_by_emptyable(SubcommName.FIELD_GET, field, is_current=True, clobber=True)
            if not vm.is_null:
                if not has_field_optimizations:
                    if field.is_empty:
                        raise ValueError('No empty fields allowed.')

                    # Time, level, etc. subsets.
                    field = self._get_nonspatial_subset_(field)

                    # Spatially reorder the data.
                    ocgis_lh(msg='before spatial reorder', logger=self._subset_log, level=logging.DEBUG)
                    if self.ops.spatial_reorder:
                        self._update_spatial_order_(field)

                    # Extrapolate the spatial bounds if requested.
                    # TODO: Rename "interpolate" to "extrapolate".
                    if self.ops.interpolate_spatial_bounds:
                        self._update_bounds_extrapolation_(field)

        # This error is related to subsetting by time or level. Spatial subsetting occurs below.
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(msg='time or level subset empty but empty returns allowed', logger=self._subset_log,
                         level=logging.WARN)
                coll = self._get_initialized_collection_()
                name = '_'.join([rd.field_name for rd in rds])
                field = Field(name=name, is_empty=True)
                coll.add_field(field, None)
                try:
                    yield coll
                finally:
                    return
            else:
                # Raise an exception as empty subsets are not allowed.
                ocgis_lh(exc=ExtentError(message=str(e)), alias=str([rd.field_name for rd in rds]),
                         logger=self._subset_log)

        # Set iterator based on presence of slice. Slice always overrides geometry.
        if self.ops.slice is not None:
            itr = [None]
        else:
            itr = [None] if self.ops.geom is None else self.ops.geom

        for coll in self._process_geometries_(itr, field, alias):
            # Conform units following the spatial subset.
            if not vm.is_null and self.ops.conform_units_to is not None:
                for to_conform in coll.iter_fields():
                    for dv in to_conform.data_variables:
                        dv.cfunits_conform(self.ops.conform_units_to)
            ocgis_lh(msg='_process_subsettables_ yielding', logger=self._subset_log, level=logging.DEBUG)
            yield coll

    def _process_geometries_(self, itr, field, alias):
        """
        :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting.
        :type itr: [None] or [:class:`~ocgis.Field`, ...]
        :param :class:`ocgis.Field` field: The target field for operations.
        :param str alias: The request data alias currently being processed.
        :rtype: :class:`~ocgis.SpatialCollection`
        """

        assert isinstance(field, Field)

        ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG)
        # Process each geometry.
        for subset_field in itr:

            # Initialize the collection storage.
            coll = self._get_initialized_collection_()
            if vm.is_null:
                sfield = field
            else:
                # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the
                # subset target with wrapping, coordinate system conversion, etc.
                subset_field = deepcopy(subset_field)

                if self.ops.regrid_destination is not None:
                    # If there is regridding, make another copy as this geometry may be manipulated during subsetting of
                    # sources.
                    subset_field_for_regridding = deepcopy(subset_field)

                # Operate on the rotated pole coordinate system by first transforming it to the default coordinate
                # system.
                key = constants.BackTransform.ROTATED_POLE
                self._backtransform[key] = self._get_update_rotated_pole_state_(field, subset_field)

                # Check if the geometric abstraction is available on the field object.
                self._assert_abstraction_available_(field)

                # Return a slice or snippet if either of these are requested.
                field = self._get_slice_or_snippet_(field)

                # Choose the subset UGID value.
                if subset_field is None:
                    msg = 'No selection geometry. Returning all data. No unique geometry identifier.'
                    subset_ugid = None
                else:
                    subset_ugid = subset_field.geom.ugid.get_value()[0]
                    msg = 'Subsetting with selection geometry having UGID={0}'.format(subset_ugid)
                ocgis_lh(msg=msg, logger=self._subset_log)

                if subset_field is not None:
                    # If the coordinate systems differ, update the spatial subset's CRS to match the field.
                    if subset_field.crs is not None and subset_field.crs != field.crs:
                        subset_field.update_crs(field.crs)
                    # If the geometry is a point, it needs to be buffered if there is a search radius multiplier.
                    subset_field = self._get_buffered_subset_geometry_if_point_(field, subset_field)

                # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data
                # is being returned.
                if subset_field is None:
                    sfield = field
                else:
                    sfield = self._get_spatially_subsetted_field_(alias, field, subset_field, subset_ugid)

                ocgis_lh(msg='after self._get_spatially_subsetted_field_', logger=self._subset_log, level=logging.DEBUG)

                # Create the subcommunicator following the data subset to ensure non-empty communication.
                vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET, sfield, is_current=True, clobber=True)

                if not vm.is_null:
                    if not sfield.is_empty and not self.ops.allow_empty:
                        raise_if_empty(sfield)

                        # If the base size is being requested, bypass the rest of the operations.
                        if not self._request_base_size_only:
                            # Perform regridding operations if requested.
                            if self.ops.regrid_destination is not None and sfield.regrid_source:
                                sfield = self._get_regridded_field_with_subset_(sfield,
                                                                                subset_field_for_regridding=subset_field_for_regridding)
                            else:
                                ocgis_lh(msg='no regridding operations', logger=self._subset_log, level=logging.DEBUG)
                            # If empty returns are allowed, there may be an empty field.
                            if sfield is not None:
                                # Only update spatial stuff if there are no calculations and, if there are calculations,
                                # those calculations are not expecting raw values.
                                if self.ops.calc is None or (self.ops.calc is not None and not self.ops.calc_raw):
                                    # Update spatial aggregation, wrapping, and coordinate systems.
                                    sfield = _update_aggregation_wrapping_crs_(self, alias, sfield, subset_field,
                                                                               subset_ugid)
                                    ocgis_lh('after _update_aggregation_wrapping_crs_ in _process_geometries_',
                                             self._subset_log,
                                             level=logging.DEBUG)

            # Add the created field to the output collection with the selection geometry.
            if sfield is None:
                assert self.ops.aggregate
            if sfield is not None:
                coll.add_field(sfield, subset_field)

            yield coll

    def _get_nonspatial_subset_(self, field):
        """
        
        :param field:
        :type field: :class:`~ocgis.Field`
        :return: 
        :raises: EmptySubsetError
        """

        # Apply any time or level subsetting provided through operations.
        if self.ops.time_range is not None:
            field = field.time.get_between(*self.ops.time_range).parent
        if self.ops.time_region is not None:
            field = field.time.get_time_region(self.ops.time_region).parent
        if self.ops.time_subset_func is not None:
            field = field.time.get_subset_by_function(self.ops.time_subset_func).parent
        if self.ops.level_range is not None:
            field = field.level.get_between(*self.ops.level_range).parent

        return field

    @staticmethod
    def _get_initialized_collection_():
        coll = SpatialCollection()
        return coll

    def _get_update_rotated_pole_state_(self, field, subset_field):
        """
        Rotated pole coordinate systems are handled internally by transforming the CRS to a geographic coordinate
        system.

        :param field:
        :type field: :class:`ocgis.Field`
        :param subset_field:
        :type subset_field: :class:`ocgis.Field` or None
        :rtype: None or :class:`ocgis.variable.crs.CFRotatedPole`
        :raises: AssertionError
        """

        # CFRotatedPole takes special treatment. only do this if a subset geometry is available. this variable is
        # needed to determine if backtransforms are necessary.
        original_rotated_pole_crs = None
        if isinstance(field.crs, CFRotatedPole):
            # Only transform if there is a subset geometry.
            if subset_field is not None or self.ops.aggregate or self.ops.spatial_operation == 'clip':
                # Update the CRS. Copy the original CRS for possible later transformation back to rotated pole.
                original_rotated_pole_crs = deepcopy(field.crs)
                ocgis_lh('initial rotated pole transformation...', self._subset_log, level=logging.DEBUG)
                field.update_crs(env.DEFAULT_COORDSYS)
                ocgis_lh('...finished initial rotated pole transformation', self._subset_log, level=logging.DEBUG)
        return original_rotated_pole_crs

    def _assert_abstraction_available_(self, field):
        """
        Assert the spatial abstraction may be loaded on the field object if one is provided in the operations.

        :param field: The field to check for a spatial abstraction.
        :type field: :class:`ocgis.Field`
        """

        if self.ops.abstraction != 'auto':
            is_available = field.grid.is_abstraction_available(self.ops.abstraction)
            if not is_available:
                msg = 'A "{0}" spatial abstraction is not available.'.format(self.ops.abstraction)
                ocgis_lh(exc=ValueError(msg), logger='subset')

    def _get_slice_or_snippet_(self, field):
        """
        Slice the incoming field if a slice or snippet argument is present.

        :param field: The field to slice.
        :type field: :class:`ocgis.Field`
        :rtype: :class:`ocgis.Field`
        """

        # If there is a snippet, return the first realization, time, and level.
        if self.ops.snippet:
            the_slice = {'time': 0, 'realization': 0, 'level': 0}
        # If there is a slice, use it to subset the field. Only field slices are supported.
        elif self.ops.slice is not None:
            the_slice = self.ops.slice
        else:
            the_slice = None
        if the_slice is not None:
            field = field.get_field_slice(the_slice, strict=False, distributed=True)
        return field

    def _get_spatially_subsetted_field_(self, alias, field, subset_field, subset_ugid):
        """
        Spatially subset a field with a selection field.

        :param str alias: The request data alias currently being processed.
        :param field: Target field to subset.
        :type field: :class:`ocgis.Field`
        :param subset_field: The field to use for subsetting.
        :type subset_field: :class:`ocgis.Field`
        :rtype: :class:`ocgis.Field`
        :raises: AssertionError, ExtentError
        """

        assert subset_field is not None

        ocgis_lh('executing spatial subset operation', self._subset_log, level=logging.DEBUG, alias=alias,
                 ugid=subset_ugid)
        sso = SpatialSubsetOperation(field)
        try:
            # Execute the spatial subset and return the subsetted field.
            sfield = sso.get_spatial_subset(self.ops.spatial_operation, subset_field.geom,
                                            select_nearest=self.ops.select_nearest,
                                            optimized_bbox_subset=self.ops.optimized_bbox_subset)
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(alias=alias, ugid=subset_ugid, msg='Empty geometric operation but empty returns allowed.',
                         level=logging.WARN)
                sfield = Field(name=field.name, is_empty=True)
            else:
                msg = ' This typically means the selection geometry falls outside the spatial domain of the target ' \
                      'dataset.'
                msg = str(e) + msg
                ocgis_lh(exc=ExtentError(message=msg), alias=alias, logger=self._subset_log)

        # If the subset geometry is unwrapped and the vector wrap option is true, wrap the subset geometry.
        if self.ops.vector_wrap:
            if subset_field.wrapped_state == WrappedState.UNWRAPPED:
                subset_field.wrap()

        return sfield

    def _get_buffered_subset_geometry_if_point_(self, field, subset_field):
        """
        If the subset geometry is a point of multipoint, it will need to be buffered and the spatial dimension updated
        accordingly. If the subset geometry is a polygon, pass through.

        :param field:
        :type field: :class:`ocgis.Field`
        :param subset_field:
        :type subset_field: :class:`ocgis.Field`
        """

        if subset_field.geom.geom_type in ['Point', 'MultiPoint'] and self.ops.search_radius_mult is not None:
            ocgis_lh(logger=self._subset_log, msg='buffering point geometry', level=logging.DEBUG)
            subset_field = subset_field.geom.get_buffer(self.ops.search_radius_mult * field.grid.resolution).parent
            assert subset_field.geom.geom_type in ['Polygon', 'MultiPolygon']

        return subset_field

    def _get_regridded_field_with_subset_(self, sfield, subset_field_for_regridding=None):
        """
        Regrid ``sfield`` subsetting the regrid destination in the process.

        :param sfield: The input field to regrid.
        :type sfield: :class:`ocgis.Field`
        :param subset_field_for_regridding: The original, unaltered spatial dimension to use for subsetting.
        :type subset_field_for_regridding: :class:`ocgis.Field`
        :rtype: :class:`~ocgis.Field`
        """

        from ocgis.regrid.base import RegridOperation
        ocgis_lh(logger=self._subset_log, msg='Starting regrid operation...', level=logging.INFO)
        ro = RegridOperation(sfield, self.ops.regrid_destination, subset_field=subset_field_for_regridding,
                             regrid_options=self.ops.regrid_options)
        sfield = ro.execute()
        ocgis_lh(logger=self._subset_log, msg='Regrid operation complete.', level=logging.INFO)
        return sfield

    def _update_bounds_extrapolation_(self, field):
        try:
            name_x_variable = '{}_{}'.format(field.grid.x.name, constants.OCGIS_BOUNDS)
            name_y_variable = '{}_{}'.format(field.grid.y.name, constants.OCGIS_BOUNDS)
            field.grid.set_extrapolated_bounds(name_x_variable, name_y_variable, constants.OCGIS_BOUNDS)
        except BoundsAlreadyAvailableError:
            msg = 'Bounds/corners already on object. Ignoring "interpolate_spatial_bounds".'
            ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARNING)

    def _update_spatial_order_(self, field):
        _update_wrapping_(self, field)
        if field.grid is not None:
            wrapped_state = field.grid.wrapped_state
            if wrapped_state == WrappedState.WRAPPED:
                field.grid.reorder()
            else:
                msg = 'Reorder not relevant for wrapped state "{}". Doing nothing.'.format(
                    str(wrapped_state))
                ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARN)