Example #1
0
    def validate(cls, ops):
        if ops.calc_sample_size:
            from ocgis.api.parms.definition import CalcSampleSize

            exc = DefinitionValidationError(CalcSampleSize,
                                            'Multivariate functions do not calculate sample size at this time.')
            ocgis_lh(exc=exc, logger='calc.base')

        # ensure the required variables are present
        should_raise = False
        for c in ops.calc:
            if c['func'] == cls.key:
                kwds = c['kwds']

                # check the required variables are keyword arguments
                if not len(set(kwds.keys()).intersection(set(cls.required_variables))) >= 2:
                    should_raise = True
                    break

                # ensure the mapped aliases exist
                for xx in cls.required_variables:
                    to_check = kwds[xx]
                    if to_check not in ops.dataset:
                        should_raise = True

                break
        if should_raise:
            from ocgis.api.parms.definition import Calc

            exc = DefinitionValidationError(Calc,
                                            'Variable aliases are missing for multivariate function "{0}". Required variable aliases are: {1}.'.format(
                                                cls.__name__, cls.required_variables))
            ocgis_lh(exc=exc, logger='calc.base')
Example #2
0
 def __init__(self, *args, **kwargs):
     if kwargs.get('calc_sample_size') is True:
         exc = SampleSizeNotImplemented(self.__class__,
                                        'Multivariate functions do not calculate sample size at this time.')
         ocgis_lh(exc=exc, logger='calc.base')
     else:
         AbstractFunction.__init__(self, *args, **kwargs)
Example #3
0
    def _get_update_rotated_pole_state_(self, field, subset_sdim):
        """
        Rotated pole coordinate systems are handled internally by transforming the CRS to a geographic coordinate
        system.

        :param field:
        :type field: :class:`ocgis.interface.base.field.Field`
        :param subset_sdim:
        :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` or None
        :rtype: None or :class:`ocgis.interface.base.crs.CFRotatedPole`
        :raises: AssertionError
        """

        # CFRotatedPole takes special treatment. only do this if a subset geometry is available. this variable is
        # needed to determine if backtransforms are necessary.
        original_rotated_pole_crs = None
        if isinstance(field.spatial.crs, CFRotatedPole):
            # only transform if there is a subset geometry
            if subset_sdim is not None or self.ops.aggregate or self.ops.spatial_operation == 'clip':
                # update the CRS. copy the original CRS for possible later transformation back to rotated pole.
                original_rotated_pole_crs = copy(field.spatial.crs)
                ocgis_lh('initial rotated pole transformation...', self._subset_log, level=logging.DEBUG)
                field.spatial.update_crs(CFWGS84())
                ocgis_lh('...finished initial rotated pole transformation', self._subset_log, level=logging.DEBUG)
        return original_rotated_pole_crs
Example #4
0
 def parse(self,value):
     if type(value) in [Polygon,MultiPolygon,Point]:
         ret = [{'geom':value,'properties':{'ugid':1},'crs':CFWGS84()}]
     elif type(value) in [list,tuple]:
         if all([isinstance(element,dict) for element in value]):
             for ii,element in enumerate(value,start=1):
                 if 'geom' not in element:
                     ocgis_lh(exc=DefinitionValidationError(self,'Geometry dictionaries must have a "geom" key.'))
                 if 'properties' not in element:
                     element['properties'] = {'UGID':ii}
                 if 'crs' not in element:
                     element['crs'] = CFWGS84()
                     ocgis_lh(msg='No CRS in geometry dictionary - assuming WGS84.',level=logging.WARN,check_duplicate=True)
             ret = value
         else:
             if len(value) == 2:
                 geom = Point(value[0],value[1])
             elif len(value) == 4:
                 minx,miny,maxx,maxy = value
                 geom = Polygon(((minx,miny),
                                 (minx,maxy),
                                 (maxx,maxy),
                                 (maxx,miny)))
             if not geom.is_valid:
                 raise(DefinitionValidationError(self,'Parsed geometry is not valid.'))
             ret = [{'geom':geom,'properties':{'ugid':1},'crs':CFWGS84()}]
             self._bounds = geom.bounds
     elif isinstance(value,ShpCabinetIterator):
         self._shp_key = value.key
         ret = value
     else:
         ret = value
     return(ret)
Example #5
0
    def _format_parms_(self, values):
        """
        :param values: A dictionary containing the parameter values to check.
        :type values: dict[str, type]
        """

        ret = {}
        for k, v in values.iteritems():
            try:
                if isinstance(v, self.parms_definition[k]):
                    formatted = v
                else:
                    formatted = self.parms_definition[k](v)
            # likely a nonetype
            except TypeError as e:
                if self.parms_definition[k] is None:
                    formatted = v
                else:
                    ocgis_lh(exc=e, logger="calc.base")
            # likely a required variable for a multivariate calculation
            except KeyError as e:
                if k in self.required_variables:
                    formatted = values[k]
                else:
                    ocgis_lh(exc=e, logger="calc.base")
            ret.update({k: formatted})
        return ret
Example #6
0
    def _get_regrid_destination_(self):
        """
        Prepare destination field for regridding.

        :rtype: :class:`~ocgis.SpatialDimension`
        """

        # Spatially subset the regrid destination. #####################################################################
        if self.subset_sdim is None:
            ocgis_lh(logger='regrid', msg='no spatial subsetting', level=logging.DEBUG)
            regrid_destination = self.field_dst
        else:
            if self.with_buffer:
                # Buffer the subset geometry by the resolution of the source field to improve chances of overlap between
                # source and destination extents.
                buffer_value = self.field_src.spatial.grid.resolution
                buffer_crs = self.field_src.spatial.crs
            else:
                buffer_value, buffer_crs = [None, None]
            ss = SpatialSubsetOperation(self.field_dst)
            regrid_destination = ss.get_spatial_subset('intersects', self.subset_sdim,
                                                       use_spatial_index=env.USE_SPATIAL_INDEX,
                                                       select_nearest=False, buffer_value=buffer_value,
                                                       buffer_crs=buffer_crs)

        # Transform the coordinate system of the regrid destination. ###################################################

        # Update the coordinate system of the regrid destination if required.
        try:
            destination_sdim = regrid_destination.spatial
        except AttributeError:
            # Likely a spatial dimension object already.
            destination_sdim = regrid_destination
        # If switched to true, the regrid destination coordinate system must be updated to match the source.
        update_regrid_destination_crs = False
        if not isinstance(regrid_destination.crs, Spherical):
            if isinstance(regrid_destination, Field):
                if isinstance(destination_sdim.crs, WGS84) and regrid_destination._has_assigned_coordinate_system:
                    update_regrid_destination_crs = True
                elif isinstance(destination_sdim.crs,
                                WGS84) and not regrid_destination._has_assigned_coordinate_system:
                    pass
                else:
                    update_regrid_destination_crs = True
            else:
                if not isinstance(destination_sdim.crs, Spherical):
                    update_regrid_destination_crs = True
        if update_regrid_destination_crs:
            ocgis_lh(logger='regrid',
                     msg='updating regrid destination to spherical. regrid destination crs is: {}'.format(
                         regrid_destination.crs), level=logging.DEBUG)
            destination_sdim.update_crs(Spherical())
        else:
            destination_sdim.crs = Spherical()

        # Remove the mask from the destination field. ##################################################################
        new_mask = np.zeros(destination_sdim.shape, dtype=bool)
        destination_sdim.set_mask(new_mask)

        return destination_sdim
Example #7
0
    def add_field(self, ugid, geom, field, properties=None, name=None):
        """
        :param int ugid:
        :param :class:`shapely.Geometry`:
        :param :class:`ocgis.Field`:
        :param dict properties:
        :param str name:
        """
        name = name or field.name

        ## add field unique identifier if it does not exist
        try:
            if field.uid is None:
                field.uid = self._storage_id_next
                self._storage_id.append(field.uid)
        ## likely a nonetype from an empty subset
        except AttributeError as e:
            if field is None:
                pass
            else:
                ocgis_lh(exc=e, logger='collection')
            
        self.geoms.update({ugid:geom})
        self.properties.update({ugid:properties})
        if ugid not in self:
            self.update({ugid:{}})
        assert(name not in self[ugid])
        self[ugid].update({name:field})
Example #8
0
    def _execute_(self):
        for variable in self.field.variables.itervalues():

            self.validate_units(variable)

            if self.file_only:
                fill = self._empty_fill
            else:
                fill = self.calculate(variable.value, **self.parms)

            dtype = self.dtype or variable.dtype
            if not self.file_only:
                if dtype != fill.dtype:
                    fill = fill.astype(dtype)
                assert fill.shape == self.field.shape

            if not self.file_only:
                if self.tgd is not None:
                    fill = self._get_temporal_agg_fill_(fill, f=self.aggregate_temporal, parms={})
                else:
                    if self.calc_sample_size:
                        msg = "Sample sizes not relevant for scalar transforms."
                        ocgis_lh(msg=msg, logger="calc.base", level=logging.WARN)
                    fill = self._get_or_pass_spatial_agg_fill_(fill)

            units = self.get_output_units(variable)

            self._add_to_collection_(
                value=fill, parent_variables=[variable], dtype=self.dtype, fill_value=self.fill_value, units=units
            )
Example #9
0
File: crs.py Project: UV-CDAT/ocgis
 def _get_projection_coordinate_(target,meta):
     key = 'projection_{0}_coordinate'.format(target)
     for k,v in meta['variables'].iteritems():
         if 'standard_name' in v['attrs']:
             if v['attrs']['standard_name'] == key:
                 return(k)
     ocgis_lh(logger='crs',exc=ProjectionCoordinateNotFound(key))
Example #10
0
    def execute(self):
        """
        Execute regridding operation.

        :rtype: :class:`~ocgis.Field`
        """

        destination_sdim = self._get_regrid_destination_()
        self._update_regrid_source_coordinate_system_()

        # Regrid the input field.
        ocgis_lh(logger='regrid', msg='Creating regridded fields...', level=logging.INFO)
        regridded_source = list(iter_regridded_fields([self.field_src], destination_sdim, **self.regrid_options))[0]

        # Return the source field to its original coordinate system.
        if self._regrid_required_source_crs_update:
            ocgis_lh(logger='regrid', msg='Reverting source field to original coordinate system...', level=logging.INFO)
            regridded_source.spatial.update_crs(self._original_sfield_crs)
        else:
            regridded_source.spatial.crs = self._original_sfield_crs

        # Subset the output from the regrid operation as masked values may be introduced on the edges.
        if self.subset_sdim is not None:
            ss = SpatialSubsetOperation(regridded_source)
            regridded_source = ss.get_spatial_subset('intersects', self.subset_sdim,
                                                     use_spatial_index=env.USE_SPATIAL_INDEX,
                                                     select_nearest=False)

        return regridded_source
 def _get_calendar_day_window_(cday_index,target_cday_index,width):
     width = int(width)
     try:
         assert(width >= 3)
         assert(width%2 != 0)
     except AssertionError:
         ocgis_lh(exc=ValueError('Kernel widths must be >= 3 and be oddly numbered.'),logger='calc.library')
     
     stride_dim = (width-1)/2
     axis_length = cday_index.shape[0]
     
     lower_idx = target_cday_index - stride_dim
     upper_idx = target_cday_index + stride_dim + 1
     
     if lower_idx < 0:
         a = cday_index[lower_idx:]
         b = cday_index[0:target_cday_index]
         lower = np.append(a,b)
     else:
         lower = cday_index[lower_idx:target_cday_index]
         
     if upper_idx > axis_length:
         a = cday_index[0:upper_idx-axis_length]
         b = cday_index[target_cday_index+1:upper_idx]
         upper = np.append(a,b)
     else:
         upper = cday_index[target_cday_index+1:upper_idx]
         
     ret = np.append(cday_index[target_cday_index],np.append(lower,upper))
     
     return(ret)
Example #12
0
File: field.py Project: NCPP/ocgis
    def set_abstraction_geom(self, force=True, create_ugid=False, ugid_name=HeaderName.ID_GEOMETRY, ugid_start=1,
                             set_ugid_as_data=False):
        """
        Set the abstraction geometry for the field using the field's geometry variable or the field's grid abstraction
        geometry.
        
        :param bool force: If ``True`` (the default), clobber any existing geometry variables.
        :param bool create_ugid: If ``True``, create a unique identifier integer :class:`~ocgis.Variable` for the 
         abstraction geometry. Only creates the variable if the geometry does not already have a ``ugid``.
        :param str ugid_name: Name for the ``ugid`` variable.
        :param int ugid_start: Starting value to use for the unique identifier.
        :param bool set_ugid_as_data: If ``True``, set the ``ugid`` variable as data on the field. Useful for writing
         shapefiles which require at least one data variable.
        :raises: ValueError
        """

        if self.geom is None:
            if self.grid is None:
                raise ValueError('No grid available to set abstraction geometry.')
            else:
                self.set_geom_from_grid(force=force)
        if self.geom.ugid is None and create_ugid:
            ocgis_lh(msg='before self.geom.create_ugid_global in {}'.format(self.__class__), level=logging.DEBUG)
            self.geom.create_ugid_global(ugid_name, start=ugid_start)
            ocgis_lh(msg='after self.geom.create_ugid_global in {}'.format(self.__class__), level=logging.DEBUG)
        if set_ugid_as_data:
            self.add_variable(self.geom.ugid, force=True, is_data=True)
Example #13
0
File: base.py Project: NCPP/ocgis
    def _get_regrid_destination_(self):
        """
        Prepare destination field for regridding.

        :rtype: (:class:`~ocgis.Field`, :class:`~ocgis.CoordinateReferenceSystem` or ``None``)
        """

        # Transform the coordinate system of the regrid destination. ###################################################

        # Update the regrid destination coordinate system must be updated to match the source.
        if self.field_dst.crs != Spherical():
            ocgis_lh(logger='regrid',
                     msg='updating regrid destination to spherical. regrid destination crs is: {}'.format(
                         self.field_dst.crs), level=logging.DEBUG)
            backtransform_crs = deepcopy(self.field_dst.crs)
            self.field_dst.update_crs(Spherical())
        else:
            backtransform_crs = None

        # Spatially subset the regrid destination. #####################################################################
        if self.subset_field is None:
            ocgis_lh(logger='regrid', msg='no spatial subsetting', level=logging.DEBUG)
            regrid_destination = self.field_dst
        else:
            ss = SpatialSubsetOperation(self.field_dst)
            regrid_destination = ss.get_spatial_subset('intersects', self.subset_field.geom,
                                                       use_spatial_index=env.USE_SPATIAL_INDEX,
                                                       select_nearest=False)

        return regrid_destination, backtransform_crs
Example #14
0
    def execute(self,coll,file_only=False):
        
        ## switch field type based on the types of calculations present
        if self._check_calculation_members_(self.funcs,AbstractMultivariateFunction):
            klass = DerivedMultivariateField
        else:
            klass = DerivedField
                        
        ## group the variables. if grouping is None, calculations are performed
        ## on each element. array computations are taken advantage of.
        if self.grouping is not None:
            ocgis_lh('setting temporal grouping(s)','calc.engine')
            for v in coll.itervalues():
                for k2,v2 in v.iteritems():
                    if k2 not in self.tgds:
                        self.tgds[k2] = v2.temporal.get_grouping(self.grouping)

        ## iterate over functions
        for ugid,dct in coll.iteritems():
            for alias_field,field in dct.iteritems():
                new_temporal = self.tgds.get(alias_field)
                out_vc = VariableCollection()
                for f in self.funcs:
                    ocgis_lh('calculating: {0}'.format(f),logger='calc.engine')
                    function = f['ref'](alias=f['name'],dtype=None,field=field,file_only=file_only,vc=out_vc,
                         parms=f['kwds'],tgd=new_temporal,use_raw_values=self.use_raw_values,
                         calc_sample_size=self.calc_sample_size)
                    out_vc = function.execute()
                new_temporal = new_temporal or field.temporal
                new_field = klass(variables=out_vc,temporal=new_temporal,spatial=field.spatial,
                                  level=field.level,realization=field.realization,meta=field.meta,
                                  uid=field.uid)
                coll[ugid][alias_field] = new_field
        return(coll)
Example #15
0
File: nc.py Project: NCPP/ocgis
    def _get_field_write_target_(cls, field):
        """Collective!"""
        ocgis_lh(level=10, logger="driver.nc", msg="entering _get_field_write_target_")

        if field.crs is not None:
            field.crs.format_spatial_object(field)

        grid = field.grid
        if grid is not None:
            # If any grid pieces are masked, ensure the mask is created across all grids.
            has_mask = vm.gather(grid.has_mask)
            if vm.rank == 0:
                if any(has_mask):
                    create_mask = True
                else:
                    create_mask = False
            else:
                create_mask = None
            create_mask = vm.bcast(create_mask)
            if create_mask and not grid.has_mask:
                grid.get_mask(create=True)

            # Putting units on bounds for netCDF-CF can confuse some parsers.
            if grid.has_bounds:
                field = field.copy()
                field.x.bounds.attrs.pop('units', None)
                field.y.bounds.attrs.pop('units', None)

        # Remove the current coordinate system if this is a dummy coordinate system.
        if env.COORDSYS_ACTUAL is not None:
            field = field.copy()
            field.set_crs(env.COORDSYS_ACTUAL, should_add=True)

        return field
Example #16
0
File: nc.py Project: NCPP/ocgis
    def _create_dimension_map_entries_dict_(axes, group_metadata, strict, attr_name='axis'):
        variables = group_metadata['variables']
        check_bounds = list(axes.keys())
        if 'realization' in check_bounds:
            check_bounds.pop(check_bounds.index('realization'))

        # Get the main entry for each axis.
        for k, v in list(axes.items()):
            axes[k] = create_dimension_map_entry(v, variables, strict=strict, attr_name=attr_name)

        # Attempt to find bounds for each entry (ignoring realizations).
        for k in check_bounds:
            if axes[k] is not None:
                keys = ['bounds']
                if k == 'time':
                    keys += ['climatology']
                bounds_var = get_by_key_list(variables[axes[k]['variable']]['attrs'], keys)
                if bounds_var is not None:
                    if bounds_var not in variables:
                        msg = 'Bounds listed for variable "{0}" but the destination bounds variable "{1}" does not exist.'. \
                            format(axes[k]['variable'], bounds_var)
                        ocgis_lh(msg, logger='nc.driver', level=logging.WARNING)
                        bounds_var = None
                axes[k]['bounds'] = bounds_var
        entries = {k: v for k, v in list(axes.items()) if v is not None}
        return entries
Example #17
0
 def get_between(self,lower,upper,return_indices=False,closed=False):
     assert(lower <= upper)
     
     if self.bounds is None:
         if closed:
             select = np.logical_and(self.value > lower,self.value < upper)
         else:
             select = np.logical_and(self.value >= lower,self.value <= upper)
     else:
         bounds_min = np.min(self.bounds,axis=1)
         bounds_max = np.max(self.bounds,axis=1)
         if closed:
             select_lower = np.logical_or(bounds_min > lower,bounds_max > lower)
             select_upper = np.logical_or(bounds_min < upper,bounds_max < upper)
         else:
             select_lower = np.logical_or(bounds_min >= lower,bounds_max >= lower)
             select_upper = np.logical_or(bounds_min <= upper,bounds_max <= upper)
         select = np.logical_and(select_lower,select_upper)
     
     if select.any() == False:
         ocgis_lh(exc=EmptySubsetError(origin=self.name))
         
     ret = self[select]
     
     if return_indices:
         indices = np.arange(select.shape[0])
         ret = (ret,indices[select])
     
     return(ret)
Example #18
0
    def _get_bounds_from_source_(self):
        # Allow NoneType bounds when there is no request dataset.
        ret = None
        if self._request_dataset is not None:
            assert self.axis is not None

            # Open the connection to the real dataset connection object.
            ds = self._request_dataset.driver.open()
            try:
                # Check for bounds.
                bounds_name = self._request_dataset.source_metadata['dim_map'][self.axis].get('bounds')
                if bounds_name is not None:
                    try:
                        ret = ds.variables[bounds_name][self._src_idx, :]
                    except ValueError:
                        shape = ds.variables[bounds_name]
                        if len(shape) != 2 or shape[1] != 2:
                            msg = (
                                'The bounds variable "{0}" has an improper shape "{1}". Bounds variables should have '
                                'dimensions (m,2).'.format(bounds_name, shape))
                            ocgis_lh(msg=msg, logger='interface.nc', level=logging.WARN)
                        else:
                            raise
            finally:
                self._request_dataset.driver.close(ds)
        return ret
Example #19
0
 def __init__(self,*args,**kwds):
     self.grid = kwds.pop('grid',None)
     self.crs = kwds.pop('crs',None)
     self.abstraction = kwds.pop('abstraction','polygon')
     self._geom = kwds.pop('geom',None)
     
     ## if a grid value is passed, then when it is reset
     if self._grid is not None:
         self._geom_to_grid = True
     else:
         self._geom_to_grid = False
     
     ## attempt to build the geometry dimension
     point = kwds.pop('point',None)
     polygon = kwds.pop('polygon',None)
     geom_kwds = dict(point=point,polygon=polygon)
     if any([g != None for g in geom_kwds.values()]):
         self._geom = SpatialGeometryDimension(**geom_kwds)
     
     if self.grid is None and self._geom is None:
         try:
             self.grid = SpatialGridDimension(row=kwds.pop('row'),
                                              col=kwds.pop('col'))
         except KeyError:
             ocgis_lh(exc=ValueError('A SpatialDimension without "grid" or "geom" arguments requires a "row" and "column".'))
     
     super(SpatialDimension,self).__init__(*args,**kwds)
Example #20
0
 def get_intersects(self,polygon,return_indices=False):
     ret = copy(self)
     if type(polygon) in (Point,MultiPoint):
         exc = ValueError('Only Polygons and MultiPolygons are acceptable geometry types for intersects operations.')
         ocgis_lh(exc=exc,logger='dimension.spatial')
     elif type(polygon) in (Polygon,MultiPolygon):
         ## for a polygon subset, first the grid is subsetted by the bounds
         ## of the polygon object. the intersects operations is then performed
         ## on the polygon/point representation as appropriate.
         minx,miny,maxx,maxy = polygon.bounds
         if self.grid is None:
             raise(NotImplementedError)
         else:
             ## reset the geometries
             ret._geom = None
             ## subset the grid by its bounding box
             ret.grid,slc = self.grid.get_subset_bbox(minx,miny,maxx,maxy,return_indices=True)
             ## update the unique identifier to copy the grid uid
             ret.uid = ret.grid.uid
             ## attempt to mask the polygons
             try:
                 ret._geom._polygon = ret.geom.polygon.get_intersects_masked(polygon)
                 grid_mask = ret.geom.polygon.value.mask
             except ImproperPolygonBoundsError:
                 ret._geom._point = ret.geom.point.get_intersects_masked(polygon)
                 grid_mask = ret.geom.point.value.mask
             ## transfer the geometry mask to the grid mask
             ret.grid.value.mask[:,:,:] = grid_mask.copy()
     else:
         raise(NotImplementedError)
     if return_indices:
         ret = (ret,slc)
     
     return(ret)
Example #21
0
    def test_system_with_callback(self):
        fp = get_temp_path(wd=self.current_dir_output)

        def callback(message, path=fp):
            with open(path, 'a') as sink:
                sink.write(message)
                sink.write('\n')

        class FooError(Exception):
            pass

        ocgis_lh.configure(callback=callback)
        ocgis_lh(msg='this is a test message')
        ocgis_lh()
        ocgis_lh(msg='this is a second test message')
        ocgis_lh(msg='this should not be there', level=logging.DEBUG)
        exc = FooError('foo message for value error')
        try:
            ocgis_lh(exc=exc)
        except FooError:
            pass
        with open(fp, 'r') as source:
            lines = source.readlines()
        self.assertEqual(lines, ['this is a test message\n', 'this is a second test message\n',
                                 'FooError: foo message for value error\n'])
Example #22
0
    def __init__(self,ops,serial=True,nprocs=1):
        self.ops = ops
        self.serial = serial
        self.nprocs = nprocs
        
        self._subset_log = ocgis_lh.get_logger('subset')

        ## create the calculation engine
        if self.ops.calc is None:
            self.cengine = None
        else:
            ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG)
            self.cengine = OcgCalculationEngine(self.ops.calc_grouping,
                                           self.ops.calc,
                                           raw=self.ops.calc_raw,
                                           agg=self.ops.aggregate,
                                           calc_sample_size=self.ops.calc_sample_size)
            
        ## in the case of netcdf output, geometries must be unioned. this is
        ## also true for the case of the selection geometry being requested as
        ## aggregated.
        if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \
         and self.ops.geom is not None:
            ocgis_lh('aggregating selection geometry',self._subset_log)
            build = True
            for element_geom in self.ops.geom:
                if build:
                    new_geom = element_geom['geom']
                    new_crs = element_geom['crs']
                    new_properties = {'UGID':1}
                    build = False
                else:
                    new_geom = new_geom.union(element_geom['geom'])
            itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}]
            self.ops.geom = itr
Example #23
0
File: crs.py Project: UV-CDAT/ocgis
 def __init__(self,crs=None,prjs=None,epsg=None):
     if crs is None:
         if prjs is not None:
             crs = from_string(prjs)
         elif epsg is not None:
             sr = SpatialReference()
             sr.ImportFromEPSG(epsg)
             crs = from_string(sr.ExportToProj4())
         else:
             raise(NotImplementedError)
     else:
         ## remove unicode and change to python types
         for k,v in crs.iteritems():
             if type(v) == unicode:
                 crs[k] = str(v)
             else:
                 try:
                     crs[k] = v.tolist()
                 except AttributeError:
                     continue
         
     sr = SpatialReference()
     sr.ImportFromProj4(to_string(crs))
     self.value = from_string(sr.ExportToProj4())
 
     try:
         assert(self.value != {})
     except AssertionError:
         ocgis_lh(logger='crs',exc=ValueError('Empty CRS: The conversion to PROJ4 may have failed. The CRS value is: {0}'.format(crs)))
Example #24
0
File: base.py Project: NCPP/ocgis
    def __init__(self, *args, **kwargs):
        super(AbstractUnivariateFunction, self).__init__(*args, **kwargs)

        if self.calc_sample_size and self.tgd is None:
            msg = 'Sample sizes not relevant for scalar transforms with no temporal grouping. Setting to False.'
            ocgis_lh(msg=msg, logger='calc.base', level=logging.WARN)
            self.calc_sample_size = False
Example #25
0
    def _build_(self, coll):
        ret = CsvConverter._build_(self, coll)

        self._ugid_gid_store = {}

        if not self.ops.aggregate:
            fiona_path = os.path.join(self._get_or_create_shp_folder_(), self.prefix + '_gid.shp')
            archetype_field = coll._archetype_field

            try:
                fiona_crs = archetype_field.spatial.crs.value
            except AttributeError:
                if archetype_field.spatial.crs is None:
                    raise ValueError('"crs" is None. A coordinate systems is required for writing to Fiona output.')
                else:
                    raise

            fiona_schema = {'geometry': archetype_field.spatial.abstraction_geometry.geom_type,
                            'properties': OrderedDict([[constants.HEADERS.ID_DATASET.upper(), 'int'],
                                                       [self.geom_uid, 'int'],
                                                       [constants.HEADERS.ID_GEOMETRY.upper(), 'int']])}
            fiona_object = fiona.open(fiona_path, 'w', driver='ESRI Shapefile', crs=fiona_crs, schema=fiona_schema)
        else:
            ocgis_lh('creating a UGID-GID shapefile is not necessary for aggregated data. use UGID shapefile.',
                     'conv.csv-shp',
                     logging.WARN)
            fiona_object = None

        ret.update({'fiona_object': fiona_object})

        return ret
Example #26
0
File: engine.py Project: NCPP/ocgis
 def _update_bounds_extrapolation_(self, field):
     try:
         name_x_variable = '{}_{}'.format(field.grid.x.name, constants.OCGIS_BOUNDS)
         name_y_variable = '{}_{}'.format(field.grid.y.name, constants.OCGIS_BOUNDS)
         field.grid.set_extrapolated_bounds(name_x_variable, name_y_variable, constants.OCGIS_BOUNDS)
     except BoundsAlreadyAvailableError:
         msg = 'Bounds/corners already on object. Ignoring "interpolate_spatial_bounds".'
         ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARNING)
Example #27
0
 def _validate_bounds_(self):
     try:
         assert(self._bounds.dtype == self._value.dtype)
     except AssertionError:
         try:
             self._bounds = np.array(self._bounds,dtype=self._value.dtype)
         except:
             ocgis_lh(exc=ValueError('Value and bounds data types do not match and types could not be casted.'))
Example #28
0
 def _get_value_(self):
     if self._data is None and self._value is None:
         ocgis_lh(exc=ValueError('Values were requested from data source, but no data source is available.'))
     elif self._src_idx is None and self._value is None:
         ocgis_lh(exc=ValueError('Values were requested from data source, but no source index source is available.'))
     else:
         self._set_value_from_source_()
     return(self._value)
Example #29
0
File: nc.py Project: NCPP/ocgis
def create_dimension_map_entry(src, variables, strict=False, attr_name='axis'):
    """
    Create a dimension map entry dictionary by searching variable metadata using attribute constraints.

    :param src: The source information to use for constructing the entry. If ``src`` is a dictionary, it must have two
     entries. The key ``'value'`` corresponds to the string attribute value. The key ``'axis'`` is the representative
     axis to assign the source value (for example ``'X'`` or ``'Y'``).
    :type src: str | dict
    :param dict variables: The metadata entries for the group's variables.
    :param bool strict: If ``False``, do not use a strict interpretation of metadata. Allow some standard approaches for
     handling metadata exceptions.
    :param str attr_name: Name of the attribute to use for checking the attribute values form ``src``.
    :return: dict
    """
    if isinstance(src, dict):
        axis = src['axis']
        attr_value = src['value']
    else:
        axis = src
        attr_value = src

    axis_vars = []
    for variable in list(variables.values()):
        vattrs = variable.get('attrs', {})
        if vattrs.get(attr_name) == attr_value:
            if len(variable['dimensions']) == 0:
                pass
            else:
                axis_vars.append(variable['name'])

    # Try to find by default names.
    if not strict and len(axis_vars) == 0:
        possible_names = CFName.get_axis_mapping().get(axis, [])
        for pn in possible_names:
            if pn in list(variables.keys()):
                axis_vars.append(variables[pn]['name'])

    if len(axis_vars) == 1:
        var_name = axis_vars[0]
        dims = list(variables[var_name]['dimensions'])

        if not strict:
            # Use default index positions for X/Y dimensions.
            if axis in ('X', 'Y') and len(dims) > 1:
                if axis == 'Y':
                    dims = [dims[0]]
                elif axis == 'X':
                    dims = [dims[1]]

        ret = {'variable': var_name, DimensionMapKey.DIMENSION: dims}
    elif len(axis_vars) > 1:
        msg = 'Multiple axis (axis="{}") possibilities found using variable(s) "{}". Use a dimension map to specify ' \
              'the appropriate coordinate dimensions.'
        ocgis_lh(msg.format(axis, axis_vars), level=logging.WARN, logger='ocgis.driver.nc', force=True)
        ret = None
    else:
        ret = None
    return ret
Example #30
0
 def _run_():
     logpath = self.get_temporary_file_path('foo.log')
     ocgis_lh.configure(to_file=logpath)
     ocgis_lh(msg='oh my', level=logging.WARN)
     with open(logpath, 'r') as f:
         lines = f.readlines()
         lines = ''.join(lines)
     self.assertIn('OcgWarning', lines)
     self.assertIn('oh my', lines)
Example #31
0
 def format(self, value):
     if os.environ.get(self.env_name) is not None:
         msg = 'REFERENCE_PROJECTION may not be set as a system environment variable. It must be parameterized at runtime.'
         e = OcgisEnvironmentError(self, msg)
         ocgis_lh(exc=e, logger='env')
Example #32
0
    def create_merged_weight_file(self, merged_weight_filename, strict=False):
        """
        Merge weight file chunks to a single, global weight file.

        :param str merged_weight_filename: Path to the merged weight file.
        :param bool strict: If ``False``, allow "missing" files where the iterator index cannot create a found file.
         It is best to leave these ``False`` as not all source and destinations are mapped. If ``True``, raise an
        """

        if vm.size > 1:
            raise ValueError(
                "'create_merged_weight_file' does not work in parallel")

        index_filename = self.create_full_path_from_template('index_file')
        ifile = RequestDataset(uri=index_filename).get()
        ifile.load()
        ifc = GridChunkerConstants.IndexFile
        gidx = ifile[ifc.NAME_INDEX_VARIABLE].attrs

        src_global_shape = gidx[ifc.NAME_SRC_GRID_SHAPE]
        dst_global_shape = gidx[ifc.NAME_DST_GRID_SHAPE]

        # Get the global weight dimension size.
        n_s_size = 0
        weight_filename = ifile[gidx[ifc.NAME_WEIGHTS_VARIABLE]]
        wv = weight_filename.join_string_value()
        split_weight_file_directory = self.paths['wd']
        for wfn in map(
                lambda x: os.path.join(split_weight_file_directory,
                                       os.path.split(x)[1]), wv):
            ocgis_lh(msg="current merge weight file target: {}".format(wfn),
                     level=logging.DEBUG,
                     logger=_LOCAL_LOGGER)
            if not os.path.exists(wfn):
                if strict:
                    raise IOError(wfn)
                else:
                    continue
            curr_dimsize = RequestDataset(wfn).get().dimensions['n_s'].size
            # ESMF writes the weight file, but it may be empty if there are no generated weights.
            if curr_dimsize is not None:
                n_s_size += curr_dimsize

        # Create output weight file.
        wf_varnames = ['row', 'col', 'S']
        wf_dtypes = [np.int32, np.int32, np.float64]
        vc = VariableCollection()
        dim = Dimension('n_s', n_s_size)
        for w, wd in zip(wf_varnames, wf_dtypes):
            var = Variable(name=w, dimensions=dim, dtype=wd)
            vc.add_variable(var)
        vc.write(merged_weight_filename)

        # Transfer weights to the merged file.
        sidx = 0
        src_indices = self.src_grid._gc_create_global_indices_(
            src_global_shape)
        dst_indices = self.dst_grid._gc_create_global_indices_(
            dst_global_shape)

        out_wds = nc.Dataset(merged_weight_filename, 'a')
        for ii, wfn in enumerate(
                map(lambda x: os.path.join(split_weight_file_directory, x),
                    wv)):
            if not os.path.exists(wfn):
                if strict:
                    raise IOError(wfn)
                else:
                    continue
            wdata = RequestDataset(wfn).get()
            for wvn in wf_varnames:
                odata = wdata[wvn].get_value()
                try:
                    split_grids_directory = self.paths['wd']
                    odata = self._gc_remap_weight_variable_(
                        ii,
                        wvn,
                        odata,
                        src_indices,
                        dst_indices,
                        ifile,
                        gidx,
                        split_grids_directory=split_grids_directory)
                except IndexError as e:
                    msg = "Weight filename: '{}'; Weight Variable Name: '{}'. {}".format(
                        wfn, wvn, str(e))
                    raise IndexError(msg)
                out_wds[wvn][sidx:sidx + odata.size] = odata
                out_wds.sync()
            sidx += odata.size
        out_wds.close()
Example #33
0
    def _process_geometries_(self, itr, field, alias):
        """
        :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting.
        :type itr: [None] or [:class:`~ocgis.Field`, ...]
        :param :class:`ocgis.Field` field: The target field for operations.
        :param str alias: The request data alias currently being processed.
        :rtype: :class:`~ocgis.SpatialCollection`
        """

        assert isinstance(field, Field)

        ocgis_lh('processing geometries',
                 self._subset_log,
                 level=logging.DEBUG)
        # Process each geometry.
        for subset_field in itr:

            # Initialize the collection storage.
            coll = self._get_initialized_collection_()
            if vm.is_null:
                sfield = field
            else:
                # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the
                # subset target with wrapping, coordinate system conversion, etc.
                subset_field = deepcopy(subset_field)

                if self.ops.regrid_destination is not None:
                    # If there is regridding, make another copy as this geometry may be manipulated during subsetting of
                    # sources.
                    subset_field_for_regridding = deepcopy(subset_field)

                # Operate on the rotated pole coordinate system by first transforming it to the default coordinate
                # system.
                key = constants.BackTransform.ROTATED_POLE
                self._backtransform[
                    key] = self._get_update_rotated_pole_state_(
                        field, subset_field)

                # Check if the geometric abstraction is available on the field object.
                self._assert_abstraction_available_(field)

                # Return a slice or snippet if either of these are requested.
                field = self._get_slice_or_snippet_(field)

                # Choose the subset UGID value.
                if subset_field is None:
                    msg = 'No selection geometry. Returning all data. No unique geometry identifier.'
                    subset_ugid = None
                else:
                    subset_ugid = subset_field.geom.ugid.get_value()[0]
                    msg = 'Subsetting with selection geometry having UGID={0}'.format(
                        subset_ugid)
                ocgis_lh(msg=msg, logger=self._subset_log)

                if subset_field is not None:
                    # If the coordinate systems differ, update the spatial subset's CRS to match the field.
                    if subset_field.crs is not None and subset_field.crs != field.crs:
                        subset_field.update_crs(field.crs)
                    # If the geometry is a point, it needs to be buffered if there is a search radius multiplier.
                    subset_field = self._get_buffered_subset_geometry_if_point_(
                        field, subset_field)

                # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data
                # is being returned.
                if subset_field is None:
                    sfield = field
                else:
                    sfield = self._get_spatially_subsetted_field_(
                        alias, field, subset_field, subset_ugid)

                ocgis_lh(msg='after self._get_spatially_subsetted_field_',
                         logger=self._subset_log,
                         level=logging.DEBUG)

                # Create the subcommunicator following the data subset to ensure non-empty communication.
                vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET,
                                               sfield,
                                               is_current=True,
                                               clobber=True)

                if not vm.is_null:
                    if not sfield.is_empty and not self.ops.allow_empty:
                        raise_if_empty(sfield)

                        # If the base size is being requested, bypass the rest of the operations.
                        if not self._request_base_size_only:
                            # Perform regridding operations if requested.
                            if self.ops.regrid_destination is not None and sfield.regrid_source:
                                sfield = self._get_regridded_field_with_subset_(
                                    sfield,
                                    subset_field_for_regridding=
                                    subset_field_for_regridding)
                            else:
                                ocgis_lh(msg='no regridding operations',
                                         logger=self._subset_log,
                                         level=logging.DEBUG)
                            # If empty returns are allowed, there may be an empty field.
                            if sfield is not None:
                                # Only update spatial stuff if there are no calculations and, if there are calculations,
                                # those calculations are not expecting raw values.
                                if self.ops.calc is None or (
                                        self.ops.calc is not None
                                        and not self.ops.calc_raw):
                                    # Update spatial aggregation, wrapping, and coordinate systems.
                                    sfield = _update_aggregation_wrapping_crs_(
                                        self, alias, sfield, subset_field,
                                        subset_ugid)
                                    ocgis_lh(
                                        'after _update_aggregation_wrapping_crs_ in _process_geometries_',
                                        self._subset_log,
                                        level=logging.DEBUG)

            # Add the created field to the output collection with the selection geometry.
            if sfield is None:
                assert self.ops.aggregate
            if sfield is not None:
                coll.add_field(sfield, subset_field)

            yield coll
Example #34
0
File: base.py Project: imclab/ocgis
    def write(self):
        ## call subclass write method
        ocgis_lh('starting subclass write method', self._log, logging.DEBUG)
        ret = self._write_()

        ## added OCGIS metadata output if requested.
        if self.add_meta:
            ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG)
            lines = MetaConverter(self.ops).write()
            out_path = os.path.join(
                self.outdir, self.prefix + '_' + MetaConverter._meta_filename)
            with open(out_path, 'w') as f:
                f.write(lines)

        ## add the dataset descriptor file if specified
        if self._add_did_file:
            ocgis_lh('writing dataset description (DID) file', 'conv',
                     logging.DEBUG)
            from ocgis.conv.csv_ import OcgDialect

            headers = [
                'DID', 'VARIABLE', 'ALIAS', 'URI', 'STANDARD_NAME', 'UNITS',
                'LONG_NAME'
            ]
            out_path = os.path.join(self.outdir, self.prefix + '_did.csv')
            with open(out_path, 'w') as f:
                writer = csv.writer(f, dialect=OcgDialect)
                writer.writerow(headers)
                for rd in self.ops.dataset:
                    row = [rd.did, rd.variable, rd.alias, rd.uri]
                    ref_variable = rd.ds.metadata['variables'][
                        rd.variable]['attrs']
                    row.append(ref_variable.get('standard_name', None))
                    row.append(ref_variable.get('units', None))
                    row.append(ref_variable.get('long_name', None))
                    writer.writerow(row)

        ## add user-geometry
        if self._add_ugeom and self.ops.geom is not None:
            ocgis_lh('writer user-geometry shapefile', 'conv', logging.DEBUG)
            if self._add_ugeom_nest:
                shp_dir = os.path.join(self.outdir, 'shp')
                try:
                    os.mkdir(shp_dir)
                ## catch if the directory exists
                except OSError:
                    if os.path.exists(shp_dir):
                        pass
                    else:
                        raise
            else:
                shp_dir = self.outdir
            shp_path = os.path.join(shp_dir, self.prefix + '_ugid.shp')
            self.ops.geom.write(shp_path)

        ## add source metadata if requested
        if self._add_source_meta:
            ocgis_lh('writing source metadata file', 'conv', logging.DEBUG)
            out_path = os.path.join(self.outdir,
                                    self.prefix + '_source_metadata.txt')
            to_write = []
            for rd in self.ops.dataset:
                ip = Inspect(request_dataset=rd)
                to_write += ip.get_report()
            with open(out_path, 'w') as f:
                f.writelines('\n'.join(to_write))

        ## return anything from the overloaded _write_ method. otherwise return
        ## the internal path.
        if ret is None:
            ret = self.path

        return (ret)
Example #35
0
    cmd = [MPIEXEC, '-n', str(nprocs), sys.executable, OCLI_EXE, 'chunked_rwg']

    cmd.extend(['--source', dsrc['path'], '--esmf_src_type', dsrc['etype']])
    cmd.extend(
        ['--destination', ddst['path'], '--esmf_dst_type', ddst['etype']])
    cmd.extend(['--wd', wd])
    cmd.extend(['--weight', weight])
    if is_point:
        cmd.append('--spatial_subset')
    else:
        cmd.extend(['--nchunks_dst', str(ddst['nchunks_dst'])])
    # cmd.extend(['--no_genweights'])

    return cmd


if __name__ == '__main__':
    ocgis_lh(logger='chunker', msg='starting!')

    key_dst = 'scrip-unstruct'
    # key_dst = 'scrip-struct'
    # key_dst = 'scrip-point'
    cmd = create_command(WD, 'ugrid', key_dst, WEIGHT)

    ocgis_lh(logger='chunker', msg=' '.join(cmd))

    subprocess.check_call(cmd)

    ocgis_lh(logger='chunker', msg='stopping!')
Example #36
0
    def test_system_combinations(self):
        _to_stream = [
            True,
            False
        ]
        _to_file = [
            os.path.join(env.DIR_OUTPUT, 'test_ocgis_log.log'),
            None
        ]

        _level = [logging.INFO, logging.DEBUG, logging.WARN]
        for ii, (to_file, to_stream, level) in enumerate(itertools.product(_to_file, _to_stream, _level)):
            ocgis_lh.configure(to_file=to_file, to_stream=to_stream, level=level)
            try:
                ocgis_lh(ii)
                ocgis_lh('a test message')
                subset = ocgis_lh.get_logger('subset')
                interp = ocgis_lh.get_logger('interp')
                ocgis_lh('a subset message', logger=subset)
                ocgis_lh('an interp message', logger=interp)
                ocgis_lh('a general message', alias='foo', ugid=10)
                ocgis_lh('another message', level=level)
                if to_file is not None:
                    self.assertTrue(os.path.exists(to_file))
                    os.remove(to_file)
            finally:
                logging.shutdown()
Example #37
0
    def write(self):
        ocgis_lh('starting write method', self._log, logging.DEBUG)

        # Indicates if user geometries should be written to file.
        write_ugeom = False

        ncoll = len(self.ops.geom)

        build = True
        for i, coll in enumerate(self):
            ugids = coll.properties.keys()
            assert len(ugids) == 1
            ugid = ugids[0]

            # Geometry centroid location
            lon, lat = coll.geoms[ugid].centroid.xy

            for field in coll.iter_fields():

                lon_attrs = field.x.attrs.copy()
                lat_attrs = field.y.attrs.copy()

                # Removed for now. It'd be nice to find an elegant way to retain those.
                field.remove_variable('lat')
                field.remove_variable('lon')

                # Create new lon and lat variables
                field.add_variable(
                    ocgis.Variable('lon',
                                   value=lon,
                                   dimensions=(DimensionName.UNIONED_GEOMETRY,),
                                   attrs=dict(lon_attrs, **{'long_name':'Centroid longitude'})
                                   )
                )


                field.add_variable(
                    ocgis.Variable('lat',
                                   value=lat,
                                   dimensions=(DimensionName.UNIONED_GEOMETRY,),
                                   attrs=dict(lat_attrs, **{'long_name':'Centroid latitude'})
                                   )
                )


                if 'ocgis_spatial_mask' in field:
                    # Remove the spatial_mask and replace by new one.
                    field.remove_variable('ocgis_spatial_mask')


                grid = ocgis.Grid(field['lon'], field['lat'], abstraction='point',
                  crs=field.crs, parent=field)
                grid.set_mask([[False,]])
                field.set_grid(grid)

                # Geometry variables from the geom properties dict
                # There is no metadata for those...
                dm = get_data_model(self.ops)

                for key, val in coll.properties[ugid].items():
                    if np.issubdtype(type(val), int):
                        dt = get_dtype('int', dm)
                    elif np.issubdtype(type(val), float):
                        dt = get_dtype('float', dm)
                    else:
                        dt='auto'
                    field.add_variable(
                        ocgis.Variable(key,
                                       value=[val,],
                                       dtype=dt,
                                       dimensions=(DimensionName.UNIONED_GEOMETRY,)))

                # ------------------ Dimension update ------------------------ #
                # Modify the dimensions for the number of geometries
                gdim = field.dimensions[DimensionName.UNIONED_GEOMETRY]
                gdim.set_size(ncoll)

                for var in field.iter_variables_by_dimensions([gdim]):
                    d = var.dimensions_dict[DimensionName.UNIONED_GEOMETRY]
                    d.bounds_local = (i, i+1)
                # ------------------------------------------------------------ #

                # CF-Conventions
                # Can this be anything else than a timeseries_id
                # Options are timeseries_id, profile_id, trajectory_id
                gid = field[HeaderName.ID_GEOMETRY]
                gid.attrs['cf_role'] = 'timeseries_id'

                # TODO: Hard-code the name in constants.py
                gdim.set_name('region')

            # Path to the output object.
            # I needed to put it here because _write_archetype pops it, so it's not available after the first loop.
            f = {KeywordArgument.PATH: self.path}

            # This will be changed to "write" if we are on the build loop.
            write_mode = MPIWriteMode.APPEND

            if build:
                # During a build loop, create the file and write the first series of records. Let the drivers determine
                # the appropriate write modes for handling parallelism.
                write_mode = None

                # Write the user geometries if selected and there is one present on the incoming collection.
                if self._add_ugeom and coll.has_container_geometries:
                    write_ugeom = True

                if write_ugeom:
                    if vm.rank == 0:
                        # The output file name for the user geometries.
                        ugid_shp_name = self.prefix + '_ugid.shp'
                        if self._add_ugeom_nest:
                            ugeom_fiona_path = os.path.join(self._get_or_create_shp_folder_(), ugid_shp_name)
                        else:
                            ugeom_fiona_path = os.path.join(self.outdir, ugid_shp_name)
                    else:
                        ugeom_fiona_path = None

                build = False

            f[KeywordArgument.WRITE_MODE] = write_mode
            self._write_coll_(f, coll)

            if write_ugeom:
                with vm.scoped(SubcommName.UGEOM_WRITE, [0]):
                    if not vm.is_null:
                        for subset_field in list(coll.children.values()):
                            subset_field.write(ugeom_fiona_path, write_mode=write_mode, driver=DriverVector)

        # The metadata and dataset descriptor files may only be written if OCGIS operations are present.
        ops = self.ops
        if ops is not None and self.add_auxiliary_files and MPI_RANK == 0:
            # Add OCGIS metadata output if requested.
            if self.add_meta:
                ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG)
                from ocgis.conv.meta import MetaOCGISConverter

                lines = MetaOCGISConverter(ops).write()
                out_path = os.path.join(self.outdir, self.prefix + '_' + MetaOCGISConverter._meta_filename)
                with open(out_path, 'w') as f:
                    f.write(lines)

            # Add the dataset descriptor file if requested.
            if self._add_did_file:
                ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG)
                path = os.path.join(self.outdir, self.prefix + '_did.csv')
                _write_dataset_identifier_file_(path, ops)

            # Add source metadata if requested.
            if self._add_source_meta:
                ocgis_lh('writing source metadata file', 'conv', logging.DEBUG)
                path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt')
                _write_source_meta_(path, ops)

        # Return the internal path unless overloaded by subclasses.
        ret = self._get_return_()

        return ret
Example #38
0
 def _run_():
     ocgis_lh.configure()
     self.assertTrue(ocgis_lh.null)
     env.SUPPRESS_WARNINGS = False
     ocgis_lh(level=logging.WARNING, exc=RuntimeWarning('show me'))
     env.SUPPRESS_WARNINGS = True
Example #39
0
 def _run_():
     env.SUPPRESS_WARNINGS = False
     logpath = self.get_temporary_file_path('foo.log')
     ocgis_lh.configure(to_file=logpath)
     ocgis_lh(msg='hey there', level=logging.WARN)
     env.SUPPRESS_WARNINGS = True
Example #40
0
    def _process_subsettables_(self, rds):
        """
        :param rds: Sequence of :class:~`ocgis.RequestDataset` objects.
        :type rds: sequence
        :rtype: :class:`ocgis.collection.base.AbstractCollection`
        """

        ocgis_lh(msg='entering _process_subsettables_',
                 logger=self._subset_log,
                 level=logging.DEBUG)

        # This is used to define the group of request datasets for these like logging and exceptions.
        try:
            alias = '_'.join([r.field_name for r in rds])
        except AttributeError:
            # Allow field objects with do not expose the "field_name" attribute.
            try:
                alias = '_'.join([r.name for r in rds])
            except TypeError:
                # The alias is used for logging, etc. If it cannot be constructed easily, leave it as None.
                alias = None
        except NoDataVariablesFound:
            # If an alias is not provided and there are no data variables, set to None as this is used only for logging.
            alias = None

        ocgis_lh('processing...',
                 self._subset_log,
                 alias=alias,
                 level=logging.DEBUG)
        # Create the field object. Field objects may be passed directly to operations.
        # Look for field optimizations. Field optimizations typically include pre-loaded datetime objects.
        if self.ops.optimizations is not None and 'fields' in self.ops.optimizations:
            ocgis_lh('applying optimizations',
                     self._subset_log,
                     level=logging.DEBUG)
            field = [
                self.ops.optimizations['fields'][rd.field_name].copy()
                for rd in rds
            ]
            has_field_optimizations = True
        else:
            # Indicates no field optimizations loaded.
            has_field_optimizations = False
        try:
            # No field optimizations and data should be loaded from source.
            if not has_field_optimizations:
                ocgis_lh('creating field objects',
                         self._subset_log,
                         level=logging.DEBUG)
                len_rds = len(rds)
                field = [None] * len_rds
                for ii in range(len_rds):
                    rds_element = rds[ii]
                    try:
                        field_object = rds_element.get(
                            format_time=self.ops.format_time,
                            grid_abstraction=self.ops.abstraction)
                    except (AttributeError, TypeError):
                        # Likely a field object which does not need to be loaded from source.
                        if not self.ops.format_time:
                            raise NotImplementedError
                        # Check that is indeed a field before a proceeding.
                        if not isinstance(rds_element, Field):
                            raise
                        field_object = rds_element

                    field[ii] = field_object

            # Multivariate calculations require pulling variables across fields.
            if self._has_multivariate_calculations and len(field) > 1:
                for midx in range(1, len(field)):
                    # Use the data variable tag if it is available. Otherwise, attempt to merge the fields raising
                    # warning if the variable exists in the squashed field.
                    if len(field[midx].data_variables) > 0:
                        vitr = field[midx].data_variables
                        is_data = True
                    else:
                        vitr = list(field[midx].values())
                        is_data = False
                    for mvar in vitr:
                        mvar = mvar.extract()
                        field[0].add_variable(mvar, is_data=is_data)
                    new_field_name = '_'.join([str(f.name) for f in field])
                    field[0].set_name(new_field_name)

            # The first field in the list is always the target for other operations.
            field = field[0]
            assert isinstance(field, Field)

            # Break out of operations if the rank is empty.
            vm.create_subcomm_by_emptyable(SubcommName.FIELD_GET,
                                           field,
                                           is_current=True,
                                           clobber=True)
            if not vm.is_null:
                if not has_field_optimizations:
                    if field.is_empty:
                        raise ValueError('No empty fields allowed.')

                    # Time, level, etc. subsets.
                    field = self._get_nonspatial_subset_(field)

                    # Spatially reorder the data.
                    ocgis_lh(msg='before spatial reorder',
                             logger=self._subset_log,
                             level=logging.DEBUG)
                    if self.ops.spatial_reorder:
                        self._update_spatial_order_(field)

                    # Extrapolate the spatial bounds if requested.
                    # TODO: Rename "interpolate" to "extrapolate".
                    if self.ops.interpolate_spatial_bounds:
                        self._update_bounds_extrapolation_(field)

        # This error is related to subsetting by time or level. Spatial subsetting occurs below.
        except EmptySubsetError as e:
            if self.ops.allow_empty:
                ocgis_lh(
                    msg='time or level subset empty but empty returns allowed',
                    logger=self._subset_log,
                    level=logging.WARN)
                coll = self._get_initialized_collection_()
                name = '_'.join([rd.field_name for rd in rds])
                field = Field(name=name, is_empty=True)
                coll.add_field(field, None)
                try:
                    yield coll
                finally:
                    return
            else:
                # Raise an exception as empty subsets are not allowed.
                ocgis_lh(exc=ExtentError(message=str(e)),
                         alias=str([rd.field_name for rd in rds]),
                         logger=self._subset_log)

        # Set iterator based on presence of slice. Slice always overrides geometry.
        if self.ops.slice is not None:
            itr = [None]
        else:
            itr = [None] if self.ops.geom is None else self.ops.geom

        for coll in self._process_geometries_(itr, field, alias):
            # Conform units following the spatial subset.
            if not vm.is_null and self.ops.conform_units_to is not None:
                for to_conform in coll.iter_fields():
                    for dv in to_conform.data_variables:
                        dv.cfunits_conform(self.ops.conform_units_to)
            ocgis_lh(msg='_process_subsettables_ yielding',
                     logger=self._subset_log,
                     level=logging.DEBUG)
            yield coll
Example #41
0
File: base.py Project: wk1984/ocgis
    def validate(cls, ops):
        if ops.calc_grouping is None:
            from ocgis.ops.parms.definition import Calc

            msg = 'Set functions must have a temporal grouping.'
            ocgis_lh(exc=DefinitionValidationError(Calc, msg), logger='calc.base')
Example #42
0
    def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None):
        """
        Yield source grid subset using the extent of its associated destination grid subset.

        :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset.
        :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else.
        :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`)
        """
        if yield_dst:
            yield_slice = True
        else:
            yield_slice = False

        buffer_value = self.buffer_value

        dst_grid_wrapped_state = self.dst_grid.wrapped_state
        dst_grid_crs = self.dst_grid.crs

        # Use a destination grid iterator if provided.
        if self.iter_dst is not None:
            iter_dst = self.iter_dst(self,
                                     yield_slice=yield_slice,
                                     yield_idx=yield_idx)
        else:
            iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice,
                                                  yield_idx=yield_idx)

        # Loop over each destination grid subset.
        ocgis_lh(logger='grid_chunker',
                 msg='starting "for yld in iter_dst"',
                 level=logging.DEBUG)
        for yld in iter_dst:
            if yield_slice:
                dst_grid_subset, dst_slice = yld
            else:
                dst_grid_subset = yld

            dst_box = None
            with vm.scoped_by_emptyable('extent_global', dst_grid_subset):
                if not vm.is_null:
                    # Use the extent of the polygon for determining the bounding box. This ensures conservative
                    # regridding will be fully mapped.
                    if isinstance(dst_grid_subset,
                                  AbstractGeometryCoordinates):
                        target_grid = dst_grid_subset.parent.grid
                    else:
                        target_grid = dst_grid_subset

                    extent_global = target_grid.parent.attrs.get(
                        'extent_global')
                    if extent_global is None:
                        with grid_abstraction_scope(target_grid,
                                                    Topology.POLYGON):
                            extent_global = target_grid.extent_global

                    if self.check_contains:
                        dst_box = box(*target_grid.extent_global)

                    sub_box = box(*extent_global)
                    if buffer_value is not None:
                        # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box.
                        sub_box = sub_box.buffer(buffer_value).envelope

                    ocgis_lh(msg=str(sub_box.bounds),
                             level=logging.DEBUG,
                             logger='grid_chunker')
                else:
                    sub_box, dst_box = [None, None]

            live_ranks = vm.get_live_ranks_from_object(dst_grid_subset)
            sub_box = vm.bcast(sub_box, root=live_ranks[0])

            if self.check_contains:
                dst_box = vm.bcast(dst_box, root=live_ranks[0])

            sub_box = GeometryVariable.from_shapely(
                sub_box,
                is_bbox=True,
                wrapped_state=dst_grid_wrapped_state,
                crs=dst_grid_crs)
            ocgis_lh(logger='grid_chunker',
                     msg='starting "self.src_grid.get_intersects"',
                     level=logging.DEBUG)
            src_grid_subset, src_grid_slice = self.src_grid.get_intersects(
                sub_box,
                keep_touches=False,
                cascade=False,
                optimized_bbox_subset=self.optimized_bbox_subset,
                return_slice=True)
            ocgis_lh(logger='grid_chunker',
                     msg='finished "self.src_grid.get_intersects"',
                     level=logging.DEBUG)

            # Reload the data using a new source index distribution.
            if hasattr(src_grid_subset,
                       'reduce_global') and src_grid_subset.cindex is not None:
                # Only redistribute if we have one live rank.
                if self.redistribute and len(
                        vm.get_live_ranks_from_object(src_grid_subset)) > 0:
                    ocgis_lh(logger='grid_chunker',
                             msg='starting redistribute',
                             level=logging.DEBUG)
                    topology = src_grid_subset.abstractions_available[
                        Topology.POLYGON]
                    cindex = topology.cindex
                    redist_dimname = self.src_grid.abstractions_available[
                        Topology.POLYGON].element_dim.name
                    if src_grid_subset.is_empty:
                        redist_dim = None
                    else:
                        redist_dim = topology.element_dim
                    redistribute_by_src_idx(cindex, redist_dimname, redist_dim)
                    ocgis_lh(logger='grid_chunker',
                             msg='finished redistribute',
                             level=logging.DEBUG)

            with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset):
                if not vm.is_null:
                    if not self.allow_masked:
                        gmask = src_grid_subset.get_mask()
                        if gmask is not None and gmask.any():
                            raise ValueError(
                                'Masked values in source grid subset.')

                    if self.check_contains:
                        src_box = box(*src_grid_subset.extent_global)
                        if not does_contain(src_box, dst_box):
                            raise ValueError('Contains check failed.')

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(src_grid_subset, 'reduce_global'
                               ) and src_grid_subset.cindex is not None:
                        ocgis_lh(logger='grid_chunker',
                                 msg='starting reduce_global',
                                 level=logging.DEBUG)
                        src_grid_subset = src_grid_subset.reduce_global()
                        ocgis_lh(logger='grid_chunker',
                                 msg='finished reduce_global',
                                 level=logging.DEBUG)
                else:
                    pass
                    # src_grid_subset = VariableCollection(is_empty=True)

                if src_grid_subset.is_empty:
                    src_grid_slice = None
                else:
                    src_grid_slice = {
                        src_grid_subset.dimensions[ii].name: src_grid_slice[ii]
                        for ii in range(src_grid_subset.ndim)
                    }

            if yield_dst:
                yld = (src_grid_subset, src_grid_slice, dst_grid_subset,
                       dst_slice)
            else:
                yld = src_grid_subset, src_grid_slice

            yield yld
Example #43
0
    def execute(self, coll, file_only=False, tgds=None):
        """
        :param :class:~`ocgis.SpatialCollection` coll:
        :param bool file_only:
        :param dict tgds: {'field_alias': :class:`ocgis.interface.base.dimension.temporal.TemporalGroupDimension`,...}
        """
        from ocgis import VariableCollection

        # Select which dictionary will hold the temporal group dimensions.
        if tgds is None:
            tgds_to_use = self._tgds
            tgds_overloaded = False
        else:
            tgds_to_use = tgds
            tgds_overloaded = True

        # Group the variables. If grouping is None, calculations are performed on each element.
        if self.grouping is not None:
            ocgis_lh('Setting temporal groups: {0}'.format(self.grouping),
                     'calc.engine')
            for field in coll.iter_fields():
                if tgds_overloaded:
                    assert field.name in tgds_to_use
                else:
                    if field.name not in tgds_to_use:
                        tgds_to_use[field.name] = field.time.get_grouping(
                            self.grouping)

        # Iterate over functions.
        for ugid, container in list(coll.children.items()):
            for field_name, field in list(container.children.items()):
                new_temporal = tgds_to_use.get(field_name)
                if new_temporal is not None:
                    new_temporal = new_temporal.copy()
                # If the engine has a grouping, ensure it is equivalent to the new temporal dimension.
                if self.grouping is not None:
                    try:
                        compare = set(new_temporal.grouping) == set(
                            self.grouping)
                    # Types may be unhashable, compare directly.
                    except TypeError:
                        compare = new_temporal.grouping == self.grouping
                    if not compare:
                        msg = 'Engine temporal grouping and field temporal grouping are not equivalent. Perhaps ' \
                              'optimizations are incorrect?'
                        ocgis_lh(logger='calc.engine', exc=ValueError(msg))

                out_vc = VariableCollection()

                for f in self.funcs:
                    try:
                        ocgis_lh('Calculating: {0}'.format(f['func']),
                                 logger='calc.engine')
                        # Initialize the function.
                        function = f['ref'](
                            alias=f['name'],
                            dtype=None,
                            field=field,
                            file_only=file_only,
                            vc=out_vc,
                            parms=f['kwds'],
                            tgd=new_temporal,
                            calc_sample_size=self.calc_sample_size,
                            meta_attrs=f.get('meta_attrs'),
                            spatial_aggregation=self.spatial_aggregation)
                        # Allow a calculation to create a temporal aggregation after initialization.
                        if new_temporal is None and function.tgd is not None:
                            new_temporal = function.tgd.extract()
                    except KeyError:
                        # Likely an eval function which does not have the name key.
                        function = EvalFunction(
                            field=field,
                            file_only=file_only,
                            vc=out_vc,
                            expr=self.funcs[0]['func'],
                            meta_attrs=self.funcs[0].get('meta_attrs'))

                    ocgis_lh('calculation initialized',
                             logger='calc.engine',
                             level=logging.DEBUG)

                    # Return the variable collection from the calculations.
                    out_vc = function.execute()

                    for dv in out_vc.values():
                        # Any outgoing variables from a calculation must have an associated data type.
                        try:
                            assert dv.dtype is not None
                        except AssertionError:
                            assert isinstance(dv.dtype, np.dtype)
                        # If this is a file only operation, there should be no computed values.
                        if file_only:
                            assert dv._value is None

                    ocgis_lh('calculation finished',
                             logger='calc.engine',
                             level=logging.DEBUG)

                    # Try to mark progress. Okay if it is not there.
                    try:
                        self._progress.mark()
                    except AttributeError:
                        pass

                out_field = function.field.copy()
                function_tag = function.tag

                # Format the returned field. Doing things like removing original data variables and modifying the
                # time dimension if necessary. Field functions handle all field modifications on their own, so bypass
                # in that case.
                if new_temporal is not None:
                    new_temporal = new_temporal.extract()
                format_return_field(function_tag,
                                    out_field,
                                    new_temporal=new_temporal)

                # Add the calculation variables.
                for variable in list(out_vc.values()):
                    with orphaned(variable):
                        out_field.add_variable(variable)
                # Tag the calculation data as data variables.
                out_field.append_to_tags(function_tag, list(out_vc.keys()))

                coll.children[ugid].children[field_name] = out_field
        return coll
Example #44
0
File: ocli.py Project: NCPP/ocgis
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type,
                esmf_dst_type, genweights, esmf_regrid_method, spatial_subset,
                src_resolution, dst_resolution, buffer_distance, wd, persist,
                eager, ignore_degenerate, data_variables, spatial_subset_path,
                verbose, loglvl, weightfilemode, large_file):

    # Used for creating the history string.
    the_locals = locals()

    if verbose:
        ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl))
    ocgis_lh(msg="Starting Chunked Regrid Weight Generation",
             level=logging.INFO,
             logger=CRWG_LOG)

    if not ocgis.env.USE_NETCDF4_MPI:
        msg = (
            'env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is '
            'netCDF4-python built with parallel support?')
        ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True)

    if data_variables is not None:
        data_variables = data_variables.split(',')

    if nchunks_dst is not None:
        # Format the chunking decomposition from its string representation.
        if ',' in nchunks_dst:
            nchunks_dst = nchunks_dst.split(',')
        else:
            nchunks_dst = [nchunks_dst]
        nchunks_dst = tuple([int(ii) for ii in nchunks_dst])
    if merge:
        if not spatial_subset and weight is None:
            raise ValueError('"weight" must be a valid path if --merge')
    if spatial_subset and genweights and weight is None:
        raise ValueError('"weight" must be a valid path if --genweights')

    # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets
    # and it is not a merge only operation.
    should_create_wd = (nchunks_dst is None
                        or not all([ii == 1
                                    for ii in nchunks_dst])) or spatial_subset
    if should_create_wd:
        if wd is None:
            if ocgis.vm.rank == 0:
                wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_')
            wd = ocgis.vm.bcast(wd)
        else:
            exc = None
            if ocgis.vm.rank == 0:
                # The working directory must not exist to proceed.
                if nchunks_dst is not None:
                    if os.path.exists(wd):
                        exc = ValueError(
                            "Working directory {} must not exist.".format(wd))
                    else:
                        # Make the working directory nesting as needed.
                        os.makedirs(wd)
            exc = ocgis.vm.bcast(exc)
            if exc is not None:
                raise exc

        if merge and not spatial_subset or (spatial_subset and genweights):
            if _is_subdir_(wd, weight):
                raise ValueError(
                    'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.'
                )

    # Create the source and destination request datasets.
    rd_src = _create_request_dataset_(source,
                                      esmf_src_type,
                                      data_variables=data_variables)
    rd_dst = _create_request_dataset_(destination, esmf_dst_type)

    # Execute a spatial subset if requested.
    paths = None
    if spatial_subset:
        if spatial_subset_path is None:
            spatial_subset_path = os.path.join(wd, 'spatial_subset.nc')
        msg = "Executing spatial subset. Output path is: {}".format(
            spatial_subset_path)
        ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
        _write_spatial_subset_(rd_src,
                               rd_dst,
                               spatial_subset_path,
                               src_resmax=src_resolution)
    # Only split grids if a spatial subset is not requested.
    else:
        # Update the paths to use for the grid.
        paths = {'wd': wd}

    # Arguments to ESMF regridding.
    esmf_kwargs = {
        'regrid_method': esmf_regrid_method,
        'ignore_degenerate': ignore_degenerate,
    }
    # Allow older versions of ESMF to work with a default large file flag.
    if large_file:
        esmf_kwargs['large_file'] = large_file

    # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset.
    gs = GridChunker(rd_src,
                     rd_dst,
                     nchunks_dst=nchunks_dst,
                     src_grid_resolution=src_resolution,
                     paths=paths,
                     dst_grid_resolution=dst_resolution,
                     buffer_value=buffer_distance,
                     redistribute=True,
                     genweights=genweights,
                     esmf_kwargs=esmf_kwargs,
                     use_spatial_decomp='auto',
                     eager=eager,
                     filemode=weightfilemode)

    # Write subsets and generate weights if requested in the grid splitter.
    # TODO: Need a weight only option. If chunks are written, then weights are written...
    if not spatial_subset and nchunks_dst is not None and not gs.is_one_chunk:
        msg = "Starting main chunking loop..."
        ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
        gs.write_chunks()
    else:
        if spatial_subset:
            source = spatial_subset_path
        if genweights:
            msg = "Writing ESMF weights..."
            ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
            handle_weight_file_check(weight)
            gs.write_esmf_weights(source,
                                  destination,
                                  weight,
                                  filemode=weightfilemode)

    # Create the global weight file. This does not apply to spatial subsets because there will always be one weight
    # file.
    if merge and not spatial_subset and not gs.is_one_chunk:
        # Weight file merge only works in serial.
        exc = None
        with ocgis.vm.scoped('weight file merge', [0]):
            if not ocgis.vm.is_null:
                msg = "Merging chunked weight files to global file. Output global weight file is: {}".format(
                    weight)
                ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
                handle_weight_file_check(weight)
                gs.create_merged_weight_file(weight)
        excs = ocgis.vm.gather(exc)
        excs = ocgis.vm.bcast(excs)
        for exc in excs:
            if exc is not None:
                raise exc

        ocgis.vm.barrier()

    # Append the history string if there is an output weight file.
    if weight and ocgis.vm.rank == 0:
        if os.path.exists(weight):
            # Add some additional stuff for record keeping
            import getpass
            import socket
            import datetime

            with nc.Dataset(weight, 'a') as ds:
                ds.setncattr('created_by_user', getpass.getuser())
                ds.setncattr('created_on_hostname', socket.getfqdn())
                ds.setncattr('history', create_history_string(the_locals))
    ocgis.vm.barrier()

    # Remove the working directory unless the persist flag is provided.
    if not persist:
        if ocgis.vm.rank == 0:
            msg = "Removing working directory since persist is False."
            ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG)
            shutil.rmtree(wd)
        ocgis.vm.barrier()

    ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG)
    return 0
Example #45
0
 def test_system_parallel(self):
     to_file = os.path.join(self.current_dir_output,
                            'rank-{}-test_ocgis_log.log'.format(vm.rank))
     ocgis_lh.configure(to_file=to_file)
     ocgis_lh("something happened")
     self.assertEqual(len(os.listdir(self.current_dir_output)), vm.size)
Example #46
0
    def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None):
        """
        Yield source grid subset using the extent of its associated destination grid subset.

        :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset.
        :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else.
        :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`)
        """
        if yield_dst:
            yield_slice = True
        else:
            yield_slice = False

        buffer_value = self.buffer_value

        dst_grid_wrapped_state = self.dst_grid.wrapped_state
        dst_grid_crs = self.dst_grid.crs

        # Use a destination grid iterator if provided.
        if self.iter_dst is not None:
            iter_dst = self.iter_dst(self,
                                     yield_slice=yield_slice,
                                     yield_idx=yield_idx)
        else:
            iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice,
                                                  yield_idx=yield_idx)

        # Loop over each destination grid subset.
        ocgis_lh(logger=_LOCAL_LOGGER,
                 msg='starting "for yld in iter_dst"',
                 level=logging.DEBUG)
        for iter_dst_ctr, yld in enumerate(iter_dst, start=1):
            ocgis_lh(msg=["iter_dst_ctr", iter_dst_ctr], level=logging.DEBUG)
            if yield_slice:
                dst_grid_subset, dst_slice = yld
            else:
                dst_grid_subset = yld

            # All masked destinations are very problematic for ESMF
            with vm.scoped_by_emptyable('global mask', dst_grid_subset):
                if not vm.is_null:
                    if dst_grid_subset.has_mask_global:
                        if dst_grid_subset.has_mask and dst_grid_subset.has_masked_values:
                            all_masked = dst_grid_subset.get_mask().all()
                        else:
                            all_masked = False
                        all_masked_gather = vm.gather(all_masked)
                        if vm.rank == 0:
                            if all(all_masked_gather):
                                exc = ValueError(
                                    "Destination subset all masked")
                                try:
                                    raise exc
                                finally:
                                    vm.abort(exc=exc)

            dst_box = None
            with vm.scoped_by_emptyable('extent_global', dst_grid_subset):
                if not vm.is_null:
                    # Use the extent of the polygon for determining the bounding box. This ensures conservative
                    # regridding will be fully mapped.
                    if isinstance(dst_grid_subset,
                                  AbstractGeometryCoordinates):
                        target_grid = dst_grid_subset.parent.grid
                    else:
                        target_grid = dst_grid_subset

                    # Try to reduce the coordinates in the case of unstructured grid data. Ensure the data also has a
                    # coordinate index. SCRIP grid files, for example, do not have a coordinate index like UGRID.
                    if hasattr(
                            target_grid, 'reduce_global'
                    ) and Topology.POLYGON in target_grid.abstractions_available and target_grid.cindex is not None:
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='starting reduce_global for dst_grid_subset',
                            level=logging.DEBUG)
                        target_grid = target_grid.reduce_global()
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='finished reduce_global for dst_grid_subset',
                            level=logging.DEBUG)

                    extent_global = target_grid.parent.attrs.get(
                        'extent_global')
                    if extent_global is None:
                        with grid_abstraction_scope(target_grid,
                                                    Topology.POLYGON):
                            extent_global = target_grid.extent_global
                            # HACK: Bad corner coordinates can lead to bad extents. In this case, the lower bound on the
                            #  x-coordinate is unreasonable and breaks wrapping code. Set to 0.0 which is a reasonable
                            #  lower x-coordate for unwrapped datasets.
                            if (isinstance(target_grid.crs, Spherical)) and \
                                    dst_grid_wrapped_state == WrappedState.UNWRAPPED and \
                                    extent_global[0] < 0.0:
                                e = list(extent_global)
                                e[0] = 0.0
                                extent_global = tuple(e)

                    if self.check_contains:
                        dst_box = box(*target_grid.extent_global)

                    sub_box = box(*extent_global)
                    if buffer_value is not None:
                        # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box.
                        sub_box = sub_box.buffer(buffer_value).envelope

                    ocgis_lh(msg=str(sub_box.bounds),
                             level=logging.DEBUG,
                             logger=_LOCAL_LOGGER)
                else:
                    sub_box, dst_box = [None, None]

            live_ranks = vm.get_live_ranks_from_object(dst_grid_subset)
            sub_box = vm.bcast(sub_box, root=live_ranks[0])

            if self.check_contains:
                dst_box = vm.bcast(dst_box, root=live_ranks[0])
            sub_box = GeometryVariable.from_shapely(
                sub_box,
                is_bbox=True,
                wrapped_state=dst_grid_wrapped_state,
                crs=dst_grid_crs)

            # Prepare geometry to match coordinate system and wrapping of the subset target
            sub_box = sub_box.prepare(archetype=self.src_grid)
            ocgis_lh(logger=_LOCAL_LOGGER,
                     msg='prepared geometry',
                     level=logging.DEBUG)

            ocgis_lh(logger=_LOCAL_LOGGER,
                     msg='starting "self.src_grid.get_intersects"',
                     level=logging.DEBUG)
            src_grid_subset, src_grid_slice = self.src_grid.get_intersects(
                sub_box,
                keep_touches=False,
                cascade=False,
                optimized_bbox_subset=self.optimized_bbox_subset,
                return_slice=True)
            ocgis_lh(logger=_LOCAL_LOGGER,
                     msg='finished "self.src_grid.get_intersects"',
                     level=logging.DEBUG)

            # Reload the data using a new source index distribution.
            if hasattr(src_grid_subset,
                       'reduce_global') and src_grid_subset.cindex is not None:
                # Only redistribute if we have one live rank.
                if self.redistribute and len(
                        vm.get_live_ranks_from_object(src_grid_subset)) > 0:
                    ocgis_lh(logger=_LOCAL_LOGGER,
                             msg='starting redistribute',
                             level=logging.DEBUG)
                    topology = src_grid_subset.abstractions_available[
                        Topology.POLYGON]
                    cindex = topology.cindex
                    redist_dimname = self.src_grid.abstractions_available[
                        Topology.POLYGON].element_dim.name
                    if src_grid_subset.is_empty:
                        redist_dim = None
                    else:
                        redist_dim = topology.element_dim
                    redistribute_by_src_idx(cindex, redist_dimname, redist_dim)
                    ocgis_lh(logger=_LOCAL_LOGGER,
                             msg='finished redistribute',
                             level=logging.DEBUG)

            with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset):
                if not vm.is_null:
                    if not self.allow_masked:
                        gmask = src_grid_subset.get_mask()
                        if gmask is not None and gmask.any():
                            raise ValueError(
                                'Masked values in source grid subset.')

                    if self.check_contains:
                        src_box = box(*src_grid_subset.extent_global)
                        if not does_contain(src_box, dst_box):
                            raise ValueError('Contains check failed.')

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(src_grid_subset, 'reduce_global'
                               ) and src_grid_subset.cindex is not None:
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='starting reduce_global',
                                 level=logging.DEBUG)
                        src_grid_subset = src_grid_subset.reduce_global()
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='finished reduce_global',
                                 level=logging.DEBUG)
                else:
                    pass
                    # src_grid_subset = VariableCollection(is_empty=True)

                if src_grid_subset.is_empty:
                    src_grid_slice = None
                else:
                    src_grid_slice = {
                        src_grid_subset.dimensions[ii].name: src_grid_slice[ii]
                        for ii in range(src_grid_subset.ndim)
                    }

            if yield_dst:
                yld = (src_grid_subset, src_grid_slice, dst_grid_subset,
                       dst_slice)
            else:
                yld = src_grid_subset, src_grid_slice

            yield yld
Example #47
0
File: ocli.py Project: huard/ocgis
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights,
                esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist,
                eager, ignore_degenerate):
    if not ocgis.env.USE_NETCDF4_MPI:
        msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is '
               'netCDF4-python built with parallel support?')
        ocgis_lh(msg, level=logging.WARN, logger='ocli.chunked_rwg', force=True)

    if nchunks_dst is not None:
        # Format the chunking decomposition from its string representation.
        if ',' in nchunks_dst:
            nchunks_dst = nchunks_dst.split(',')
        else:
            nchunks_dst = [nchunks_dst]
        nchunks_dst = tuple([int(ii) for ii in nchunks_dst])
    if merge:
        if not spatial_subset and weight is None:
            raise ValueError('"weight" must be a valid path if --merge')
    if spatial_subset and genweights and weight is None:
        raise ValueError('"weight" must be a valid path if --genweights')

    # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets
    # and it is not a merge only operation.
    if wd is None:
        if ocgis.vm.rank == 0:
            wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_')
        wd = ocgis.vm.bcast(wd)
    else:
        if ocgis.vm.rank == 0:
            # The working directory must not exist to proceed.
            if os.path.exists(wd):
                raise ValueError("Working directory 'wd' must not exist.")
            else:
                # Make the working directory nesting as needed.
                os.makedirs(wd)
        ocgis.vm.barrier()

    if merge and not spatial_subset or (spatial_subset and genweights):
        if _is_subdir_(wd, weight):
            raise ValueError(
                'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.')

    # Create the source and destination request datasets.
    rd_src = _create_request_dataset_(source, esmf_src_type)
    rd_dst = _create_request_dataset_(destination, esmf_dst_type)

    # Execute a spatial subset if requested.
    paths = None
    if spatial_subset:
        # TODO: This path should be customizable.
        spatial_subset_path = os.path.join(wd, 'spatial_subset.nc')
        _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path)
    # Only split grids if a spatial subset is not requested.
    else:
        # Update the paths to use for the grid.
        paths = {'wd': wd}

    # Arguments to ESMF regridding.
    esmf_kwargs = {'regrid_method': esmf_regrid_method,
                   'ignore_degenerate': ignore_degenerate}

    # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset.
    gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths,
                     dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True,
                     genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager)

    # Write subsets and generate weights if requested in the grid splitter.
    # TODO: Need a weight only option. If chunks are written, then weights are written...
    if not spatial_subset and nchunks_dst is not None:
        gs.write_chunks()
    else:
        if spatial_subset:
            source = spatial_subset_path
        if genweights:
            gs.write_esmf_weights(source, destination, weight)

    # Create the global weight file. This does not apply to spatial subsets because there will always be one weight
    # file.
    if merge and not spatial_subset:
        # Weight file merge only works in serial.
        exc = None
        with ocgis.vm.scoped('weight file merge', [0]):
            if not ocgis.vm.is_null:
                gs.create_merged_weight_file(weight)
        excs = ocgis.vm.gather(exc)
        excs = ocgis.vm.bcast(excs)
        for exc in excs:
            if exc is not None:
                raise exc

        ocgis.vm.barrier()

    # Remove the working directory unless the persist flag is provided.
    if not persist:
        if ocgis.vm.rank == 0:
            shutil.rmtree(wd)
        ocgis.vm.barrier()

    return 0
Example #48
0
    def write_chunks(self):
        """
        Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF
        regridding weights for each subset if requested.
        """
        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []
        src_slices = []
        index_path = self.create_full_path_from_template('index_file')

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst)))

        ctr = 1
        ocgis_lh(logger=_LOCAL_LOGGER,
                 msg='starting self.iter_src_grid_subsets',
                 level=logging.DEBUG)
        for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(
                yield_dst=True):
            ocgis_lh(
                logger=_LOCAL_LOGGER,
                msg='finished iteration {} for self.iter_src_grid_subsets'.
                format(ctr),
                level=logging.DEBUG)

            src_path = self.create_full_path_from_template('src_template',
                                                           index=ctr)
            dst_path = self.create_full_path_from_template('dst_template',
                                                           index=ctr)
            wgt_path = self.create_full_path_from_template('wgt_template',
                                                           index=ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_path)
            dst_slices.append(dst_slc)
            src_slices.append(src_slc)

            # Only write destinations if an iterator is not provided.
            if self.iter_dst is None:
                zip_args = [[sub_src, sub_dst], [src_path, dst_path]]
            else:
                zip_args = [[sub_src], [src_path]]

            cc = 1
            for target, path in zip(*zip_args):
                with vm.scoped_by_emptyable('field.write' + str(cc), target):
                    if not vm.is_null:
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='write_chunks:writing: {}'.format(path),
                                 level=logging.DEBUG)
                        field = Field(grid=target)
                        field.write(path)
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='write_chunks:finished writing: {}'.format(
                                path),
                            level=logging.DEBUG)
                cc += 1

            # Increment the counter outside of the loop to avoid counting empty subsets.
            ctr += 1

            # Generate an ESMF weights file if requested and at least one rank has data on it.
            if self.genweights and len(
                    vm.get_live_ranks_from_object(sub_src)) > 0:
                vm.barrier()
                ocgis_lh(logger=_LOCAL_LOGGER,
                         msg='write_chunks:writing esmf weights: {}'.format(
                             wgt_path),
                         level=logging.DEBUG)
                self.write_esmf_weights(src_path,
                                        dst_path,
                                        wgt_path,
                                        src_grid=sub_src,
                                        dst_grid=sub_dst)
                vm.barrier()

        # Global shapes require a VM global scope to collect.
        src_global_shape = global_grid_shape(self.src_grid)
        dst_global_shape = global_grid_shape(self.dst_grid)

        # Gather and collapse source slices as some may be empty and we write on rank 0.
        gathered_src_grid_slice = vm.gather(src_slices)
        if vm.rank == 0:
            len_src_slices = len(src_slices)
            new_src_grid_slice = [None] * len_src_slices
            for idx in range(len_src_slices):
                for rank_src_grid_slice in gathered_src_grid_slice:
                    if rank_src_grid_slice[idx] is not None:
                        new_src_grid_slice[idx] = rank_src_grid_slice[idx]
                        break
            src_slices = new_src_grid_slice

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = [
                    'source_filename', 'destination_filename',
                    'weights_filename'
                ]
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{
                    'esmf_role': 'grid_chunker_source'
                }, {
                    'esmf_role': grid_chunker_destination
                }, {
                    'esmf_role': 'grid_chunker_weights'
                }]

                vc = VariableCollection()

                grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_chunker_index)
                vidx.attrs['esmf_role'] = grid_chunker_index
                vidx.attrs['grid_chunker_source'] = 'source_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_chunker_weights'] = 'weights_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_SRC_GRID_SHAPE] = src_global_shape
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_DST_GRID_SHAPE] = dst_global_shape

                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx],
                                 dimensions=dim,
                                 dtype=str,
                                 value=values[idx],
                                 attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                # TODO: This needs to work with four dimensions.
                # Source -----------------------------------------------------------------------------------------------
                self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE,
                                                       vidx, vc, src_slices,
                                                       dim, bounds_dimension)

                # Destination ------------------------------------------------------------------------------------------
                self.dst_grid._gc_create_index_bounds_(
                    RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim,
                    bounds_dimension)

                vc.write(index_path)

        vm.barrier()
Example #49
0
    def execute(self, coll, file_only=False):
        ## switch collection type based on the types of calculations present
        if self._check_calculation_members_(self.funcs, OcgCvArgFunction):
            klass = MultivariateCalcCollection
        elif self._check_calculation_members_(self.funcs, KeyedFunctionOutput):
            klass = KeyedOutputCalcCollection
        else:
            klass = CalcCollection
        ret = klass(coll, funcs=self.funcs)
        ocgis_lh(msg='returning collection of type {0}'.format(coll.__class__),
                 logger='calc.engine')

        ## group the variables. if grouping is None, calculations are performed
        ## on each element. array computations are taken advantage of.
        if self.grouping is not None:
            ocgis_lh('setting temporal grouping(s)', 'calc.engine')
            for ds in coll.variables.itervalues():
                ds.temporal.set_grouping(self.grouping)

        ## iterate over functions
        for f in self.funcs:
            ocgis_lh('calculating: {0}'.format(f), logger='calc.engine')
            ## change behavior for multivariate functions
            if issubclass(f['ref'], OcgCvArgFunction) or (
                    isinstance(ret, MultivariateCalcCollection)
                    and f['ref'] == SampleSize):
                ## do not calculated sample size for multivariate calculations
                ## yet
                if f['ref'] == SampleSize:
                    ocgis_lh(
                        'sample size calculations not implemented for multivariate calculations yet',
                        'calc.engine',
                        level=logging.WARN)
                    continue
                ## cv-controlled multivariate functions require collecting
                ## data arrays before passing to function.
                kwds = f['kwds'].copy()
                ## reference the appropriate datasets to pass to the calculation
                keyed_datasets = {}
                for ii, key in enumerate(f['ref'].keys):
                    ## the name of the variable passed in the request
                    ## that should be mapped to the named argument
                    backref = kwds[key]
                    ## pull associated data
                    dref = coll.variables[backref]
                    ## map the key to a dataset
                    keyed_datasets.update({key: dref})
                    value, weights = self._get_value_weights_(
                        dref, file_only=file_only)
                    ## get the calculation groups and weights.
                    if ii == 0:
                        if self.grouping is None:
                            dgroups = None
                        else:
                            dgroups = dref.temporal.group.dgroups
                    ## update dict with properly reference data
                    kwds.update({key: value})
                ## function object instance
                ref = f['ref'](agg=self.agg,
                               groups=dgroups,
                               kwds=kwds,
                               weights=weights,
                               dataset=keyed_datasets,
                               calc_name=f['name'],
                               file_only=file_only)
                calc = ref.calculate()
                ## store calculation value
                ret.calc[f['name']] = calc
            else:
                ## perform calculation on each variable
                for alias, var in coll.variables.iteritems():
                    if alias not in ret.calc:
                        ret.calc[alias] = OrderedDict()
                    value, weights = self._get_value_weights_(
                        var, file_only=file_only)
                    ## make the function instance
                    try:
                        ref = f['ref'](values=value,
                                       agg=self.agg,
                                       groups=var.temporal.group.dgroups,
                                       kwds=f['kwds'],
                                       weights=weights,
                                       dataset=var,
                                       calc_name=f['name'],
                                       file_only=file_only)
                    except AttributeError:
                        ## if there is no grouping, there is no need to calculate
                        ## sample size.
                        if self.grouping is None and f['ref'] == SampleSize:
                            break
                        elif self.grouping is None:
                            e = NotImplementedError(
                                'Univariate calculations must have a temporal grouping.'
                            )
                            ocgis_lh(exc=e, logger='calc.engine')
                        else:
                            raise
                    ## calculate the values
                    calc = ref.calculate()
                    ## store the values
                    ret.calc[alias][f['name']] = calc
        return (ret)
Example #50
0
def reduce_reindex_coordinate_index(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """
    ocgis_lh(msg='entering reduce_reindex_coordinate_index', logger='geomc', level=logging.DEBUG)

    # Get the coordinate index values as a NumPy array.
    try:

        ocgis_lh(msg='calling cindex.get_value()', logger='geomc', level=logging.DEBUG)
        ocgis_lh(msg='cindex.has_allocated_value={}'.format(cindex.has_allocated_value), logger='geomc',
                 level=logging.DEBUG)
        ocgis_lh(msg='cindex.dimensions[0]={}'.format(cindex.dimensions[0]), logger='geomc', level=logging.DEBUG)
        cindex = cindex.get_value()
        ocgis_lh(msg='finished cindex.get_value()', logger='geomc', level=logging.DEBUG)
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Only work with 1D arrays.
    cindex = np.atleast_1d(cindex)
    # Used to return the coordinate index to the original shape of the incoming coordinate index.
    original_shape = cindex.shape
    cindex = cindex.flatten()

    # Create the unique coordinate index array.
    ocgis_lh(msg='calling create_unique_global_array', logger='geomc', level=logging.DEBUG)
    if vm.size > 1:
        u = np.array(create_unique_global_array(cindex))
    else:
        u = np.unique(cindex)
    ocgis_lh(msg='finished create_unique_global_array', logger='geomc', level=logging.DEBUG)

    # Synchronize the data type for the new coordinate index.
    lrank = vm.rank
    if lrank == 0:
        dtype = u.dtype
    else:
        dtype = None
    dtype = vm.bcast(dtype)

    # Flag to indicate if the current rank has any unique values.
    has_u = len(u) > 0

    # Create the new coordinate index.
    new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__')
    new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype)

    # Create a hash for the new index. This is used to remap the old coordinate index.
    if has_u:
        uidx = {ii: jj for ii, jj in zip(u, new_u)}
    else:
        uidx = None

    vm.barrier()

    # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for
    # index overlaps.
    if has_u:
        local_bounds = min(u), max(u)
    else:
        local_bounds = None
    # Put a copy for the bounds indexing on each rank.
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == lrank:
            continue
        if lb is not None:
            contains = lb[0] <= cindex
            contains = np.logical_and(lb[1] >= cindex, contains)
            if np.any(contains):
                overlaps.append(rank)

    # Ranks must be able to identify which ranks will be asking them for data.
    global_overlaps = vm.gather(overlaps)
    global_overlaps = vm.bcast(global_overlaps)
    destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj]

    # MPI communication tags used in the algorithm.
    tag_search = MPITag.REDUCE_REINDEX_SEARCH
    tag_success = MPITag.REDUCE_REINDEX_SUCCESS
    tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED
    tag_found = MPITag.REDUCE_REINDEX_FOUND

    # Fill array for the new coordinate index.
    new_cindex = np.empty_like(cindex)

    # vm.barrier_print('starting run_rr')
    # Fill the new coordinate indexing.
    if lrank == 0:
        run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success)
    else:
        run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found,
                       tag_search,
                       tag_success)
    # vm.barrier_print('finished run_rr')

    # Return array to its original shape.
    new_cindex = new_cindex.reshape(*original_shape)

    vm.barrier()

    return new_cindex, u
Example #51
0
File: base.py Project: imclab/ocgis
 def aggregate_spatial(self, fill):
     exc = NotImplementedError(
         'Spatial aggregation of raw input values not implemented for keyed output functions.'
     )
     ocgis_lh(exc=exc, logger='calc.library')
Example #52
0
    def execute(self):
        # check for a user-supplied output prefix
        prefix = self.ops.prefix

        # do directory management #

        # flag to indicate a directory is made. mostly a precaution to make sure the appropriate directory is is
        # removed.
        made_output_directory = False

        if self.ops.output_format in self._no_directory:
            # No output directory for some formats.
            outdir = None
        else:
            # Directories or a single output file(s) is created for the other cases.
            if self.ops.add_auxiliary_files:
                # Auxiliary files require that a directory be created.
                outdir = os.path.join(self.ops.dir_output, prefix)
                # Create and/or remove the output directory.
                if vm.rank == 0:
                    if os.path.exists(outdir):
                        if env.OVERWRITE:
                            shutil.rmtree(outdir)
                        else:
                            raise IOError('The output directory exists but env.OVERWRITE is False: {0}'.format(outdir))
                    os.mkdir(outdir)
                # Block until output directory is created. Most often the zero rank manages writing, but this is not a
                # requirement.
                vm.Barrier()
                # On an exception, the output directory needs to be removed.
                made_output_directory = True
            else:
                # with no auxiliary files the output directory will do just fine
                outdir = self.ops.dir_output

        try:
            # configure logging ########################################################################################

            progress = self._get_progress_and_configure_logging_(outdir, prefix)

            # create local logger
            interpreter_log = ocgis_lh.get_logger('interpreter')

            ocgis_lh('Initializing...', interpreter_log)

            # set up environment #######################################################################################

            # run validation - doesn't do much now
            self.check()

            # do not perform vector wrapping for NetCDF output
            if self.ops.output_format == 'nc':
                ocgis_lh('"vector_wrap" set to False for netCDF output',
                         interpreter_log, level=logging.WARN)
                self.ops.vector_wrap = False

            # if the requested output format is "meta" then no operations are run and only the operations dictionary is
            # required to generate output.
            Converter = self.ops._get_object_(OutputFormat.name).get_converter_class()
            if issubclass(Converter, AbstractMetaConverter):
                ret = Converter(self.ops).write()
            # this is the standard request for other output types.
            else:
                # the operations object performs subsetting and calculations
                ocgis_lh('initializing subset', interpreter_log, level=logging.DEBUG)
                so = OperationsEngine(self.ops, progress=progress)
                # if there is no grouping on the output files, a singe converter is needed
                if self.ops.output_grouping is None:
                    ocgis_lh('initializing converter', interpreter_log, level=logging.DEBUG)
                    conv = self._get_converter_(Converter, outdir, prefix, so)
                    ocgis_lh('starting converter write loop: {0}'.format(self.ops.output_format), interpreter_log,
                             level=logging.DEBUG)
                    ret = conv.write()
                else:
                    raise NotImplementedError

            ocgis_lh('Operations successful.'.format(self.ops.prefix), interpreter_log)

            return ret
        except:
            # The output directory needs to be removed if one was created. Shutdown logging before to make sure there
            # is no file lock (Windows).
            ocgis_lh.shutdown()
            if vm.rank == 0 and made_output_directory:
                shutil.rmtree(outdir)
            raise
        finally:
            ocgis_lh.shutdown()

            if env.ADD_OPS_MPI_BARRIER:
                vm.Barrier()
Example #53
0
    def _iter_collections_(self):
        """:rtype: :class:`ocgis.collection.base.AbstractCollection`"""

        # Multivariate calculations require datasets come in as a list with all variable inputs part of the same
        # sequence.
        if self._has_multivariate_calculations:
            itr_rd = [[rd for rd in self.ops.dataset]]
        # Otherwise, process geometries expects a single element sequence.
        else:
            itr_rd = [[rd] for rd in self.ops.dataset]

        # Configure the progress object.
        self._progress.n_subsettables = len(itr_rd)
        self._progress.n_geometries = get_default_or_apply(self.ops.geom,
                                                           len,
                                                           default=1)
        self._progress.n_calculations = get_default_or_apply(self.ops.calc,
                                                             len,
                                                             default=0)

        # Some introductory logging.
        msg = '{0} dataset collection(s) to process.'.format(
            self._progress.n_subsettables)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self.ops.geom is None:
            msg = 'Entire spatial domain returned. No selection geometries requested.'
        else:
            msg = 'Each data collection will be subsetted by {0} selection geometries.'.format(
                self._progress.n_geometries)
        ocgis_lh(msg=msg, logger=self._subset_log)
        if self._progress.n_calculations == 0:
            msg = 'No calculations requested.'
        else:
            msg = 'The following calculations will be applied to each data collection: {0}.'. \
                format(', '.join([_['func'] for _ in self.ops.calc]))
        ocgis_lh(msg=msg, logger=self._subset_log)

        # Process the incoming datasets. Convert from request datasets to fields as needed.
        for rds in itr_rd:

            try:
                msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds])
            except AttributeError:
                # Field objects have no URIs. Multivariate calculations change how the request dataset iterator is
                # configured as well.
                msg = []
                for rd in rds:
                    try:
                        msg.append(rd.uri)
                    except AttributeError:
                        # Likely a field object which does have a name.
                        msg.append(rd.name)
                msg = 'Processing URI(s) / field names: {0}'.format(msg)
            ocgis_lh(msg=msg, logger=self._subset_log)

            for coll in self._process_subsettables_(rds):
                # If there are calculations, do those now and return a collection.
                if not vm.is_null and self.cengine is not None:
                    ocgis_lh('Starting calculations.', self._subset_log)
                    raise_if_empty(coll)

                    # Look for any temporal grouping optimizations.
                    if self.ops.optimizations is None:
                        tgds = None
                    else:
                        tgds = self.ops.optimizations.get('tgds')

                    # Execute the calculations.
                    coll = self.cengine.execute(coll,
                                                file_only=self.ops.file_only,
                                                tgds=tgds)

                    # If we need to spatially aggregate and calculations used raw values, update the collection
                    # fields and subset geometries.
                    if self.ops.aggregate and self.ops.calc_raw:
                        coll_to_itr = coll.copy()
                        for sfield, container in coll_to_itr.iter_fields(
                                yield_container=True):
                            sfield = _update_aggregation_wrapping_crs_(
                                self, None, sfield, container, None)
                            coll.add_field(sfield, container, force=True)
                else:
                    # If there are no calculations, mark progress to indicate a geometry has been completed.
                    self._progress.mark()

                # Conversion of groups.
                if self.ops.output_grouping is not None:
                    raise NotImplementedError
                else:
                    ocgis_lh('_iter_collections_ yielding',
                             self._subset_log,
                             level=logging.DEBUG)
                    yield coll
Example #54
0
File: core.py Project: huard/ocgis
    def __init__(self,
                 uri=None,
                 variable=None,
                 units=None,
                 time_range=None,
                 time_region=None,
                 time_subset_func=None,
                 level_range=None,
                 conform_units_to=None,
                 crs='auto',
                 t_units=None,
                 t_calendar=None,
                 t_conform_units_to=None,
                 grid_abstraction='auto',
                 grid_is_isomorphic='auto',
                 dimension_map=None,
                 field_name=None,
                 driver=None,
                 regrid_source=True,
                 regrid_destination=False,
                 metadata=None,
                 format_time=True,
                 opened=None,
                 uid=None,
                 rename_variable=None,
                 predicate=None,
                 rotated_pole_priority=False,
                 driver_kwargs=None):
        self._is_init = True

        self._field_name = field_name
        self._level_range = None
        self._time_range = None
        self._time_region = None
        self._time_subset_func = None
        self._driver_kwargs = driver_kwargs

        if rename_variable is not None:
            rename_variable = get_tuple(rename_variable)
        self._rename_variable = rename_variable

        self.rotated_pole_priority = rotated_pole_priority
        self.predicate = predicate
        if dimension_map is not None and isinstance(dimension_map, dict):
            dimension_map = DimensionMap.from_dict(dimension_map)
        self._dimension_map = dimension_map

        self._metadata = deepcopy(metadata)
        self._uri = None
        self.uid = uid

        # This is an "open" file-like object that may be passed in-place of file location parameters.
        self._opened = opened
        if opened is not None and driver is None:
            msg = 'If "opened" is not None, then a "driver" must be provided.'
            ocgis_lh(logger='request',
                     exc=RequestValidationError('driver', msg))

        # Field creation options.
        self.format_time = format_time
        self.grid_abstraction = grid_abstraction
        self.grid_is_isomorphic = grid_is_isomorphic
        # Flag used for regridding to determine if the coordinate system was assigned during initialization.
        self._has_assigned_coordinate_system = False if crs == 'auto' else True

        if uri is None:
            # Fields may be created from pure metadata.
            if metadata is not None:
                # The default OCGIS driver is NetCDF.
                if driver is None:
                    driver = DriverKey.NETCDF_CF
            elif opened is None:
                ocgis_lh(logger='request',
                         exc=RequestValidationError('uri', 'Cannot be None'))
        else:
            self._uri = get_uri(uri)

        if driver is None:
            klass = get_autodiscovered_driver(uri)
        else:
            klass = get_driver(driver)
        self._driver = klass(self)

        if variable is not None:
            variable = get_tuple(variable)
        self._variable = variable

        self.time_range = time_range
        self.time_region = time_region
        self.time_subset_func = time_subset_func
        self.level_range = level_range

        self._crs = deepcopy(crs)

        self.regrid_source = regrid_source
        self.regrid_destination = regrid_destination

        self.units = units
        self.conform_units_to = conform_units_to

        self._is_init = False

        self._validate_time_subset_()

        # Update metadata for time variable.
        tvar = self.dimension_map.get_variable(DMK.TIME)
        if tvar is not None:
            m = self.metadata['variables'][tvar]
            if t_units is not None:
                m['attrs']['units'] = t_units
            if t_calendar is not None:
                m['attrs']['calendar'] = t_calendar
            if t_conform_units_to is not None:
                from ocgis.util.units import get_units_object
                t_calendar = m['attrs'].get(
                    'calendar', constants.DEFAULT_TEMPORAL_CALENDAR)
                t_conform_units_to = get_units_object(t_conform_units_to,
                                                      calendar=t_calendar)
                m['conform_units_to'] = t_conform_units_to
Example #55
0
def _update_aggregation_wrapping_crs_(obj, alias, sfield, subset_sdim,
                                      subset_ugid):
    raise_if_empty(sfield)

    ocgis_lh('entering _update_aggregation_wrapping_crs_',
             obj._subset_log,
             alias=alias,
             ugid=subset_ugid,
             level=logging.DEBUG)

    # Aggregate if requested.
    if obj.ops.aggregate:
        ocgis_lh('aggregate requested in _update_aggregation_wrapping_crs_',
                 obj._subset_log,
                 alias=alias,
                 ugid=subset_ugid,
                 level=logging.DEBUG)

        # There may be no geometries if we are working with a gridded dataset. Load the geometries if this is the case.
        sfield.set_abstraction_geom()

        ocgis_lh(
            'after sfield.set_abstraction_geom in _update_aggregation_wrapping_crs_',
            obj._subset_log,
            alias=alias,
            ugid=subset_ugid,
            level=logging.DEBUG)

        # Union the geometries and spatially average the data variables.
        # with vm.scoped(vm.get_live_ranks_from_object(sfield)):
        sfield = sfield.geom.get_unioned(spatial_average=sfield.data_variables)
        ocgis_lh(
            'after sfield.geom.get_unioned in _update_aggregation_wrapping_crs_',
            obj._subset_log,
            alias=alias,
            ugid=subset_ugid,
            level=logging.DEBUG)

        # None is returned for the non-root process. Check we are in parallel and create an empty field.
        if sfield is None:
            if vm.size == 1:
                raise ValueError(
                    'None should not be returned from get_unioned if running on a single processor.'
                )
            else:
                sfield = Field(is_empty=True)
        else:
            sfield = sfield.parent

        vm.create_subcomm_by_emptyable(SubcommName.SPATIAL_AVERAGE,
                                       sfield,
                                       is_current=True,
                                       clobber=True)

        if not vm.is_null and subset_sdim is not None and subset_sdim.geom is not None:
            # Add the unique geometry identifier variable. This should match the selection geometry's identifier.
            new_gid_variable_kwargs = dict(
                name=HeaderName.ID_GEOMETRY,
                value=subset_sdim.geom.ugid.get_value(),
                dimensions=sfield.geom.dimensions)
            dm = get_data_model(obj.ops)
            new_gid_variable = create_typed_variable_from_data_model(
                'int', data_model=dm, **new_gid_variable_kwargs)
            sfield.geom.set_ugid(new_gid_variable)

    if vm.is_null:
        ocgis_lh(msg='null communicator following spatial average. returning.',
                 logger=obj._subset_log,
                 level=logging.DEBUG)
        return sfield

    raise_if_empty(sfield)
    ocgis_lh(msg='before wrapped_state in _update_aggregation_wrapping_crs_',
             logger=obj._subset_log,
             level=logging.DEBUG)
    try:
        wrapped_state = sfield.wrapped_state
    except WrappedStateEvalTargetMissing:
        # If there is no target for wrapping evaluation, then consider this unknown.
        wrapped_state = WrappedState.UNKNOWN
    ocgis_lh(msg='after wrapped_state in _update_aggregation_wrapping_crs_',
             logger=obj._subset_log,
             level=logging.DEBUG)

    # Wrap the returned data.
    if not env.OPTIMIZE_FOR_CALC and not sfield.is_empty:
        if wrapped_state == WrappedState.UNWRAPPED:
            ocgis_lh('wrap target is empty: {}'.format(sfield.is_empty),
                     obj._subset_log,
                     level=logging.DEBUG)

            # There may be no geometries if we are working with a gridded dataset. Load the geometries if this
            # is the case.
            sfield.set_abstraction_geom()

            if obj.ops.output_format in constants.VECTOR_OUTPUT_FORMATS and obj.ops.vector_wrap:
                ocgis_lh('wrapping output geometries',
                         obj._subset_log,
                         alias=alias,
                         ugid=subset_ugid,
                         level=logging.DEBUG)

                # Deepcopy geometries before wrapping as wrapping will be performed inplace. The original field may
                # need to be reused for additional subsets.
                geom = sfield.geom
                copied_geom = geom.get_value().copy()
                geom.set_value(copied_geom)
                geom.wrap()
                ocgis_lh('finished wrapping output geometries',
                         obj._subset_log,
                         alias=alias,
                         ugid=subset_ugid,
                         level=logging.DEBUG)

    # Transform back to rotated pole if necessary.
    original_rotated_pole_crs = obj._backtransform.get(
        constants.BackTransform.ROTATED_POLE)
    if original_rotated_pole_crs is not None:
        if not isinstance(obj.ops.output_crs, (Spherical, WGS84)):
            sfield.update_crs(original_rotated_pole_crs)

    # Update the coordinate system of the data output.
    if obj.ops.output_crs is not None:

        # If the geometry is not none, it may need to be projected to match the output coordinate system.
        if subset_sdim is not None and subset_sdim.crs != obj.ops.output_crs:
            subset_sdim.update_crs(obj.ops.output_crs)

        # Update the subsetted field's coordinate system.
        sfield = sfield.copy()
        sfield.update_crs(obj.ops.output_crs)

    # Wrap or unwrap the data if the coordinate system permits.
    _update_wrapping_(obj, sfield)

    ocgis_lh('leaving _update_aggregation_wrapping_crs_',
             obj._subset_log,
             level=logging.DEBUG)

    return sfield
Example #56
0
    def write(self):
        ocgis_lh('starting write method', self._log, logging.DEBUG)

        # Indicates if user geometries should be written to file.
        write_ugeom = False

        # Path to the output object.
        f = {KeywordArgument.PATH: self.path}

        build = True
        for coll in self:
            # This will be changed to "write" if we are on the build loop.
            write_mode = MPIWriteMode.APPEND

            if build:
                # During a build loop, create the file and write the first series of records. Let the drivers determine
                # the appropriate write modes for handling parallelism.
                write_mode = None

                # Write the user geometries if selected and there is one present on the incoming collection.
                if self._add_ugeom and coll.has_container_geometries:
                    write_ugeom = True

                if write_ugeom:
                    if vm.rank == 0:
                        # The output file name for the user geometries.
                        ugid_shp_name = self.prefix + '_ugid.shp'
                        if self._add_ugeom_nest:
                            ugeom_fiona_path = os.path.join(
                                self._get_or_create_shp_folder_(),
                                ugid_shp_name)
                        else:
                            ugeom_fiona_path = os.path.join(
                                self.outdir, ugid_shp_name)
                    else:
                        ugeom_fiona_path = None

                build = False

            f[KeywordArgument.WRITE_MODE] = write_mode
            self._write_coll_(f, coll)

            if write_ugeom:
                with vm.scoped(SubcommName.UGEOM_WRITE, [0]):
                    if not vm.is_null:
                        for subset_field in list(coll.children.values()):
                            subset_field.write(ugeom_fiona_path,
                                               write_mode=write_mode,
                                               driver=DriverVector)

        # The metadata and dataset descriptor files may only be written if OCGIS operations are present.
        ops = self.ops
        if ops is not None and self.add_auxiliary_files and MPI_RANK == 0:
            # Add OCGIS metadata output if requested.
            if self.add_meta:
                ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG)
                from ocgis.conv.meta import MetaOCGISConverter

                lines = MetaOCGISConverter(ops).write()
                out_path = os.path.join(
                    self.outdir,
                    self.prefix + '_' + MetaOCGISConverter._meta_filename)
                with open(out_path, 'w') as f:
                    f.write(lines)

            # Add the dataset descriptor file if requested.
            if self._add_did_file:
                ocgis_lh('writing dataset description (DID) file', 'conv',
                         logging.DEBUG)
                path = os.path.join(self.outdir, self.prefix + '_did.csv')
                _write_dataset_identifier_file_(path, ops)

            # Add source metadata if requested.
            if self._add_source_meta:
                ocgis_lh('writing source metadata file', 'conv', logging.DEBUG)
                path = os.path.join(self.outdir,
                                    self.prefix + '_source_metadata.txt')
                _write_source_meta_(path, ops)

        # Return the internal path unless overloaded by subclasses.
        ret = self._get_return_()

        return ret
Example #57
0
 def value_datetime(self):
     if self._value_datetime is None:
         if self._get_optimized_('_value_datetime') is False:
             ocgis_lh('getting value_datetime','nc.dimension',logging.DEBUG)
             self._value_datetime = np.atleast_1d(self.get_datetime(self.value))
     return(self._value_datetime)
Example #58
0
File: nc.py Project: moghimis/ocgis
def create_dimension_map_entry(src, variables, strict=False, attr_name='axis'):
    """
    Create a dimension map entry dictionary by searching variable metadata using attribute constraints.

    :param src: The source information to use for constructing the entry. If ``src`` is a dictionary, it must have two
     entries. The key ``'value'`` corresponds to the string attribute value. The key ``'axis'`` is the representative
     axis to assign the source value (for example ``'X'`` or ``'Y'``).
    :type src: str | dict
    :param dict variables: The metadata entries for the group's variables.
    :param bool strict: If ``False``, do not use a strict interpretation of metadata. Allow some standard approaches for
     handling metadata exceptions.
    :param str attr_name: Name of the attribute to use for checking the attribute values form ``src``.
    :return: dict
    """
    if isinstance(src, dict):
        axis = src['axis']
        attr_value = src['value']
    else:
        axis = src
        attr_value = src

    axis_vars = []
    for variable in list(variables.values()):
        vattrs = variable.get('attrs', {})
        if vattrs.get(attr_name) == attr_value:
            if len(variable['dimensions']) == 0:
                pass
            else:
                axis_vars.append(variable['name'])

    # Try to find by default names.
    if not strict and len(axis_vars) == 0:
        possible_names = CFName.get_axis_mapping().get(axis, [])
        for pn in possible_names:
            if pn in list(variables.keys()):
                axis_vars.append(variables[pn]['name'])

    if len(axis_vars) == 1:
        var_name = axis_vars[0]
        dims = list(variables[var_name]['dimensions'])

        if not strict:
            # Use default index positions for X/Y dimensions.
            if axis in ('X', 'Y') and len(dims) > 1:
                if axis == 'Y':
                    dims = [dims[0]]
                elif axis == 'X':
                    dims = [dims[1]]

        ret = {'variable': var_name, DimensionMapKey.DIMENSION: dims}
    elif len(axis_vars) > 1:
        msg = 'Multiple axis (axis="{}") possibilities found using variable(s) "{}". Use a dimension map to specify ' \
              'the appropriate coordinate dimensions.'
        ocgis_lh(msg.format(axis, axis_vars),
                 level=logging.WARN,
                 logger='ocgis.driver.nc',
                 force=True)
        ret = None
    else:
        ret = None
    return ret
Example #59
0
    def iter_geoms(self, key=None, select_uid=None, path=None, load_geoms=True, as_field=False,
                   uid=None, select_sql_where=None, slc=None, union=False, data_model=None,
                   driver_kwargs=None):
        """
        See documentation for :class:`~ocgis.GeomCabinetIterator`.
        """

        # Get the path to the output shapefile.
        shp_path = self._get_path_by_key_or_direct_path_(key=key, path=path)

        # Get the source metadata.
        meta = self.get_meta(path=shp_path, driver_kwargs=driver_kwargs)

        if union:
            gic = GeomCabinetIterator(key=key, select_uid=select_uid, path=path, load_geoms=load_geoms, as_field=False,
                                      uid=uid, select_sql_where=select_sql_where, slc=slc, union=False,
                                      data_model=data_model, driver_kwargs=driver_kwargs)
            yld = Field.from_records(gic, meta['schema'], crs=meta['crs'], uid=uid, union=True, data_model=data_model)
            yield yld
        else:
            if slc is not None and (select_uid is not None or select_sql_where is not None):
                exc = ValueError('Slice is not allowed with other select statements.')
                ocgis_lh(exc=exc, logger='geom_cabinet')

            # Format the slice for iteration. We will get the features by index if a slice is provided.
            if slc is not None:
                slc = get_index_slice_for_iteration(slc)

            # Open the target geometry file.
            ds = ogr.Open(shp_path)
            try:
                # Return the features iterator.
                features = self._get_features_object_(ds, uid=uid, select_uid=select_uid,
                                                      select_sql_where=select_sql_where, driver_kwargs=driver_kwargs)

                # Using slicing, we will select the features individually from the object.
                if slc is None:
                    itr = features
                else:
                    # The geodatabase API requires iterations to get the given location.
                    if self.get_gdal_driver(shp_path) == 'OpenFileGDB' or isinstance(slc, slice):
                        def _o_itr_(features_object, slice_start, slice_stop):
                            for ctr2, fb in enumerate(features_object):
                                # ... iterate until start is reached.
                                if ctr2 < slice_start:
                                    continue
                                # ... stop if we have reached the stop.
                                elif ctr2 == slice_stop:
                                    raise StopIteration
                                yield fb

                        itr = _o_itr_(features, slc.start, slc.stop)
                    else:
                        # Convert the slice index to an integer to avoid type conflict in GDAL layer.
                        itr = (features.GetFeature(int(idx)) for idx in slc)

                # Convert feature objects to record dictionaries.
                for ctr, feature in enumerate(itr):
                    if load_geoms:
                        yld = {'geom': wkb.loads(feature.geometry().ExportToWkb())}
                    else:
                        yld = {}
                    items = feature.items()
                    properties = OrderedDict([(key, items[key]) for key in feature.keys()])
                    yld.update({'properties': properties, 'meta': meta})

                    if ctr == 0:
                        uid, add_uid = get_uid_from_properties(properties, uid)
                        # The properties schema needs to be updated to account for the adding of a unique identifier.
                        if add_uid:
                            meta['schema']['properties'][uid] = 'int'

                    # Add the unique identifier if required
                    if add_uid:
                        properties[uid] = feature.GetFID()
                    # Ensure the unique identifier is an integer
                    else:
                        properties[uid] = int(properties[uid])

                    if as_field:
                        yld = Field.from_records([yld], schema=meta['schema'], crs=yld['meta']['crs'], uid=uid,
                                                 data_model=data_model)

                    yield yld
                try:
                    assert ctr >= 0
                except UnboundLocalError:
                    # occurs if there were not feature returned by the iterator. raise a more clear exception.
                    msg = 'No features returned from target data source. Were features appropriately selected?'
                    raise ValueError(msg)
            finally:
                # Close or destroy the data source object if it actually exists.
                if ds is not None:
                    ds.Destroy()
                    ds = None
Example #60
0
def get_collection((so, geom, logger)):
    '''
    :type so: SubsetOperation
    :type geom: None, GeometryDataset, ShpDataset
    :rtype: AbstractCollection
    '''

    ## initialize the collection object to store the subsetted data.
    coll = RawCollection(ugeom=geom, ops=so.ops)
    ## perform the operations on each request dataset
    ocgis_lh('{0} request dataset(s) to process'.format(len(so.ops.dataset)),
             logger)
    ## reference the geometry ugid
    ugid = None if geom is None else geom.spatial.uid[0]
    for request_dataset in so.ops.dataset:
        ## reference the request dataset alias
        alias = request_dataset.alias
        ocgis_lh('processing',
                 logger,
                 level=logging.INFO,
                 alias=alias,
                 ugid=ugid)
        ## copy the geometry
        copy_geom = deepcopy(geom)
        ## reference the dataset object
        ods = request_dataset.ds
        ## return a slice or do the other operations
        if so.ops.slice is not None:
            ods = ods.__getitem__(so.ops.slice)
        ## other subsetting operations
        else:
            ## if a geometry is passed and the target dataset is 360 longitude,
            ## unwrap the passed geometry to match the spatial domain of the target
            ## dataset.
            if copy_geom is None:
                igeom = None
            else:
                ## check projections adjusting projection the selection geometry
                ## if necessary
                if type(ods.spatial.projection) != type(
                        copy_geom.spatial.projection):
                    msg = 'projecting selection geometry to match input projection: {0} to {1}'
                    msg = msg.format(
                        copy_geom.spatial.projection.__class__.__name__,
                        ods.spatial.projection.__class__.__name__)
                    ocgis_lh(msg, logger, alias=alias, ugid=ugid)
                    copy_geom.project(ods.spatial.projection)
                else:
                    ocgis_lh('projections match',
                             logger,
                             alias=alias,
                             ugid=ugid)
                ## unwrap the data if it is geographic and 360
                if type(ods.spatial.projection
                        ) == WGS84 and ods.spatial.is_360:
                    ocgis_lh(
                        'unwrapping selection geometry with axis={0}'.format(
                            ods.spatial.pm),
                        logger,
                        alias=alias,
                        ugid=ugid)
                    w = Wrapper(axis=ods.spatial.pm)
                    copy_geom.spatial.geom[0] = w.unwrap(
                        deepcopy(copy_geom.spatial.geom[0]))
                igeom = copy_geom.spatial.geom[0]
            ## perform the data subset
            try:
                ## pull the temporal subset which may be a range or region. if
                ## it is a snippet operation, set the temporal subset to None
                ## as a slice has already been applied. however, if a calculation
                ## is present leave the temporal subset alone.
                if so.ops.snippet and so.ops.calc is None:
                    temporal = None
                else:
                    temporal = request_dataset.time_range or request_dataset.time_region

                ocgis_lh('executing get_subset', logger, level=logging.DEBUG)
                ods = ods.get_subset(
                    spatial_operation=so.ops.spatial_operation,
                    igeom=igeom,
                    temporal=temporal,
                    level=request_dataset.level_range)

                ## for the case of time range and time region subset, apply the
                ## time region subset following the time range subset.
                if request_dataset.time_range is not None and request_dataset.time_region is not None:
                    ods._temporal = ods.temporal.subset(
                        request_dataset.time_region)

                ## aggregate the geometries and data if requested
                if so.ops.aggregate:
                    ocgis_lh(
                        'aggregating target geometries and area-weighting values',
                        logger,
                        alias=alias,
                        ugid=ugid)
                    ## the new geometry will have the same id as the passed
                    ## geometry. if it does not have one, simple give it a value
                    ## of 1 as it is the only geometry requested for subsetting.
                    try:
                        new_geom_id = copy_geom.spatial.uid[0]
                    except AttributeError:
                        new_geom_id = 1
                    ## do the aggregation in place.
                    clip_geom = None if copy_geom is None else copy_geom.spatial.geom[
                        0]
                    ods.aggregate(new_geom_id=new_geom_id, clip_geom=clip_geom)
                ## wrap the returned data depending on the conditions of the
                ## operations.
                if not env.OPTIMIZE_FOR_CALC:
                    if type(ods.spatial.projection) == WGS84 and \
                       ods.spatial.is_360 and \
                       so.ops.output_format != 'nc' and \
                       so.ops.vector_wrap:
                        ocgis_lh('wrapping output geometries',
                                 logger,
                                 alias=alias,
                                 ugid=ugid)
                        ods.spatial.vector.wrap()
                        ocgis_lh('geometries wrapped',
                                 logger,
                                 alias=alias,
                                 ugid=ugid,
                                 level=logging.DEBUG)
                ## check for all masked values
                if env.OPTIMIZE_FOR_CALC is False and so.ops.file_only is False:
                    if ods.value.mask.all():
                        ## masked data may be okay depending on other opeartional
                        ## conditions.
                        if so.ops.snippet or so.ops.allow_empty:
                            if so.ops.snippet:
                                ocgis_lh(
                                    'all masked data encountered but allowed for snippet',
                                    logger,
                                    alias=alias,
                                    ugid=ugid,
                                    level=logging.WARN)
                            if so.ops.allow_empty:
                                ocgis_lh(
                                    'all masked data encountered but empty returns allowed',
                                    logger,
                                    alias=alias,
                                    ugid=ugid,
                                    level=logging.WARN)
                            pass
                        else:
                            ## if the geometry is also masked, it is an empty spatial
                            ## operation.
                            if ods.spatial.vector.geom.mask.all():
                                raise (EmptyData)
                            else:
                                ocgis_lh(None,
                                         logger,
                                         exc=MaskedDataError(),
                                         alias=alias,
                                         ugid=ugid)
            ## there may be no data returned - this may be real or could be an
            ## error. by default, empty returns are not allowed
            except EmptyData as ed:
                if so.ops.allow_empty:
                    if ed.origin == 'time':
                        msg = 'the time subset returned empty but empty returns are allowed'
                    else:
                        msg = 'the geometric operations returned empty but empty returns are allowed'
                    ocgis_lh(msg, logger, alias=alias, ugid=ugid)
                    continue
                else:
                    if ed.origin == 'time':
                        msg = 'empty temporal subset operation'
                    else:
                        msg = 'empty geometric operation'
                    ocgis_lh(msg,
                             logger,
                             exc=ExtentError(msg),
                             alias=alias,
                             ugid=ugid)
        ods.spatial._ugid = ugid
        coll.variables.update({request_dataset.alias: ods})

    ## if there are calculations, do those now and return a new type of collection
    if so.cengine is not None:
        ocgis_lh('performing computations', logger, alias=alias, ugid=ugid)
        coll = so.cengine.execute(coll, file_only=so.ops.file_only)

    ## conversion of groups.
    if so.ops.output_grouping is not None:
        raise (NotImplementedError)
    else:
        ocgis_lh('subset returning', logger, level=logging.INFO)
        return (coll)