예제 #1
0
파일: test_base.py 프로젝트: huard/ocgis
    def test_iter_spatial_decomposition(self):
        self.add_barrier = False
        if vm.size not in [1, 4]:
            raise SkipTest('vm.size not in [1, 4]')

        grid = create_gridxy_global(resolution=10.,
                                    wrapped=False,
                                    crs=Spherical())
        splits = (2, 3)
        actual = []
        for sub, slc in iter_spatial_decomposition(grid,
                                                   splits,
                                                   optimized_bbox_subset=True):
            root = vm.get_live_ranks_from_object(sub)[0]
            with vm.scoped_by_emptyable('test extent', sub):
                if vm.is_null:
                    extent_global = None
                else:
                    extent_global = sub.extent_global
            extent_global = vm.bcast(extent_global, root=root)
            actual.append(extent_global)

        desired = [(0.0, -90.0, 120.0, 0.0), (120.0, -90.0, 240.0, 0.0),
                   (240.0, -90.0, 360.0, 0.0), (0.0, 0.0, 120.0, 90.0),
                   (120.0, 0.0, 240.0, 90.0), (240.0, 0.0, 360.0, 90.0)]
        self.assertEqual(actual, desired)
예제 #2
0
파일: test_base.py 프로젝트: NCPP/ocgis
    def test_iter_spatial_decomposition(self):
        self.add_barrier = False
        if vm.size not in [1, 4]:
            raise SkipTest('vm.size not in [1, 4]')

        grid = create_gridxy_global(resolution=10., wrapped=False, crs=Spherical())
        splits = (2, 3)
        actual = []
        for sub, slc in iter_spatial_decomposition(grid, splits, optimized_bbox_subset=True):
            root = vm.get_live_ranks_from_object(sub)[0]
            with vm.scoped_by_emptyable('test extent', sub):
                if vm.is_null:
                    extent_global = None
                else:
                    extent_global = sub.extent_global
            extent_global = vm.bcast(extent_global, root=root)
            actual.append(extent_global)

        desired = [(0.0, -90.0, 120.0, 0.0), (120.0, -90.0, 240.0, 0.0), (240.0, -90.0, 360.0, 0.0),
                   (0.0, 0.0, 120.0, 90.0), (120.0, 0.0, 240.0, 90.0), (240.0, 0.0, 360.0, 90.0)]
        self.assertEqual(actual, desired)
예제 #3
0
    def write_chunks(self):
        """
        Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF
        regridding weights for each subset if requested.
        """
        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []
        src_slices = []
        index_path = self.create_full_path_from_template('index_file')

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst)))

        ctr = 1
        ocgis_lh(logger=_LOCAL_LOGGER,
                 msg='starting self.iter_src_grid_subsets',
                 level=logging.DEBUG)
        for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(
                yield_dst=True):
            ocgis_lh(
                logger=_LOCAL_LOGGER,
                msg='finished iteration {} for self.iter_src_grid_subsets'.
                format(ctr),
                level=logging.DEBUG)

            src_path = self.create_full_path_from_template('src_template',
                                                           index=ctr)
            dst_path = self.create_full_path_from_template('dst_template',
                                                           index=ctr)
            wgt_path = self.create_full_path_from_template('wgt_template',
                                                           index=ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_path)
            dst_slices.append(dst_slc)
            src_slices.append(src_slc)

            # Only write destinations if an iterator is not provided.
            if self.iter_dst is None:
                zip_args = [[sub_src, sub_dst], [src_path, dst_path]]
            else:
                zip_args = [[sub_src], [src_path]]

            cc = 1
            for target, path in zip(*zip_args):
                with vm.scoped_by_emptyable('field.write' + str(cc), target):
                    if not vm.is_null:
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='write_chunks:writing: {}'.format(path),
                                 level=logging.DEBUG)
                        field = Field(grid=target)
                        field.write(path)
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='write_chunks:finished writing: {}'.format(
                                path),
                            level=logging.DEBUG)
                cc += 1

            # Increment the counter outside of the loop to avoid counting empty subsets.
            ctr += 1

            # Generate an ESMF weights file if requested and at least one rank has data on it.
            if self.genweights and len(
                    vm.get_live_ranks_from_object(sub_src)) > 0:
                vm.barrier()
                ocgis_lh(logger=_LOCAL_LOGGER,
                         msg='write_chunks:writing esmf weights: {}'.format(
                             wgt_path),
                         level=logging.DEBUG)
                self.write_esmf_weights(src_path,
                                        dst_path,
                                        wgt_path,
                                        src_grid=sub_src,
                                        dst_grid=sub_dst)
                vm.barrier()

        # Global shapes require a VM global scope to collect.
        src_global_shape = global_grid_shape(self.src_grid)
        dst_global_shape = global_grid_shape(self.dst_grid)

        # Gather and collapse source slices as some may be empty and we write on rank 0.
        gathered_src_grid_slice = vm.gather(src_slices)
        if vm.rank == 0:
            len_src_slices = len(src_slices)
            new_src_grid_slice = [None] * len_src_slices
            for idx in range(len_src_slices):
                for rank_src_grid_slice in gathered_src_grid_slice:
                    if rank_src_grid_slice[idx] is not None:
                        new_src_grid_slice[idx] = rank_src_grid_slice[idx]
                        break
            src_slices = new_src_grid_slice

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = [
                    'source_filename', 'destination_filename',
                    'weights_filename'
                ]
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{
                    'esmf_role': 'grid_chunker_source'
                }, {
                    'esmf_role': grid_chunker_destination
                }, {
                    'esmf_role': 'grid_chunker_weights'
                }]

                vc = VariableCollection()

                grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_chunker_index)
                vidx.attrs['esmf_role'] = grid_chunker_index
                vidx.attrs['grid_chunker_source'] = 'source_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_chunker_weights'] = 'weights_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_SRC_GRID_SHAPE] = src_global_shape
                vidx.attrs[GridChunkerConstants.IndexFile.
                           NAME_DST_GRID_SHAPE] = dst_global_shape

                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx],
                                 dimensions=dim,
                                 dtype=str,
                                 value=values[idx],
                                 attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                # TODO: This needs to work with four dimensions.
                # Source -----------------------------------------------------------------------------------------------
                self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE,
                                                       vidx, vc, src_slices,
                                                       dim, bounds_dimension)

                # Destination ------------------------------------------------------------------------------------------
                self.dst_grid._gc_create_index_bounds_(
                    RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim,
                    bounds_dimension)

                vc.write(index_path)

        vm.barrier()
예제 #4
0
    def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None):
        """
        Yield source grid subset using the extent of its associated destination grid subset.

        :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset.
        :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else.
        :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`)
        """
        if yield_dst:
            yield_slice = True
        else:
            yield_slice = False

        buffer_value = self.buffer_value

        dst_grid_wrapped_state = self.dst_grid.wrapped_state
        dst_grid_crs = self.dst_grid.crs

        # Use a destination grid iterator if provided.
        if self.iter_dst is not None:
            iter_dst = self.iter_dst(self,
                                     yield_slice=yield_slice,
                                     yield_idx=yield_idx)
        else:
            iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice,
                                                  yield_idx=yield_idx)

        # Loop over each destination grid subset.
        ocgis_lh(logger=_LOCAL_LOGGER,
                 msg='starting "for yld in iter_dst"',
                 level=logging.DEBUG)
        for iter_dst_ctr, yld in enumerate(iter_dst, start=1):
            ocgis_lh(msg=["iter_dst_ctr", iter_dst_ctr], level=logging.DEBUG)
            if yield_slice:
                dst_grid_subset, dst_slice = yld
            else:
                dst_grid_subset = yld

            # All masked destinations are very problematic for ESMF
            with vm.scoped_by_emptyable('global mask', dst_grid_subset):
                if not vm.is_null:
                    if dst_grid_subset.has_mask_global:
                        if dst_grid_subset.has_mask and dst_grid_subset.has_masked_values:
                            all_masked = dst_grid_subset.get_mask().all()
                        else:
                            all_masked = False
                        all_masked_gather = vm.gather(all_masked)
                        if vm.rank == 0:
                            if all(all_masked_gather):
                                exc = ValueError(
                                    "Destination subset all masked")
                                try:
                                    raise exc
                                finally:
                                    vm.abort(exc=exc)

            dst_box = None
            with vm.scoped_by_emptyable('extent_global', dst_grid_subset):
                if not vm.is_null:
                    # Use the extent of the polygon for determining the bounding box. This ensures conservative
                    # regridding will be fully mapped.
                    if isinstance(dst_grid_subset,
                                  AbstractGeometryCoordinates):
                        target_grid = dst_grid_subset.parent.grid
                    else:
                        target_grid = dst_grid_subset

                    # Try to reduce the coordinates in the case of unstructured grid data. Ensure the data also has a
                    # coordinate index. SCRIP grid files, for example, do not have a coordinate index like UGRID.
                    if hasattr(
                            target_grid, 'reduce_global'
                    ) and Topology.POLYGON in target_grid.abstractions_available and target_grid.cindex is not None:
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='starting reduce_global for dst_grid_subset',
                            level=logging.DEBUG)
                        target_grid = target_grid.reduce_global()
                        ocgis_lh(
                            logger=_LOCAL_LOGGER,
                            msg='finished reduce_global for dst_grid_subset',
                            level=logging.DEBUG)

                    extent_global = target_grid.parent.attrs.get(
                        'extent_global')
                    if extent_global is None:
                        with grid_abstraction_scope(target_grid,
                                                    Topology.POLYGON):
                            extent_global = target_grid.extent_global
                            # HACK: Bad corner coordinates can lead to bad extents. In this case, the lower bound on the
                            #  x-coordinate is unreasonable and breaks wrapping code. Set to 0.0 which is a reasonable
                            #  lower x-coordate for unwrapped datasets.
                            if (isinstance(target_grid.crs, Spherical)) and \
                                    dst_grid_wrapped_state == WrappedState.UNWRAPPED and \
                                    extent_global[0] < 0.0:
                                e = list(extent_global)
                                e[0] = 0.0
                                extent_global = tuple(e)

                    if self.check_contains:
                        dst_box = box(*target_grid.extent_global)

                    sub_box = box(*extent_global)
                    if buffer_value is not None:
                        # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box.
                        sub_box = sub_box.buffer(buffer_value).envelope

                    ocgis_lh(msg=str(sub_box.bounds),
                             level=logging.DEBUG,
                             logger=_LOCAL_LOGGER)
                else:
                    sub_box, dst_box = [None, None]

            live_ranks = vm.get_live_ranks_from_object(dst_grid_subset)
            sub_box = vm.bcast(sub_box, root=live_ranks[0])

            if self.check_contains:
                dst_box = vm.bcast(dst_box, root=live_ranks[0])
            sub_box = GeometryVariable.from_shapely(
                sub_box,
                is_bbox=True,
                wrapped_state=dst_grid_wrapped_state,
                crs=dst_grid_crs)

            # Prepare geometry to match coordinate system and wrapping of the subset target
            sub_box = sub_box.prepare(archetype=self.src_grid)
            ocgis_lh(logger=_LOCAL_LOGGER,
                     msg='prepared geometry',
                     level=logging.DEBUG)

            ocgis_lh(logger=_LOCAL_LOGGER,
                     msg='starting "self.src_grid.get_intersects"',
                     level=logging.DEBUG)
            src_grid_subset, src_grid_slice = self.src_grid.get_intersects(
                sub_box,
                keep_touches=False,
                cascade=False,
                optimized_bbox_subset=self.optimized_bbox_subset,
                return_slice=True)
            ocgis_lh(logger=_LOCAL_LOGGER,
                     msg='finished "self.src_grid.get_intersects"',
                     level=logging.DEBUG)

            # Reload the data using a new source index distribution.
            if hasattr(src_grid_subset,
                       'reduce_global') and src_grid_subset.cindex is not None:
                # Only redistribute if we have one live rank.
                if self.redistribute and len(
                        vm.get_live_ranks_from_object(src_grid_subset)) > 0:
                    ocgis_lh(logger=_LOCAL_LOGGER,
                             msg='starting redistribute',
                             level=logging.DEBUG)
                    topology = src_grid_subset.abstractions_available[
                        Topology.POLYGON]
                    cindex = topology.cindex
                    redist_dimname = self.src_grid.abstractions_available[
                        Topology.POLYGON].element_dim.name
                    if src_grid_subset.is_empty:
                        redist_dim = None
                    else:
                        redist_dim = topology.element_dim
                    redistribute_by_src_idx(cindex, redist_dimname, redist_dim)
                    ocgis_lh(logger=_LOCAL_LOGGER,
                             msg='finished redistribute',
                             level=logging.DEBUG)

            with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset):
                if not vm.is_null:
                    if not self.allow_masked:
                        gmask = src_grid_subset.get_mask()
                        if gmask is not None and gmask.any():
                            raise ValueError(
                                'Masked values in source grid subset.')

                    if self.check_contains:
                        src_box = box(*src_grid_subset.extent_global)
                        if not does_contain(src_box, dst_box):
                            raise ValueError('Contains check failed.')

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(src_grid_subset, 'reduce_global'
                               ) and src_grid_subset.cindex is not None:
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='starting reduce_global',
                                 level=logging.DEBUG)
                        src_grid_subset = src_grid_subset.reduce_global()
                        ocgis_lh(logger=_LOCAL_LOGGER,
                                 msg='finished reduce_global',
                                 level=logging.DEBUG)
                else:
                    pass
                    # src_grid_subset = VariableCollection(is_empty=True)

                if src_grid_subset.is_empty:
                    src_grid_slice = None
                else:
                    src_grid_slice = {
                        src_grid_subset.dimensions[ii].name: src_grid_slice[ii]
                        for ii in range(src_grid_subset.ndim)
                    }

            if yield_dst:
                yld = (src_grid_subset, src_grid_slice, dst_grid_subset,
                       dst_slice)
            else:
                yld = src_grid_subset, src_grid_slice

            yield yld
예제 #5
0
def global_grid_shape(grid):
    with vm.scoped_by_emptyable('global grid shape', grid):
        if not vm.is_null:
            return grid.shape_global
예제 #6
0
파일: grid_chunker.py 프로젝트: huard/ocgis
    def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None):
        """
        Yield source grid subset using the extent of its associated destination grid subset.

        :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset.
        :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else.
        :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`)
        """
        if yield_dst:
            yield_slice = True
        else:
            yield_slice = False

        buffer_value = self.buffer_value

        dst_grid_wrapped_state = self.dst_grid.wrapped_state
        dst_grid_crs = self.dst_grid.crs

        # Use a destination grid iterator if provided.
        if self.iter_dst is not None:
            iter_dst = self.iter_dst(self,
                                     yield_slice=yield_slice,
                                     yield_idx=yield_idx)
        else:
            iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice,
                                                  yield_idx=yield_idx)

        # Loop over each destination grid subset.
        ocgis_lh(logger='grid_chunker',
                 msg='starting "for yld in iter_dst"',
                 level=logging.DEBUG)
        for yld in iter_dst:
            if yield_slice:
                dst_grid_subset, dst_slice = yld
            else:
                dst_grid_subset = yld

            dst_box = None
            with vm.scoped_by_emptyable('extent_global', dst_grid_subset):
                if not vm.is_null:
                    # Use the extent of the polygon for determining the bounding box. This ensures conservative
                    # regridding will be fully mapped.
                    if isinstance(dst_grid_subset,
                                  AbstractGeometryCoordinates):
                        target_grid = dst_grid_subset.parent.grid
                    else:
                        target_grid = dst_grid_subset

                    extent_global = target_grid.parent.attrs.get(
                        'extent_global')
                    if extent_global is None:
                        with grid_abstraction_scope(target_grid,
                                                    Topology.POLYGON):
                            extent_global = target_grid.extent_global

                    if self.check_contains:
                        dst_box = box(*target_grid.extent_global)

                    sub_box = box(*extent_global)
                    if buffer_value is not None:
                        # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box.
                        sub_box = sub_box.buffer(buffer_value).envelope

                    ocgis_lh(msg=str(sub_box.bounds),
                             level=logging.DEBUG,
                             logger='grid_chunker')
                else:
                    sub_box, dst_box = [None, None]

            live_ranks = vm.get_live_ranks_from_object(dst_grid_subset)
            sub_box = vm.bcast(sub_box, root=live_ranks[0])

            if self.check_contains:
                dst_box = vm.bcast(dst_box, root=live_ranks[0])

            sub_box = GeometryVariable.from_shapely(
                sub_box,
                is_bbox=True,
                wrapped_state=dst_grid_wrapped_state,
                crs=dst_grid_crs)
            ocgis_lh(logger='grid_chunker',
                     msg='starting "self.src_grid.get_intersects"',
                     level=logging.DEBUG)
            src_grid_subset, src_grid_slice = self.src_grid.get_intersects(
                sub_box,
                keep_touches=False,
                cascade=False,
                optimized_bbox_subset=self.optimized_bbox_subset,
                return_slice=True)
            ocgis_lh(logger='grid_chunker',
                     msg='finished "self.src_grid.get_intersects"',
                     level=logging.DEBUG)

            # Reload the data using a new source index distribution.
            if hasattr(src_grid_subset,
                       'reduce_global') and src_grid_subset.cindex is not None:
                # Only redistribute if we have one live rank.
                if self.redistribute and len(
                        vm.get_live_ranks_from_object(src_grid_subset)) > 0:
                    ocgis_lh(logger='grid_chunker',
                             msg='starting redistribute',
                             level=logging.DEBUG)
                    topology = src_grid_subset.abstractions_available[
                        Topology.POLYGON]
                    cindex = topology.cindex
                    redist_dimname = self.src_grid.abstractions_available[
                        Topology.POLYGON].element_dim.name
                    if src_grid_subset.is_empty:
                        redist_dim = None
                    else:
                        redist_dim = topology.element_dim
                    redistribute_by_src_idx(cindex, redist_dimname, redist_dim)
                    ocgis_lh(logger='grid_chunker',
                             msg='finished redistribute',
                             level=logging.DEBUG)

            with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset):
                if not vm.is_null:
                    if not self.allow_masked:
                        gmask = src_grid_subset.get_mask()
                        if gmask is not None and gmask.any():
                            raise ValueError(
                                'Masked values in source grid subset.')

                    if self.check_contains:
                        src_box = box(*src_grid_subset.extent_global)
                        if not does_contain(src_box, dst_box):
                            raise ValueError('Contains check failed.')

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(src_grid_subset, 'reduce_global'
                               ) and src_grid_subset.cindex is not None:
                        ocgis_lh(logger='grid_chunker',
                                 msg='starting reduce_global',
                                 level=logging.DEBUG)
                        src_grid_subset = src_grid_subset.reduce_global()
                        ocgis_lh(logger='grid_chunker',
                                 msg='finished reduce_global',
                                 level=logging.DEBUG)
                else:
                    pass
                    # src_grid_subset = VariableCollection(is_empty=True)

                if src_grid_subset.is_empty:
                    src_grid_slice = None
                else:
                    src_grid_slice = {
                        src_grid_subset.dimensions[ii].name: src_grid_slice[ii]
                        for ii in range(src_grid_subset.ndim)
                    }

            if yield_dst:
                yld = (src_grid_subset, src_grid_slice, dst_grid_subset,
                       dst_slice)
            else:
                yld = src_grid_subset, src_grid_slice

            yield yld
예제 #7
0
파일: grid_chunker.py 프로젝트: NCPP/ocgis
def global_grid_shape(grid):
    with vm.scoped_by_emptyable('global grid shape', grid):
        if not vm.is_null:
            return grid.shape_global
예제 #8
0
파일: grid_chunker.py 프로젝트: NCPP/ocgis
    def write_chunks(self):
        """
        Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF
        regridding weights for each subset if requested.
        """
        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []
        src_slices = []
        index_path = self.create_full_path_from_template('index_file')

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst)))

        ctr = 1
        ocgis_lh(logger='grid_chunker', msg='starting self.iter_src_grid_subsets', level=logging.DEBUG)
        for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True):
            ocgis_lh(logger='grid_chunker', msg='finished iteration {} for self.iter_src_grid_subsets'.format(ctr),
                     level=logging.DEBUG)

            src_path = self.create_full_path_from_template('src_template', index=ctr)
            dst_path = self.create_full_path_from_template('dst_template', index=ctr)
            wgt_path = self.create_full_path_from_template('wgt_template', index=ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_path)
            dst_slices.append(dst_slc)
            src_slices.append(src_slc)

            # Only write destinations if an iterator is not provided.
            if self.iter_dst is None:
                zip_args = [[sub_src, sub_dst], [src_path, dst_path]]
            else:
                zip_args = [[sub_src], [src_path]]

            cc = 1
            for target, path in zip(*zip_args):
                with vm.scoped_by_emptyable('field.write' + str(cc), target):
                    if not vm.is_null:
                        ocgis_lh(logger='grid_chunker', msg='write_chunks:writing: {}'.format(path),
                                 level=logging.DEBUG)
                        field = Field(grid=target)
                        field.write(path)
                        ocgis_lh(logger='grid_chunker', msg='write_chunks:finished writing: {}'.format(path),
                                 level=logging.DEBUG)
                cc += 1

            # Increment the counter outside of the loop to avoid counting empty subsets.
            ctr += 1

            # Generate an ESMF weights file if requested and at least one rank has data on it.
            if self.genweights and len(vm.get_live_ranks_from_object(sub_src)) > 0:
                vm.barrier()
                self.write_esmf_weights(src_path, dst_path, wgt_path, src_grid=sub_src, dst_grid=sub_dst)
                vm.barrier()

        # Global shapes require a VM global scope to collect.
        src_global_shape = global_grid_shape(self.src_grid)
        dst_global_shape = global_grid_shape(self.dst_grid)

        # Gather and collapse source slices as some may be empty and we write on rank 0.
        gathered_src_grid_slice = vm.gather(src_slices)
        if vm.rank == 0:
            len_src_slices = len(src_slices)
            new_src_grid_slice = [None] * len_src_slices
            for idx in range(len_src_slices):
                for rank_src_grid_slice in gathered_src_grid_slice:
                    if rank_src_grid_slice[idx] is not None:
                        new_src_grid_slice[idx] = rank_src_grid_slice[idx]
                        break
            src_slices = new_src_grid_slice

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = ['source_filename', 'destination_filename', 'weights_filename']
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{'esmf_role': 'grid_chunker_source'},
                         {'esmf_role': grid_chunker_destination},
                         {'esmf_role': 'grid_chunker_weights'}]

                vc = VariableCollection()

                grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_chunker_index)
                vidx.attrs['esmf_role'] = grid_chunker_index
                vidx.attrs['grid_chunker_source'] = 'source_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_chunker_weights'] = 'weights_filename'
                vidx.attrs[GridChunkerConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape
                vidx.attrs[GridChunkerConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape

                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                # TODO: This needs to work with four dimensions.
                # Source -----------------------------------------------------------------------------------------------
                self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim,
                                                       bounds_dimension)

                # Destination ------------------------------------------------------------------------------------------
                self.dst_grid._gc_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim,
                                                       bounds_dimension)

                vc.write(index_path)

        vm.barrier()
예제 #9
0
파일: grid_chunker.py 프로젝트: NCPP/ocgis
    def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None):
        """
        Yield source grid subset using the extent of its associated destination grid subset.

        :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset.
        :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else.
        :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`)
        """
        if yield_dst:
            yield_slice = True
        else:
            yield_slice = False

        buffer_value = self.buffer_value

        dst_grid_wrapped_state = self.dst_grid.wrapped_state
        dst_grid_crs = self.dst_grid.crs

        # Use a destination grid iterator if provided.
        if self.iter_dst is not None:
            iter_dst = self.iter_dst(self, yield_slice=yield_slice, yield_idx=yield_idx)
        else:
            iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice, yield_idx=yield_idx)

        # Loop over each destination grid subset.
        ocgis_lh(logger='grid_chunker', msg='starting "for yld in iter_dst"', level=logging.DEBUG)
        for iter_dst_ctr, yld in enumerate(iter_dst, start=1):
            ocgis_lh(msg=["iter_dst_ctr", iter_dst_ctr], level=logging.DEBUG)
            if yield_slice:
                dst_grid_subset, dst_slice = yld
            else:
                dst_grid_subset = yld

            # All masked destinations are very problematic for ESMF
            with vm.scoped_by_emptyable('global mask', dst_grid_subset):
                if not vm.is_null:
                    if dst_grid_subset.has_mask_global:
                        if dst_grid_subset.has_mask and dst_grid_subset.has_masked_values:
                            all_masked = dst_grid_subset.get_mask().all()
                        else:
                            all_masked = False
                        all_masked_gather = vm.gather(all_masked)
                        if vm.rank == 0:
                            if all(all_masked_gather):
                                exc = ValueError("Destination subset all masked")
                                try:
                                    raise exc
                                finally:
                                    vm.abort(exc=exc)

            dst_box = None
            with vm.scoped_by_emptyable('extent_global', dst_grid_subset):
                if not vm.is_null:
                    # Use the extent of the polygon for determining the bounding box. This ensures conservative
                    # regridding will be fully mapped.
                    if isinstance(dst_grid_subset, AbstractGeometryCoordinates):
                        target_grid = dst_grid_subset.parent.grid
                    else:
                        target_grid = dst_grid_subset

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(target_grid, 'reduce_global') and Topology.POLYGON in target_grid.abstractions_available:
                        ocgis_lh(logger='grid_chunker', msg='starting reduce_global for dst_grid_subset',
                                 level=logging.DEBUG)
                        target_grid = target_grid.reduce_global()
                        ocgis_lh(logger='grid_chunker', msg='finished reduce_global for dst_grid_subset',
                                 level=logging.DEBUG)

                    extent_global = target_grid.parent.attrs.get('extent_global')
                    if extent_global is None:
                        with grid_abstraction_scope(target_grid, Topology.POLYGON):
                            extent_global = target_grid.extent_global

                    if self.check_contains:
                        dst_box = box(*target_grid.extent_global)

                    sub_box = box(*extent_global)
                    if buffer_value is not None:
                        # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box.
                        sub_box = sub_box.buffer(buffer_value).envelope

                    ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG, logger='grid_chunker')
                else:
                    sub_box, dst_box = [None, None]

            live_ranks = vm.get_live_ranks_from_object(dst_grid_subset)
            sub_box = vm.bcast(sub_box, root=live_ranks[0])

            if self.check_contains:
                dst_box = vm.bcast(dst_box, root=live_ranks[0])

            sub_box = GeometryVariable.from_shapely(sub_box, is_bbox=True, wrapped_state=dst_grid_wrapped_state,
                                                    crs=dst_grid_crs)
            ocgis_lh(logger='grid_chunker', msg='starting "self.src_grid.get_intersects"', level=logging.DEBUG)
            src_grid_subset, src_grid_slice = self.src_grid.get_intersects(sub_box, keep_touches=False, cascade=False,
                                                                           optimized_bbox_subset=self.optimized_bbox_subset,
                                                                           return_slice=True)
            ocgis_lh(logger='grid_chunker', msg='finished "self.src_grid.get_intersects"', level=logging.DEBUG)

            # Reload the data using a new source index distribution.
            if hasattr(src_grid_subset, 'reduce_global') and src_grid_subset.cindex is not None:
                # Only redistribute if we have one live rank.
                if self.redistribute and len(vm.get_live_ranks_from_object(src_grid_subset)) > 0:
                    ocgis_lh(logger='grid_chunker', msg='starting redistribute', level=logging.DEBUG)
                    topology = src_grid_subset.abstractions_available[Topology.POLYGON]
                    cindex = topology.cindex
                    redist_dimname = self.src_grid.abstractions_available[Topology.POLYGON].element_dim.name
                    if src_grid_subset.is_empty:
                        redist_dim = None
                    else:
                        redist_dim = topology.element_dim
                    redistribute_by_src_idx(cindex, redist_dimname, redist_dim)
                    ocgis_lh(logger='grid_chunker', msg='finished redistribute', level=logging.DEBUG)

            with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset):
                if not vm.is_null:
                    if not self.allow_masked:
                        gmask = src_grid_subset.get_mask()
                        if gmask is not None and gmask.any():
                            raise ValueError('Masked values in source grid subset.')

                    if self.check_contains:
                        src_box = box(*src_grid_subset.extent_global)
                        if not does_contain(src_box, dst_box):
                            raise ValueError('Contains check failed.')

                    # Try to reduce the coordinates in the case of unstructured grid data.
                    if hasattr(src_grid_subset, 'reduce_global') and src_grid_subset.cindex is not None:
                        ocgis_lh(logger='grid_chunker', msg='starting reduce_global', level=logging.DEBUG)
                        src_grid_subset = src_grid_subset.reduce_global()
                        ocgis_lh(logger='grid_chunker', msg='finished reduce_global', level=logging.DEBUG)
                else:
                    pass
                    # src_grid_subset = VariableCollection(is_empty=True)

                if src_grid_subset.is_empty:
                    src_grid_slice = None
                else:
                    src_grid_slice = {src_grid_subset.dimensions[ii].name: src_grid_slice[ii] for ii in
                                      range(src_grid_subset.ndim)}

            if yield_dst:
                yld = (src_grid_subset, src_grid_slice, dst_grid_subset, dst_slice)
            else:
                yld = src_grid_subset, src_grid_slice

            yield yld