def _gc_iter_dst_grid_slices_(grid_chunker): # TODO: This method uses some global gathers which is not ideal. # Destination splitting works off center coordinates only. pgc = grid_chunker.dst_grid.abstractions_available['point'] # Use the unique center values to break the grid into pieces. This ensures that nearby grid cell are close # spatially. If we just break the grid into pieces w/out using unique values, the points may be scattered which # does not optimize the spatial coverage of the source grid. center_lat = pgc.y.get_value() # ucenter_lat = np.unique(center_lat) ucenter_lat = create_unique_global_array(center_lat) ucenter_lat = vm.gather(ucenter_lat) if vm.rank == 0: ucenter_lat = hgather(ucenter_lat) ucenter_lat.sort() ucenter_splits = np.array_split(ucenter_lat, grid_chunker.nchunks_dst[0]) else: ucenter_splits = [None] * grid_chunker.nchunks_dst[0] for ucenter_split in ucenter_splits: ucenter_split = vm.bcast(ucenter_split) select = np.zeros_like(center_lat, dtype=bool) for v in ucenter_split.flat: select = np.logical_or(select, center_lat == v) yield select
def test_system_masking_with_smm(self): """Test masking with sparse matrix multiplication.""" from ocgis.regrid import RegridOperation grid = create_gridxy_global(with_bounds=False, crs=Spherical(), dist_dimname='x', resolution=5.0) src_field = create_exact_field(grid, 'exact', ntime=3) mask = src_field.grid.get_mask(create=True) mask[0:2, :] = True mask[:, -2:] = True mask[-2:, :] = True mask[:, 0:2] = True src_field.grid.set_mask(mask, cascade=True) src_field['exact'].set_value(src_field['exact'].mv().filled()) dst_field = deepcopy(src_field) dst_field.remove_variable('exact') weights = self.get_temporary_file_path('weights.nc', collective=True) weights = vm.bcast(weights) ro = RegridOperation(src_field, dst_field, regrid_options={'weights_out': weights, 'split': False}) _ = ro.execute() ro2 = RegridOperation(src_field, dst_field, regrid_options={'weights_in': weights, 'split': True}) result = ro2.execute() actual = result['exact'].mv() desired = src_field['exact'].mv() self.assertNumpyAllClose(actual, desired)
def _get_field_write_target_(cls, field): """Collective!""" ocgis_lh(level=10, logger="driver.nc", msg="entering _get_field_write_target_") if field.crs is not None: field.crs.format_spatial_object(field) grid = field.grid if grid is not None: # If any grid pieces are masked, ensure the mask is created across all grids. has_mask = vm.gather(grid.has_mask) if vm.rank == 0: if any(has_mask): create_mask = True else: create_mask = False else: create_mask = None create_mask = vm.bcast(create_mask) if create_mask and not grid.has_mask: grid.get_mask(create=True) # Putting units on bounds for netCDF-CF can confuse some parsers. if grid.has_bounds: field = field.copy() field.x.bounds.attrs.pop('units', None) field.y.bounds.attrs.pop('units', None) # Remove the current coordinate system if this is a dummy coordinate system. if env.COORDSYS_ACTUAL is not None: field = field.copy() field.set_crs(env.COORDSYS_ACTUAL, should_add=True) return field
def test_write_variable_collection_netcdf4_mpi(self): # TODO: TEST: Test writing a grouped netCDF file in parallel. self.add_barrier = False if not env.USE_NETCDF4_MPI: raise SkipTest('not env.USE_NETCDF4_MPI') path = self.create_rank_valued_netcdf() # if vm.rank == 0: # self.ncdump(path, header_only=False) rd = RequestDataset(path, driver='netcdf') rd.metadata['dimensions']['dist_dim']['dist'] = True field = rd.get() # self.barrier_print(field['data'].get_value()) if vm.rank == 0: actual_path = self.get_temporary_file_path('actual_mpi.nc') else: actual_path = None actual_path = vm.bcast(actual_path) # self.barrier_print('before field.write') field.write(actual_path) # self.barrier_print('after field.write') if vm.rank == 0: # self.ncdump(actual_path, header_only=False) self.assertNcEqual(actual_path, path)
def test_write_variable_fill_value_is_maintained(self): if vm.size != 4: raise SkipTest('vm.size != 4') dist = OcgDist() dim = dist.create_dimension('dim', 8, dist=True) dist.update_dimension_bounds() var = Variable(name='var', dimensions=dim, fill_value=2.) var.v()[0] = 1 var.v()[1] = 2 var.get_mask(create=True, check_value=True) if vm.rank == 0: path = self.get_temporary_file_path('foo.nc') else: path = None path = vm.bcast(path) var.parent.write(path) # if vm.rank == 0: # self.ncdump(path, header_only=False) with vm.scoped('read test', [0]): if not vm.is_null: invar = RequestDataset(path).create_field()['var'] self.assertEqual(invar.get_mask().sum(), 4) self.assertEqual(invar.fill_value, 2.)
def get_periodicity_parameters(grid): """ Get characteristics of a grid's periodicity. This is only applicable for grids with a spherical coordinate system. There are two classifications: 1. A grid is periodic (i.e. it has global coverage). Periodicity is determined only with the x/longitude dimension. 2. A grid is non-periodic (i.e. it has regional coverage). Call is collective across the current VM. :param grid: :class:`~ocgis.Grid` :return: A dictionary containing periodicity parameters. :rtype: dict """ # Check if grid may be flagged as "periodic" by determining if its extent is global. Use the centroids and the grid # resolution to determine this. is_periodic = False col = grid.x.get_value() resolution = grid.resolution_x min_col, max_col = col.min(), col.max() # Work only with unwrapped coordinates. if min_col < 0: select = col < 0 if select.any(): max_col = np.max(col[col < 0]) + 360. select = col >= 0 if select.any(): min_col = np.min(col[col >= 0]) # Check the min and max column values are within a tolerance (the grid resolution) of global (0 to 360) edges. if (0. - resolution) <= min_col <= (0. + resolution): min_periodic = True else: min_periodic = False if (360. - resolution) <= max_col <= (360. + resolution): max_periodic = True else: max_periodic = False # Determin global periodicity. min_periodic = vm.gather(min_periodic) max_periodic = vm.gather(max_periodic) if vm.rank == 0: min_periodic = any(min_periodic) max_periodic = any(max_periodic) if min_periodic and max_periodic: is_periodic = True else: is_periodic = False is_periodic = vm.bcast(is_periodic) # If the grid is periodic, set the appropriate parameters. if is_periodic: num_peri_dims = 1 periodic_dim = 0 pole_dim = 1 else: num_peri_dims, pole_dim, periodic_dim = [None] * 3 ret = {'num_peri_dims': num_peri_dims, 'pole_dim': pole_dim, 'periodic_dim': periodic_dim} return ret
def get_wrapped_state(self, target): """ :param target: Return the wrapped state of a field. This function only checks grid centroids and geometry exteriors. Bounds/corners on the grid are excluded. :type target: :class:`~ocgis.Field` """ # TODO: Wrapped state should operate on the x-coordinate variable vectors or geometries only. # TODO: This should be a method on grids and geometry variables. from ocgis.collection.field import Field from ocgis.spatial.base import AbstractXYZSpatialContainer from ocgis import vm raise_if_empty(self) # If this is not a wrappable coordinate system, wrapped state is undefined. if not self.is_wrappable: ret = None else: if isinstance(target, Field): grid = target.grid if grid is not None: target = grid else: target = target.geom if target is None: raise WrappedStateEvalTargetMissing elif target.is_empty: ret = None elif isinstance(target, AbstractXYZSpatialContainer): ret = self._get_wrapped_state_from_array_(target.x.get_value()) else: stops = (WrappedState.WRAPPED, WrappedState.UNWRAPPED) ret = WrappedState.UNKNOWN geoms = target.get_masked_value().flat _is_masked = np.ma.is_masked _get_ws = self._get_wrapped_state_from_geometry_ for geom in geoms: if not _is_masked(geom): flag = _get_ws(geom) if flag in stops: ret = flag break rets = vm.gather(ret) if vm.rank == 0: rets = set(rets) if WrappedState.WRAPPED in rets: ret = WrappedState.WRAPPED elif WrappedState.UNWRAPPED in rets: ret = WrappedState.UNWRAPPED else: ret = list(rets)[0] else: ret = None ret = vm.bcast(ret) return ret
def create_esmf_grid_fromfile(filename, grid, esmf_kwargs): """ This call is collective across the VM and must be called by each rank. The underlying call to ESMF must be using the global VM. """ from ocgis import vm filetype = grid.driver.get_esmf_fileformat() klass = grid.driver.get_esmf_grid_class() if klass == ESMF.Grid: # Corners are only needed for conservative regridding. if esmf_kwargs.get('regrid_method') == ESMF.RegridMethod.BILINEAR: add_corner_stagger = False else: add_corner_stagger = True # If there is a spatial mask, pass this information to grid creation. root = vm.get_live_ranks_from_object(grid)[0] with vm.scoped_by_emptyable('masked values', grid): if not vm.is_null: if grid.has_masked_values_global: add_mask = True varname = grid.mask_variable.name else: add_mask = False varname = None else: varname, add_mask = [None] * 2 varname = vm.bcast(varname, root=root) add_mask = vm.bcast(add_mask, root=root) ret = klass(filename=filename, filetype=filetype, add_corner_stagger=add_corner_stagger, is_sphere=False, add_mask=add_mask, varname=varname) else: meshname = str(grid.dimension_map.get_variable(DMK.ATTRIBUTE_HOST)) ret = klass(filename=filename, filetype=filetype, meshname=meshname) return ret
def test_write_variable_collection(self): if MPI_RANK == 0: path_in = self.get_temporary_file_path('foo.nc') path_out = self.get_temporary_file_path('foo_out.nc') with self.nc_scope(path_in, 'w') as ds: ds.createDimension('seven', 7) var = ds.createVariable('var_seven', float, dimensions=('seven',)) var[:] = np.arange(7, dtype=float) + 10 var.foo = 'bar' else: path_in, path_out = [None] * 2 path_in = vm.bcast(path_in) path_out = vm.bcast(path_out) rd = RequestDataset(path_in) rd.metadata['dimensions']['seven']['dist'] = True driver = DriverNetcdf(rd) vc = driver.create_raw_field() with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path_out) if MPI_RANK == 0: self.assertNcEqual(path_in, path_out)
def create_rank_valued_netcdf(self): rank_size = 10 size_global = vm.size_global with vm.scoped('write rank netcdf', [0]): if not vm.is_null: path = self.get_temporary_file_path('dist_desired.nc') dim = Dimension('dist_dim', rank_size * size_global) var = Variable(name='data', dimensions=dim, attrs={'hi': 5}) for rank in range(size_global): value = np.ones(rank_size) + (10 * (rank + 1)) bounds = (rank_size * rank, rank_size * rank + rank_size) var.get_value()[bounds[0]: bounds[1]] = value var.parent.attrs = {'hi_dataset_level': 'whee'} var.write(path) else: path = None path = vm.bcast(path) return path
def test_system_raise_exception_subcommunicator(self): if vm.size != 4: raise (SkipTest('vm.size != 4')) raiser = Mock(side_effect=IndexError('oops')) with self.assertRaises(IndexError): e = None with vm.scoped('the sub which will raise', [2]): if not vm.is_null: try: raiser() except IndexError as exc: e = exc es = vm.gather(e) es = vm.bcast(es) for e in es: if e is not None: raise e
def test_system_cf_data_write_parallel(self): """Test some basic reading operations.""" if MPI_RANK == 0: path_out = self.get_temporary_file_path('foo.nc') else: path_out = None path_out = vm.bcast(path_out) rd = self.test_data.get_rd('cancm4_tas') rd.metadata['dimensions']['lat']['dist'] = True rd.metadata['dimensions']['lon']['dist'] = True field = rd.get() field.write(path_out, dataset_kwargs={'format': rd.metadata['file_format']}) if MPI_RANK == 0: ignore_attributes = {'time_bnds': ['units', 'calendar'], 'lat_bnds': ['units'], 'lon_bnds': ['units'], 'tas': ['grid_mapping']} self.assertNcEqual(path_out, rd.uri, ignore_variables=['latitude_longitude'], ignore_attributes=ignore_attributes)
def create_distributed_dimension(size, **kwargs): """ Create a distributed dimension using a local size. Function is collective across the current VM. :param int size: The local size of the dimension. If ``0``, the created dimension will be empty. :param dict kwargs: Additional arguments to the creation of the dimension. A dimension name is required. Size, distribution, and empty keyword arguments are overloaded. :rtype: :class:`~ocgis.Dimension` """ assert KeywordArgument.NAME in kwargs kwargs = kwargs.copy() dimension_name = kwargs.pop(KeywordArgument.NAME) size_global = vm.reduce(size, MPIOps.SUM) size_global = vm.bcast(size_global) tag = MPITag.CREATE_DIST_DIM if vm.rank == 0: start_idx = 0 for idx, rank in enumerate(vm.ranks): dest_rank = rank + 1 if dest_rank == vm.size: break else: if vm.rank == rank: vm.comm.send(start_idx + size, dest=dest_rank, tag=tag) elif vm.rank == dest_rank: start_idx = vm.comm.recv(source=rank, tag=tag) bounds_local = (start_idx, start_idx + size) is_empty = size == 0 kwargs[KeywordArgument.SIZE] = size kwargs[KeywordArgument.DIST] = True kwargs[KeywordArgument.IS_EMPTY] = is_empty ret = Dimension(dimension_name, **kwargs) ret.bounds_global = (0, size_global) ret.bounds_local = bounds_local return ret
def test_bcast(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') vm = OcgVM() live_ranks = [1, 3, 5] vm.create_subcomm('tester', live_ranks, is_current=True) # vm.set_live_ranks(live_ranks) if vm.rank == 0: root_value = 101 else: root_value = None if MPI_RANK in live_ranks: global_value = vm.bcast(root_value) self.assertEqual(global_value, 101) else: self.assertIsNone(root_value) vm.finalize()
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') unique_local = np.unique(arr) vm.barrier() local_bounds = min(unique_local), max(unique_local) lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == vm.rank: continue contains = [] for lb2 in local_bounds: if lb[0] <= lb2 <= lb[1]: to_app = True else: to_app = False contains.append(to_app) if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]): overlaps.append(rank) # Send out the overlapping sources. tag_overlap = MPITag.OVERLAP_CHECK tag_select_send_size = MPITag.SELECT_SEND_SIZE vm.barrier() # NumPy and MPI types. np_type = unique_local.dtype mpi_type = vm.get_mpi_type(np_type) for o in overlaps: if vm.rank != o and vm.rank < o: dest_rank_bounds = lb_global[o] select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1]) u_src = unique_local[select_send] select_send_size = u_src.size _ = vm.comm.Isend([np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size) _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap) # Receive and process conflicts to reduce the unique local values. if vm.rank != 0: for o in overlaps: if vm.rank != o and vm.rank > o: select_send_size = np.array([0], dtype=np_type) req_select_send_size = vm.comm.Irecv([select_send_size, mpi_type], source=o, tag=tag_select_send_size) req_select_send_size.wait() select_send_size = select_send_size[0] u_src = np.zeros(select_send_size.astype(int), dtype=np_type) req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap) req.wait() utokeep = np.ones_like(unique_local, dtype=bool) for uidx, u in enumerate(unique_local.flat): if u in u_src: utokeep[uidx] = False unique_local = unique_local[utokeep] vm.barrier() return unique_local
dst_subset_filename = os.path.join(OUTDIR, 'dst_subset_{}.nc'.format(ctr)) if vm.rank == 0: print 'creating subset:', subset_filename with vm.scoped_by_emptyable('grid subset', grid_sub): if not vm.is_null: extent_global = grid_sub.extent_global if vm.rank == 0: root = vm.rank_global else: extent_global = None live_ranks = vm.get_live_ranks_from_object(grid_sub) bbox = vm.bcast(extent_global, root=live_ranks[0]) vm.barrier() if vm.rank == 0: print 'starting bbox subset:', bbox vm.barrier() has_subset = get_subset(bbox, subset_filename, 1) vm.barrier() if vm.rank == 0: print 'finished bbox subset:', bbox vm.barrier() has_subset = vm.gather(has_subset) if vm.rank == 0:
def redistribute_by_src_idx(variable, dimname, dimension): """ Redistribute values in ``variable`` using the source index associated with ``dimension``. The reloads the data from source and does not do an in-memory redistribution using MPI. This function is collective across the current `~ocgis.OcgVM`. * Uses fancy indexing only. * Gathers all source indices to a single processor. :param variable: The variable to redistribute. :type variable: :class:`~ocgis.Variable` :param str dimname: The name of the dimension holding the source indices. :param dimension: The dimension object. :type dimension: :class:`~ocgis.Dimension` """ from ocgis import SourcedVariable, Variable, vm from ocgis.variable.dimension import create_src_idx assert isinstance(variable, SourcedVariable) assert dimname is not None # If this is a serial operation just return. The rank should be fully autonomous in terms of its source information. if vm.size == 1: return # There needs to be at least one rank to redistribute. live_ranks = vm.get_live_ranks_from_object(variable) if len(live_ranks) == 0: raise ValueError('There must be at least one rank to redistribute by source index.') # Remove relevant values from a variable. def _reset_variable_(target): target._is_empty = None target._mask = None target._value = None target._has_initialized_value = False # Gather the sliced dimensions. This dimension hold the source indices that are redistributed. dims_global = vm.gather(dimension) if vm.rank == 0: # Filter any none-type dimensions to handle currently empty ranks. dims_global = [d for d in dims_global if d is not None] # Convert any bounds-type source indices to fancy type. # TODO: Support bounds-type source indices. for d in dims_global: if d._src_idx_type == SourceIndexType.BOUNDS: d._src_idx = create_src_idx(*d._src_idx, si_type=SourceIndexType.FANCY) # Create variable to scatter that holds the new global source indices. global_src_idx = hgather([d._src_idx for d in dims_global]) global_src_idx = Variable(name='global_src_idx', value=global_src_idx, dimensions=dimname) # The new size is also needed to create a regular distribution for the variable scatter. global_src_idx_size = global_src_idx.size else: global_src_idx, global_src_idx_size = [None] * 2 # Build the new distribution based on the gathered source indices. global_src_idx_size = vm.bcast(global_src_idx_size) dest_dist = OcgDist() new_dim = dest_dist.create_dimension(dimname, global_src_idx_size, dist=True) dest_dist.update_dimension_bounds() # This variable holds the new source indices. new_rank_src_idx = variable_scatter(global_src_idx, dest_dist) if new_rank_src_idx.is_empty: # Support new empty ranks following the scatter. variable.convert_to_empty() else: # Reset the variable so everything can be loaded from source. _reset_variable_(variable) # Update the source index on the target dimension. new_dim._src_idx = new_rank_src_idx.get_value() # Add the dimension with the new source index to the collection. variable.parent.dimensions[dimname] = new_dim # All emptiness should be pushed back to the dimensions. variable.parent._is_empty = None for var in variable.parent.values(): var._is_empty = None # Any variables that have a shared dimension should also be reset. for var in variable.parent.values(): if dimname in var.dimension_names: if new_rank_src_idx.is_empty: var.convert_to_empty() else: _reset_variable_(var)
def variable_scatter(variable, dest_dist, root=0, strict=False): from ocgis import vm if variable is not None: raise_if_empty(variable) if vm.rank == root: if variable.dist: raise ValueError('Only variables with no prior distribution may be scattered.') if not dest_dist.has_updated_dimensions: raise ValueError('The destination distribution must have updated dimensions.') # Find the appropriate group for the dimensions. if vm.rank == root: group = variable.group # dimension_names = [dim.name for dim in variable.dimensions] dimension_names = variable.parent.dimensions.keys() else: group = None dimension_names = None # Depending on the strictness level, not all dimensions may be present in the distribution. This is allowed for # processes to more flexibly add undistributed dimensions. Distributed dimensions should be part of the destination # distribution already. not_in_dist = {} if vm.rank == 0: for dest_dimension_name in dimension_names: try: _ = dest_dist.get_dimension(dest_dimension_name, group=group) except DimensionNotFound: if strict: raise else: not_in_dist[dest_dimension_name] = variable.parent.dimensions[dest_dimension_name] not_in_dist = vm.bcast(not_in_dist) # Synchronize the processes with the MPI distribution and the group containing the dimensions. dest_dist = vm.bcast(dest_dist, root=root) group = vm.bcast(group, root=root) # Need to convert the object to a list to be compatible with Python 3. if dimension_names is not None: dimension_names = list(dimension_names) dimension_names = vm.bcast(dimension_names, root=root) # These are the dimensions for the local process. dest_dimensions = [None] * len(dimension_names) for ii, dest_dimension_name in enumerate(dimension_names): try: d = dest_dist.get_dimension(dest_dimension_name, group=group) except DimensionNotFound: if strict: raise else: # Dimensions not in the distribution should have been received from the root process. d = not_in_dist[dest_dimension_name] dest_dimensions[ii] = d # Populate the local destination dimensions dictionary. dd_dict = OrderedDict() for d in dest_dimensions: dd_dict[d.name] = d # Slice the variables collecting the sequence to scatter to the MPI procs. if vm.rank == root: size = dest_dist.size if size > 1: slices = [None] * size # Get the slices need to scatter the variables. These are essentially the local bounds on each dimension. empty_ranks = dest_dist.get_empty_ranks() empty_variable = variable.copy() empty_variable.convert_to_empty() for current_rank in range(size): if current_rank in empty_ranks: slices[current_rank] = None else: current_dimensions = list( dest_dist.get_group(group=group, rank=current_rank)['dimensions'].values()) slices[current_rank] = {dim.name: slice(*dim.bounds_local) for dim in current_dimensions if dim.name in variable.parent.dimensions} # Slice the variables. These sliced variables are the scatter targets. variables_to_scatter = [None] * size for idx, slc in enumerate(slices): if slc is None: variables_to_scatter[idx] = empty_variable else: variables_to_scatter[idx] = variable.parent[slc][variable.name] else: variables_to_scatter = [variable] else: variables_to_scatter = None # Scatter the variable across processes. scattered_variable = vm.scatter(variables_to_scatter, root=root) # Update the scattered variable collection dimensions with the destination dimensions on the process. Everything # should align shape-wise. scattered_variable.parent._dimensions = dd_dict return scattered_variable
def reduce_reindex_coordinate_index(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ ocgis_lh(msg='entering reduce_reindex_coordinate_index', logger='geomc', level=logging.DEBUG) # Get the coordinate index values as a NumPy array. try: ocgis_lh(msg='calling cindex.get_value()', logger='geomc', level=logging.DEBUG) ocgis_lh(msg='cindex.has_allocated_value={}'.format( cindex.has_allocated_value), logger='geomc', level=logging.DEBUG) ocgis_lh(msg='cindex.dimensions[0]={}'.format(cindex.dimensions[0]), logger='geomc', level=logging.DEBUG) cindex = cindex.get_value() ocgis_lh(msg='finished cindex.get_value()', logger='geomc', level=logging.DEBUG) except AttributeError: # Assume this is already a NumPy array. pass # Only work with 1D arrays. cindex = np.atleast_1d(cindex) # Used to return the coordinate index to the original shape of the incoming coordinate index. original_shape = cindex.shape cindex = cindex.flatten() # Create the unique coordinate index array. ocgis_lh(msg='calling create_unique_global_array', logger='geomc', level=logging.DEBUG) if vm.size > 1: u = np.array(create_unique_global_array(cindex)) else: u = np.unique(cindex) ocgis_lh(msg='finished create_unique_global_array', logger='geomc', level=logging.DEBUG) # Synchronize the data type for the new coordinate index. lrank = vm.rank if lrank == 0: dtype = u.dtype else: dtype = None dtype = vm.bcast(dtype) # Flag to indicate if the current rank has any unique values. has_u = len(u) > 0 # Create the new coordinate index. new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__') new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype) # Create a hash for the new index. This is used to remap the old coordinate index. if has_u: uidx = {ii: jj for ii, jj in zip(u, new_u)} else: uidx = None vm.barrier() # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for # index overlaps. if has_u: local_bounds = min(u), max(u) else: local_bounds = None # Put a copy for the bounds indexing on each rank. lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == lrank: continue if lb is not None: contains = lb[0] <= cindex contains = np.logical_and(lb[1] >= cindex, contains) if np.any(contains): overlaps.append(rank) # Ranks must be able to identify which ranks will be asking them for data. global_overlaps = vm.gather(overlaps) global_overlaps = vm.bcast(global_overlaps) destinations = [ ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj ] # MPI communication tags used in the algorithm. tag_search = MPITag.REDUCE_REINDEX_SEARCH tag_success = MPITag.REDUCE_REINDEX_SUCCESS tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED tag_found = MPITag.REDUCE_REINDEX_FOUND # Fill array for the new coordinate index. new_cindex = np.empty_like(cindex) # vm.barrier_print('starting run_rr') # Fill the new coordinate indexing. if lrank == 0: run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success) else: run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found, tag_search, tag_success) # vm.barrier_print('finished run_rr') # Return array to its original shape. new_cindex = new_cindex.reshape(*original_shape) vm.barrier() return new_cindex, u
def test_get_intersects(self): subset_geom = self.fixture_subset_geom() poly = self.fixture() # Scatter the polygon geometry coordinates for the parallel case =============================================== dist = OcgDist() for d in poly.parent.dimensions.values(): d = d.copy() if d.name == poly.dimensions[0].name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() poly.parent = variable_collection_scatter(poly.parent, dist) vm.create_subcomm_by_emptyable('scatter', poly, is_current=True) if vm.is_null: return poly.parent._validate_() for v in poly.parent.values(): self.assertEqual(id(v.parent), id(poly.parent)) self.assertEqual(len(v.parent), len(poly.parent)) # ============================================================================================================== # p = os.path.join('/tmp/subset_geom.shp') # s = GeometryVariable.from_shapely(subset_geom) # s.write_vector(p) # p = os.path.join('/tmp/poly.shp') # s = poly.convert_to() # s.write_vector(p) sub = poly.get_intersects(subset_geom) vm.create_subcomm_by_emptyable('after intersects', sub, is_current=True) if vm.is_null: return actual = [] for g in sub.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired) self.assertEqual(len(sub.parent), len(poly.parent)) sub.parent._validate_() sub2 = sub.reduce_global() sub2.parent._validate_() # p = os.path.join('/tmp/sub.shp') # s = sub.convert_to() # s.write_vector(p) # p = os.path.join('/tmp/sub2.shp') # s = sub2.convert_to() # s.write_vector(p) # Gather then broadcast coordinates so all coordinates are available on each process. to_add = [] for gather_target in [sub2.x, sub2.y]: gathered = variable_gather(gather_target.extract()) gathered = vm.bcast(gathered) to_add.append(gathered) for t in to_add: sub2.parent.add_variable(t, force=True) for ctr, to_check in enumerate([sub, sub2]): actual = [] for g in to_check.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired)
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') unique_local = np.unique(arr) vm.barrier() local_bounds = min(unique_local), max(unique_local) lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == vm.rank: continue contains = [] for lb2 in local_bounds: if lb[0] <= lb2 <= lb[1]: to_app = True else: to_app = False contains.append(to_app) if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]): overlaps.append(rank) # Send out the overlapping sources. tag_overlap = MPITag.OVERLAP_CHECK tag_select_send_size = MPITag.SELECT_SEND_SIZE vm.barrier() # NumPy and MPI types. np_type = unique_local.dtype mpi_type = vm.get_mpi_type(np_type) for o in overlaps: if vm.rank != o and vm.rank < o: dest_rank_bounds = lb_global[o] select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1]) u_src = unique_local[select_send] select_send_size = u_src.size _ = vm.comm.Isend( [np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size) _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap) # Receive and process conflicts to reduce the unique local values. if vm.rank != 0: for o in overlaps: if vm.rank != o and vm.rank > o: select_send_size = np.array([0], dtype=np_type) req_select_send_size = vm.comm.Irecv( [select_send_size, mpi_type], source=o, tag=tag_select_send_size) req_select_send_size.wait() select_send_size = select_send_size[0] u_src = np.zeros(select_send_size, dtype=np_type) req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap) req.wait() utokeep = np.ones_like(unique_local, dtype=bool) for uidx, u in enumerate(unique_local.flat): if u in u_src: utokeep[uidx] = False unique_local = unique_local[utokeep] vm.barrier() return unique_local
def test_write_esmf_weights(self): # Create source and destination fields. This is the identity test, so the source and destination fields are # equivalent. src_grid = create_gridxy_global(resolution=3.0, crs=Spherical()) # Only test masking in serial to make indexing easier...just being lazy if vm.size == 1: mask = src_grid.get_mask(create=True) mask[4, 5] = True mask[25, 27] = True src_grid.set_mask(mask) self.assertEqual(src_grid.get_mask().sum(), 2) src_field = create_exact_field(src_grid, 'foo', ntime=3) dst_field = deepcopy(src_field) # Write the fields to disk for use in global file reconstruction and testing. if vm.rank == 0: master_path = self.get_temporary_file_path('foo.nc') src_field_path = self.get_temporary_file_path('src_field.nc') else: master_path = None src_field_path = None master_path = vm.bcast(master_path) src_field_path = vm.bcast(src_field_path) assert not os.path.exists(master_path) dst_field.write(master_path) src_field.write(src_field_path) # Remove the destination data variable to test its creation and filling dst_field.remove_variable('foo') # Chunk the fields and generate weights paths = {'wd': self.current_dir_output} gc = GridChunker(src_field, dst_field, nchunks_dst=(2, 2), genweights=True, paths=paths, esmf_kwargs={'regrid_method': 'BILINEAR'}) gc.write_chunks() # This is the path to the index file describing how to reconstruct the grid file index_path = os.path.join(self.current_dir_output, gc.paths['index_file']) # Execute the sparse matrix multiplication using weights read from file gc.smm(index_path, paths['wd']) with vm.scoped('index and reconstruct', [0]): if not vm.is_null: # Reconstruct the global destination file gc.insert_weighted(index_path, self.current_dir_output, master_path) # Load the actual values from file (destination) actual_field = RequestDataset(master_path).create_field() actual = actual_field.data_variables[0].mv() # Load the desired data from file (original values in the source field) desired = RequestDataset(src_field_path).create_field().data_variables[0].mv() if vm.size_global == 1: # Masking is only tested in serial self.assertEqual(actual_field.grid.get_mask().sum(), 2) else: self.assertIsNone(actual_field.grid.get_mask()) self.assertNumpyAll(actual, desired)
def variable_scatter(variable, dest_dist, root=0): from ocgis import vm if variable is not None: raise_if_empty(variable) if vm.rank == root: if variable.dist: raise ValueError( 'Only variables with no prior distribution may be scattered.') if not dest_dist.has_updated_dimensions: raise ValueError( 'The destination distribution must have updated dimensions.') # Synchronize distribution across processors. dest_dist = vm.bcast(dest_dist) # Find the appropriate group for the dimensions. if vm.rank == root: group = variable.group dimension_names = [dim.name for dim in variable.dimensions] else: group = None dimension_names = None # Synchronize the processes with the MPI distribution and the group containing the dimensions. dest_dist = vm.bcast(dest_dist) group = vm.bcast(group) dimension_names = vm.bcast(dimension_names) # These are the dimensions for the local process. dest_dimensions = dest_dist.get_dimensions(dimension_names, group=group) # barrier_print('dest_dimensions bounds_global: ', [(d.name, d.bounds_global) for d in dest_dimensions]) # Slice the variables collecting the sequence to scatter to the MPI procs. if vm.rank == root: size = dest_dist.size if size > 1: slices = [None] * size # Get the slices need to scatter the variables. These are essentially the local bounds on each dimension. empty_ranks = dest_dist.get_empty_ranks() empty_variable = variable.copy() empty_variable.convert_to_empty() for current_rank in range(size): if current_rank in empty_ranks: slices[current_rank] = None else: current_dimensions = list( dest_dist.get_group( group=group, rank=current_rank)['dimensions'].values()) slices[current_rank] = { dim.name: slice(*dim.bounds_local) for dim in current_dimensions if dim.name in variable.parent.dimensions } # Slice the variables. These sliced variables are the scatter targets. variables_to_scatter = [None] * size for idx, slc in enumerate(slices): if slc is None: variables_to_scatter[idx] = empty_variable else: variables_to_scatter[idx] = variable.parent[slc][ variable.name] else: variables_to_scatter = [variable] else: variables_to_scatter = None # Scatter the variable across processes. scattered_variable = vm.scatter(variables_to_scatter, root=root) # Update the scattered variable dimensions with the destination dimensions on the process. Everything should align # shape-wise. If they don't, an exception will be raised. scattered_variable.set_dimensions(dest_dimensions, force=True) return scattered_variable
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [b + adjust for b in ret.bounds_local] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def iter_src_grid_subsets(self, yield_dst=False): """ Yield source grid subsets using the extent of its associated destination grid subset. :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset. :return: The source grid if ``yield_dst`` is ``False``, otherwise a three-element tuple in the form ``(<source grid subset>, <destination grid subset>, <destination grid slice>)``. :rtype: :class:`ocgis.Grid` or (:class:`ocgis.Grid`, :class:`ocgis.Grid`, dict) """ if yield_dst: yield_slice = True else: yield_slice = False if self.buffer_value is None: try: if self.dst_grid_resolution is None: dst_grid_resolution = self.dst_grid.resolution else: dst_grid_resolution = self.dst_grid_resolution if self.src_grid_resolution is None: src_grid_resolution = self.src_grid.resolution else: src_grid_resolution = self.src_grid_resolution if dst_grid_resolution <= src_grid_resolution: target_resolution = dst_grid_resolution else: target_resolution = src_grid_resolution buffer_value = 2. * target_resolution except NotImplementedError: # Unstructured grids do not have an associated resolution. if isinstance(self.src_grid, GridUnstruct) or isinstance(self.dst_grid, GridUnstruct): buffer_value = None else: raise else: buffer_value = self.buffer_value dst_grid_wrapped_state = self.dst_grid.wrapped_state dst_grid_crs = self.dst_grid.crs # Use a destination grid iterator if provided. if self.iter_dst is not None: iter_dst = self.iter_dst(self, yield_slice=yield_slice) else: iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice) # Loop over each destination grid subset. for yld in iter_dst: if yield_slice: dst_grid_subset, dst_slice = yld else: dst_grid_subset = yld dst_box = None with vm.scoped_by_emptyable('extent_global', dst_grid_subset): if not vm.is_null: if self.check_contains: dst_box = box(*dst_grid_subset.extent_global) # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box. extent_global = dst_grid_subset.parent.attrs.get('extent_global') if extent_global is None: extent_global = dst_grid_subset.extent_global sub_box = box(*extent_global) if buffer_value is not None: sub_box = sub_box.buffer(buffer_value).envelope ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG) else: sub_box, dst_box = [None, None] live_ranks = vm.get_live_ranks_from_object(dst_grid_subset) sub_box = vm.bcast(sub_box, root=live_ranks[0]) if self.check_contains: dst_box = vm.bcast(dst_box, root=live_ranks[0]) sub_box = GeometryVariable.from_shapely(sub_box, is_bbox=True, wrapped_state=dst_grid_wrapped_state, crs=dst_grid_crs) src_grid_subset, src_grid_slice = self.src_grid.get_intersects(sub_box, keep_touches=False, cascade=False, optimized_bbox_subset=self.optimized_bbox_subset, return_slice=True) # Reload the data using a new source index distribution. if hasattr(src_grid_subset, 'reduce_global'): # Only redistribute if we have one live rank. if self.redistribute and len(vm.get_live_ranks_from_object(src_grid_subset)) > 0: topology = src_grid_subset.abstractions_available[Topology.POLYGON] cindex = topology.cindex redist_dimname = self.src_grid.abstractions_available[Topology.POLYGON].element_dim.name if src_grid_subset.is_empty: redist_dim = None else: redist_dim = topology.element_dim redistribute_by_src_idx(cindex, redist_dimname, redist_dim) with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset): if not vm.is_null: if not self.allow_masked: gmask = src_grid_subset.get_mask() if gmask is not None and gmask.any(): raise ValueError('Masked values in source grid subset.') if self.check_contains: src_box = box(*src_grid_subset.extent_global) if not does_contain(src_box, dst_box): raise ValueError('Contains check failed.') # Try to reduce the coordinates in the case of unstructured grid data. if hasattr(src_grid_subset, 'reduce_global'): src_grid_subset = src_grid_subset.reduce_global() else: src_grid_subset = VariableCollection(is_empty=True) if src_grid_subset.is_empty: src_grid_slice = None else: src_grid_slice = {src_grid_subset.dimensions[ii].name: src_grid_slice[ii] for ii in range(src_grid_subset.ndim)} if yield_dst: yld = (src_grid_subset, src_grid_slice, dst_grid_subset, dst_slice) else: yld = src_grid_subset, src_grid_slice yield yld