def main(): rd = RequestDataset(IN_PATH, driver=DriverNetcdfUGRID, grid_abstraction=GridAbstraction.POINT) field = rd.get() foo = '/tmp/foo.nc' # assert field.grid.cindex is not None # print field.grid.archetype # tkk print field.shapes sub = field.grid.get_intersects(box(*BBOX), optimized_bbox_subset=True).parent with vm.scoped_by_emptyable('reduce global', sub): if not vm.is_null: sub.grid_abstraction = GridAbstraction.POLYGON # rank_print('sub.grid.abstraction', sub.grid.abstraction) # rank_print('sub.grid._abstraction', sub.grid._abstraction) # rank_print('archetype', sub.grid.archetype) # rank_print(sub.grid.extent) rank_print('sub', sub.grid.cindex.get_value()) subr = sub.grid.reduce_global().parent rank_print('sub', subr.grid.cindex.get_value()) # rank_print(subr.x.name) # rank_print(subr.x.get_value().min()) rank_print(subr.grid.extent) # rank_print(subr.grid.cindex.get_value()) # rank_print(subr.shapes) # subr.write(foo) # if vm.rank == 0: # RequestDataset(foo).inspect() vm.barrier()
def arange_from_dimension(dim, start=0, dtype=int, dist=True): """ Create a sequential integer range similar to ``numpy.arange``. :param dim: The dimension to use for creating the range. :type dim: :class:`~ocgis.Dimension` :param int start: The starting value for the range. :param dtype: The data type for the output array. :param bool dist: If ``True``, create range as a distributed array with a collective VM call. If ``False``, create the array locally. :rtype: :class:`numpy.ndarray` """ local_size = len(dim) if dist: from ocgis import vm for rank in vm.ranks: dest_rank = rank + 1 if dest_rank == vm.size: break else: if vm.rank == rank: vm.comm.send(start + local_size, dest=dest_rank) elif vm.rank == dest_rank: start = vm.comm.recv(source=rank) else: pass vm.barrier() ret = np.arange(start, start + local_size, dtype=dtype) return ret
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path( 'merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def test(self): gs = self.fixture_grid_chunker() desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum() desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum) if vm.rank == 0: desired_sum = np.sum(desired_dst_grid_sum) desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)}, {'y': slice(0, 180, None), 'x': slice(240, 480, None)}, {'y': slice(0, 180, None), 'x': slice(480, 720, None)}, {'y': slice(180, 360, None), 'x': slice(0, 240, None)}, {'y': slice(180, 360, None), 'x': slice(240, 480, None)}, {'y': slice(180, 360, None), 'x': slice(480, 720, None)}] actual = list(gs.iter_dst_grid_slices()) self.assertEqual(actual, desired) gs.write_chunks() if vm.rank == 0: rank_sums = [] for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1): src_path = gs.create_full_path_from_template('src_template', index=ctr) dst_path = gs.create_full_path_from_template('dst_template', index=ctr) src_field = RequestDataset(src_path).get() dst_field = RequestDataset(dst_path).get() src_envelope_global = box(*src_field.grid.extent_global) dst_envelope_global = box(*dst_field.grid.extent_global) self.assertTrue(does_contain(src_envelope_global, dst_envelope_global)) actual = get_variable_names(src_field.data_variables) self.assertIn('data', actual) actual = get_variable_names(dst_field.data_variables) self.assertIn('data', actual) actual_data_sum = dst_field['data'].get_value().sum() actual_data_sum = MPI_COMM.gather(actual_data_sum) if MPI_RANK == 0: actual_data_sum = np.sum(actual_data_sum) rank_sums.append(actual_data_sum) if vm.rank == 0: self.assertAlmostEqual(desired_sum, np.sum(rank_sums)) index_path = gs.create_full_path_from_template('index_file') self.assertTrue(os.path.exists(index_path)) vm.barrier() index_path = gs.create_full_path_from_template('index_file') index_field = RequestDataset(index_path).get() self.assertTrue(len(list(index_field.keys())) > 2)
def _write_variable_collection_main_(cls, field, opened_or_path, write_mode, **kwargs): from ocgis.collection.field import Field if not isinstance(field, Field): raise ValueError('Only fields may be written to vector GIS formats.') fiona_crs = kwargs.get('crs') fiona_schema = kwargs.get('fiona_schema') fiona_driver = kwargs.get('fiona_driver', 'ESRI Shapefile') iter_kwargs = kwargs.pop('iter_kwargs', {}) iter_kwargs[KeywordArgument.DRIVER] = cls # This finds the geometry variable used in the iterator. Need for the general geometry type that may not be # determined using the record iterator. geom_variable = field.geom if geom_variable is None: raise ValueError('A geometry variable is required for writing to vector GIS formats.') # Open the output Fiona object using overloaded values or values determined at call-time. if not cls.inquire_opened_state(opened_or_path): if fiona_crs is None: if field.crs is not None: fiona_crs = field.crs.value _, archetype_record = next(field.iter(**iter_kwargs)) archetype_record = format_record_for_fiona(fiona_driver, archetype_record) if fiona_schema is None: fiona_schema = get_fiona_schema(geom_variable.geom_type, archetype_record) else: fiona_schema = opened_or_path.schema fiona_crs = opened_or_path.crs fiona_driver = opened_or_path.driver # The Fiona GeoJSON driver does not support update. if fiona_driver == 'GeoJSON': mode = 'w' else: mode = 'a' # Write the template file. if fiona_driver != 'GeoJSON': if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path=opened_or_path, mode='w', driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as _: pass # Write data on each rank to the file. if write_mode != MPIWriteMode.TEMPLATE: for rank_to_write in vm.ranks: if vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as sink: itr = field.iter(**iter_kwargs) write_records_to_fiona(sink, itr, fiona_driver) vm.barrier()
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') from ocgis.vmachine.mpi import rank_print rank_print('starting np.unique') unique_local = np.unique(arr) rank_print('finished np.unique') rank_print('waiting at barrier1') vm.barrier() tag_unique_count = MPITag.UNIQUE_GLOBAL_COUNT tag_unique_check = MPITag.UNIQUE_GLOBAL_CHECK for root_rank in vm.ranks: rank_print('root_rank=', root_rank) if vm.rank == root_rank: has_unique_local = len(unique_local) != 0 else: has_unique_local = None has_unique_local = vm.bcast(has_unique_local, root=root_rank) if has_unique_local: if vm.rank == root_rank: for rank in vm.ranks: if rank != vm.rank: vm.comm.send(len(unique_local), dest=rank, tag=tag_unique_count) for u in unique_local: for rank in vm.ranks: if rank != vm.rank: vm.comm.send(u, dest=rank, tag=tag_unique_check) else: recv_count = vm.comm.recv(source=root_rank, tag=tag_unique_count) for _ in range(recv_count): u = vm.comm.recv(source=root_rank, tag=tag_unique_check) if u in unique_local: select = np.invert(unique_local == u) unique_local = unique_local[select] rank_print('waiting at barrier 2') vm.barrier() return unique_local
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): assert write_mode is not None dataset_kwargs = kwargs.get('dataset_kwargs', {}) variable_kwargs = kwargs.get('variable_kwargs', {}) # When filling a dataset, we use append mode. if write_mode == MPIWriteMode.FILL: mode = 'a' else: mode = 'w' # For an asynchronous write, treat everything like a single rank. if write_mode == MPIWriteMode.ASYNCHRONOUS: possible_ranks = [0] else: possible_ranks = vm.ranks # Write the data on each rank. for idx, rank_to_write in enumerate(possible_ranks): # The template write only occurs on the first rank. if write_mode == MPIWriteMode.TEMPLATE and rank_to_write != 0: pass # If this is not a template write, fill the data. elif write_mode == MPIWriteMode.ASYNCHRONOUS or vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, **dataset_kwargs) as dataset: # Write global attributes if we are not filling data. if write_mode != MPIWriteMode.FILL: vc.write_attributes_to_netcdf_object(dataset) # This is the main variable write loop. variables_to_write = get_variables_to_write(vc) for variable in variables_to_write: # Load the variable's data before orphaning. The variable needs its parent to know which # group it is in. variable.load() # Call the individual variable write method in fill mode. Orphaning is required as a # variable will attempt to write its parent first. with orphaned(variable, keep_dimensions=True): variable.write(dataset, write_mode=write_mode, **variable_kwargs) # Recurse the children. for child in list(vc.children.values()): if write_mode != MPIWriteMode.FILL: group = nc.Group(dataset, child.name) else: group = dataset.groups[child.name] child.write(group, write_mode=write_mode, **kwargs) dataset.sync() vm.barrier()
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertLessEqual(sf.grid.shape[0] - df.grid.shape[0], 150) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path('merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def test_barrier(self): if MPI_SIZE != 4: raise SkipTest('MPI_SIZE != 4') vm = OcgVM() live_ranks = [1, 3] vm.create_subcomm('for barrier', live_ranks, is_current=True) if not vm.is_null: self.assertEqual(vm.size, 2) else: self.assertNotIn(MPI_RANK, live_ranks) if MPI_RANK in live_ranks: vm.barrier() vm.finalize()
def arange_from_dimension(dim, start=0, dtype=None, dist=True): """ Create a sequential integer range similar to ``numpy.arange``. Call is collective across the current :class:`~ocgis.OcgVM` if ``dist=True`` (the default). :param dim: The dimension to use for creating the range. :type dim: :class:`~ocgis.Dimension` :param int start: The starting value for the range. :param dtype: The data type for the output array. :param bool dist: If ``True``, create range as a distributed array with a collective VM call. If ``False``, create the array locally. :rtype: :class:`numpy.ndarray` """ if dtype is None: from ocgis import env dtype = env.NP_INT local_size = len(dim) if dist: from ocgis import vm for rank in vm.ranks: dest_rank = rank + 1 if dest_rank == vm.size: break else: if vm.rank == rank: data = np.array([start + local_size], dtype=dtype) buf = [data, vm.get_mpi_type(dtype)] vm.comm.Send(buf, dest=dest_rank, tag=MPITag.ARANGE_FROM_DIMENSION) elif vm.rank == dest_rank: data = np.zeros(1, dtype=dtype) buf = [data, vm.get_mpi_type(dtype)] vm.comm.Recv(buf, source=rank, tag=MPITag.ARANGE_FROM_DIMENSION) start = data[0] else: pass vm.barrier() ret = np.arange(start, start + local_size, dtype=dtype) return ret
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): raise_if_empty(vc) iter_kwargs = kwargs.pop(KeywordArgument.ITER_KWARGS, {}) fieldnames = list(six.next(vc.iter(**iter_kwargs))[1].keys()) if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path, mode='w') as opened: writer = csv.DictWriter(opened, fieldnames) writer.writeheader() if write_mode != MPIWriteMode.TEMPLATE: for current_rank_write in vm.ranks: if vm.rank == current_rank_write: with driver_scope(cls, opened_or_path, mode='a') as opened: writer = csv.DictWriter(opened, fieldnames) for _, record in vc.iter(**iter_kwargs): writer.writerow(record) vm.barrier()
def write_subsets(self, src_template, dst_template, wgt_template, index_path): """ Write grid subsets to netCDF files using the provided filename templates. The template must contain the full file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a full path. This name is used when generating weight files. >>> template_example = '/path/to/data_{}.nc' :param str src_template: The template for the source subset file. :param str dst_template: The template for the destination subset file. :param str wgt_template: The template for the weight filename. >>> wgt_template = 'esmf_weights_{}.nc' :param index_path: Path to the output indexing netCDF. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1): # padded = create_zero_padded_integer(ctr, nzeros) src_path = src_template.format(ctr) dst_path = dst_template.format(ctr) wgt_filename = wgt_template.format(ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_filename) dst_slices.append(dst_slc) for target, path in zip([sub_src, sub_dst], [src_path, dst_path]): if target.is_empty: is_empty = True target = None else: is_empty = False field = Field(grid=target, is_empty=is_empty) ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) with vm.scoped_by_emptyable('field.write', field): if not vm.is_null: field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE vidx.attrs[x_bounds] = x_bounds y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE vidx.attrs[y_bounds] = y_bounds vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'}, dtype=int) yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'}, dtype=int) x_name = self.dst_grid.x.dimensions[0].name y_name = self.dst_grid.y.dimensions[0].name for idx, slc in enumerate(dst_slices): xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop vc.add_variable(xb) vc.add_variable(yb) vc.write(index_path) vm.barrier()
def reduce_reindex_coordinate_index(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Only work with 1D arrays. cindex = np.atleast_1d(cindex) # Used to return the coordinate index to the original shape of the incoming coordinate index. original_shape = cindex.shape cindex = cindex.flatten() # Create the unique coordinate index array. # barrier_print('before create_unique_global_array') u = np.array(create_unique_global_array(cindex)) # barrier_print('after create_unique_global_array') # Synchronize the data type for the new coordinate index. lrank = vm.rank if lrank == 0: dtype = u.dtype else: dtype = None dtype = vm.bcast(dtype) # Flag to indicate if the current rank has any unique values. has_u = len(u) > 0 # Create the new coordinate index. new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__') new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype) # Create a hash for the new index. This is used to remap the old coordinate index. if has_u: uidx = {ii: jj for ii, jj in zip(u, new_u)} else: uidx = None vm.barrier() # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for # index overlaps. if has_u: local_bounds = min(u), max(u) else: local_bounds = None # Put a copy for the bounds indexing on each rank. lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == lrank: continue if lb is not None: contains = lb[0] <= cindex contains = np.logical_and(lb[1] >= cindex, contains) if np.any(contains): overlaps.append(rank) # Ranks must be able to identify which ranks will be asking them for data. global_overlaps = vm.gather(overlaps) global_overlaps = vm.bcast(global_overlaps) destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj] # MPI communication tags used in the algorithm. tag_search = MPITag.REDUCE_REINDEX_SEARCH tag_success = MPITag.REDUCE_REINDEX_SUCCESS tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED tag_found = MPITag.REDUCE_REINDEX_FOUND # Fill array for the new coordinate index. new_cindex = np.empty_like(cindex) # vm.barrier_print('starting run_rr') # Fill the new coordinate indexing. if lrank == 0: run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success) else: run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found, tag_search, tag_success) # vm.barrier_print('finished run_rr') # Return array to its original shape. new_cindex = new_cindex.reshape(*original_shape) vm.barrier() return new_cindex, u
def write_subsets(self): """ Write grid subsets to netCDF files using the provided filename templates. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) ctr = 1 for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True): # if vm.rank == 0: # vm.rank_print('write_subset iterator count :: {}'.format(ctr)) # tstart = time.time() # padded = create_zero_padded_integer(ctr, nzeros) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write', target): if not vm.is_null: ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # if vm.rank == 0: # tstop = time.time() # vm.rank_print('timing::write_subset iteration::{}'.format(tstop - tstart)) # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridSplitterConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gs_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gs_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') unique_local = np.unique(arr) vm.barrier() local_bounds = min(unique_local), max(unique_local) lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == vm.rank: continue contains = [] for lb2 in local_bounds: if lb[0] <= lb2 <= lb[1]: to_app = True else: to_app = False contains.append(to_app) if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]): overlaps.append(rank) # Send out the overlapping sources. tag_overlap = MPITag.OVERLAP_CHECK tag_select_send_size = MPITag.SELECT_SEND_SIZE vm.barrier() # NumPy and MPI types. np_type = unique_local.dtype mpi_type = vm.get_mpi_type(np_type) for o in overlaps: if vm.rank != o and vm.rank < o: dest_rank_bounds = lb_global[o] select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1]) u_src = unique_local[select_send] select_send_size = u_src.size _ = vm.comm.Isend([np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size) _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap) # Receive and process conflicts to reduce the unique local values. if vm.rank != 0: for o in overlaps: if vm.rank != o and vm.rank > o: select_send_size = np.array([0], dtype=np_type) req_select_send_size = vm.comm.Irecv([select_send_size, mpi_type], source=o, tag=tag_select_send_size) req_select_send_size.wait() select_send_size = select_send_size[0] u_src = np.zeros(select_send_size.astype(int), dtype=np_type) req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap) req.wait() utokeep = np.ones_like(unique_local, dtype=bool) for uidx, u in enumerate(unique_local.flat): if u in u_src: utokeep[uidx] = False unique_local = unique_local[utokeep] vm.barrier() return unique_local
def reduce_reindex_coordinate_variables(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The coordinate values contained in ``coords`` will be reduced to match the coordinates required by the indices in ``cindex``. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` || :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Create the unique coordinte index array. u = np.array(create_unique_global_array(cindex)) # Holds re-indexed values. new_cindex = np.empty_like(cindex) # Caches the local re-indexing for the process. cache = {} # Increment the indexing values based on its presence in the cache. curr_idx = 0 for idx, to_reindex in enumerate(u.flat): if to_reindex not in cache: cache[to_reindex] = curr_idx curr_idx += 1 # MPI communication tags. tag_cache_create = MPITag.REINDEX_CACHE_CREATE tag_cache_get_recv = MPITag.REINDEX_CACHE_GET_RECV tag_cache_get_send = MPITag.REINDEX_CACHE_GET_SEND # This is the local offset to move sequentially across processes. If the local cache is empty, there is no # offsetting to move between tasks. if len(cache) > 0: offset = max(cache.values()) + 1 else: offset = 0 # Synchronize the processes with the appropriate local offset. for idx, rank in enumerate(vm.ranks): try: dest_rank = vm.ranks[idx + 1] except IndexError: break else: if vm.rank == rank: vm.comm.send(start_index + offset, dest=dest_rank, tag=tag_cache_create) elif vm.rank == dest_rank: offset_previous = vm.comm.recv(source=rank, tag=tag_cache_create) start_index = offset_previous vm.barrier() # Find any missing local coordinate indices that are not mapped by the local cache. is_missing = False is_missing_indices = [] for idx, to_reindex in enumerate(cindex.flat): try: local_new_cindex = cache[to_reindex] except KeyError: is_missing = True is_missing_indices.append(idx) else: new_cindex[idx] = local_new_cindex + start_index # Check if there are any processors missing their new index values. is_missing_global = vm.gather(is_missing) if vm.rank == 0: is_missing_global = any(is_missing_global) is_missing_global = vm.bcast(is_missing_global) # Execute a search across the process caches for any missing coordinate index values. if is_missing_global: for rank in vm.ranks: is_missing_rank = vm.bcast(is_missing, root=rank) if is_missing_rank: n_missing = vm.bcast(len(is_missing_indices), root=rank) if vm.rank == rank: for imi in is_missing_indices: for subrank in vm.ranks: if vm.rank != subrank: vm.comm.send(cindex[imi], dest=subrank, tag=tag_cache_get_recv) new_cindex_element = vm.comm.recv( source=subrank, tag=tag_cache_get_send) if new_cindex_element is not None: new_cindex[imi] = new_cindex_element else: for _ in range(n_missing): curr_missing = vm.comm.recv(source=rank, tag=tag_cache_get_recv) new_cindex_element = cache.get(curr_missing) if new_cindex_element is not None: new_cindex_element += start_index vm.comm.send(new_cindex_element, dest=rank, tag=tag_cache_get_send) return new_cindex, u
if vm.rank == 0: print 'creating subset:', subset_filename with vm.scoped_by_emptyable('grid subset', grid_sub): if not vm.is_null: extent_global = grid_sub.extent_global if vm.rank == 0: root = vm.rank_global else: extent_global = None live_ranks = vm.get_live_ranks_from_object(grid_sub) bbox = vm.bcast(extent_global, root=live_ranks[0]) vm.barrier() if vm.rank == 0: print 'starting bbox subset:', bbox vm.barrier() has_subset = get_subset(bbox, subset_filename, 1) vm.barrier() if vm.rank == 0: print 'finished bbox subset:', bbox vm.barrier() has_subset = vm.gather(has_subset) if vm.rank == 0: if any(has_subset): has_subset = True
def test_system_converting_state_boundaries_shapefile(self): ocgis.env.USE_NETCDF4_MPI = False # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = {'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True]} actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() # Test there is no mask present. field.geom.load() self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) try: gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) for v in list(field.values()): if v.name != field.geom.name: gc.parent.add_variable(v.extract(), force=True) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) gathered_geoms = vm.gather(field.geom.get_value()) if vm.rank == 0: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) vm.barrier() # Test coordinates have actually changed. if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)
def test(self): gs = self.fixture_grid_chunker() desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum() desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum) if vm.rank == 0: desired_sum = np.sum(desired_dst_grid_sum) desired = [{ 'y': slice(0, 180, None), 'x': slice(0, 240, None) }, { 'y': slice(0, 180, None), 'x': slice(240, 480, None) }, { 'y': slice(0, 180, None), 'x': slice(480, 720, None) }, { 'y': slice(180, 360, None), 'x': slice(0, 240, None) }, { 'y': slice(180, 360, None), 'x': slice(240, 480, None) }, { 'y': slice(180, 360, None), 'x': slice(480, 720, None) }] actual = list(gs.iter_dst_grid_slices()) self.assertEqual(actual, desired) gs.write_chunks() if vm.rank == 0: rank_sums = [] for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1): src_path = gs.create_full_path_from_template('src_template', index=ctr) dst_path = gs.create_full_path_from_template('dst_template', index=ctr) src_field = RequestDataset(src_path).get() dst_field = RequestDataset(dst_path).get() src_envelope_global = box(*src_field.grid.extent_global) dst_envelope_global = box(*dst_field.grid.extent_global) self.assertTrue( does_contain(src_envelope_global, dst_envelope_global)) actual = get_variable_names(src_field.data_variables) self.assertIn('data', actual) actual = get_variable_names(dst_field.data_variables) self.assertIn('data', actual) actual_data_sum = dst_field['data'].get_value().sum() actual_data_sum = MPI_COMM.gather(actual_data_sum) if MPI_RANK == 0: actual_data_sum = np.sum(actual_data_sum) rank_sums.append(actual_data_sum) if vm.rank == 0: self.assertAlmostEqual(desired_sum, np.sum(rank_sums)) index_path = gs.create_full_path_from_template('index_file') self.assertTrue(os.path.exists(index_path)) vm.barrier() index_path = gs.create_full_path_from_template('index_file') index_field = RequestDataset(index_path).get() self.assertTrue(len(list(index_field.keys())) > 2)
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [ b + adjust for b in ret.bounds_local ] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') unique_local = np.unique(arr) vm.barrier() local_bounds = min(unique_local), max(unique_local) lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == vm.rank: continue contains = [] for lb2 in local_bounds: if lb[0] <= lb2 <= lb[1]: to_app = True else: to_app = False contains.append(to_app) if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]): overlaps.append(rank) # Send out the overlapping sources. tag_overlap = MPITag.OVERLAP_CHECK tag_select_send_size = MPITag.SELECT_SEND_SIZE vm.barrier() # NumPy and MPI types. np_type = unique_local.dtype mpi_type = vm.get_mpi_type(np_type) for o in overlaps: if vm.rank != o and vm.rank < o: dest_rank_bounds = lb_global[o] select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1]) u_src = unique_local[select_send] select_send_size = u_src.size _ = vm.comm.Isend( [np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size) _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap) # Receive and process conflicts to reduce the unique local values. if vm.rank != 0: for o in overlaps: if vm.rank != o and vm.rank > o: select_send_size = np.array([0], dtype=np_type) req_select_send_size = vm.comm.Irecv( [select_send_size, mpi_type], source=o, tag=tag_select_send_size) req_select_send_size.wait() select_send_size = select_send_size[0] u_src = np.zeros(select_send_size.astype(int), dtype=np_type) req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap) req.wait() utokeep = np.ones_like(unique_local, dtype=bool) for uidx, u in enumerate(unique_local.flat): if u in u_src: utokeep[uidx] = False unique_local = unique_local[utokeep] vm.barrier() return unique_local
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [b + adjust for b in ret.bounds_local] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def test_system_converting_state_boundaries_shapefile(self): ocgis.env.USE_NETCDF4_MPI = False # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = {'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True]} actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries, variable=['UGID', 'ID']) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() self.assertEqual(len(field.data_variables), 2) # Test there is no mask present. field.geom.load() self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) self.assertEqual(len(field.data_variables), 2) self.assertEqual(len(field.geom.parent.data_variables), 2) try: gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) gathered_geoms = vm.gather(field.geom.get_value()) if vm.rank == 0: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) vm.barrier() # Test coordinates have actually changed. if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)
def _write_variable_collection_main_(cls, field, opened_or_path, write_mode, **kwargs): from ocgis.collection.field import Field if not isinstance(field, Field): raise ValueError( 'Only fields may be written to vector GIS formats.') fiona_crs = kwargs.get('crs') fiona_schema = kwargs.get('fiona_schema') fiona_driver = kwargs.get('fiona_driver', 'ESRI Shapefile') iter_kwargs = kwargs.pop('iter_kwargs', {}) iter_kwargs[KeywordArgument.DRIVER] = cls # This finds the geometry variable used in the iterator. Need for the general geometry type that may not be # determined using the record iterator. geom_variable = field.geom if geom_variable is None: raise ValueError( 'A geometry variable is required for writing to vector GIS formats.' ) # Open the output Fiona object using overloaded values or values determined at call-time. if not cls.inquire_opened_state(opened_or_path): if fiona_crs is None: if field.crs is not None: fiona_crs = field.crs.value _, archetype_record = next(field.iter(**iter_kwargs)) archetype_record = format_record_for_fiona(fiona_driver, archetype_record) if fiona_schema is None: fiona_schema = get_fiona_schema(geom_variable.geom_type, archetype_record) else: fiona_schema = opened_or_path.schema fiona_crs = opened_or_path.crs fiona_driver = opened_or_path.driver # The Fiona GeoJSON driver does not support update. if fiona_driver == 'GeoJSON': mode = 'w' else: mode = 'a' # Write the template file. if fiona_driver != 'GeoJSON': if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path=opened_or_path, mode='w', driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as _: pass # Write data on each rank to the file. if write_mode != MPIWriteMode.TEMPLATE: for rank_to_write in vm.ranks: if vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, driver=fiona_driver, crs=fiona_crs, schema=fiona_schema) as sink: itr = field.iter(**iter_kwargs) write_records_to_fiona(sink, itr, fiona_driver) vm.barrier()