def test_get_mask_from_intersects(self): poly = wkt.loads( 'POLYGON((-98.26574367088608142 40.19952531645570559,-98.71764240506330168 39.54825949367089066,-99.26257911392406186 39.16281645569620906,-99.43536392405064817 38.64446202531645724,-98.78409810126584034 38.33876582278481493,-98.23916139240508016 37.71408227848101546,-97.77397151898735217 37.67420886075949937,-97.62776898734178133 38.15268987341772799,-98.39865506329114453 38.52484177215190186,-98.23916139240508016 39.33560126582278826,-97.73409810126582897 39.58813291139241386,-97.52143987341773368 40.27927215189873777,-97.52143987341773368 40.27927215189873777,-98.26574367088608142 40.19952531645570559))' ) desired_mask = np.array([[True, True, False, True], [True, False, True, True], [True, True, False, True]]) dist = OcgDist() xdim = dist.create_dimension('x', 4, dist=True) ydim = dist.create_dimension('y', 3) dist.create_dimension('bounds', 2) dist.update_dimension_bounds() if MPI_RANK == 0: x = self.get_variable_x() y = self.get_variable_y() grid = Grid(x=x, y=y, abstraction='point', crs=WGS84()) pa = get_geometry_variable(grid) else: pa = None pa = variable_scatter(pa, dist) vm.create_subcomm_by_emptyable('test_get_mask_from_intersects', pa, is_current=True) if vm.is_null: self.assertTrue(pa.is_empty) return usi = [False] if env.USE_SPATIAL_INDEX: usi.append(True) keywords = dict(use_spatial_index=usi) for k in self.iter_product_keywords(keywords): ret = pa.get_mask_from_intersects( poly, use_spatial_index=k.use_spatial_index) desired_mask_local = desired_mask[slice(*ydim.bounds_local), slice(*xdim.bounds_local)] if MPI_RANK > 1: self.assertIsNone(ret) else: self.assertNumpyAll(desired_mask_local, ret) # This does not test a parallel operation. if MPI_RANK == 0: # Test pre-masked values in geometry are okay for intersects operation. value = [Point(1, 1), Point(2, 2), Point(3, 3)] value = np.ma.array(value, mask=[False, True, False], dtype=object) pa2 = GeometryVariable(value=value, dimensions='ngeom') b = box(0, 0, 5, 5) res = pa2.get_mask_from_intersects( b, use_spatial_index=k.use_spatial_index) self.assertNumpyAll(res, value.mask)
def test_reduce_global(self): pt = self.fixture(cindex=self.fixture_cindex(1), start_index=1) self.assertEqual(pt.start_index, 1) dist = OcgDist() for d in pt.parent.dimensions.values(): d = d.copy() if d.name == self.fixture_element_dimension.name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() new_parent = variable_collection_scatter(pt.parent, dist) vm.create_subcomm_by_emptyable('coordinate reduction', new_parent, is_current=True) if vm.is_null: return pt.parent = new_parent sub = pt.get_distributed_slice(slice(2, 5)) vm.create_subcomm_by_emptyable('distributed slice', sub, is_current=True) if vm.is_null: return actual = sub.reduce_global() actual_cindex = actual.cindex.extract() actual_cindex = variable_gather(actual_cindex) if vm.rank == 0: actual_cindex = actual_cindex.get_value().flatten().tolist() self.assertEqual(actual_cindex, [1, 2, 3]) gathered = [ variable_gather(c.extract()) for c in actual.coordinate_variables ] if vm.rank == 0: actual_coords = [] for c in gathered: actual_coords.append(c.get_value().tolist()) desired = [[2.0, 3.0, 4.0], [8.0, 9.0, 10.0], [14.0, 15.0, 16.0]] self.assertEqual(actual_coords, desired) path = self.get_temporary_file_path('foo.nc') actual.parent.write(path) actual = Field.read(path) self.assertEqual(actual['cindex'].attrs['start_index'], 1)
def test_reduce_reindex_coordinate_variables(self): self.add_barrier = False dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([ 0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150 ]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_variables(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def test_get_mask_from_intersects(self): poly = wkt.loads( 'POLYGON((-98.26574367088608142 40.19952531645570559,-98.71764240506330168 39.54825949367089066,-99.26257911392406186 39.16281645569620906,-99.43536392405064817 38.64446202531645724,-98.78409810126584034 38.33876582278481493,-98.23916139240508016 37.71408227848101546,-97.77397151898735217 37.67420886075949937,-97.62776898734178133 38.15268987341772799,-98.39865506329114453 38.52484177215190186,-98.23916139240508016 39.33560126582278826,-97.73409810126582897 39.58813291139241386,-97.52143987341773368 40.27927215189873777,-97.52143987341773368 40.27927215189873777,-98.26574367088608142 40.19952531645570559))') desired_mask = np.array([[True, True, False, True], [True, False, True, True], [True, True, False, True]]) dist = OcgDist() xdim = dist.create_dimension('x', 4, dist=True) ydim = dist.create_dimension('y', 3) dist.create_dimension('bounds', 2) dist.update_dimension_bounds() if MPI_RANK == 0: x = self.get_variable_x() y = self.get_variable_y() grid = Grid(x=x, y=y, abstraction='point', crs=WGS84()) pa = get_geometry_variable(grid) else: pa = None pa = variable_scatter(pa, dist) vm.create_subcomm_by_emptyable('test_get_mask_from_intersects', pa, is_current=True) if vm.is_null: self.assertTrue(pa.is_empty) return usi = [False] if env.USE_SPATIAL_INDEX: usi.append(True) keywords = dict(use_spatial_index=usi) for k in self.iter_product_keywords(keywords): ret = pa.get_mask_from_intersects(poly, use_spatial_index=k.use_spatial_index) desired_mask_local = desired_mask[slice(*ydim.bounds_local), slice(*xdim.bounds_local)] if MPI_RANK > 1: self.assertIsNone(ret) else: self.assertNumpyAll(desired_mask_local, ret) # This does not test a parallel operation. if MPI_RANK == 0: # Test pre-masked values in geometry are okay for intersects operation. value = [Point(1, 1), Point(2, 2), Point(3, 3)] value = np.ma.array(value, mask=[False, True, False], dtype=object) pa2 = GeometryVariable(value=value, dimensions='ngeom') b = box(0, 0, 5, 5) res = pa2.get_mask_from_intersects(b, use_spatial_index=k.use_spatial_index) self.assertNumpyAll(res, value.mask)
def test_reduce_global(self): pt = self.fixture(cindex=self.fixture_cindex(1), start_index=1) self.assertEqual(pt.start_index, 1) dist = OcgDist() for d in pt.parent.dimensions.values(): d = d.copy() if d.name == self.fixture_element_dimension.name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() new_parent = variable_collection_scatter(pt.parent, dist) vm.create_subcomm_by_emptyable('coordinate reduction', new_parent, is_current=True) if vm.is_null: return pt.parent = new_parent sub = pt.get_distributed_slice(slice(2, 5)) vm.create_subcomm_by_emptyable('distributed slice', sub, is_current=True) if vm.is_null: return actual = sub.reduce_global() actual_cindex = actual.cindex.extract() actual_cindex = variable_gather(actual_cindex) if vm.rank == 0: actual_cindex = actual_cindex.get_value().flatten().tolist() self.assertEqual(actual_cindex, [1, 2, 3]) gathered = [variable_gather(c.extract()) for c in actual.coordinate_variables] if vm.rank == 0: actual_coords = [] for c in gathered: actual_coords.append(c.get_value().tolist()) desired = [[2.0, 3.0, 4.0], [8.0, 9.0, 10.0], [14.0, 15.0, 16.0]] self.assertEqual(actual_coords, desired) path = self.get_temporary_file_path('foo.nc') actual.parent.write(path) actual = Field.read(path) self.assertEqual(actual['cindex'].attrs['start_index'], 1)
def test_reduce_reindex_coordinate_index(self): dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_index(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def test_get_intersects(self): subset_geom = self.fixture_subset_geom() poly = self.fixture() # Scatter the polygon geometry coordinates for the parallel case =============================================== dist = OcgDist() for d in poly.parent.dimensions.values(): d = d.copy() if d.name == poly.dimensions[0].name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() poly.parent = variable_collection_scatter(poly.parent, dist) vm.create_subcomm_by_emptyable('scatter', poly, is_current=True) if vm.is_null: return poly.parent._validate_() for v in poly.parent.values(): self.assertEqual(id(v.parent), id(poly.parent)) self.assertEqual(len(v.parent), len(poly.parent)) # ============================================================================================================== # p = os.path.join('/tmp/subset_geom.shp') # s = GeometryVariable.from_shapely(subset_geom) # s.write_vector(p) # p = os.path.join('/tmp/poly.shp') # s = poly.convert_to() # s.write_vector(p) sub = poly.get_intersects(subset_geom) vm.create_subcomm_by_emptyable('after intersects', sub, is_current=True) if vm.is_null: return actual = [] for g in sub.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired) self.assertEqual(len(sub.parent), len(poly.parent)) sub.parent._validate_() sub2 = sub.reduce_global() sub2.parent._validate_() # p = os.path.join('/tmp/sub.shp') # s = sub.convert_to() # s.write_vector(p) # p = os.path.join('/tmp/sub2.shp') # s = sub2.convert_to() # s.write_vector(p) # Gather then broadcast coordinates so all coordinates are available on each process. to_add = [] for gather_target in [sub2.x, sub2.y]: gathered = variable_gather(gather_target.extract()) gathered = vm.bcast(gathered) to_add.append(gathered) for t in to_add: sub2.parent.add_variable(t, force=True) for ctr, to_check in enumerate([sub, sub2]): actual = [] for g in to_check.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired)
def test_get_intersects_state_boundaries(self): path_shp = self.path_state_boundaries geoms = [] with fiona.open(path_shp) as source: for record in source: geom = shape(record['geometry']) geoms.append(geom) gvar = GeometryVariable(value=geoms, dimensions='ngeom') gvar_sub = gvar.get_unioned() if gvar_sub is not None: subset = gvar_sub.get_value().flatten()[0] else: subset = None subset = MPI_COMM.bcast(subset) resolution = 1.0 for with_bounds in [False, True]: grid = self.get_gridxy_global(resolution=resolution, with_bounds=with_bounds) vm.create_subcomm_by_emptyable('global grid', grid, is_current=True) if not vm.is_null: res = grid.get_intersects(subset, return_slice=True) grid_sub, slc = res vm.create_subcomm_by_emptyable('grid subset', grid_sub, is_current=True) if not vm.is_null: mask = Variable('mask_after_subset', grid_sub.get_mask(), dimensions=grid_sub.dimensions) mask = variable_gather(mask) if vm.rank == 0: mask_sum = np.invert(mask.get_value()).sum() mask_shape = mask.shape else: mask_sum = None mask_shape = None mask_sum = vm.bcast(mask_sum) mask_shape = vm.bcast(mask_shape) if with_bounds: self.assertEqual(mask_shape, (54, 113)) self.assertEqual( slc, (slice(108, 162, None), slice(1, 114, None))) self.assertEqual(mask_sum, 1358) else: if MPI_SIZE == 2: grid_bounds_global = [ dim.bounds_global for dim in grid_sub.dimensions ] self.assertEqual(grid_bounds_global, [(0, 52), (0, 105)]) self.assertEqual(mask_shape, (52, 105)) self.assertEqual( slc, (slice(109, 161, None), slice(8, 113, None))) self.assertEqual(mask_sum, 1087) if vm.rank == 0: path = self.get_temporary_file_path('foo.nc') else: path = None path = vm.bcast(path) field = Field(grid=grid_sub) field.write(path) vm.finalize() vm.__init__() MPI_COMM.Barrier()
def test_get_intersects_parallel(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') grid = self.get_gridxy() live_ranks = vm.get_live_ranks_from_object(grid) # Test with an empty subset. subset_geom = box(1000., 1000., 1100., 1100.) with vm.scoped('empty subset', live_ranks): if not vm.is_null: with self.assertRaises(EmptySubsetError): grid.get_intersects(subset_geom) # Test combinations. subset_geom = box(101.5, 40.5, 102.5, 42.) keywords = dict(is_vectorized=[True, False], has_bounds=[False, True], use_bounds=[False, True], keep_touches=[True, False]) for ctr, k in enumerate(self.iter_product_keywords(keywords)): grid = self.get_gridxy() vm_name, _ = vm.create_subcomm_by_emptyable('grid testing', grid, is_current=True) if vm.is_null: vm.free_subcomm(name=vm_name) vm.set_comm() continue if k.has_bounds: grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') self.assertTrue(grid.has_bounds) # Cannot use bounds with a point grid abstraction. if k.use_bounds and grid.abstraction == 'point': vm.free_subcomm(name=vm_name) vm.set_comm() continue grid_sub, slc = grid.get_intersects(subset_geom, keep_touches=k.keep_touches, use_bounds=k.use_bounds, return_slice=True) if k.has_bounds: self.assertTrue(grid.has_bounds) # Test geometries are filled appropriately after allocation. if not grid_sub.is_empty: for t in grid_sub.get_abstraction_geometry().get_value().flat: self.assertIsInstance(t, BaseGeometry) self.assertIsInstance(grid_sub, Grid) if k.keep_touches: if k.has_bounds and k.use_bounds: desired = (slice(0, 3, None), slice(0, 3, None)) else: desired = (slice(1, 3, None), slice(1, 2, None)) else: if k.has_bounds and k.use_bounds: desired = (slice(1, 3, None), slice(1, 2, None)) else: desired = (slice(1, 2, None), slice(1, 2, None)) if not grid.is_empty: self.assertEqual(grid.has_bounds, k.has_bounds) self.assertTrue(grid.is_vectorized) self.assertEqual(slc, desired) vm.free_subcomm(name=vm_name) vm.set_comm() # Test against a file. ######################################################################################### subset_geom = box(101.5, 40.5, 102.5, 42.) if MPI_RANK == 0: path_grid = self.get_temporary_file_path('grid.nc') else: path_grid = None path_grid = MPI_COMM.bcast(path_grid) grid_to_write = self.get_gridxy() with vm.scoped_by_emptyable('write', grid_to_write): if not vm.is_null: field = Field(grid=grid_to_write) field.write(path_grid, driver=DriverNetcdfCF) MPI_COMM.Barrier() rd = RequestDataset(uri=path_grid) x = SourcedVariable(name='x', request_dataset=rd) self.assertIsNone(x._value) y = SourcedVariable(name='y', request_dataset=rd) self.assertIsNone(x._value) self.assertIsNone(y._value) grid = Grid(x, y) for target in [grid._y_name, grid._x_name]: self.assertIsNone(grid.parent[target]._value) self.assertTrue(grid.is_vectorized) with vm.scoped_by_emptyable('intersects', grid): if not vm.is_null: sub, slc = grid.get_intersects(subset_geom, return_slice=True) self.assertEqual(slc, (slice(1, 3, None), slice(1, 2, None))) self.assertIsInstance(sub, Grid) # The file may be deleted before other ranks open. MPI_COMM.Barrier()
def _update_aggregation_wrapping_crs_(obj, alias, sfield, subset_sdim, subset_ugid): raise_if_empty(sfield) ocgis_lh('entering _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Aggregate if requested. if obj.ops.aggregate: ocgis_lh('aggregate requested in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this is the case. sfield.set_abstraction_geom() ocgis_lh( 'after sfield.set_abstraction_geom in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Union the geometries and spatially average the data variables. # with vm.scoped(vm.get_live_ranks_from_object(sfield)): sfield = sfield.geom.get_unioned(spatial_average=sfield.data_variables) ocgis_lh( 'after sfield.geom.get_unioned in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # None is returned for the non-root process. Check we are in parallel and create an empty field. if sfield is None: if vm.size == 1: raise ValueError( 'None should not be returned from get_unioned if running on a single processor.' ) else: sfield = Field(is_empty=True) else: sfield = sfield.parent vm.create_subcomm_by_emptyable(SubcommName.SPATIAL_AVERAGE, sfield, is_current=True, clobber=True) if not vm.is_null and subset_sdim is not None and subset_sdim.geom is not None: # Add the unique geometry identifier variable. This should match the selection geometry's identifier. new_gid_variable_kwargs = dict( name=HeaderName.ID_GEOMETRY, value=subset_sdim.geom.ugid.get_value(), dimensions=sfield.geom.dimensions) dm = get_data_model(obj.ops) new_gid_variable = create_typed_variable_from_data_model( 'int', data_model=dm, **new_gid_variable_kwargs) sfield.geom.set_ugid(new_gid_variable) if vm.is_null: ocgis_lh(msg='null communicator following spatial average. returning.', logger=obj._subset_log, level=logging.DEBUG) return sfield raise_if_empty(sfield) ocgis_lh(msg='before wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) try: wrapped_state = sfield.wrapped_state except WrappedStateEvalTargetMissing: # If there is no target for wrapping evaluation, then consider this unknown. wrapped_state = WrappedState.UNKNOWN ocgis_lh(msg='after wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) # Wrap the returned data. if not env.OPTIMIZE_FOR_CALC and not sfield.is_empty: if wrapped_state == WrappedState.UNWRAPPED: ocgis_lh('wrap target is empty: {}'.format(sfield.is_empty), obj._subset_log, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this # is the case. sfield.set_abstraction_geom() if obj.ops.output_format in constants.VECTOR_OUTPUT_FORMATS and obj.ops.vector_wrap: ocgis_lh('wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Deepcopy geometries before wrapping as wrapping will be performed inplace. The original field may # need to be reused for additional subsets. geom = sfield.geom copied_geom = geom.get_value().copy() geom.set_value(copied_geom) # Some grids do not play nicely with wrapping. Bounds may be less than zero for an unwrapped grid. # Force wrapping if it is requested. Normally, when force is false there is a pass-through that will # leave the data untouched. geom.wrap(force=True) ocgis_lh('finished wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Transform back to rotated pole if necessary. original_rotated_pole_crs = obj._backtransform.get( constants.BackTransform.ROTATED_POLE) if original_rotated_pole_crs is not None: if not isinstance(obj.ops.output_crs, (Spherical, WGS84)): sfield.update_crs(original_rotated_pole_crs) # Update the coordinate system of the data output. if obj.ops.output_crs is not None: # If the geometry is not none, it may need to be projected to match the output coordinate system. if subset_sdim is not None and subset_sdim.crs != obj.ops.output_crs: subset_sdim.update_crs(obj.ops.output_crs) # Update the subsetted field's coordinate system. sfield = sfield.copy() sfield.update_crs(obj.ops.output_crs) # Wrap or unwrap the data if the coordinate system permits. _update_wrapping_(obj, sfield) ocgis_lh('leaving _update_aggregation_wrapping_crs_', obj._subset_log, level=logging.DEBUG) return sfield
def _process_geometries_(self, itr, field, alias): """ :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting. :type itr: [None] or [:class:`~ocgis.Field`, ...] :param :class:`ocgis.Field` field: The target field for operations. :param str alias: The request data alias currently being processed. :rtype: :class:`~ocgis.SpatialCollection` """ assert isinstance(field, Field) ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG) # Process each geometry. for subset_field in itr: # Initialize the collection storage. coll = self._get_initialized_collection_() if vm.is_null: sfield = field else: # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the # subset target with wrapping, coordinate system conversion, etc. subset_field = deepcopy(subset_field) if self.ops.regrid_destination is not None: # If there is regridding, make another copy as this geometry may be manipulated during subsetting of # sources. subset_field_for_regridding = deepcopy(subset_field) # Operate on the rotated pole coordinate system by first transforming it to the default coordinate # system. key = constants.BackTransform.ROTATED_POLE self._backtransform[ key] = self._get_update_rotated_pole_state_( field, subset_field) # Check if the geometric abstraction is available on the field object. self._assert_abstraction_available_(field) # Return a slice or snippet if either of these are requested. field = self._get_slice_or_snippet_(field) # Choose the subset UGID value. if subset_field is None: msg = 'No selection geometry. Returning all data. No unique geometry identifier.' subset_ugid = None else: subset_ugid = subset_field.geom.ugid.get_value()[0] msg = 'Subsetting with selection geometry having UGID={0}'.format( subset_ugid) ocgis_lh(msg=msg, logger=self._subset_log) if subset_field is not None: # If the coordinate systems differ, update the spatial subset's CRS to match the field. if subset_field.crs is not None and subset_field.crs != field.crs: subset_field.update_crs(field.crs) # If the geometry is a point, it needs to be buffered if there is a search radius multiplier. subset_field = self._get_buffered_subset_geometry_if_point_( field, subset_field) # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data # is being returned. if subset_field is None: sfield = field else: sfield = self._get_spatially_subsetted_field_( alias, field, subset_field, subset_ugid) ocgis_lh(msg='after self._get_spatially_subsetted_field_', logger=self._subset_log, level=logging.DEBUG) # Create the subcommunicator following the data subset to ensure non-empty communication. vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET, sfield, is_current=True, clobber=True) if not vm.is_null: if not sfield.is_empty and not self.ops.allow_empty: raise_if_empty(sfield) # If the base size is being requested, bypass the rest of the operations. if not self._request_base_size_only: # Perform regridding operations if requested. if self.ops.regrid_destination is not None and sfield.regrid_source: sfield = self._get_regridded_field_with_subset_( sfield, subset_field_for_regridding= subset_field_for_regridding) else: ocgis_lh(msg='no regridding operations', logger=self._subset_log, level=logging.DEBUG) # If empty returns are allowed, there may be an empty field. if sfield is not None: # Only update spatial stuff if there are no calculations and, if there are calculations, # those calculations are not expecting raw values. if self.ops.calc is None or ( self.ops.calc is not None and not self.ops.calc_raw): # Update spatial aggregation, wrapping, and coordinate systems. sfield = _update_aggregation_wrapping_crs_( self, alias, sfield, subset_field, subset_ugid) ocgis_lh( 'after _update_aggregation_wrapping_crs_ in _process_geometries_', self._subset_log, level=logging.DEBUG) # Add the created field to the output collection with the selection geometry. if sfield is None: assert self.ops.aggregate if sfield is not None: coll.add_field(sfield, subset_field) yield coll
def _process_subsettables_(self, rds): """ :param rds: Sequence of :class:~`ocgis.RequestDataset` objects. :type rds: sequence :rtype: :class:`ocgis.collection.base.AbstractCollection` """ ocgis_lh(msg='entering _process_subsettables_', logger=self._subset_log, level=logging.DEBUG) # This is used to define the group of request datasets for these like logging and exceptions. try: alias = '_'.join([r.field_name for r in rds]) except AttributeError: # Allow field objects with do not expose the "field_name" attribute. try: alias = '_'.join([r.name for r in rds]) except TypeError: # The alias is used for logging, etc. If it cannot be constructed easily, leave it as None. alias = None except NoDataVariablesFound: # If an alias is not provided and there are no data variables, set to None as this is used only for logging. alias = None ocgis_lh('processing...', self._subset_log, alias=alias, level=logging.DEBUG) # Create the field object. Field objects may be passed directly to operations. # Look for field optimizations. Field optimizations typically include pre-loaded datetime objects. if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: ocgis_lh('applying optimizations', self._subset_log, level=logging.DEBUG) field = [ self.ops.optimizations['fields'][rd.field_name].copy() for rd in rds ] has_field_optimizations = True else: # Indicates no field optimizations loaded. has_field_optimizations = False try: # No field optimizations and data should be loaded from source. if not has_field_optimizations: ocgis_lh('creating field objects', self._subset_log, level=logging.DEBUG) len_rds = len(rds) field = [None] * len_rds for ii in range(len_rds): rds_element = rds[ii] try: field_object = rds_element.get( format_time=self.ops.format_time, grid_abstraction=self.ops.abstraction) except (AttributeError, TypeError): # Likely a field object which does not need to be loaded from source. if not self.ops.format_time: raise NotImplementedError # Check that is indeed a field before a proceeding. if not isinstance(rds_element, Field): raise field_object = rds_element field[ii] = field_object # Multivariate calculations require pulling variables across fields. if self._has_multivariate_calculations and len(field) > 1: for midx in range(1, len(field)): # Use the data variable tag if it is available. Otherwise, attempt to merge the fields raising # warning if the variable exists in the squashed field. if len(field[midx].data_variables) > 0: vitr = field[midx].data_variables is_data = True else: vitr = list(field[midx].values()) is_data = False for mvar in vitr: mvar = mvar.extract() field[0].add_variable(mvar, is_data=is_data) new_field_name = '_'.join([str(f.name) for f in field]) field[0].set_name(new_field_name) # The first field in the list is always the target for other operations. field = field[0] assert isinstance(field, Field) # Break out of operations if the rank is empty. vm.create_subcomm_by_emptyable(SubcommName.FIELD_GET, field, is_current=True, clobber=True) if not vm.is_null: if not has_field_optimizations: if field.is_empty: raise ValueError('No empty fields allowed.') # Time, level, etc. subsets. field = self._get_nonspatial_subset_(field) # Spatially reorder the data. ocgis_lh(msg='before spatial reorder', logger=self._subset_log, level=logging.DEBUG) if self.ops.spatial_reorder: self._update_spatial_order_(field) # Extrapolate the spatial bounds if requested. # TODO: Rename "interpolate" to "extrapolate". if self.ops.interpolate_spatial_bounds: self._update_bounds_extrapolation_(field) # This error is related to subsetting by time or level. Spatial subsetting occurs below. except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh( msg='time or level subset empty but empty returns allowed', logger=self._subset_log, level=logging.WARN) coll = self._get_initialized_collection_() name = '_'.join([rd.field_name for rd in rds]) field = Field(name=name, is_empty=True) coll.add_field(field, None) try: yield coll finally: return else: # Raise an exception as empty subsets are not allowed. ocgis_lh(exc=ExtentError(message=str(e)), alias=str([rd.field_name for rd in rds]), logger=self._subset_log) # Set iterator based on presence of slice. Slice always overrides geometry. if self.ops.slice is not None: itr = [None] else: itr = [None] if self.ops.geom is None else self.ops.geom for coll in self._process_geometries_(itr, field, alias): # Conform units following the spatial subset. if not vm.is_null and self.ops.conform_units_to is not None: for to_conform in coll.iter_fields(): for dv in to_conform.data_variables: dv.cfunits_conform(self.ops.conform_units_to) ocgis_lh(msg='_process_subsettables_ yielding', logger=self._subset_log, level=logging.DEBUG) yield coll
def test_get_intersects(self): self.add_barrier = False subset_geom = self.fixture_subset_geom() poly = self.fixture() dist = OcgDist() for d in poly.parent.dimensions.values(): d = d.copy() if d.name == poly.dimensions[0].name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() poly.parent = variable_collection_scatter(poly.parent, dist) vm.create_subcomm_by_emptyable('scatter', poly, is_current=True) if vm.is_null: return poly.parent._validate_() for v in poly.parent.values(): self.assertEqual(id(v.parent), id(poly.parent)) self.assertEqual(len(v.parent), len(poly.parent)) sub = poly.get_intersects(subset_geom) vm.create_subcomm_by_emptyable('after intersects', sub, is_current=True) if vm.is_null: return actual = [] for g in sub.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired) self.assertEqual(len(sub.parent), len(poly.parent)) sub.parent._validate_() sub2 = sub.reduce_global() sub2.parent._validate_() # Gather then broadcast coordinates so all coordinates are available on each process. to_add = [] for gather_target in [sub2.x, sub2.y]: gathered = variable_gather(gather_target.extract()) gathered = vm.bcast(gathered) to_add.append(gathered) for t in to_add: sub2.parent.add_variable(t, force=True) for ctr, to_check in enumerate([sub, sub2]): actual = [] for g in to_check.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired)
def _update_aggregation_wrapping_crs_(obj, alias, sfield, subset_sdim, subset_ugid): raise_if_empty(sfield) ocgis_lh('entering _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Aggregate if requested. if obj.ops.aggregate: ocgis_lh('aggregate requested in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this is the case. sfield.set_abstraction_geom() ocgis_lh('after sfield.set_abstraction_geom in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Union the geometries and spatially average the data variables. # with vm.scoped(vm.get_live_ranks_from_object(sfield)): sfield = sfield.geom.get_unioned(spatial_average=sfield.data_variables) ocgis_lh('after sfield.geom.get_unioned in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # None is returned for the non-root process. Check we are in parallel and create an empty field. if sfield is None: if vm.size == 1: raise ValueError('None should not be returned from get_unioned if running on a single processor.') else: sfield = Field(is_empty=True) else: sfield = sfield.parent vm.create_subcomm_by_emptyable(SubcommName.SPATIAL_AVERAGE, sfield, is_current=True, clobber=True) if not vm.is_null and subset_sdim is not None and subset_sdim.geom is not None: # Add the unique geometry identifier variable. This should match the selection geometry's identifier. new_gid_variable_kwargs = dict(name=HeaderName.ID_GEOMETRY, value=subset_sdim.geom.ugid.get_value(), dimensions=sfield.geom.dimensions) dm = get_data_model(obj.ops) new_gid_variable = create_typed_variable_from_data_model('int', data_model=dm, **new_gid_variable_kwargs) sfield.geom.set_ugid(new_gid_variable) if vm.is_null: ocgis_lh(msg='null communicator following spatial average. returning.', logger=obj._subset_log, level=logging.DEBUG) return sfield raise_if_empty(sfield) ocgis_lh(msg='before wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) try: wrapped_state = sfield.wrapped_state except WrappedStateEvalTargetMissing: # If there is no target for wrapping evaluation, then consider this unknown. wrapped_state = WrappedState.UNKNOWN ocgis_lh(msg='after wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) # Wrap the returned data. if not env.OPTIMIZE_FOR_CALC and not sfield.is_empty: if wrapped_state == WrappedState.UNWRAPPED: ocgis_lh('wrap target is empty: {}'.format(sfield.is_empty), obj._subset_log, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this # is the case. sfield.set_abstraction_geom() if obj.ops.output_format in constants.VECTOR_OUTPUT_FORMATS and obj.ops.vector_wrap: ocgis_lh('wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Deepcopy geometries before wrapping as wrapping will be performed inplace. The original field may # need to be reused for additional subsets. geom = sfield.geom copied_geom = geom.get_value().copy() geom.set_value(copied_geom) # Some grids do not play nicely with wrapping. Bounds may be less than zero for an unwrapped grid. # Force wrapping if it is requested. Normally, when force is false there is a pass-through that will # leave the data untouched. geom.wrap(force=True) ocgis_lh('finished wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Transform back to rotated pole if necessary. original_rotated_pole_crs = obj._backtransform.get(constants.BackTransform.ROTATED_POLE) if original_rotated_pole_crs is not None: if not isinstance(obj.ops.output_crs, (Spherical, WGS84)): sfield.update_crs(original_rotated_pole_crs) # Update the coordinate system of the data output. if obj.ops.output_crs is not None: # If the geometry is not none, it may need to be projected to match the output coordinate system. if subset_sdim is not None and subset_sdim.crs != obj.ops.output_crs: subset_sdim.update_crs(obj.ops.output_crs) # Update the subsetted field's coordinate system. sfield = sfield.copy() sfield.update_crs(obj.ops.output_crs) # Wrap or unwrap the data if the coordinate system permits. _update_wrapping_(obj, sfield) ocgis_lh('leaving _update_aggregation_wrapping_crs_', obj._subset_log, level=logging.DEBUG) return sfield
def _process_geometries_(self, itr, field, alias): """ :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting. :type itr: [None] or [:class:`~ocgis.Field`, ...] :param :class:`ocgis.Field` field: The target field for operations. :param str alias: The request data alias currently being processed. :rtype: :class:`~ocgis.SpatialCollection` """ assert isinstance(field, Field) ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG) # Process each geometry. for subset_field in itr: # Initialize the collection storage. coll = self._get_initialized_collection_() if vm.is_null: sfield = field else: # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the # subset target with wrapping, coordinate system conversion, etc. subset_field = deepcopy(subset_field) if self.ops.regrid_destination is not None: # If there is regridding, make another copy as this geometry may be manipulated during subsetting of # sources. subset_field_for_regridding = deepcopy(subset_field) # Operate on the rotated pole coordinate system by first transforming it to the default coordinate # system. key = constants.BackTransform.ROTATED_POLE self._backtransform[key] = self._get_update_rotated_pole_state_(field, subset_field) # Check if the geometric abstraction is available on the field object. self._assert_abstraction_available_(field) # Return a slice or snippet if either of these are requested. field = self._get_slice_or_snippet_(field) # Choose the subset UGID value. if subset_field is None: msg = 'No selection geometry. Returning all data. No unique geometry identifier.' subset_ugid = None else: subset_ugid = subset_field.geom.ugid.get_value()[0] msg = 'Subsetting with selection geometry having UGID={0}'.format(subset_ugid) ocgis_lh(msg=msg, logger=self._subset_log) if subset_field is not None: # If the coordinate systems differ, update the spatial subset's CRS to match the field. if subset_field.crs is not None and subset_field.crs != field.crs: subset_field.update_crs(field.crs) # If the geometry is a point, it needs to be buffered if there is a search radius multiplier. subset_field = self._get_buffered_subset_geometry_if_point_(field, subset_field) # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data # is being returned. if subset_field is None: sfield = field else: sfield = self._get_spatially_subsetted_field_(alias, field, subset_field, subset_ugid) ocgis_lh(msg='after self._get_spatially_subsetted_field_', logger=self._subset_log, level=logging.DEBUG) # Create the subcommunicator following the data subset to ensure non-empty communication. vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET, sfield, is_current=True, clobber=True) if not vm.is_null: if not sfield.is_empty and not self.ops.allow_empty: raise_if_empty(sfield) # If the base size is being requested, bypass the rest of the operations. if not self._request_base_size_only: # Perform regridding operations if requested. if self.ops.regrid_destination is not None and sfield.regrid_source: sfield = self._get_regridded_field_with_subset_(sfield, subset_field_for_regridding=subset_field_for_regridding) else: ocgis_lh(msg='no regridding operations', logger=self._subset_log, level=logging.DEBUG) # If empty returns are allowed, there may be an empty field. if sfield is not None: # Only update spatial stuff if there are no calculations and, if there are calculations, # those calculations are not expecting raw values. if self.ops.calc is None or (self.ops.calc is not None and not self.ops.calc_raw): # Update spatial aggregation, wrapping, and coordinate systems. sfield = _update_aggregation_wrapping_crs_(self, alias, sfield, subset_field, subset_ugid) ocgis_lh('after _update_aggregation_wrapping_crs_ in _process_geometries_', self._subset_log, level=logging.DEBUG) # Add the created field to the output collection with the selection geometry. if sfield is None: assert self.ops.aggregate if sfield is not None: coll.add_field(sfield, subset_field) yield coll
def _process_subsettables_(self, rds): """ :param rds: Sequence of :class:~`ocgis.RequestDataset` objects. :type rds: sequence :rtype: :class:`ocgis.collection.base.AbstractCollection` """ ocgis_lh(msg='entering _process_subsettables_', logger=self._subset_log, level=logging.DEBUG) # This is used to define the group of request datasets for these like logging and exceptions. try: alias = '_'.join([r.field_name for r in rds]) except AttributeError: # Allow field objects with do not expose the "field_name" attribute. try: alias = '_'.join([r.name for r in rds]) except TypeError: # The alias is used for logging, etc. If it cannot be constructed easily, leave it as None. alias = None except NoDataVariablesFound: # If an alias is not provided and there are no data variables, set to None as this is used only for logging. alias = None ocgis_lh('processing...', self._subset_log, alias=alias, level=logging.DEBUG) # Create the field object. Field objects may be passed directly to operations. # Look for field optimizations. Field optimizations typically include pre-loaded datetime objects. if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: ocgis_lh('applying optimizations', self._subset_log, level=logging.DEBUG) field = [self.ops.optimizations['fields'][rd.field_name].copy() for rd in rds] has_field_optimizations = True else: # Indicates no field optimizations loaded. has_field_optimizations = False try: # No field optimizations and data should be loaded from source. if not has_field_optimizations: ocgis_lh('creating field objects', self._subset_log, level=logging.DEBUG) len_rds = len(rds) field = [None] * len_rds for ii in range(len_rds): rds_element = rds[ii] try: field_object = rds_element.get(format_time=self.ops.format_time, grid_abstraction=self.ops.abstraction) except (AttributeError, TypeError): # Likely a field object which does not need to be loaded from source. if not self.ops.format_time: raise NotImplementedError # Check that is indeed a field before a proceeding. if not isinstance(rds_element, Field): raise field_object = rds_element field[ii] = field_object # Multivariate calculations require pulling variables across fields. if self._has_multivariate_calculations and len(field) > 1: for midx in range(1, len(field)): # Use the data variable tag if it is available. Otherwise, attempt to merge the fields raising # warning if the variable exists in the squashed field. if len(field[midx].data_variables) > 0: vitr = field[midx].data_variables is_data = True else: vitr = list(field[midx].values()) is_data = False for mvar in vitr: mvar = mvar.extract() field[0].add_variable(mvar, is_data=is_data) new_field_name = '_'.join([str(f.name) for f in field]) field[0].set_name(new_field_name) # The first field in the list is always the target for other operations. field = field[0] assert isinstance(field, Field) # Break out of operations if the rank is empty. vm.create_subcomm_by_emptyable(SubcommName.FIELD_GET, field, is_current=True, clobber=True) if not vm.is_null: if not has_field_optimizations: if field.is_empty: raise ValueError('No empty fields allowed.') # Time, level, etc. subsets. field = self._get_nonspatial_subset_(field) # Spatially reorder the data. ocgis_lh(msg='before spatial reorder', logger=self._subset_log, level=logging.DEBUG) if self.ops.spatial_reorder: self._update_spatial_order_(field) # Extrapolate the spatial bounds if requested. # TODO: Rename "interpolate" to "extrapolate". if self.ops.interpolate_spatial_bounds: self._update_bounds_extrapolation_(field) # This error is related to subsetting by time or level. Spatial subsetting occurs below. except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh(msg='time or level subset empty but empty returns allowed', logger=self._subset_log, level=logging.WARN) coll = self._get_initialized_collection_() name = '_'.join([rd.field_name for rd in rds]) field = Field(name=name, is_empty=True) coll.add_field(field, None) try: yield coll finally: return else: # Raise an exception as empty subsets are not allowed. ocgis_lh(exc=ExtentError(message=str(e)), alias=str([rd.field_name for rd in rds]), logger=self._subset_log) # Set iterator based on presence of slice. Slice always overrides geometry. if self.ops.slice is not None: itr = [None] else: itr = [None] if self.ops.geom is None else self.ops.geom for coll in self._process_geometries_(itr, field, alias): # Conform units following the spatial subset. if not vm.is_null and self.ops.conform_units_to is not None: for to_conform in coll.iter_fields(): for dv in to_conform.data_variables: dv.cfunits_conform(self.ops.conform_units_to) ocgis_lh(msg='_process_subsettables_ yielding', logger=self._subset_log, level=logging.DEBUG) yield coll