def test_grid(self): # Test mask variable information is propagated through property. grid = self.get_gridxy(with_xy_bounds=True) self.assertTrue(grid.is_vectorized) self.assertTrue(grid.has_bounds) np.random.seed(1) value = np.random.rand(*grid.shape) select = value > 0.4 mask_var = create_spatial_mask_variable('nonstandard', select, grid.dimensions) grid.set_mask(mask_var) field = Field(grid=grid) self.assertTrue(field.grid.has_bounds) self.assertEqual(field.dimension_map.get_spatial_mask(), mask_var.name) self.assertNumpyAll(field.grid.get_mask(), mask_var.get_mask()) # Test dimension map bounds are updated appropriately. dim = Dimension('count', 2) x = Variable(name='x', value=[1., 2.], dimensions=dim) y = Variable(name='y', value=[1., 2.], dimensions=dim) xb = Variable(name='xb', value=[[0., 1.5], [1.5, 2.5]], dimensions=[dim, 'bounds']) yb = Variable(name='yb', value=[[0., 1.5], [1.5, 2.5]], dimensions=[dim, 'bounds']) variables = [x, y, xb, yb] dmap = DimensionMap() dmap.set_variable(DMK.X, x, bounds=xb) dmap.set_variable(DMK.Y, y, bounds=yb) f = Field(dimension_map=dmap, variables=variables) self.assertTrue(f.grid.has_bounds)
def test_system_changing_field_name(self): path1 = self.get_temporary_file_path('foo1.nc') path2 = self.get_temporary_file_path('foo2.nc') vc1 = VariableCollection(name='vc1') var1 = Variable('var1', value=[1, 2, 3], dimensions='three', parent=vc1) vc2 = VariableCollection(name='vc2') vc1.add_child(vc2) var2 = Variable('var2', value=[4, 5, 6, 7], dimensions='four', parent=vc2) vc1.write(path1) rd = RequestDataset(path1) # rd.inspect() nvc = rd.get_variable_collection() nvc2 = nvc.children['vc2'] self.assertIsNone(nvc2['var2']._value) self.assertEqual(nvc2.name, 'vc2') nvc2.set_name('extraordinary') self.assertIsNotNone(nvc2['var2'].get_value()) self.assertEqual(nvc2['var2'].get_value().tolist(), [4, 5, 6, 7]) nvc.write(path2) rd2 = RequestDataset(path2) # rd2.inspect() n2vc = rd2.get_variable_collection() self.assertEqual(n2vc.children[nvc2.name].name, nvc2.name)
def get_field(self, ntime=2, variable_name='foo', nrow=2, ncol=2): """Create random field where mean varies with radius and std with the angle around the center of the grid. """ np.random.seed(1) row = Variable(value=np.arange(nrow) - nrow / 2., name='row', dimensions='row') col = Variable(value=np.arange(ncol) - ncol / 2., name='col', dimensions='col') grid = Grid(col, row) x, y = grid.get_value_stacked() start = dt.datetime(2000, 1, 1) delta = dt.timedelta(days=1) value_temporal = [start + i * delta for i in range(ntime)] temporal = TemporalVariable(value=value_temporal, dimensions='time', name='time') nlevel = 1 level = None nrlz = 1 realization = None value = np.random.rand(nrlz, ntime, nlevel, nrow, ncol) * np.arctan2(x, y).clip(.1) + np.hypot(x, y) variable = Variable(name=variable_name, value=value, dimensions=['realization', 'time', 'level', 'row', 'col']) field = Field(grid=grid, time=temporal, is_data=variable, level=level, realization=realization) return field
def test_write_variable_collection_object_arrays(self): """Test writing variable length arrays in parallel.""" with vm.scoped('write', [0]): if not vm.is_null: path_actual = self.get_temporary_file_path('in.nc') path_desired = self.get_temporary_file_path('out.nc') value = [[1, 3, 5], [7, 9], [11]] v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values') v.write(path_desired) else: v, path_actual, path_desired = [None] * 3 path_actual = MPI_COMM.bcast(path_actual) path_desired = MPI_COMM.bcast(path_desired) dest_mpi = OcgDist() dest_mpi.create_dimension('values', 3, dist=True) dest_mpi.update_dimension_bounds() scattered = variable_scatter(v, dest_mpi) outvc = VariableCollection(variables=[scattered]) with vm.scoped_by_emptyable('write', outvc): if not vm.is_null: outvc.write(path_actual) if MPI_RANK == 0: self.assertNcEqual(path_actual, path_desired)
def test_system_get_field_dimensioned_variables(self): """Test data is appropriately tagged to identify dimensioned variables.""" path = self.get_temporary_file_path('foo.nc') time = TemporalVariable(value=[1, 2, 3], dimensions='time') x = Variable(name='x', value=[10, 20], dimensions='x') y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y') data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time']) field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3]) field.write(path) # Test dimensioned variables are read from a file with appropriate metadata. rd = RequestDataset(path) self.assertEqual(rd.variable, ('data1', 'data2')) read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data1', 'data2')) # Test dimensioned variables are overloaded. rd = RequestDataset(path, variable='data2') read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data2', ))
def test_get_dist_default_distribution(self): """Test using default distributions defined by drivers.""" with vm.scoped('write', [0]): if not vm.is_null: path = self.get_temporary_file_path('foo.nc') varx = Variable('x', np.arange(5), dimensions='five', attrs={'axis': 'X'}) vary = Variable('y', np.arange(7) + 10, dimensions='seven', attrs={'axis': 'Y'}) vc = VariableCollection(variables=[varx, vary]) vc.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) dist = rd.driver.dist distributed_dimension = dist.get_dimension('seven') self.assertTrue(distributed_dimension.dist)
def test_get_unioned(self): # TODO: Test with an n-dimensional mask. ancillary = Variable('ancillary') pa = self.get_geometryvariable(parent=ancillary.parent, crs=WGS84()) self.assertIn(ancillary.name, pa.parent) unioned = pa.get_unioned() new_uid = Variable('flower', value=[100], dimensions=unioned.dimensions) unioned.set_ugid(new_uid) # Parent should be removed from the unioned variable. self.assertNotIn(ancillary.name, unioned.parent) self.assertIn(ancillary.name, pa.parent) self.assertEqual(unioned.crs, WGS84()) self.assertEqual(unioned.shape, (1, )) desired = MultiPoint([[1.0, 2.0], [3.0, 4.0]]) self.assertEqual(unioned.get_value()[0], desired) self.assertEqual(len(unioned.dimensions[0]), 1) self.assertIsNone(unioned.get_mask()) self.assertEqual(unioned.ugid.get_value()[0], 100) self.assertNotEqual(id(unioned), id(pa))
def test_get_unioned_spatial_average_differing_dimensions(self): pa = self.get_geometryvariable() to_weight = Variable(name='to_weight', dimensions=pa.dimensions, dtype=float) to_weight.get_value()[0] = 5.0 to_weight.get_value()[1] = 10.0 pa.parent.add_variable(to_weight) to_weight2 = Variable(name='to_weight2', dimensions=[ Dimension('time', 10), Dimension('level', 3), pa.dimensions[0] ], dtype=float) for time_idx in range(to_weight2.shape[0]): for level_idx in range(to_weight2.shape[1]): to_weight2.get_value()[time_idx, level_idx] = ( time_idx + 2) + (level_idx + 2)**(level_idx + 1) pa.parent.add_variable(to_weight2) unioned = pa.get_unioned(spatial_average=['to_weight', 'to_weight2']) actual = unioned.parent[to_weight2.name] self.assertEqual(actual.shape, (10, 3, 1)) self.assertEqual(to_weight2.shape, (10, 3, 2)) self.assertNumpyAll(actual.get_value(), to_weight2.get_value()[:, :, 0].reshape(10, 3, 1)) self.assertEqual(actual.dimension_names, ('time', 'level', 'ocgis_geom_union')) self.assertEqual(unioned.parent[to_weight.name].get_value()[0], 7.5)
def test_init(self): ompi = OcgDist(size=2) self.assertEqual(len(ompi.mapping), 2) dim_x = Dimension('x', 5, dist=False) dim_y = Dimension('y', 11, dist=True) var_tas = Variable('tas', value=np.arange(0, 5 * 11).reshape(5, 11), dimensions=(dim_x, dim_y)) thing = Variable('thing', value=np.arange(11) * 10, dimensions=(dim_y,)) vc = VariableCollection(variables=[var_tas, thing]) child = VariableCollection(name='younger') vc.add_child(child) childer = VariableCollection(name='youngest') child.add_child(childer) dim_six = Dimension('six', 6) hidden = Variable('hidden', value=[6, 7, 8, 9, 0, 10], dimensions=dim_six) childer.add_variable(hidden) ompi.add_dimensions([dim_x, dim_y]) ompi.add_dimension(dim_six, group=hidden.group) ompi.add_variables([var_tas, thing]) ompi.add_variable(hidden) var = ompi.get_variable(hidden) self.assertIsInstance(var, dict)
def test_variable_scatter(self): var_value = np.arange(5, dtype=float) + 50 var_mask = np.array([True, True, False, True, False]) dest_dist = OcgDist() five = dest_dist.create_dimension('five', 5, src_idx=np.arange(5), dist=True) bounds = dest_dist.create_dimension('bounds', 2) dest_dist.update_dimension_bounds() if MPI_RANK == 0: local_dim = Dimension('local', 5, src_idx=np.arange(5)) dim_src_idx = local_dim._src_idx.copy() var = Variable('the_five', value=var_value, mask=var_mask, dimensions=five.name) var.set_extrapolated_bounds('the_five_bounds', 'bounds') var_bounds_value = var.bounds.get_value() else: var, var_bounds_value, dim_src_idx = [None] * 3 svar = variable_scatter(var, dest_dist) var_bounds_value = MPI_COMM.bcast(var_bounds_value) dim_src_idx = MPI_COMM.bcast(dim_src_idx) if MPI_RANK > 1: self.assertIsNone(svar.get_value()) self.assertTrue(svar.is_empty) else: dest_dim = dest_dist.get_dimension('five') self.assertNumpyAll(var_value[slice(*dest_dim.bounds_local)], svar.get_value()) self.assertNumpyAll(var_mask[slice(*dest_dim.bounds_local)], svar.get_mask()) self.assertNumpyAll(var_bounds_value[slice(*dest_dim.bounds_local)], svar.bounds.get_value()) self.assertNumpyAll(dim_src_idx[slice(*dest_dim.bounds_local)], svar.dimensions[0]._src_idx) self.assertNumpyAll(dim_src_idx[slice(*dest_dim.bounds_local)], svar.bounds.dimensions[0]._src_idx)
def test_system_spatial_averaging_through_operations(self): data_name = 'data' with vm.scoped('write', [0]): if not vm.is_null: x = Variable('x', range(5), 'x', float) y = Variable('y', range(7), 'y', float) grid = Grid(x, y) data_value = np.arange(x.size * y.size).reshape(grid.shape) data = Variable(data_name, data_value, grid.dimensions, float) data_value = data.get_value() field = Field(grid=grid, is_data=data) path = self.get_temporary_file_path('data.nc') field.write(path) else: data_value, path = None, None data_value = MPI_COMM.bcast(data_value) path = MPI_COMM.bcast(path) rd = RequestDataset(path, variable=data_name) ops = OcgOperations(dataset=rd, aggregate=True) ret = ops.execute() if ret is None: self.assertNotEqual(vm.rank, vm.root) else: out_field = ret.get_element() if MPI_RANK == 0: desired = data_value.mean() actual = out_field.data_variables[0].get_value()[0] self.assertEqual(actual, desired)
def test_system_dataset_identifiers_on_variables(self): """Test dataset identifiers make it to output variables for iteration.""" paths = [] variables = [] for suffix in [1, 2]: path = self.get_temporary_file_path('foo{}.nc'.format(suffix)) paths.append(path) x = Variable(name='x{}'.format(suffix), value=[2, 3], dimensions='x') y = Variable(name='y{}'.format(suffix), value=[4, 5, 6], dimensions='y') data_variable_name = 'data{}'.format(suffix) variables.append(data_variable_name) data = Variable(name=data_variable_name, value=np.arange(6).reshape(2, 3) + suffix, dimensions=['x', 'y']) grid = Grid(x, y) field = Field(grid=grid, is_data=data) field.write(path) rds = [RequestDataset(uri=p, variable=dv) for p, dv in zip(paths, variables)] ops = OcgOperations(dataset=rds) rds_uids = [ds.uid for ds in ops.dataset] self.assertEqual(rds_uids, [1, 2]) ret = ops.execute() for field in ret.iter_fields(): self.assertFalse(field.grid.has_allocated_abstraction_geometry) for variable in list(field.values()): if isinstance(variable, CoordinateReferenceSystem): continue self.assertIsNotNone(variable._request_dataset.uid) for row in variable.get_iter(): self.assertIsNotNone(row[HeaderName.DATASET_IDENTIFER])
def get_wrap_field(crs=None, unwrapped=True): ompi = OcgDist() ompi.create_dimension('x', 5, dist=False) ompi.create_dimension('y', 7, dist=True) ompi.create_dimension('time', size_current=4, dist=False) ompi.update_dimension_bounds() if MPI_RANK == 0: row = Variable(value=[-60, -40, -20, 0, 20, 40, 60], name='y', dimensions='y') if unwrapped: col_value = [1, 90, 180, 225, 270] else: col_value = [-170, -85, 0, 85, 170] col = Variable(value=col_value, name='x', dimensions='x') grid = Grid(col, row) value = np.zeros((4, 7, 5)) for col_idx in range(value.shape[-1]): value[:, :, col_idx] = col_idx time = TemporalVariable(name='time', value=[1, 2, 3, 4], dimensions='time') var = Variable(name='foo', value=value, dimensions=['time', 'y', 'x']) field = Field(grid=grid, is_data=var, crs=crs, time=time) else: field = None field = variable_collection_scatter(field, ompi) return field
def get_ocgis_field_from_esmf_field(efield, field=None): """ :param efield: The ESMPy field object to convert to an OCGIS field. :type efield: :class:`ESMF.Field` :param field: If provided, use this as the template field for OCGIS field creation. :type field: :class:`~ocgis.Field` :return: :class:`~ocgis.Field` """ ometa = efield._ocgis dimnames = ometa.get('dimnames') dimnames_backref = ometa.get('dimnames_backref') ogrid = ometa.get('ocgis_grid') if ogrid is None: ogrid = get_ocgis_grid_from_esmf_grid(efield.grid) ovar = None if dimnames is not None and efield.name is not None: ovar = Variable(name=efield.name, value=efield.data, dimensions=dimnames, dtype=efield.data.dtype) broadcast_variable(ovar, get_dimension_names(dimnames_backref)) ovar.set_dimensions(dimnames_backref, force=True) if field is None: field = Field(grid=ogrid) else: field.set_grid(ogrid) if ovar is not None: field.add_variable(ovar, is_data=True, force=True) if ogrid.has_mask: field.grid.set_mask(ogrid.get_mask(), cascade=True) return field
def test_system_parallel_write_ndvariable(self): """Test a parallel CSV write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.csv') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) vc = VariableCollection(variables=[t, extra, x, y, data]) else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, iter_kwargs={ 'variable': 'data', 'followers': ['time', 'extra', 'y', 'x'] }, driver=DriverCSV) if MPI_RANK == 0: desired = 169 with open(path, 'r') as f: lines = f.readlines() self.assertEqual(len(lines), desired)
def test_get_by_tag(self): v1 = Variable(name='tas') v2 = Variable(name='tasmax') v3 = Variable(name='tasmin') tags = {'avg': ['tas'], 'other': ['tasmax', 'tasmin']} field = Field(variables=[v1, v2, v3], tags=tags) t = field.get_by_tag('other') self.assertAsSetEqual([ii.name for ii in t], tags['other'])
def test_redistribute_by_src_idx(self): if vm.size != 4: raise SkipTest('vm.size != 4') dist = OcgDist() dim1 = dist.create_dimension('dim1', 5 * vm.size, dist=True) dim2 = dist.create_dimension('dim2', 2, dist=False) dist.update_dimension_bounds() rank_value = np.arange(5) + (10 * (vm.rank + 1)) var1 = Variable(name='dvar1', value=rank_value, dimensions=dim1) var2 = Variable(name='dvar2', dimensions=[dim1, dim2]) var1.parent.add_variable(var2) path = self.get_temporary_file_path('out.nc') var1.parent.write(path) desired_idx = np.array([1, 7, 9, 10, 14]) vdesired_value = variable_gather(var1) if vm.rank == 0: desired_value = vdesired_value.get_value()[desired_idx] desired_idx_ranks = {0: slice(1, 2), 1: [2, 4], 2: [0, 4]} rd = RequestDataset(path) rd.metadata['dimensions'][dim1.name]['dist'] = True field = rd.create_field() indvar = field[var1.name] field[var2.name].load() try: rank_slice = desired_idx_ranks[vm.rank] except KeyError: sub = Variable(is_empty=True) else: sub = indvar[rank_slice] self.barrier_print(sub.is_empty) redistribute_by_src_idx(indvar, dim1.name, sub.dimensions_dict.get(dim1.name)) with vm.scoped_by_emptyable('gather for test', indvar): if vm.is_null: self.assertIn(vm.rank_global, [2, 3]) else: self.assertIn(vm.rank_global, [0, 1]) for v in [indvar, indvar.parent[var2.name]]: self.assertIsNone(v._value) self.assertIsNone(v._mask) self.assertIsNone(v._is_empty) self.assertFalse(v._has_initialized_value) self.rank_print(indvar) actual_value = variable_gather(indvar) if vm.rank == 0: actual_value = actual_value.get_value() self.assertNumpyAll(actual_value, desired_value)
def test_system_properties(self): """Test field properties.""" time = TemporalVariable(value=[20, 30, 40], dimensions=['the_time'], dtype=float, name='time') time_bounds = TemporalVariable(value=[[15, 25], [25, 35], [35, 45]], dimensions=['times', 'bounds'], dtype=float, name='time_bounds') other = Variable(value=[44, 55, 66], name='other', dimensions=['times_again']) x = Variable(value=[1, 2, 3], name='xc', dimensions=['x']) y = Variable(value=[10, 20, 30, 40], name='yc', dimensions=['y']) crs = CoordinateReferenceSystem(epsg=2136) f = self.get_ocgfield(variables=[time, time_bounds, other, x, y]) f2 = deepcopy(f) self.assertIsNone(f.realization) self.assertIsNone(f.time) f.dimension_map.set_variable('time', time.name) self.assertNumpyAll(f.time.get_value(), time.get_value()) self.assertEqual(f.time.attrs['axis'], 'T') self.assertIsNone(f.time.bounds) f.dimension_map.set_variable('time', 'time', bounds=time_bounds.name) self.assertNumpyAll(f.time.bounds.get_value(), time_bounds.get_value()) self.assertIn('other', f.time.parent) dims = f.dimension_map.get_dimension('time') dims += ['times', 'times_again', 'the_time'] sub = f.get_field_slice({'time': slice(1, 2)}) desired = OrderedDict([('time', (1,)), ('time_bounds', (1, 2)), ('other', (1,)), ('xc', (3,)), ('yc', (4,))]) self.assertEqual(sub.shapes, desired) self.assertIsNone(sub.grid) sub.dimension_map.set_variable('x', 'xc') sub.dimension_map.set_variable('y', 'yc') # Test writing to netCDF will load attributes. path = self.get_temporary_file_path('foo.nc') sub.write(path) with self.nc_scope(path) as ds: self.assertEqual(ds.variables[x.name].axis, 'X') self.assertEqual(ds.variables[y.name].axis, 'Y') self.assertEqual(sub.x.attrs['axis'], 'X') self.assertEqual(sub.y.attrs['axis'], 'Y') self.assertIsInstance(sub.grid, Grid) desired = OrderedDict([('time', (1,)), ('time_bounds', (1, 2)), ('other', (1,)), ('xc', (3,)), ('yc', (4,))]) self.assertEqual(sub.shapes, desired) # Test a subset. bbox = [1.5, 15, 2.5, 35] data = Variable(name='data', value=np.random.rand(3, 4), dimensions=['x', 'y']) f2.add_variable(data) f2.dimension_map.set_variable('x', 'xc') f2.dimension_map.set_variable('y', 'yc') bbox = box(*bbox) spatial_sub = f2.grid.get_intersects(bbox).parent desired = OrderedDict([('time', (3,)), ('time_bounds', (3, 2)), ('other', (3,)), ('xc', (1,)), ('yc', (2,)), ('data', (1, 2)), ]) self.assertEqual(spatial_sub.shapes, desired)
def test_iter_primary_mask(self): var = Variable('a', [1, 2, 3], 'b', mask=np.ones(3, dtype=bool)) primary_mask = Variable('c', [4, 5, 6], 'b', mask=[True, False, True]) itr = Iterator(var, followers=[primary_mask], primary_mask=primary_mask, allow_masked=False) actual = list(itr) self.assertIsNone(actual[0]['a'])
def test_remove_netcdf_attribute(self): path = self.get_temporary_file_path('foo.nc') var = Variable(name='test', attrs={'remove_me': 10}) var.write(path) remove_netcdf_attribute(path, var.name, 'remove_me') with self.nc_scope(path) as ds: actual = ds.variables[var.name] self.assertFalse(hasattr(actual, 'remove_me'))
def get_fill_sample_size_variable(archetype, file_only): attrs = OrderedDict() attrs['standard_name'] = constants.DEFAULT_SAMPLE_SIZE_STANDARD_NAME attrs['long_name'] = constants.DEFAULT_SAMPLE_SIZE_LONG_NAME fill_sample_size = Variable(name='n_{}'.format(archetype.name), dimensions=archetype.dimensions, attrs=attrs, dtype=np.int32) if not file_only: fill_sample_size.allocate_value() return fill_sample_size
def test_system_masking(self): """Test how the mask is handled by the NetCDF python library.""" var = Variable(name='foo', value=[1, 2, 3], dimensions='three') self.assertIsNone(var.get_mask()) path = self.get_temporary_file_path('foo.nc') var.parent.write(path) invar = Field.read(path)['foo'] self.assertIsNone(invar.get_mask())
def test_get_intersects_masking(self): """Test no mask is created if no geometries are masked.""" x = Variable('x', [1, 2], 'x') y = Variable('y', [1, 2], 'y') grid = Grid(x, y) self.assertIsNone(grid.get_mask()) sub = grid.get_intersects(Point(1, 1)) self.assertIsNone(grid.get_mask()) self.assertIsNone(sub.get_mask())
def test_get_unioned_spatial_average(self): pa = self.get_geometryvariable() to_weight = Variable(name='to_weight', dimensions=pa.dimensions, dtype=float) to_weight.get_value()[:] = 5.0 pa.parent.add_variable(to_weight) unioned = pa.get_unioned(spatial_average='to_weight') self.assertEqual(unioned.parent[to_weight.name].get_value().tolist(), [5.0]) self.assertEqual(pa.parent[to_weight.name].get_value().shape, (2,)) self.assertEqual(unioned.dimensions, unioned.parent[to_weight.name].dimensions) self.assertEqual(id(unioned.dimensions[0]), id(unioned.parent[to_weight.name].dimensions[0]))
def test_write_variable_collection_different_data_types(self): """Test multiple data types are handled by the shapefile write when melted is True.""" v_int = Variable(name='an_int', value=[1, 2, 3], dtype=int, dimensions='three') v_flt = Variable(name='a_float', value=[10., 20., 30.], dtype=float, dimensions='three') g = GeometryVariable(name='points', value=[Point(1, 2), Point(3, 4), Point(5, 6)], dimensions='three') field = Field(is_data=[v_int, v_flt], geom=g) self.assertEqual(len(field.data_variables), 2) path = self.get_temporary_file_path('foo.shp') field.write(path, driver='vector', iter_kwargs=dict(melted=True))
def get_fill_variable(self, archetype, name, dimensions, file_only=False, dtype=None, add_repeat_record=True, add_repeat_record_archetype_name=True, variable_value=None): """ Initialize a return variable for a calculation. :param archetype: An archetypical variable to use for the creation of the output variable. :type archetype: :class:`ocgis.Variable` :param str name: Name of the output variable. :param dimensions: Dimension tuple for the variable creation. The dimensions from `archetype` or not used because output dimensions are often different. Temporal grouping is an example of this. :type dimensions: tuple(:class:`ocgis.Dimension`, ...) :param bool file_only: If `True`, this is a file-only operation and no value should be allocated. :param type dtype: The data type for the output variable. :param bool add_repeat_record: If `True`, add a repeat record to the variable containing the calculation key. :param add_repeat_record_archetype_name: If `True`, add the `archetype` name repeat record. :param variable_value: If not `None`, use this as the variable value during initialization. :return: :class:`ocgis.Variable` """ # If a default data type was provided at initialization, use this value otherwise use the data type from the # input value. if dtype is None: if self.dtype is None: dtype = archetype.dtype else: dtype = self.get_default_dtype() if self.fill_value is None: fill_value = archetype.fill_value else: fill_value = self.fill_value # Provide a default fill value for file only operations. This will guarantee variable value arrays are # initialized with a default fill value. if file_only and fill_value is None: fill_value = get_default_fill_value_from_dtype(dtype) attrs = OrderedDict() attrs['standard_name'] = self.standard_name attrs['long_name'] = self.long_name units = self.get_output_units(archetype) if add_repeat_record: repeat_record = [(HeaderName.CALCULATION_KEY, self.key)] if add_repeat_record_archetype_name: repeat_record.append((HeaderName.CALCULATION_SOURCE_VARIABLE, archetype.name)) else: repeat_record = None fill = Variable(name=name, dimensions=dimensions, dtype=dtype, fill_value=fill_value, attrs=attrs, units=units, repeat_record=repeat_record, value=variable_value) if not file_only and variable_value is None: fill.allocate_value() return fill
def get_subset_field(self): crs = self.crs geoms = self.geoms gridcode = Variable('gridcode', [110101, 12103], dimensions='ngeom') description = Variable('description', ['high point', 'low point'], dimensions='ngeom') dimension_map = {'geom': {'variable': 'geoms', DimensionMapKey.DIMENSION: ['ngeom']}, 'crs': {'variable': crs.name}} poi = Field(variables=[geoms, gridcode, description], dimension_map=dimension_map, is_data=[gridcode, description]) geoms.set_ugid(gridcode) return poi
def test_iter_repeater(self): var1 = Variable(name='var1', value=[1, 2, 3], dimensions='dim') var2 = Variable(name='var2', value=[1, 2, 3], dimensions='dim') var2.get_value()[:] *= 9 repeater = ('i_am', 'a_repeater') itr = Iterator(var1, followers=[var2], repeaters=[repeater]) desired = [OrderedDict([('i_am', 'a_repeater'), ('var1', 1), ('var2', 9)]), OrderedDict([('i_am', 'a_repeater'), ('var1', 2), ('var2', 18)]), OrderedDict([('i_am', 'a_repeater'), ('var1', 3), ('var2', 27)])] actual = list(itr) self.assertEqual(actual, desired)
def test_get_wrapped_state(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') ompi = OcgDist() ompi.create_dimension('x', 5, dist=True) ompi.create_dimension('y', 1) ompi.update_dimension_bounds() values = [{ 'value': [-179, -90, 0, 90, 180], 'desired': WrappedState.WRAPPED }, { 'value': [0, 90, 180, 270, 360], 'desired': WrappedState.UNWRAPPED }, { 'value': [1, 2, 3, 4, 5], 'desired': WrappedState.UNKNOWN }] kwds = {'values': values, 'crs': [Spherical(), None]} for k in self.iter_product_keywords(kwds): ompi = deepcopy(ompi) if MPI_RANK == 0: vx = Variable(name='x', value=k.values['value'], dimensions='x') vy = Variable(name='y', value=[0], dimensions='y') else: vx, vy = [None] * 2 vx = variable_scatter(vx, ompi) vy = variable_scatter(vy, ompi) grid = Grid(vx, vy) field = Field(grid=grid, crs=k.crs) with vm.scoped_by_emptyable('wrap', field): if not vm.is_null: wrapped_state = field.wrapped_state else: wrapped_state = None if not field.is_empty: if k.crs is None: self.assertIsNone(wrapped_state) else: self.assertIsNotNone(wrapped_state) if k.crs is None or field.is_empty: self.assertIsNone(wrapped_state) else: self.assertEqual(wrapped_state, k.values['desired'])
def test_renamed_dimensions_on_variables(self): vc = VariableCollection() var1 = Variable(name='ugid', value=[1, 2, 3], dimensions='ocgis_geom') var2 = Variable(name='state', value=[20, 30, 40], dimensions='ocgis_geom') vc.add_variable(var1) vc.add_variable(var2) with renamed_dimensions_on_variables(vc, {'geom': ['ocgis_geom']}): for var in list(vc.values()): self.assertEqual(var.dimensions[0].name, 'geom') for var in list(vc.values()): self.assertEqual(var.dimensions[0].name, 'ocgis_geom')
def test_resolution(self): for grid in self.get_iter_gridxy(): self.assertEqual(grid.resolution, 1.) # Test resolution with a singleton dimension. x = Variable(name='x', value=[[1, 2, 3, 4]], dimensions=['first', 'second']) y = Variable(name='y', value=[[5, 6, 7, 8]], dimensions=['first', 'second']) grid = Grid(x, y) self.assertEqual(grid.resolution, 1)
def test_set_x(self): f = Field() var = Variable('x', value=[1, 2], dimensions='xdim') f.set_x(var, 'xdim') var2 = Variable('x2', value=[3, 4], dimensions='xdim2') f.set_x(var2, 'xdim2') self.assertNotIn(var.name, f) f.set_x(None, None) self.assertEqual(len(f), 0) self.assertIsNone(f.x)
def test_setitem(self): grid = self.get_gridxy() self.assertNotIn('point', grid.parent) self.assertFalse(np.any(grid.get_mask())) grid2 = deepcopy(grid) grid2.x[:] = Variable(value=111, name='scalar111', dimensions=[]) grid2.y[:] = Variable(value=222, name='scalar222', dimensions=[]) grid2.set_mask(np.ones(grid2.shape)) self.assertTrue(grid2.get_mask().all()) grid[:, :] = grid2 self.assertTrue(np.all(grid.get_mask())) self.assertEqual(grid.x.get_value().mean(), 111) self.assertEqual(grid.y.get_value().mean(), 222)
def test_iter_with_bounds(self): var = Variable(name='bounded', value=[1, 2, 3, 4], dtype=float, dimensions='dim') var.set_extrapolated_bounds('the_bounds', 'bounds') lower = Variable(name='lower_bounds', value=var.bounds.get_value()[:, 0], dimensions=var.dimensions) upper = Variable(name='upper_bounds', value=var.bounds.get_value()[:, 1], dimensions=var.dimensions) itr = Iterator(var, followers=[lower, upper]) actual = list(itr) self.assertEqual(len(actual), var.shape[0]) self.assertEqual(len(actual[0]), 3)
def test_reduce_reindex_coordinate_variables(self): self.add_barrier = False dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([ 0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150 ]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_variables(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def test_system_through_operations(self): """Test calculation through operations.""" row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y') col = Variable(name='x', value=[10, 11, 12], dimensions='x') grid = Grid(col, row) time = TemporalVariable(name='time', value=[1, 2], dimensions='time') data = Variable(name='data', dimensions=[time.dimensions[0]] + list(grid.dimensions)) data.get_value()[0, :] = 1 data.get_value()[1, :] = 2 field = Field(grid=grid, time=time, is_data=data) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret.get_element(variable_name='sum').get_masked_value().flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
def test_iter_formatter(self): def _formatter_(name, value, mask): if value is None: modified_value = None else: modified_value = value * 1000 value = str(value) ret = [(name, value), ('modified', modified_value)] return ret var = Variable(name='data', value=[1, 2, 3], mask=[False, True, False], dimensions='dim') itr = Iterator(var, formatter=_formatter_) as_list = list(itr) actual = as_list[1][var.name] self.assertIsNone(actual) self.assertEqual(as_list[2][var.name], str(var.get_value()[2])) self.assertEqual(as_list[0]['modified'], 1000)
def create_rank_valued_netcdf(self): rank_size = 10 size_global = vm.size_global with vm.scoped('write rank netcdf', [0]): if not vm.is_null: path = self.get_temporary_file_path('dist_desired.nc') dim = Dimension('dist_dim', rank_size * size_global) var = Variable(name='data', dimensions=dim, attrs={'hi': 5}) for rank in range(size_global): value = np.ones(rank_size) + (10 * (rank + 1)) bounds = (rank_size * rank, rank_size * rank + rank_size) var.get_value()[bounds[0]: bounds[1]] = value var.parent.attrs = {'hi_dataset_level': 'whee'} var.write(path) else: path = None path = vm.bcast(path) return path
def test_variable_collection_scatter(self): dest_mpi = OcgDist() five = dest_mpi.create_dimension('five', 5, dist=True) ten = dest_mpi.create_dimension('ten', 10) dest_mpi.create_variable(name='five', dimensions=five) dest_mpi.create_variable(name='all_in', dimensions=ten) dest_mpi.create_variable(name='i_could_be_a_coordinate_system') dest_mpi.update_dimension_bounds() if MPI_RANK == 0: var = Variable('holds_five', np.arange(5), dimensions='five') var_empty = Variable('i_could_be_a_coordinate_system', attrs={'reality': 'im_not'}) var_not_dist = Variable('all_in', value=np.arange(10) + 10, dimensions='ten') vc = VariableCollection(variables=[var, var_empty, var_not_dist]) else: vc = None svc = variable_collection_scatter(vc, dest_mpi) self.assertEqual(svc['i_could_be_a_coordinate_system'].attrs['reality'], 'im_not') if MPI_RANK < 2: self.assertFalse(svc['all_in'].is_empty) self.assertNumpyAll(svc['all_in'].get_value(), np.arange(10) + 10) self.assertFalse(svc.is_empty) self.assertFalse(svc['i_could_be_a_coordinate_system'].is_empty) else: self.assertTrue(svc['all_in'].is_empty) self.assertTrue(svc.is_empty) self.assertTrue(svc['i_could_be_a_coordinate_system'].is_empty) if MPI_RANK == 0: self.assertNumpyAll(var.get_value(), vc[var.name].get_value()) actual = svc['holds_five'].get_value() if MPI_SIZE == 2: desired = {0: np.arange(3), 1: np.arange(3, 5)} self.assertNumpyAll(actual, desired[MPI_RANK]) actual = svc['holds_five'].is_empty if MPI_RANK > 1: self.assertTrue(actual) else: self.assertFalse(actual)
def test_write_variable_fill_value_is_maintained(self): if vm.size != 4: raise SkipTest('vm.size != 4') dist = OcgDist() dim = dist.create_dimension('dim', 8, dist=True) dist.update_dimension_bounds() var = Variable(name='var', dimensions=dim, fill_value=2.) var.v()[0] = 1 var.v()[1] = 2 var.get_mask(create=True, check_value=True) if vm.rank == 0: path = self.get_temporary_file_path('foo.nc') else: path = None path = vm.bcast(path) var.parent.write(path) # if vm.rank == 0: # self.ncdump(path, header_only=False) with vm.scoped('read test', [0]): if not vm.is_null: invar = RequestDataset(path).create_field()['var'] self.assertEqual(invar.get_mask().sum(), 4) self.assertEqual(invar.fill_value, 2.)
def test_init(self): row = Variable(value=[2, 3], name='row', dimensions='y') col = Variable(value=[4, 5], name='col', dimensions='x') grid = Grid(col, row) self.assertIsNone(grid.archetype.bounds) row = Variable(value=[2, 3], name='row', dimensions='y') row.set_extrapolated_bounds('row_bounds', 'bounds') col = Variable(value=[4, 5], name='col', dimensions='x') col.set_extrapolated_bounds('col_bounds', 'bounds') grid = Grid(y=row, x=col) self.assertEqual(grid.abstraction, 'polygon') poly = get_geometry_variable(grid) self.assertEqual(poly.geom_type, 'Polygon')
def test_write_variable(self): path = self.get_temporary_file_path('foo.nc') var = Variable(name='height', value=10.0, dimensions=[]) var.write(path) rd = RequestDataset(path) varin = SourcedVariable(name='height', request_dataset=rd) self.assertEqual(varin.get_value(), var.get_value()) # Test mask persists after write. v = Variable(name='the_mask', value=[1, 2, 3, 4], mask=[False, True, True, False], dimensions='ephemeral', fill_value=222) path = self.get_temporary_file_path('foo.nc') v.write(path) rd = RequestDataset(path, driver=DriverNetcdf) sv = SourcedVariable(name='the_mask', request_dataset=rd) self.assertEqual(sv.get_value().tolist(), [1, 222, 222, 4]) self.assertNumpyAll(sv.get_mask(), v.get_mask())
def test_system_parallel_write_ndvariable(self): """Test a parallel vector GIS write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.shp') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'}, 'y': {'variable': 'y', 'bounds': 'y_bounds'}, 'time': {'variable': 'time', 'bounds': 'the_time_bounds'}} vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data') vc.set_abstraction_geom() else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, driver=DriverVector) MPI_COMM.Barrier() desired = 168 rd = RequestDataset(path, driver=DriverVector) sizes = MPI_COMM.gather(rd.get().geom.shape[0]) if MPI_RANK == 0: self.assertEqual(sum(sizes), desired)
def test_get_unioned_spatial_average_differing_dimensions(self): pa = self.get_geometryvariable() to_weight = Variable(name='to_weight', dimensions=pa.dimensions, dtype=float) to_weight.get_value()[0] = 5.0 to_weight.get_value()[1] = 10.0 pa.parent.add_variable(to_weight) to_weight2 = Variable(name='to_weight2', dimensions=[Dimension('time', 10), Dimension('level', 3), pa.dimensions[0]], dtype=float) for time_idx in range(to_weight2.shape[0]): for level_idx in range(to_weight2.shape[1]): to_weight2.get_value()[time_idx, level_idx] = (time_idx + 2) + (level_idx + 2) ** (level_idx + 1) pa.parent.add_variable(to_weight2) unioned = pa.get_unioned(spatial_average=['to_weight', 'to_weight2']) actual = unioned.parent[to_weight2.name] self.assertEqual(actual.shape, (10, 3, 1)) self.assertEqual(to_weight2.shape, (10, 3, 2)) self.assertNumpyAll(actual.get_value(), to_weight2.get_value()[:, :, 0].reshape(10, 3, 1)) self.assertEqual(actual.dimension_names, ('time', 'level', 'ocgis_geom_union')) self.assertEqual(unioned.parent[to_weight.name].get_value()[0], 7.5)
def test_system_parallel_write_ndvariable(self): """Test a parallel CSV write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.csv') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) vc = VariableCollection(variables=[t, extra, x, y, data]) else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, iter_kwargs={'variable': 'data', 'followers': ['time', 'extra', 'y', 'x']}, driver=DriverCSV) if MPI_RANK == 0: desired = 169 with open(path, 'r') as f: lines = f.readlines() self.assertEqual(len(lines), desired)
def get_ocgis_grid_from_esmf_grid(egrid): """ Create an OCGIS grid from an ESMF grid. :param egrid: The input ESMF grid to convert to an OCGIS grid. :type egrid: :class:`ESMF.Grid` :return: :class:`~ocgis.Grid` """ dmap = egrid._ocgis['dimension_map'] edims = list(egrid._ocgis['dimnames']) odims = egrid._ocgis['dimnames_backref'] coords = egrid.coords[ESMF.StaggerLoc.CENTER] var_x = Variable(name=dmap.get_variable(DMK.X), value=coords[0], dimensions=edims) var_y = Variable(name=dmap.get_variable(DMK.Y), value=coords[1], dimensions=edims) # Build OCGIS corners array if corners are present on the ESMF grid object. has_corners = esmf_grid_has_corners(egrid) if has_corners: corner = egrid.coords[ESMF.StaggerLoc.CORNER] if egrid.periodic_dim == 0: xcorner = np.zeros([corner[0].shape[0] + 1, corner[0].shape[1]], dtype=corner[0].dtype) xcorner[0:corner[0].shape[0], :] = corner[0] xcorner[-1, :] = corner[0][0, :] ycorner = np.zeros([corner[1].shape[0] + 1, corner[1].shape[1]], dtype=corner[1].dtype) ycorner[0:corner[1].shape[0], :] = corner[1] ycorner[-1, :] = corner[1][0, :] else: xcorner = corner[0] ycorner = corner[1] ocorner_x = create_ocgis_corners_from_esmf_corners(xcorner) ocorner_y = create_ocgis_corners_from_esmf_corners(ycorner) cdims = deepcopy(edims) cdims.append(constants.DEFAULT_NAME_CORNERS_DIMENSION) vocorner_x = Variable(name=dmap.get_bounds(DMK.X), value=ocorner_x, dimensions=cdims) vocorner_y = Variable(name=dmap.get_bounds(DMK.Y), value=ocorner_y, dimensions=cdims) crs = get_crs_from_esmf(egrid) ogrid = Grid(x=var_x, y=var_y, crs=crs) # Does the grid have a mask? has_mask = False if egrid.mask is not None: if egrid.mask[ESMF.StaggerLoc.CENTER] is not None: has_mask = True if has_mask: # if there is a mask, update the grid values egrid_mask = egrid.mask[ESMF.StaggerLoc.CENTER] egrid_mask = np.invert(egrid_mask.astype(bool)) ogrid.set_mask(egrid_mask) ogrid.parent.dimension_map = dmap if tuple(odims) != tuple(edims): broadcast_variable(var_x, odims) broadcast_variable(var_y, odims) if has_corners: broadcast_variable(vocorner_x, list(odims) + [constants.DEFAULT_NAME_CORNERS_DIMENSION]) broadcast_variable(vocorner_y, list(odims) + [constants.DEFAULT_NAME_CORNERS_DIMENSION]) if has_corners: var_x.set_bounds(vocorner_x) var_y.set_bounds(vocorner_y) return ogrid
def write_chunks(self): """ Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF regridding weights for each subset if requested. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst))) ctr = 1 ocgis_lh(logger='grid_chunker', msg='starting self.iter_src_grid_subsets', level=logging.DEBUG) for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True): ocgis_lh(logger='grid_chunker', msg='finished iteration {} for self.iter_src_grid_subsets'.format(ctr), level=logging.DEBUG) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] cc = 1 for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write' + str(cc), target): if not vm.is_null: ocgis_lh(logger='grid_chunker', msg='write_chunks:writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh(logger='grid_chunker', msg='write_chunks:finished writing: {}'.format(path), level=logging.DEBUG) cc += 1 # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # Generate an ESMF weights file if requested and at least one rank has data on it. if self.genweights and len(vm.get_live_ranks_from_object(sub_src)) > 0: vm.barrier() self.write_esmf_weights(src_path, dst_path, wgt_path, src_grid=sub_src, dst_grid=sub_dst) vm.barrier() # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_chunker_source'}, {'esmf_role': grid_chunker_destination}, {'esmf_role': 'grid_chunker_weights'}] vc = VariableCollection() grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_chunker_index) vidx.attrs['esmf_role'] = grid_chunker_index vidx.attrs['grid_chunker_source'] = 'source_filename' vidx.attrs[GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_chunker_weights'] = 'weights_filename' vidx.attrs[GridChunkerConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridChunkerConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gc_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
def _convert_to_ugrid_(field): """ Takes field data out of the OCGIS unstructured format (similar to UGRID) converting to the format expected by ESMF Unstructured metadata. """ # The driver for the current field must be NetCDF UGRID to ensure interpretability. assert field.dimension_map.get_driver() == DriverKey.NETCDF_UGRID grid = field.grid # Three-dimensional data is not supported. assert not grid.has_z # Number of coordinate dimension. This will be 3 for three-dimensional data. coord_dim = Dimension('coordDim', 2) # Transform ragged array to one-dimensional array. ############################################################# cindex = grid.cindex elements = cindex.get_value() num_element_conn_data = [e.shape[0] for e in elements.flat] length_connection_count = sum(num_element_conn_data) esmf_element_conn = np.zeros(length_connection_count, dtype=elements[0].dtype) start = 0 tag_start_index = MPITag.START_INDEX # Collapse the ragged element index array into a single dimensioned vector. This communication block finds the # size for the new array. ###################################################################################### if vm.size > 1: max_index = max([ii.max() for ii in elements.flat]) if vm.rank == 0: vm.comm.isend(max_index + 1, dest=1, tag=tag_start_index) adjust = 0 else: adjust = vm.comm.irecv(source=vm.rank - 1, tag=tag_start_index) adjust = adjust.wait() if vm.rank != vm.size - 1: vm.comm.isend(max_index + 1 + adjust, dest=vm.rank + 1, tag=tag_start_index) # Fill the new vector for the element connectivity. ############################################################ for ii in elements.flat: if vm.size > 1: if grid.archetype.has_multi: mbv = cindex.attrs[OcgisConvention.Name.MULTI_BREAK_VALUE] replace_breaks = np.where(ii == mbv)[0] else: replace_breaks = [] ii = ii + adjust if len(replace_breaks) > 0: ii[replace_breaks] = mbv esmf_element_conn[start: start + ii.shape[0]] = ii start += ii.shape[0] # Create the new data representation. ########################################################################## connection_count = create_distributed_dimension(esmf_element_conn.size, name='connectionCount') esmf_element_conn_var = Variable(name='elementConn', value=esmf_element_conn, dimensions=connection_count, dtype=np.int32) esmf_element_conn_var.attrs[CFName.LONG_NAME] = 'Node indices that define the element connectivity.' mbv = cindex.attrs.get(OcgisConvention.Name.MULTI_BREAK_VALUE) if mbv is not None: esmf_element_conn_var.attrs['polygon_break_value'] = mbv esmf_element_conn_var.attrs['start_index'] = grid.start_index ret = VariableCollection(variables=field.copy().values(), force=True) # Rename the element count dimension. original_name = ret[cindex.name].dimensions[0].name ret.rename_dimension(original_name, 'elementCount') # Add the element-node connectivity variable to the collection. ret.add_variable(esmf_element_conn_var) num_element_conn = Variable(name='numElementConn', value=num_element_conn_data, dimensions=cindex.dimensions[0], attrs={CFName.LONG_NAME: 'Number of nodes per element.'}, dtype=np.int32) ret.add_variable(num_element_conn) # Check that the node count dimension is appropriately named. gn_name = grid.node_dim.name if gn_name != 'nodeCount': ret.dimensions[gn_name] = ret.dimensions[gn_name].copy() ret.rename_dimension(gn_name, 'nodeCount') node_coords = Variable(name='nodeCoords', dimensions=(ret.dimensions['nodeCount'], coord_dim)) node_coords.units = 'degrees' node_coords.attrs[CFName.LONG_NAME] = 'Node coordinate values indexed by element connectivity.' node_coords.attrs['coordinates'] = 'x y' fill = node_coords.get_value() fill[:, 0] = grid.x.get_value() fill[:, 1] = grid.y.get_value() ret.pop(grid.x.name) ret.pop(grid.y.name) ret.add_variable(node_coords) ret.attrs['gridType'] = 'unstructured' ret.attrs['version'] = '0.9' # Remove the coordinate index, this does not matter. if field.grid.cindex is not None: ret.remove_variable(field.grid.cindex.name) return ret
def from_records(cls, records, schema=None, crs=UNINITIALIZED, uid=None, union=False, data_model=None): """ Create a :class:`~ocgis.Field` from Fiona-like records. :param records: A sequence of records returned from an Fiona file object. :type records: `sequence` of :class:`dict` :param schema: A Fiona-like schema dictionary. If ``None`` and any records properties are ``None``, then this must be provided. :type schema: dict >>> schema = {'geometry': 'Point', 'properties': {'UGID': 'int', 'NAME', 'str:4'}} :param crs: If :attr:`ocgis.constants.UNINITIALIZED`, default to :attr:`ocgis.env.DEFAULT_COORDSYS`. :type crs: :class:`dict` | :class:`~ocgis.variable.crs.AbstractCoordinateReferenceSystem` :param str uid: If provided, use this attribute name as the unique identifier. Otherwise search for :attr:`env.DEFAULT_GEOM_UID` and, if not present, construct a 1-based identifier with this name. :param bool union: If ``True``, union the geometries from records yielding a single geometry with a unique identifier value of ``1``. :param str data_model: See :meth:`~ocgis.driver.nc.create_typed_variable_from_data_model`. :returns: Field object constructed from records. :rtype: :class:`~ocgis.Field` """ if uid is None: uid = env.DEFAULT_GEOM_UID if isinstance(crs, dict): crs = CoordinateReferenceSystem(value=crs) elif crs == UNINITIALIZED: crs = env.DEFAULT_COORDSYS if union: deque_geoms = None deque_uid = [1] else: # Holds geometry objects. deque_geoms = deque() # Holds unique identifiers. deque_uid = deque() build = True for ctr, record in enumerate(records, start=1): # Get the geometry from a keyword present on the input dictionary or construct from the coordinates # sequence. try: current_geom = record['geom'] except KeyError: current_geom = shape(record['geometry']) if union: if build: deque_geoms = current_geom else: deque_geoms = deque_geoms.union(current_geom) else: deque_geoms.append(current_geom) # Set up the properties array if build: build = False if uid in record['properties']: has_uid = True else: has_uid = False # The geometry unique identifier may be present as a property. Otherwise the enumeration counter is used for # the identifier. if not union: if has_uid: to_append = int(record['properties'][uid]) else: to_append = ctr deque_uid.append(to_append) # If we are unioning, the target geometry is not yet a sequence. if union: deque_geoms = [deque_geoms] # Dimension for the outgoing field. if union: size = 1 else: size = ctr dim = Dimension(name=DimensionName.GEOMETRY_DIMENSION, size=size) # Set default geometry type if no schema is provided. if schema is None: geom_type = 'auto' else: geom_type = schema['geometry'] geom = GeometryVariable(value=deque_geoms, geom_type=geom_type, dimensions=dim) uid = create_typed_variable_from_data_model('int', data_model=data_model, name=uid, value=deque_uid, dimensions=dim) geom.set_ugid(uid) field = Field(geom=geom, crs=crs) # All records from a unioned geometry are not relevant. if not union: from ocgis.driver.vector import get_dtype_from_fiona_type, get_fiona_type_from_pydata if schema is None: has_schema = False else: has_schema = True for idx, record in enumerate(records): if idx == 0 and not has_schema: schema = {'properties': OrderedDict()} for k, v in list(record['properties'].items()): schema['properties'][k] = get_fiona_type_from_pydata(v) if idx == 0: for k, v in list(schema['properties'].items()): if k == uid.name: continue dtype = get_dtype_from_fiona_type(v, data_model=data_model) var = Variable(name=k, dtype=dtype, dimensions=dim) if v.startswith('str:'): var.set_string_max_length_global(value=int(v.split(':')[1])) field.add_variable(var) for k, v in list(record['properties'].items()): if k == uid.name: continue if v is None: # Mask the value if it is None. NULLs are allowed in OGR Vector files, but they do not translate # well to Python. Strings are generally okay but floats/ints case problems. field[k].get_mask(create=True)[idx] = v else: # Set the associated field value. field[k].get_value()[idx] = v data_variables = [uid.name] if not union: data_variables += [k for k in list(schema['properties'].keys()) if k != uid.name] field.append_to_tags(TagName.DATA_VARIABLES, data_variables, create=True) return field
def regrid_field(source, destination, regrid_method='auto', value_mask=None, split=True, fill_value=None, weights_in=None, weights_out=None): """ Regrid ``source`` data to match the grid of ``destination``. :param source: The source field. :type source: :class:`ocgis.Field` :param destination: The destination field. :type destination: :class:`ocgis.Field` :param regrid_method: See :func:`~ocgis.regrid.base.create_esmf_grid`. :param value_mask: See :func:`~ocgis.regrid.base.iter_esmf_fields`. :type value_mask: :class:`numpy.ndarray` :param bool split: See :func:`~ocgis.regrid.base.iter_esmf_fields`. :param fill_value: Destination fill value used to fill the destination field before regridding. If ``None``, then the default fill value for the destination field data type will be used. :type fill_value: int | float :rtype: :class:`ocgis.Field` :param weights_in: Optional path to an input weights file. The route handle will be created from weights in this file. Assumes a SCRIP-like structure for the input weight file. :type weights_in: str :param weights_out: Optional path to an output weight file. Does NOT do any regridding - just writes the weights. :type weights_out: str """ # This function runs a series of asserts to make sure the sources and destination are compatible. check_fields_for_regridding(source, destination, regrid_method=regrid_method) dst_grid = destination.grid # Reference the destination grid # Spatial coordinate dimensions for the destination grid dst_spatial_coordinate_dimensions = OrderedDict([(dim.name, dim) for dim in dst_grid.dimensions]) # Spatial coordinate dimensions for the source grid src_spatial_coordinate_dimensions = OrderedDict([(dim.name, dim) for dim in source.grid.dimensions]) try: archetype = source.data_variables[0] # Reference an archetype data variable. except IndexError: # There may be no data variables. Use the grid as reference instead. archetype = source.grid # Extra dimensions (like time or level) to iterate over or use for ndbounds depending on the split protocol extra_dimensions = OrderedDict([(dim.name, dim) for dim in archetype.dimensions if dim.name not in dst_spatial_coordinate_dimensions and dim.name not in src_spatial_coordinate_dimensions]) # If there are no extra dimensions, then there is no need to split fields. if len(extra_dimensions) == 0: split = False if split: # There are no extra, ungridded dimensions for ESMF to use. ndbounds = None else: # These are the extra, ungridded dimensions for ESMF to use (ndbounds). ndbounds = [len(dim) for dim in extra_dimensions.values()] ndbounds.reverse() # Fortran order is used by ESMF # Regrid each source. ocgis_lh(logger='iter_regridded_fields', msg='starting source regrid loop', level=logging.DEBUG) build = True # Flag for first loop fills = {} # Holds destination field fill variables. # TODO: OPTIMIZE: The source and destination field objects should be reused and refilled when split=False # Main field iterator for use in the regridding loop for variable_name, src_efield, current_slice in iter_esmf_fields(source, regrid_method=regrid_method, value_mask=value_mask, split=split): # We need to generate new variables given the change in shape if variable_name not in fills: # Create the destination data variable dimensions. These are a combination of the extra dimensions and # spatial coordinate dimensions. if len(extra_dimensions) > 0: new_dimensions = list(extra_dimensions.values()) else: new_dimensions = [] new_dimensions += list(dst_grid.dimensions) # Reverse the dimensions for the creation as we are working in Fortran ordering with ESMF. new_dimensions.reverse() # Create the destination fill variable and cache it source_variable = source[variable_name] new_variable = Variable(name=variable_name, dimensions=new_dimensions, dtype=source_variable.dtype, fill_value=source_variable.fill_value, attrs=source_variable.attrs) fills[variable_name] = new_variable # Only build the ESMF/OCGIS destination grids and fields once. if build: # Build the destination grid once. ocgis_lh(logger='iter_regridded_fields', msg='before create_esmf_grid', level=logging.DEBUG) esmf_destination_grid = create_esmf_grid(destination.grid, regrid_method=regrid_method, value_mask=value_mask) # Check for corners on the destination grid. If they exist, conservative regridding is possible. if regrid_method == 'auto': if esmf_grid_has_corners(esmf_destination_grid) and esmf_grid_has_corners(src_efield.grid): regrid_method = ESMF.RegridMethod.CONSERVE else: regrid_method = None # Prepare the regridded sourced field. This amounts to exchanging the grids between the objects. regridded_source = source.copy() regridded_source.grid.extract(clean_break=True) regridded_source.set_grid(destination.grid.extract()) # Destination ESMF field dst_efield = ESMF.Field(esmf_destination_grid, name='destination', ndbounds=ndbounds) fill_variable = fills[variable_name] # Reference the destination data variable object if fill_value is None: fv = fill_variable.fill_value # The fill value used for the variable data type else: fv = fill_value dst_efield.data.fill(fv) # Fill the ESMF destination field with that fill value to help track masks # Construct the regrid object. Weight generation actually occurs in this call. ocgis_lh(logger='iter_regridded_fields', msg='before ESMF.Regrid', level=logging.DEBUG) if build: # Only create the regrid object once. It may be reused if split=True. if weights_in is None: if weights_out is None: create_rh = False else: create_rh = True # Create the weights and ESMF route handle from the grids regrid = ESMF.Regrid(src_efield, dst_efield, unmapped_action=ESMF.UnmappedAction.IGNORE, regrid_method=regrid_method, src_mask_values=[0], dst_mask_values=[0], filename=weights_out, create_rh=create_rh) else: # Create ESMF route handle with weights read from file regrid = ESMF.RegridFromFile(src_efield, dst_efield, weights_in) build = False ocgis_lh(logger='iter_regridded_fields', msg='after ESMF.Regrid', level=logging.DEBUG) # If we are just writing the weights file, bail out after it is written. if weights_out is not None: destroy_esmf_objects([regrid, src_efield, dst_efield, esmf_destination_grid]) return # Perform the regrid operation. "zero_region" only fills values involved with regridding. ocgis_lh(logger='iter_regridded_fields', msg='before regrid', level=logging.DEBUG) regridded_esmf_field = regrid(src_efield, dst_efield, zero_region=ESMF.Region.SELECT) e_data = regridded_esmf_field.data # Regridded data values # These are the unmapped values coming out of the ESMF regrid operation. unmapped_mask = e_data[:] == fv # If all data is masked, raise an exception. if unmapped_mask.all(): # Destroy ESMF objects. destroy_esmf_objects([regrid, dst_efield, esmf_destination_grid]) msg = 'All regridded elements are masked. Do the input spatial extents overlap?' raise RegriddingError(msg) if current_slice is not None: # Create an OCGIS variable to use for setting on the destination. We want to use label-based slicing since # arbitrary dimensions are possible with the extra dimensions. First, set defaults for the spatial # coordinate slices. for k in dst_spatial_coordinate_dimensions.keys(): current_slice[k] = slice(None) # The spatial coordinate dimension names for ESMF in Fortran order e_data_dimensions = deepcopy(list(dst_spatial_coordinate_dimensions.keys())) e_data_dimensions.reverse() # The extra dimension names for ESMF in Fortran order e_data_dimensions_extra = deepcopy(list(extra_dimensions.keys())) e_data_dimensions_extra.reverse() # Wrap the ESMF data in an OCGIS variable e_data_var = Variable(name='e_data', value=e_data, dimensions=e_data_dimensions, mask=unmapped_mask) # Expand the new variable's dimension to account for the extra dimensions reshape_dims = list(e_data_var.dimensions) + [Dimension(name=n, size=1) for n in e_data_dimensions_extra] e_data_var.reshape(reshape_dims) # Set the destination fill variable with the ESMF regridded data fill_variable[current_slice] = e_data_var else: # ESMF and OCGIS dimensions align at this point, so just insert the data fill_variable.v()[:] = e_data # Create a new variable collection and add the variables to the output field. for v in list(fills.values()): regridded_source.add_variable(v, is_data=True, force=True) # Destroy ESMF objects. if weights_out is None: destroy_esmf_objects([regrid, dst_efield, src_efield, esmf_destination_grid]) else: destroy_esmf_objects([dst_efield, src_efield, esmf_destination_grid]) # Broadcast ESMF (Fortran) ordering to Python (C) ordering. dst_names = [dim.name for dim in new_dimensions] dst_names.reverse() for data_variable in regridded_source.data_variables: broadcast_variable(data_variable, dst_names) return regridded_source