def test_system_with_time_data(self): """Test writing data with a time dimension.""" path = self.get_temporary_file_path('what.shp') t = TemporalVariable(value=[1.5, 2.5], name='time', dimensions='time') geom = GeometryVariable(value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='time') field = Field(variables=[t, geom], dimension_map={ 'time': { 'variable': 'time' }, 'geom': { 'variable': 'geom' } }) field.write(path, iter_kwargs={'variable': 'time'}, driver=DriverVector) rd = RequestDataset(uri=path) field2 = rd.get() # netcdftime worthlessness poss = [['0001-01-02 12:00:00', '0001-01-03 12:00:00'], ['1-01-02 12:00:00', '1-01-03 12:00:00']] actual = field2['TIME'].get_value().tolist() res = [p == actual for p in poss] self.assertTrue(any(res))
def test_system_dataset_identifiers_on_variables(self): """Test dataset identifiers make it to output variables for iteration.""" paths = [] variables = [] for suffix in [1, 2]: path = self.get_temporary_file_path('foo{}.nc'.format(suffix)) paths.append(path) x = Variable(name='x{}'.format(suffix), value=[2, 3], dimensions='x') y = Variable(name='y{}'.format(suffix), value=[4, 5, 6], dimensions='y') data_variable_name = 'data{}'.format(suffix) variables.append(data_variable_name) data = Variable(name=data_variable_name, value=np.arange(6).reshape(2, 3) + suffix, dimensions=['x', 'y']) grid = Grid(x, y) field = Field(grid=grid, is_data=data) field.write(path) rds = [RequestDataset(uri=p, variable=dv) for p, dv in zip(paths, variables)] ops = OcgOperations(dataset=rds) rds_uids = [ds.uid for ds in ops.dataset] self.assertEqual(rds_uids, [1, 2]) ret = ops.execute() for field in ret.iter_fields(): self.assertFalse(field.grid.has_allocated_abstraction_geometry) for variable in list(field.values()): if isinstance(variable, CoordinateReferenceSystem): continue self.assertIsNotNone(variable._request_dataset.uid) for row in variable.get_iter(): self.assertIsNotNone(row[HeaderName.DATASET_IDENTIFER])
def test_system_spatial_averaging_through_operations(self): data_name = 'data' with vm.scoped('write', [0]): if not vm.is_null: x = Variable('x', range(5), 'x', float) y = Variable('y', range(7), 'y', float) grid = Grid(x, y) data_value = np.arange(x.size * y.size).reshape(grid.shape) data = Variable(data_name, data_value, grid.dimensions, float) data_value = data.get_value() field = Field(grid=grid, is_data=data) path = self.get_temporary_file_path('data.nc') field.write(path) else: data_value, path = None, None data_value = MPI_COMM.bcast(data_value) path = MPI_COMM.bcast(path) rd = RequestDataset(path, variable=data_name) ops = OcgOperations(dataset=rd, aggregate=True) ret = ops.execute() if ret is None: self.assertNotEqual(vm.rank, vm.root) else: out_field = ret.get_element() if MPI_RANK == 0: desired = data_value.mean() actual = out_field.data_variables[0].get_value()[0] self.assertEqual(actual, desired)
def test_system_create_field_dimensioned_variables(self): """Test data is appropriately tagged to identify dimensioned variables.""" path = self.get_temporary_file_path('foo.nc') time = TemporalVariable(value=[1, 2, 3], dimensions='time') x = Variable(name='x', value=[10, 20], dimensions='x') y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y') data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time']) field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3]) field.write(path) # Test dimensioned variables are read from a file with appropriate metadata. rd = RequestDataset(path) self.assertEqual(rd.variable, ('data1', 'data2')) read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data1', 'data2')) # Test dimensioned variables are overloaded. rd = RequestDataset(path, variable='data2') read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data2',))
def test_system_get_field_dimensioned_variables(self): """Test data is appropriately tagged to identify dimensioned variables.""" path = self.get_temporary_file_path('foo.nc') time = TemporalVariable(value=[1, 2, 3], dimensions='time') x = Variable(name='x', value=[10, 20], dimensions='x') y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y') data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time']) field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3]) field.write(path) # Test dimensioned variables are read from a file with appropriate metadata. rd = RequestDataset(path) self.assertEqual(rd.variable, ('data1', 'data2')) read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data1', 'data2')) # Test dimensioned variables are overloaded. rd = RequestDataset(path, variable='data2') read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data2', ))
def test_write_variable_collection_different_data_types(self): """Test multiple data types are handled by the shapefile write when melted is True.""" v_int = Variable(name='an_int', value=[1, 2, 3], dtype=int, dimensions='three') v_flt = Variable(name='a_float', value=[10., 20., 30.], dtype=float, dimensions='three') g = GeometryVariable(name='points', value=[Point(1, 2), Point(3, 4), Point(5, 6)], dimensions='three') field = Field(is_data=[v_int, v_flt], geom=g) self.assertEqual(len(field.data_variables), 2) path = self.get_temporary_file_path('foo.shp') field.write(path, driver='vector', iter_kwargs=dict(melted=True))
def test_system_conform_units(self): """Test conforming units on data read from shapefile.""" path = self.get_temporary_file_path('temps.shp') gvar = GeometryVariable(value=[Point(1, 2), Point(3, 4)], dimensions='g', name='geom') var = Variable(name='temp', value=[10., 20.], dimensions='g') field = Field(variables=[gvar, var], geom=gvar, is_data=var) field.write(path, driver=DriverVector) field = RequestDataset(path, units='celsius', variable='temp', conform_units_to='fahrenheit').get() self.assertNumpyAllClose(field['temp'].get_value(), np.array([50., 68.]))
def test_system_parallel_write_ndvariable(self): """Test a parallel vector GIS write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.shp') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'}, 'y': {'variable': 'y', 'bounds': 'y_bounds'}, 'time': {'variable': 'time', 'bounds': 'the_time_bounds'}} vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data') vc.set_abstraction_geom() else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, driver=DriverVector) MPI_COMM.Barrier() desired = 168 rd = RequestDataset(path, driver=DriverVector) sizes = MPI_COMM.gather(rd.get().geom.shape[0]) if MPI_RANK == 0: self.assertEqual(sum(sizes), desired)
def test_get_dimension_map_2d_spatial_coordinates(self): grid = create_gridxy_global() grid.expand() path = self.get_temporary_file_path('foo.nc') f = Field(grid=grid) f.write(path) rd = RequestDataset(path) field = rd.get() sub = field.get_field_slice({'y': 10, 'x': 5}) self.assertEqual(sub.grid.x.shape, (1, 1)) actual = f.dimension_map.get_dimension(DimensionMapKey.Y) self.assertEqual(actual, ['y']) actual = f.dimension_map.get_dimension(DimensionMapKey.X) self.assertEqual(actual, ['x'])
def test_create_dimension_map_2d_spatial_coordinates(self): grid = create_gridxy_global() grid.expand() path = self.get_temporary_file_path('foo.nc') f = Field(grid=grid) f.write(path) rd = RequestDataset(path) field = rd.get() sub = field.get_field_slice({'y': 10, 'x': 5}) self.assertEqual(sub.grid.x.shape, (1, 1)) actual = f.dimension_map.get_dimension(DimensionMapKey.Y) self.assertEqual(actual, ['y']) actual = f.dimension_map.get_dimension(DimensionMapKey.X) self.assertEqual(actual, ['x'])
def test_system_with_time_data(self): """Test writing data with a time dimension.""" path = self.get_temporary_file_path('what.shp') t = TemporalVariable(value=[1.5, 2.5], name='time', dimensions='time') geom = GeometryVariable(value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='time') field = Field(variables=[t, geom], dimension_map={'time': {'variable': 'time'}, 'geom': {'variable': 'geom'}}) field.write(path, iter_kwargs={'variable': 'time'}, driver=DriverVector) rd = RequestDataset(uri=path) field2 = rd.get() # netcdftime worthlessness poss = [['0001-01-02 12:00:00', '0001-01-03 12:00:00'], ['1-01-02 12:00:00', '1-01-03 12:00:00']] actual = field2['TIME'].get_value().tolist() res = [p == actual for p in poss] self.assertTrue(any(res))
def test_write_parallel(self): """Test writing by selective rank.""" if MPI_SIZE != 3 and MPI_SIZE != 1: raise SkipTest('MPI_SIZE != 1 or 3') ranks = list(range(MPI_SIZE)) for base_rank in ranks: for driver in [DriverCSV, DriverVector, DriverNetcdf]: if MPI_RANK == 0: path = self.get_temporary_file_path('{}-{}.{}'.format( driver.key, base_rank, driver.common_extension)) else: path = None path = MPI_COMM.bcast(path) with vm.scoped('field write by rank', [base_rank]): if not vm.is_null: geom = GeometryVariable( value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='geom') data = Variable(name='data', value=[10, 20], dimensions='geom') field = Field(geom=geom) field.add_variable(data, is_data=True) self.assertFalse(os.path.isdir(path)) field.write(path, driver=driver) self.assertFalse(os.path.isdir(path)) rd = RequestDataset(path, driver=driver) in_field = rd.get() self.assertEqual(in_field['data'].dimensions[0].size, 2) MPI_COMM.Barrier() MPI_COMM.Barrier()
def test_get_distributed_slice(self): with vm.scoped('grid write', [0]): if MPI_RANK == 0: x = Variable('x', list(range(768)), 'x', float) y = Variable('y', list(range(768)), 'y', float) grid = Grid(x, y) field = Field(grid=grid) path = self.get_temporary_file_path('grid.nc') field.write(path) else: path = None path = vm.bcast(path) rd = RequestDataset(path) grid = rd.get().grid bounds_global = deepcopy([d.bounds_global for d in grid.dimensions]) for _ in range(10): _ = grid.get_distributed_slice([slice(73, 157), slice(305, 386)]) bounds_global_grid_after_slice = [ d.bounds_global for d in grid.dimensions ] self.assertEqual(bounds_global, bounds_global_grid_after_slice)
def test_get_intersects_one_rank_with_mask(self): """Test mask is created if one rank has a spatial mask.""" if MPI_SIZE != 2: raise SkipTest('MPI_SIZE != 2') if MPI_RANK == 0: value = [1, 2] else: value = [3, 4] ompi = OcgDist() xdim = ompi.create_dimension('x', 4, dist=True) ydim = ompi.create_dimension('y', 5, dist=False) ompi.update_dimension_bounds() x = Variable('x', value=value, dimensions=xdim) y = Variable('y', value=[1, 2, 3, 4, 5], dimensions=ydim) grid = Grid(x, y) wkt_geom = 'Polygon ((0.72993630573248502 5.22484076433120936, 0.70318471337579691 0.67707006369426814, 2.70063694267515952 0.69490445859872629, 2.59363057324840796 2.54076433121019107, 4.52866242038216527 2.51401273885350296, 4.40382165605095466 5.34968152866241908, 0.72993630573248502 5.22484076433120936))' subset_geom = wkt.loads(wkt_geom) sub = grid.get_intersects(subset_geom) path = self.get_temporary_file_path('foo.nc') field = Field(grid=sub) field.write(path) with vm.scoped('mask count', [0]): if not vm.is_null: rd = RequestDataset(path) out_field = rd.get() target = out_field[out_field.grid._mask_name].get_value() select = target != 0 self.assertEqual(select.sum(), 4)
def test_write_parallel(self): """Test writing by selective rank.""" if MPI_SIZE != 3 and MPI_SIZE != 1: raise SkipTest('MPI_SIZE != 1 or 3') ranks = list(range(MPI_SIZE)) for base_rank in ranks: for driver in [ DriverCSV, DriverVector, DriverNetcdf ]: if MPI_RANK == 0: path = self.get_temporary_file_path('{}-{}.{}'.format(driver.key, base_rank, driver.common_extension)) else: path = None path = MPI_COMM.bcast(path) with vm.scoped('field write by rank', [base_rank]): if not vm.is_null: geom = GeometryVariable(value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='geom') data = Variable(name='data', value=[10, 20], dimensions='geom') field = Field(geom=geom) field.add_variable(data, is_data=True) self.assertFalse(os.path.isdir(path)) field.write(path, driver=driver) self.assertFalse(os.path.isdir(path)) rd = RequestDataset(path, driver=driver) in_field = rd.get() self.assertEqual(in_field['data'].dimensions[0].size, 2) MPI_COMM.Barrier() MPI_COMM.Barrier()
def test_write_variable_collection(self): # Attempt to write without a geometry variable. v = Variable('a', value=[1, 2], dimensions='bb') field = Field(variables=v) path = self.get_temporary_file_path('out.shp') with self.assertRaises(ValueError): field.write(path, driver=DriverVector) # Test writing a field with two-dimensional geometry storage. value = [Point(1, 2), Point(3, 4), Point(5, 6), Point(6, 7), Point(8, 9), Point(10, 11)] gvar = GeometryVariable(value=value, name='points', dimensions='ngeoms') gvar.reshape([Dimension('lat', 2), Dimension('lon', 3)]) var1 = Variable(name='dummy', value=[6, 7, 8], dimensions=['a']) var2 = Variable(name='some_lats', value=[41, 41], dimensions=['lat']) var3 = Variable(name='some_lons', value=[0, 90, 280], dimensions=['lon']) var4 = Variable(name='data', value=np.random.rand(4, 3, 2), dimensions=['time', 'lon', 'lat']) field = Field(variables=[var1, var2, var3, var4], geom=gvar, is_data=['data']) path = self.get_temporary_file_path('2d.shp') field.write(path, iter_kwargs={'followers': ['some_lats', 'some_lons']}, driver=DriverVector) read = RequestDataset(uri=path).get() self.assertTrue(len(read) > 2) self.assertEqual(list(read.keys()), ['data', 'some_lats', 'some_lons', constants.VariableName.GEOMETRY_VARIABLE]) # Test writing a subset of the variables. path = self.get_temporary_file_path('limited.shp') value = [Point(1, 2), Point(3, 4), Point(5, 6)] gvar = GeometryVariable(value=value, name='points', dimensions='points') var1 = Variable('keep', value=[1, 2, 3], dimensions='points') var2 = Variable('remove', value=[4, 5, 6], dimensions='points') field = Field(variables=[var1, var2], geom=gvar, is_data=[var1]) field.write(path, variable_names=['keep'], driver=DriverVector) read = RequestDataset(uri=path).get() self.assertNotIn('remove', read) # Test using append. path = self.get_temporary_file_path('limited.shp') value = [Point(1, 2), Point(3, 4), Point(5, 6)] gvar = GeometryVariable(value=value, name='points', dimensions='points') var1 = Variable('keep', value=[1, 2, 3], dimensions='points') var2 = Variable('remove', value=[4, 5, 6], dimensions='points') field = Field(variables=[var1, var2], geom=gvar, is_data=[var1, var2]) for idx in range(3): sub = field[{'points': idx}] if idx == 0: write_mode = MPIWriteMode.WRITE else: write_mode = MPIWriteMode.APPEND sub.write(path, write_mode=write_mode, driver=DriverVector) self.assertOGRFileLength(path, idx + 1)
def test_write_variable_collection(self): # Attempt to write without a geometry variable. v = Variable('a', value=[1, 2], dimensions='bb') field = Field(variables=v) path = self.get_temporary_file_path('out.shp') with self.assertRaises(ValueError): field.write(path, driver=DriverVector) # Test writing a field with two-dimensional geometry storage. value = [Point(1, 2), Point(3, 4), Point(5, 6), Point(6, 7), Point(8, 9), Point(10, 11)] gvar = GeometryVariable(value=value, name='points', dimensions='ngeoms') gvar.reshape([Dimension('lat', 2), Dimension('lon', 3)]) var1 = Variable(name='dummy', value=[6, 7, 8], dimensions=['a']) var2 = Variable(name='some_lats', value=[41, 41], dimensions=['lat']) var3 = Variable(name='some_lons', value=[0, 90, 280], dimensions=['lon']) var4 = Variable(name='data', value=np.random.rand(4, 3, 2), dimensions=['time', 'lon', 'lat']) field = Field(variables=[var1, var2, var3, var4], geom=gvar, is_data=['data']) path = self.get_temporary_file_path('2d.shp') field.write(path, iter_kwargs={'followers': ['some_lats', 'some_lons']}, driver=DriverVector) read = RequestDataset(uri=path).get() self.assertTrue(len(read) > 2) self.assertEqual(list(read.keys()), ['data', 'some_lats', 'some_lons', constants.DimensionName.GEOMETRY_DIMENSION]) # Test writing a subset of the variables. path = self.get_temporary_file_path('limited.shp') value = [Point(1, 2), Point(3, 4), Point(5, 6)] gvar = GeometryVariable(value=value, name='points', dimensions='points') var1 = Variable('keep', value=[1, 2, 3], dimensions='points') var2 = Variable('remove', value=[4, 5, 6], dimensions='points') field = Field(variables=[var1, var2], geom=gvar, is_data=[var1]) field.write(path, variable_names=['keep'], driver=DriverVector) read = RequestDataset(uri=path).get() self.assertNotIn('remove', read) # Test using append. path = self.get_temporary_file_path('limited.shp') value = [Point(1, 2), Point(3, 4), Point(5, 6)] gvar = GeometryVariable(value=value, name='points', dimensions='points') var1 = Variable('keep', value=[1, 2, 3], dimensions='points') var2 = Variable('remove', value=[4, 5, 6], dimensions='points') field = Field(variables=[var1, var2], geom=gvar, is_data=[var1, var2]) for idx in range(3): sub = field[{'points': idx}] if idx == 0: write_mode = MPIWriteMode.WRITE else: write_mode = MPIWriteMode.APPEND sub.write(path, write_mode=write_mode, driver=DriverVector) self.assertOGRFileLength(path, idx + 1)
def write_vector(self, *args, **kwargs): from ocgis.collection.field import Field from ocgis.driver.vector import DriverVector field = Field(geom=self, crs=self.crs) kwargs[KeywordArgument.DRIVER] = DriverVector field.write(*args, **kwargs)
def write_chunks(self): """ Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF regridding weights for each subset if requested. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst))) ctr = 1 ocgis_lh(logger='grid_chunker', msg='starting self.iter_src_grid_subsets', level=logging.DEBUG) for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True): ocgis_lh(logger='grid_chunker', msg='finished iteration {} for self.iter_src_grid_subsets'.format(ctr), level=logging.DEBUG) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] cc = 1 for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write' + str(cc), target): if not vm.is_null: ocgis_lh(logger='grid_chunker', msg='write_chunks:writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh(logger='grid_chunker', msg='write_chunks:finished writing: {}'.format(path), level=logging.DEBUG) cc += 1 # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # Generate an ESMF weights file if requested and at least one rank has data on it. if self.genweights and len(vm.get_live_ranks_from_object(sub_src)) > 0: vm.barrier() self.write_esmf_weights(src_path, dst_path, wgt_path, src_grid=sub_src, dst_grid=sub_dst) vm.barrier() # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_chunker_source'}, {'esmf_role': grid_chunker_destination}, {'esmf_role': 'grid_chunker_weights'}] vc = VariableCollection() grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_chunker_index) vidx.attrs['esmf_role'] = grid_chunker_index vidx.attrs['grid_chunker_source'] = 'source_filename' vidx.attrs[GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_chunker_weights'] = 'weights_filename' vidx.attrs[GridChunkerConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridChunkerConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gc_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
def test_write(self): # Test writing a basic grid. path = self.get_temporary_file_path('foo.nc') x = Variable(name='x', value=[1, 2], dimensions='x') y = Variable(name='y', value=[3, 4, 5, 6, 7], dimensions='y') dmap = {'x': {'variable': 'x'}, 'y': {'variable': 'y'}} field = Field(variables=[x, y], dimension_map=dmap) desired_value_stacked = field.grid.get_value_stacked() self.assertEqual(field.grid.parent['x'].get_value().shape, (2, )) self.assertTrue(field.grid.is_vectorized) field.write(path) out_field = RequestDataset(path).get() self.assertTrue(out_field.grid.is_vectorized) actual_value_stacked = out_field.grid.get_value_stacked() self.assertNumpyAll(actual_value_stacked, desired_value_stacked) # Test another grid. grid = self.get_gridxy(crs=WGS84()) self.assertTrue(grid.is_vectorized) field = Field(grid=grid) self.assertTrue(field.grid.is_vectorized) path = self.get_temporary_file_path('out.nc') with self.nc_scope(path, 'w') as ds: field.write(ds) self.assertTrue(field.grid.is_vectorized) with self.nc_scope(path) as ds: self.assertNumpyAll(ds.variables[grid.x.name][:], grid.x.get_value()) var = ds.variables[grid.y.name] self.assertNumpyAll(var[:], grid.y.get_value()) self.assertEqual(var.axis, 'Y') self.assertIn(grid.crs.name, ds.variables) # Test with 2-d x and y arrays. grid = self.get_gridxy(with_2d_variables=True) field = Field(grid=grid) path = self.get_temporary_file_path('out.nc') field.grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') with self.nc_scope(path, 'w') as ds: field.write(ds) # self.ncdump(path) with self.nc_scope(path) as ds: var = ds.variables['y'] self.assertNumpyAll(var[:], grid.y.get_value()) # Test writing a vectorized grid with corners. grid = self.get_gridxy() field = Field(grid=grid) self.assertIsNotNone(field.grid.dimensions) self.assertFalse(field.grid.has_bounds) field.grid.set_extrapolated_bounds('xbnds', 'ybnds', 'corners') self.assertTrue(field.grid.is_vectorized) path = self.get_temporary_file_path('out.nc') with self.nc_scope(path, 'w') as ds: field.write(ds) # self.ncdump(path) with self.nc_scope(path, 'r') as ds: self.assertEqual(['ydim'], [d for d in ds.variables['y'].dimensions]) self.assertEqual(['xdim'], [d for d in ds.variables['x'].dimensions])
def write_chunks(self): """ Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF regridding weights for each subset if requested. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst))) ctr = 1 ocgis_lh(logger=_LOCAL_LOGGER, msg='starting self.iter_src_grid_subsets', level=logging.DEBUG) for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets( yield_dst=True): ocgis_lh( logger=_LOCAL_LOGGER, msg='finished iteration {} for self.iter_src_grid_subsets'. format(ctr), level=logging.DEBUG) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] cc = 1 for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write' + str(cc), target): if not vm.is_null: ocgis_lh(logger=_LOCAL_LOGGER, msg='write_chunks:writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh( logger=_LOCAL_LOGGER, msg='write_chunks:finished writing: {}'.format( path), level=logging.DEBUG) cc += 1 # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # Generate an ESMF weights file if requested and at least one rank has data on it. if self.genweights and len( vm.get_live_ranks_from_object(sub_src)) > 0: vm.barrier() ocgis_lh(logger=_LOCAL_LOGGER, msg='write_chunks:writing esmf weights: {}'.format( wgt_path), level=logging.DEBUG) self.write_esmf_weights(src_path, dst_path, wgt_path, src_grid=sub_src, dst_grid=sub_dst) vm.barrier() # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = [ 'source_filename', 'destination_filename', 'weights_filename' ] values = [src_filenames, dst_filenames, wgt_filenames] grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{ 'esmf_role': 'grid_chunker_source' }, { 'esmf_role': grid_chunker_destination }, { 'esmf_role': 'grid_chunker_weights' }] vc = VariableCollection() grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_chunker_index) vidx.attrs['esmf_role'] = grid_chunker_index vidx.attrs['grid_chunker_source'] = 'source_filename' vidx.attrs[GridChunkerConstants.IndexFile. NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_chunker_weights'] = 'weights_filename' vidx.attrs[GridChunkerConstants.IndexFile. NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridChunkerConstants.IndexFile. NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gc_create_index_bounds_( RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
def write_subsets(self, src_template, dst_template, wgt_template, index_path): """ Write grid subsets to netCDF files using the provided filename templates. The template must contain the full file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a full path. This name is used when generating weight files. >>> template_example = '/path/to/data_{}.nc' :param str src_template: The template for the source subset file. :param str dst_template: The template for the destination subset file. :param str wgt_template: The template for the weight filename. >>> wgt_template = 'esmf_weights_{}.nc' :param index_path: Path to the output indexing netCDF. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1): # padded = create_zero_padded_integer(ctr, nzeros) src_path = src_template.format(ctr) dst_path = dst_template.format(ctr) wgt_filename = wgt_template.format(ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_filename) dst_slices.append(dst_slc) for target, path in zip([sub_src, sub_dst], [src_path, dst_path]): if target.is_empty: is_empty = True target = None else: is_empty = False field = Field(grid=target, is_empty=is_empty) ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) with vm.scoped_by_emptyable('field.write', field): if not vm.is_null: field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE vidx.attrs[x_bounds] = x_bounds y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE vidx.attrs[y_bounds] = y_bounds vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'}, dtype=int) yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'}, dtype=int) x_name = self.dst_grid.x.dimensions[0].name y_name = self.dst_grid.y.dimensions[0].name for idx, slc in enumerate(dst_slices): xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop vc.add_variable(xb) vc.add_variable(yb) vc.write(index_path) vm.barrier()
def _write_coll_(self, f, coll, add_geom_uid=True): ocgis_lh(msg='entering _write_coll_ in {}'.format(self.__class__), logger='csv-shp.converter', level=logging.DEBUG) # Load the geometries. The geometry identifier is needed for the data write. for field, container in coll.iter_fields(yield_container=True): field.set_abstraction_geom(create_ugid=True) # Write the output CSV file. ocgis_lh(msg='before CsvShapefileConverter super call in {}'.format( self.__class__), logger='csv-shp.converter', level=logging.DEBUG) super(CsvShapefileConverter, self)._write_coll_(f, coll, add_geom_uid=add_geom_uid) ocgis_lh(msg='after CsvShapefileConverter super call in {}'.format( self.__class__), logger='csv-shp.converter', level=logging.DEBUG) # The output geometry identifier shapefile path. if vm.rank == 0: fiona_path = os.path.join(self._get_or_create_shp_folder_(), self.prefix + '_gid.shp') else: fiona_path = None fiona_path = vm.bcast(fiona_path) if self.ops.aggregate: ocgis_lh( 'creating a UGID-GID shapefile is not necessary for aggregated data. use UGID shapefile.', 'conv.csv-shp', logging.WARN) else: # Write the geometries for each container/field combination. for field, container in coll.iter_fields(yield_container=True): # The container may be empty. Only add the unique geometry identifier if the container has an # associated geometry. if container.geom is not None: ugid_var = Variable(name=container.geom.ugid.name, dimensions=field.geom.dimensions, dtype=constants.DEFAULT_NP_INT) ugid_var.get_value()[:] = container.geom.ugid.get_value( )[0] # Extract the variable components of the geometry file. geom = field.geom.copy() geom = geom.extract() if field.crs is not None: crs = field.crs.copy() crs = crs.extract() else: crs = None # If the dataset geometry identifier is not present, create it. gid = field[HeaderName.ID_GEOMETRY].copy() gid = gid.extract() # Construct the field to write. field_to_write = Field(geom=geom, crs=crs, uid=field.uid) if container.geom is not None: field_to_write.add_variable(ugid_var, is_data=True) field_to_write.add_variable(gid, is_data=True) # Maintain the field/dataset unique identifier if there is one. if field.uid is not None: if gid.repeat_record is None: rr = [] else: rr = list(gid.repeat_record) rr.append((HeaderName.DATASET_IDENTIFER, field.uid)) gid.repeat_record = rr # Write the field. field_to_write.write(fiona_path, write_mode=f[KeywordArgument.WRITE_MODE], driver=DriverKey.VECTOR)
def test_get_intersects_state_boundaries(self): path_shp = self.path_state_boundaries geoms = [] with fiona.open(path_shp) as source: for record in source: geom = shape(record['geometry']) geoms.append(geom) gvar = GeometryVariable(value=geoms, dimensions='ngeom') gvar_sub = gvar.get_unioned() if gvar_sub is not None: subset = gvar_sub.get_value().flatten()[0] else: subset = None subset = MPI_COMM.bcast(subset) resolution = 1.0 for with_bounds in [False, True]: grid = self.get_gridxy_global(resolution=resolution, with_bounds=with_bounds) vm.create_subcomm_by_emptyable('global grid', grid, is_current=True) if not vm.is_null: res = grid.get_intersects(subset, return_slice=True) grid_sub, slc = res vm.create_subcomm_by_emptyable('grid subset', grid_sub, is_current=True) if not vm.is_null: mask = Variable('mask_after_subset', grid_sub.get_mask(), dimensions=grid_sub.dimensions) mask = variable_gather(mask) if vm.rank == 0: mask_sum = np.invert(mask.get_value()).sum() mask_shape = mask.shape else: mask_sum = None mask_shape = None mask_sum = vm.bcast(mask_sum) mask_shape = vm.bcast(mask_shape) if with_bounds: self.assertEqual(mask_shape, (54, 113)) self.assertEqual( slc, (slice(108, 162, None), slice(1, 114, None))) self.assertEqual(mask_sum, 1358) else: if MPI_SIZE == 2: grid_bounds_global = [ dim.bounds_global for dim in grid_sub.dimensions ] self.assertEqual(grid_bounds_global, [(0, 52), (0, 105)]) self.assertEqual(mask_shape, (52, 105)) self.assertEqual( slc, (slice(109, 161, None), slice(8, 113, None))) self.assertEqual(mask_sum, 1087) if vm.rank == 0: path = self.get_temporary_file_path('foo.nc') else: path = None path = vm.bcast(path) field = Field(grid=grid_sub) field.write(path) vm.finalize() vm.__init__() MPI_COMM.Barrier()
def test_get_intersects_parallel(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') grid = self.get_gridxy() live_ranks = vm.get_live_ranks_from_object(grid) # Test with an empty subset. subset_geom = box(1000., 1000., 1100., 1100.) with vm.scoped('empty subset', live_ranks): if not vm.is_null: with self.assertRaises(EmptySubsetError): grid.get_intersects(subset_geom) # Test combinations. subset_geom = box(101.5, 40.5, 102.5, 42.) keywords = dict(is_vectorized=[True, False], has_bounds=[False, True], use_bounds=[False, True], keep_touches=[True, False]) for ctr, k in enumerate(self.iter_product_keywords(keywords)): grid = self.get_gridxy() vm_name, _ = vm.create_subcomm_by_emptyable('grid testing', grid, is_current=True) if vm.is_null: vm.free_subcomm(name=vm_name) vm.set_comm() continue if k.has_bounds: grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') self.assertTrue(grid.has_bounds) # Cannot use bounds with a point grid abstraction. if k.use_bounds and grid.abstraction == 'point': vm.free_subcomm(name=vm_name) vm.set_comm() continue grid_sub, slc = grid.get_intersects(subset_geom, keep_touches=k.keep_touches, use_bounds=k.use_bounds, return_slice=True) if k.has_bounds: self.assertTrue(grid.has_bounds) # Test geometries are filled appropriately after allocation. if not grid_sub.is_empty: for t in grid_sub.get_abstraction_geometry().get_value().flat: self.assertIsInstance(t, BaseGeometry) self.assertIsInstance(grid_sub, Grid) if k.keep_touches: if k.has_bounds and k.use_bounds: desired = (slice(0, 3, None), slice(0, 3, None)) else: desired = (slice(1, 3, None), slice(1, 2, None)) else: if k.has_bounds and k.use_bounds: desired = (slice(1, 3, None), slice(1, 2, None)) else: desired = (slice(1, 2, None), slice(1, 2, None)) if not grid.is_empty: self.assertEqual(grid.has_bounds, k.has_bounds) self.assertTrue(grid.is_vectorized) self.assertEqual(slc, desired) vm.free_subcomm(name=vm_name) vm.set_comm() # Test against a file. ######################################################################################### subset_geom = box(101.5, 40.5, 102.5, 42.) if MPI_RANK == 0: path_grid = self.get_temporary_file_path('grid.nc') else: path_grid = None path_grid = MPI_COMM.bcast(path_grid) grid_to_write = self.get_gridxy() with vm.scoped_by_emptyable('write', grid_to_write): if not vm.is_null: field = Field(grid=grid_to_write) field.write(path_grid, driver=DriverNetcdfCF) MPI_COMM.Barrier() rd = RequestDataset(uri=path_grid) x = SourcedVariable(name='x', request_dataset=rd) self.assertIsNone(x._value) y = SourcedVariable(name='y', request_dataset=rd) self.assertIsNone(x._value) self.assertIsNone(y._value) grid = Grid(x, y) for target in [grid._y_name, grid._x_name]: self.assertIsNone(grid.parent[target]._value) self.assertTrue(grid.is_vectorized) with vm.scoped_by_emptyable('intersects', grid): if not vm.is_null: sub, slc = grid.get_intersects(subset_geom, return_slice=True) self.assertEqual(slc, (slice(1, 3, None), slice(1, 2, None))) self.assertIsInstance(sub, Grid) # The file may be deleted before other ranks open. MPI_COMM.Barrier()
def test_write(self): # Test writing a basic grid. path = self.get_temporary_file_path('foo.nc') x = Variable(name='x', value=[1, 2], dimensions='x') y = Variable(name='y', value=[3, 4, 5, 6, 7], dimensions='y') dmap = {'x': {'variable': 'x'}, 'y': {'variable': 'y'}} field = Field(variables=[x, y], dimension_map=dmap) desired_value_stacked = field.grid.get_value_stacked() self.assertEqual(field.grid.parent['x'].get_value().shape, (2,)) self.assertTrue(field.grid.is_vectorized) field.write(path) out_field = RequestDataset(path).get() self.assertTrue(out_field.grid.is_vectorized) actual_value_stacked = out_field.grid.get_value_stacked() self.assertNumpyAll(actual_value_stacked, desired_value_stacked) # Test another grid. grid = self.get_gridxy(crs=WGS84()) self.assertTrue(grid.is_vectorized) field = Field(grid=grid) self.assertTrue(field.grid.is_vectorized) path = self.get_temporary_file_path('out.nc') with self.nc_scope(path, 'w') as ds: field.write(ds) self.assertTrue(field.grid.is_vectorized) with self.nc_scope(path) as ds: if atleast_ncver("1.4"): actual = grid.x.mv() else: actual = grid.x.v() self.assertNumpyAll(ds.variables[grid.x.name][:], actual) var = ds.variables[grid.y.name] if atleast_ncver("1.4"): actual = grid.y.mv() else: actual = grid.y.v() self.assertNumpyAll(var[:], actual) self.assertEqual(var.axis, 'Y') self.assertIn(grid.crs.name, ds.variables) # Test with 2-d x and y arrays. grid = self.get_gridxy(with_2d_variables=True) field = Field(grid=grid) path = self.get_temporary_file_path('out.nc') field.grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') with self.nc_scope(path, 'w') as ds: field.write(ds) # self.ncdump(path) with self.nc_scope(path) as ds: var = ds.variables['y'] if atleast_ncver("1.4"): actual = grid.y.mv() else: actual = grid.y.v() self.assertNumpyAll(var[:], actual) # Test writing a vectorized grid with corners. grid = self.get_gridxy() field = Field(grid=grid) self.assertIsNotNone(field.grid.dimensions) self.assertFalse(field.grid.has_bounds) field.grid.set_extrapolated_bounds('xbnds', 'ybnds', 'corners') self.assertTrue(field.grid.is_vectorized) path = self.get_temporary_file_path('out.nc') with self.nc_scope(path, 'w') as ds: field.write(ds) # self.ncdump(path) with self.nc_scope(path, 'r') as ds: self.assertEqual(['ydim'], [d for d in ds.variables['y'].dimensions]) self.assertEqual(['xdim'], [d for d in ds.variables['x'].dimensions])