def test_system_changing_field_name(self): path1 = self.get_temporary_file_path('foo1.nc') path2 = self.get_temporary_file_path('foo2.nc') vc1 = VariableCollection(name='vc1') var1 = Variable('var1', value=[1, 2, 3], dimensions='three', parent=vc1) vc2 = VariableCollection(name='vc2') vc1.add_child(vc2) var2 = Variable('var2', value=[4, 5, 6, 7], dimensions='four', parent=vc2) vc1.write(path1) rd = RequestDataset(path1) # rd.inspect() nvc = rd.get_variable_collection() nvc2 = nvc.children['vc2'] self.assertIsNone(nvc2['var2']._value) self.assertEqual(nvc2.name, 'vc2') nvc2.set_name('extraordinary') self.assertIsNotNone(nvc2['var2'].get_value()) self.assertEqual(nvc2['var2'].get_value().tolist(), [4, 5, 6, 7]) nvc.write(path2) rd2 = RequestDataset(path2) # rd2.inspect() n2vc = rd2.get_variable_collection() self.assertEqual(n2vc.children[nvc2.name].name, nvc2.name)
def test_insert_weighted(self): gs = self.get_grid_splitter() dst_master_path = self.get_temporary_file_path('out.nc') gs.dst_grid.parent.write(dst_master_path) dst_master = RequestDataset(dst_master_path).get() desired_sums = {} for data_variable in dst_master.data_variables: dv_sum = data_variable.get_value().sum() desired_sums[data_variable.name] = dv_sum self.assertNotEqual(dv_sum, 0) data_variable.get_value()[:] = 0 dst_master.write(dst_master_path, write_mode=MPIWriteMode.FILL) dst_master = RequestDataset(dst_master_path).get() for data_variable in dst_master.data_variables: self.assertEqual(data_variable.get_value().sum(), 0) gs.write_subsets() index_path = gs.create_full_path_from_template('index_file') gs.insert_weighted(index_path, self.current_dir_output, dst_master_path) actual_sums = {} dst_master_inserted = RequestDataset(dst_master_path).get() for data_variable in dst_master_inserted.data_variables: dv_value = data_variable.get_value() dv_sum = dv_value.sum() actual_sums[data_variable.name] = dv_sum for k, v in list(actual_sums.items()): self.assertAlmostEqual(v, desired_sums[k])
def assertWeightFilesEquivalent(self, global_weights_filename, merged_weights_filename): nwf = RequestDataset(merged_weights_filename).get() gwf = RequestDataset(global_weights_filename).get() nwf_row = nwf['row'].get_value() gwf_row = gwf['row'].get_value() self.assertAsSetEqual(nwf_row, gwf_row) nwf_col = nwf['col'].get_value() gwf_col = gwf['col'].get_value() self.assertAsSetEqual(nwf_col, gwf_col) nwf_S = nwf['S'].get_value() gwf_S = gwf['S'].get_value() self.assertEqual(nwf_S.sum(), gwf_S.sum()) unique_src = np.unique(nwf_row) diffs = [] for us in unique_src.flat: nwf_S_idx = np.where(nwf_row == us)[0] nwf_col_sub = nwf_col[nwf_S_idx] nwf_S_sub = nwf_S[nwf_S_idx].sum() gwf_S_idx = np.where(gwf_row == us)[0] gwf_col_sub = gwf_col[gwf_S_idx] gwf_S_sub = gwf_S[gwf_S_idx].sum() self.assertAsSetEqual(nwf_col_sub, gwf_col_sub) diffs.append(nwf_S_sub - gwf_S_sub) diffs = np.abs(diffs) self.assertLess(diffs.max(), 1e-14)
def test(self): path1 = self.write_field_data('data1') path2 = self.write_field_data('data2') path3 = self.write_field_data('basis_var') time_range = [datetime(2000, 3, 1), datetime(2000, 3, 31)] rds = [RequestDataset(p, time_range=time_range) for p in [path1, path2]] mrd = MultiRequestDataset(rds) basis = RequestDataset(path3, time_range=[datetime(2000, 8, 1), datetime(2000, 8, 31)]) basis_field = basis.get() calc = [{'func': 'mfpf', 'name': 'output_mfpf', 'kwds': {'reference': ('data1', 'data2'), 'basis': basis_field}}] ops = OcgOperations(dataset=mrd, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables, ('diff_data1_basis_var', 'diff_data2_basis_var')) sums = [v.get_value().sum() for v in actual_field.data_variables] for s in sums: self.assertAlmostEqual(s, 7.8071042497325145)
def test_system_with_distributed_dimensions_from_file_shapefile(self): """Test a distributed read from file.""" path = self.path_state_boundaries # These are the desired values. with vm.scoped('desired data write', [0]): if not vm.is_null: rd_desired = RequestDataset(uri=path, driver=DriverVector) var_desired = SourcedVariable(name='STATE_NAME', request_dataset=rd_desired) value_desired = var_desired.get_value().tolist() self.assertEqual(len(value_desired), 51) rd = RequestDataset(uri=path, driver=DriverVector) fvar = SourcedVariable(name='STATE_NAME', request_dataset=rd) self.assertEqual(len(rd.driver.dist.get_group()['dimensions']), 1) self.assertTrue(fvar.dimensions[0].dist) self.assertIsNotNone(fvar.get_value()) if MPI_SIZE > 1: self.assertLessEqual(fvar.shape[0], 26) values = MPI_COMM.gather(fvar.get_value()) if MPI_RANK == 0: values = hgather(values) self.assertEqual(values.tolist(), value_desired) else: self.assertIsNone(values)
def test_system_get_field_dimensioned_variables(self): """Test data is appropriately tagged to identify dimensioned variables.""" path = self.get_temporary_file_path('foo.nc') time = TemporalVariable(value=[1, 2, 3], dimensions='time') x = Variable(name='x', value=[10, 20], dimensions='x') y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y') data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time']) field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3]) field.write(path) # Test dimensioned variables are read from a file with appropriate metadata. rd = RequestDataset(path) self.assertEqual(rd.variable, ('data1', 'data2')) read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data1', 'data2')) # Test dimensioned variables are overloaded. rd = RequestDataset(path, variable='data2') read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data2', ))
def run_system_splitting_unstructured(self, genweights): env.CLOBBER_UNITS_ON_BOUNDS = False ufile = self.get_temporary_file_path('ugrid.nc') resolution = 10. self.fixture_regular_ugrid_file(ufile, resolution) src_rd = RequestDataset(ufile, driver=DriverNetcdfUGRID, grid_abstraction='point') # src_rd.inspect() src_grid = src_rd.get().grid self.assertEqual(src_grid.abstraction, 'point') dst_grid = self.get_gridxy_global(resolution=20., crs=Spherical()) gs = GridChunker(src_grid, dst_grid, (3, 3), check_contains=False, src_grid_resolution=10., paths=self.fixture_paths, genweights=genweights, use_spatial_decomp=True) gs.write_chunks() actual = gs.create_full_path_from_template('src_template', index=1) actual = RequestDataset(actual).get() self.assertIn(GridChunkerConstants.IndexFile.NAME_SRCIDX_GUID, actual)
def test_create_merged_weight_file_unstructured(self): self.remove_dir = False ufile = self.get_temporary_file_path('ugrid.nc') resolution = 10. self.fixture_regular_ugrid_file(ufile, resolution, crs=Spherical()) src_grid = RequestDataset(ufile, driver=DriverNetcdfUGRID, grid_abstraction='point').get().grid self.assertEqual(src_grid.abstraction, 'point') dst_grid = self.get_gridxy_global(resolution=20., crs=Spherical()) dst_path = self.get_temporary_file_path('dst.nc') dst_grid.parent.write(dst_path) gs = GridSplitter(src_grid, dst_grid, (3, 3), check_contains=False, src_grid_resolution=10., paths=self.fixture_paths) gs.write_subsets() # Load the grid splitter index file ---------------------------------------------------------------------------- index_path = gs.create_full_path_from_template('index_file') ifile = RequestDataset(uri=index_path).get() ifile.load() gidx = ifile[GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE].attrs source_filename = ifile[gidx[GridSplitterConstants.IndexFile.NAME_SOURCE_VARIABLE]] sv = source_filename.join_string_value() destination_filename = ifile[gidx[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE]] dv = destination_filename.join_string_value() # Create weight files for each subset -------------------------------------------------------------------------- for ii, sfn in enumerate(sv): esp = os.path.join(self.current_dir_output, sfn) edp = os.path.join(self.current_dir_output, dv[ii]) ewp = gs.create_full_path_from_template('wgt_template', index=ii + 1) cmd = ['ESMF_RegridWeightGen', '-s', esp, '--src_type', 'UGRID', '--src_meshname', VariableName.UGRID_HOST_VARIABLE, '-d', edp, '--dst_type', 'GRIDSPEC', '-w', ewp, '--method', 'conserve', '-r', '--no_log'] subprocess.check_call(cmd) # Merge weight files ------------------------------------------------------------------------------------------- mwf = self.get_temporary_file_path('merged_weight_file.nc') gs.create_merged_weight_file(mwf) # Generate a global weight file using ESMF --------------------------------------------------------------------- global_weights_filename = self.get_temporary_file_path('global_weights.nc') cmd = ['ESMF_RegridWeightGen', '-s', ufile, '--src_type', 'UGRID', '-d', dst_path, '--dst_type', 'GRIDSPEC', '-w', global_weights_filename, '--method', 'conserve', '--weight-only', '--no_log', '--src_meshname', VariableName.UGRID_HOST_VARIABLE] subprocess.check_call(cmd) # Test merged and global weight files are equivalent ----------------------------------------------------------- self.assertWeightFilesEquivalent(global_weights_filename, mwf)
def test_compute_2d_grid(self): path = self.get_path_to_2d_grid_netcdf() rd = RequestDataset(path) ops = ocgis.OcgOperations(dataset=rd, calc=[{'func': 'mean', 'name': 'mean'}], calc_grouping=['month'], output_format='nc', add_auxiliary_files=False, geom=[33.7, -35.9, 109.1, 9.4]) ret = compute(ops, 3, verbose=False) field = RequestDataset(ret).get() self.assertEqual(field['mean'].shape, (4, 17, 28))
def test(self): gs = self.get_grid_splitter() desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum() desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum) if MPI_RANK == 0: desired_sum = np.sum(desired_dst_grid_sum) desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)}, {'y': slice(0, 180, None), 'x': slice(240, 480, None)}, {'y': slice(0, 180, None), 'x': slice(480, 720, None)}, {'y': slice(180, 360, None), 'x': slice(0, 240, None)}, {'y': slice(180, 360, None), 'x': slice(240, 480, None)}, {'y': slice(180, 360, None), 'x': slice(480, 720, None)}] actual = list(gs.iter_dst_grid_slices()) self.assertEqual(actual, desired) gs.write_subsets() if MPI_RANK == 0: rank_sums = [] for ctr in range(1, gs.nsplits_dst[0] * gs.nsplits_dst[1] + 1): src_path = gs.create_full_path_from_template('src_template', index=ctr) dst_path = gs.create_full_path_from_template('dst_template', index=ctr) src_field = RequestDataset(src_path).get() dst_field = RequestDataset(dst_path).get() src_envelope_global = box(*src_field.grid.extent_global) dst_envelope_global = box(*dst_field.grid.extent_global) self.assertTrue(does_contain(src_envelope_global, dst_envelope_global)) actual = get_variable_names(src_field.data_variables) self.assertIn('data', actual) actual = get_variable_names(dst_field.data_variables) self.assertIn('data', actual) actual_data_sum = dst_field['data'].get_value().sum() actual_data_sum = MPI_COMM.gather(actual_data_sum) if MPI_RANK == 0: actual_data_sum = np.sum(actual_data_sum) rank_sums.append(actual_data_sum) if MPI_RANK == 0: self.assertAlmostEqual(desired_sum, np.sum(rank_sums)) index_path = gs.create_full_path_from_template('index_file') self.assertTrue(os.path.exists(index_path)) MPI_COMM.Barrier() index_path = gs.create_full_path_from_template('index_file') index_field = RequestDataset(index_path).get() self.assertTrue(len(list(index_field.keys())) > 2)
def test_esmf(self): rd1 = RequestDataset(**self.get_dataset()) rd2 = deepcopy(rd1) ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc') ret = ops.execute() actual_value = RequestDataset(ret).get().data_variables[0].get_value() desired_value = rd1.get().data_variables[0].get_value() self.assertNumpyAllClose(actual_value, desired_value)
def test_shapefile_through_operations(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual( list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys())) self.assertEqual((51, ), field2.data_variables[0].shape)
def test_write_variable_collection(self): # Attempt to write without a geometry variable. v = Variable('a', value=[1, 2], dimensions='bb') field = Field(variables=v) path = self.get_temporary_file_path('out.shp') with self.assertRaises(ValueError): field.write(path, driver=DriverVector) # Test writing a field with two-dimensional geometry storage. value = [Point(1, 2), Point(3, 4), Point(5, 6), Point(6, 7), Point(8, 9), Point(10, 11)] gvar = GeometryVariable(value=value, name='points', dimensions='ngeoms') gvar.reshape([Dimension('lat', 2), Dimension('lon', 3)]) var1 = Variable(name='dummy', value=[6, 7, 8], dimensions=['a']) var2 = Variable(name='some_lats', value=[41, 41], dimensions=['lat']) var3 = Variable(name='some_lons', value=[0, 90, 280], dimensions=['lon']) var4 = Variable(name='data', value=np.random.rand(4, 3, 2), dimensions=['time', 'lon', 'lat']) field = Field(variables=[var1, var2, var3, var4], geom=gvar, is_data=['data']) path = self.get_temporary_file_path('2d.shp') field.write(path, iter_kwargs={'followers': ['some_lats', 'some_lons']}, driver=DriverVector) read = RequestDataset(uri=path).get() self.assertTrue(len(read) > 2) self.assertEqual(list(read.keys()), ['data', 'some_lats', 'some_lons', constants.DimensionName.GEOMETRY_DIMENSION]) # Test writing a subset of the variables. path = self.get_temporary_file_path('limited.shp') value = [Point(1, 2), Point(3, 4), Point(5, 6)] gvar = GeometryVariable(value=value, name='points', dimensions='points') var1 = Variable('keep', value=[1, 2, 3], dimensions='points') var2 = Variable('remove', value=[4, 5, 6], dimensions='points') field = Field(variables=[var1, var2], geom=gvar, is_data=[var1]) field.write(path, variable_names=['keep'], driver=DriverVector) read = RequestDataset(uri=path).get() self.assertNotIn('remove', read) # Test using append. path = self.get_temporary_file_path('limited.shp') value = [Point(1, 2), Point(3, 4), Point(5, 6)] gvar = GeometryVariable(value=value, name='points', dimensions='points') var1 = Variable('keep', value=[1, 2, 3], dimensions='points') var2 = Variable('remove', value=[4, 5, 6], dimensions='points') field = Field(variables=[var1, var2], geom=gvar, is_data=[var1, var2]) for idx in range(3): sub = field[{'points': idx}] if idx == 0: write_mode = MPIWriteMode.WRITE else: write_mode = MPIWriteMode.APPEND sub.write(path, write_mode=write_mode, driver=DriverVector) self.assertOGRFileLength(path, idx + 1)
def test_system_geometry_identifer_added(self): """Test geometry identifier is added for linked dataset geometry formats.""" field = self.get_field() gid_name = HeaderName.ID_GEOMETRY self.assertNotIn(gid_name, field) ops = OcgOperations(dataset=field, output_format=constants.OutputFormatName.CSV_SHAPEFILE) ret = ops.execute() csv_field = RequestDataset(ret).get() self.assertIn(gid_name, list(csv_field.keys())) shp_path = os.path.join(ops.dir_output, ops.prefix, 'shp', ops.prefix + '_gid.shp') shp_field = RequestDataset(shp_path).get() self.assertIn(gid_name, list(shp_field.keys()))
def test_chunked_rwg_spatial_subset(self): env.CLOBBER_UNITS_ON_BOUNDS = False src_grid = create_gridxy_global(crs=Spherical()) src_field = create_exact_field(src_grid, 'foo') xvar = Variable(name='x', value=[-90., -80.], dimensions='xdim') yvar = Variable(name='y', value=[40., 50.], dimensions='ydim') dst_grid = Grid(x=xvar, y=yvar, crs=Spherical()) if ocgis.vm.rank == 0: source = self.get_temporary_file_path('source.nc') else: source = None source = ocgis.vm.bcast(source) src_field.write(source) if ocgis.vm.rank == 0: destination = self.get_temporary_file_path('destination.nc') else: destination = None destination = ocgis.vm.bcast(destination) dst_grid.parent.write(destination) wd = os.path.join(self.current_dir_output, 'chunks') weight = os.path.join(self.current_dir_output, 'weights.nc') spatial_subset = os.path.join(self.current_dir_output, 'spatial_subset.nc') runner = CliRunner() cli_args = [ 'chunked-rwg', '--source', source, '--destination', destination, '--wd', wd, '--spatial_subset', '--spatial_subset_path', spatial_subset, '--weight', weight, '--esmf_regrid_method', 'BILINEAR', '--persist' ] result = runner.invoke(ocli, args=cli_args, catch_exceptions=False) self.assertEqual(result.exit_code, 0) actual = RequestDataset(uri=spatial_subset).create_field() actual_ymean = actual.grid.get_value_stacked()[0].mean() actual_xmean = actual.grid.get_value_stacked()[1].mean() self.assertEqual(actual_ymean, 45.) self.assertEqual(actual_xmean, -85.) self.assertEqual(actual.grid.shape, (14, 14)) self.assertTrue(os.path.exists(weight)) actual = RequestDataset(weight, driver='netcdf').create_field() self.assertIn('history', actual.attrs)
def get_rd(self, key, kwds=None): ref = self[key] if kwds is None: kwds = {} kwds.update({'uri': self.get_uri(key), 'variable': ref['variable']}) rd = RequestDataset(**kwds) return (rd)
def test_system_multiple_netcdf_files(self): """Test subsetting multiple netCDF files and returning a spatial collection.""" grid = create_gridxy_global(resolution=3.0) vars = ['ocgis_example_tasmin', 'ocgis_example_tas', 'ocgis_example_tasmax'] paths = [self.get_temporary_file_path('{}.nc'.format(ii)) for ii in vars] geom_select_uid = [16, 23] field_names = ['tasmin', 'tas', 'tasmax'] for ctr, (path, var) in enumerate(zip(paths, vars), start=1): field = create_exact_field(grid.copy(), var, ntime=3) field.data_variables[0].get_value()[:] = 10 * ctr field.write(path) rds = [RequestDataset(uri=uri, variable=var, field_name=field_name) for uri, var, field_name in zip(paths, vars, field_names)] ops = OcgOperations(dataset=rds, spatial_operation='clip', aggregate=True, geom=self.path_state_boundaries, geom_select_uid=geom_select_uid) ret = ops.execute() self.assertAsSetEqual(ret.children.keys(), geom_select_uid) for geom_uid in geom_select_uid: actual = ret.children[geom_uid].children.keys() self.assertAsSetEqual(actual, field_names) for idx, field_name in enumerate(field_names): actual = ret.get_element(container_ugid=geom_uid, field_name=field_names[idx], variable_name=vars[idx]) actual = actual.get_value() actual = actual == (idx + 1) * 10 self.assertTrue(np.all(actual))
def test_get_dist_default_distribution(self): """Test using default distributions defined by drivers.""" with vm.scoped('write', [0]): if not vm.is_null: path = self.get_temporary_file_path('foo.nc') varx = Variable('x', np.arange(5), dimensions='five', attrs={'axis': 'X'}) vary = Variable('y', np.arange(7) + 10, dimensions='seven', attrs={'axis': 'Y'}) vc = VariableCollection(variables=[varx, vary]) vc.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) dist = rd.driver.dist distributed_dimension = dist.get_dimension('seven') self.assertTrue(distributed_dimension.dist)
def test_system_through_operations(self): ops = OcgOperations(dataset=self.field_for_test, calc=[{ 'func': 'mff', 'name': 'my_mff' }]) ret = ops.execute() actual_field = ret.get_element() actual_variable = actual_field['my_mff'] self.assertEqual(actual_variable.attrs['long_name'], MockFieldFunction.long_name) self.assertEqual(actual_variable.get_value().tolist(), self.desired_value) self.assertNotIn('data', list(actual_field.keys())) # Test writing output to netCDF. ops = OcgOperations(dataset=self.field_for_test, calc=[{ 'func': 'mff', 'name': 'my_mff' }], output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() self.assertEqual(actual_field['my_mff'].get_value().tolist(), self.desired_value)
def test_system_splitting_unstructured(self): ufile = self.get_temporary_file_path('ugrid.nc') resolution = 10. self.fixture_regular_ugrid_file(ufile, resolution) src_grid = RequestDataset(ufile, driver=DriverNetcdfUGRID, grid_abstraction='point').get().grid self.assertEqual(src_grid.abstraction, 'point') dst_grid = self.get_gridxy_global(resolution=20.) gs = GridSplitter(src_grid, dst_grid, (3, 3), check_contains=False, src_grid_resolution=10., paths=self.fixture_paths) gs.write_subsets() actual = gs.create_full_path_from_template('src_template', index=1) actual = RequestDataset(actual).get() self.assertIn(GridSplitterConstants.IndexFile.NAME_SRCIDX_GUID, actual)
def test(self): raise SkipTest('benchmarking only') # development laptop: 8 procs: 139 seconds ocgis.env.VERBOSE = True uri = '/home/benkoziol/l/data/bekozi-work/lisa-rensi-nwm/nwm.t00z.analysis_assim.terrain_rt.tm00.conus.nc_georeferenced.nc' # dimension_map = {'time': {'variable': 'time', DimensionMapKey.DIMS: ['time']}, # 'x': {'variable': 'x', DimensionMapKey.DIMS: ['x']}, # 'y': {'variable': 'y', DimensionMapKey.DIMS: ['y']}, # 'crs': {'variable': 'ProjectionCoordinateSystem'}} dimension_map = { 'time': { 'variable': 'time' }, 'x': { 'variable': 'x' }, 'y': { 'variable': 'y' }, 'crs': { 'variable': 'ProjectionCoordinateSystem' } } rd = RequestDataset(uri, dimension_map=dimension_map) # barrier_print(rd.dist.get_dimension('x').bounds_local) # barrier_print(rd.dist.get_dimension('y').bounds_local) # tkk # field = rd.get() # None ops = OcgOperations(dataset=rd, geom='state_boundaries', geom_select_uid=[16]) ret = ops.execute()
def test_system_spatial_averaging_through_operations(self): data_name = 'data' with vm.scoped('write', [0]): if not vm.is_null: x = Variable('x', range(5), 'x', float) y = Variable('y', range(7), 'y', float) grid = Grid(x, y) data_value = np.arange(x.size * y.size).reshape(grid.shape) data = Variable(data_name, data_value, grid.dimensions, float) data_value = data.get_value() field = Field(grid=grid, is_data=data) path = self.get_temporary_file_path('data.nc') field.write(path) else: data_value, path = None, None data_value = MPI_COMM.bcast(data_value) path = MPI_COMM.bcast(path) rd = RequestDataset(path, variable=data_name) ops = OcgOperations(dataset=rd, aggregate=True) ret = ops.execute() if ret is None: self.assertNotEqual(vm.rank, vm.root) else: out_field = ret.get_element() if MPI_RANK == 0: desired = data_value.mean() actual = out_field.data_variables[0].get_value()[0] self.assertEqual(actual, desired)
def test_system_with_time_data(self): """Test writing data with a time dimension.""" path = self.get_temporary_file_path('what.shp') t = TemporalVariable(value=[1.5, 2.5], name='time', dimensions='time') geom = GeometryVariable(value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='time') field = Field(variables=[t, geom], dimension_map={ 'time': { 'variable': 'time' }, 'geom': { 'variable': 'geom' } }) field.write(path, iter_kwargs={'variable': 'time'}, driver=DriverVector) rd = RequestDataset(uri=path) field2 = rd.get() # netcdftime worthlessness poss = [['0001-01-02 12:00:00', '0001-01-03 12:00:00'], ['1-01-02 12:00:00', '1-01-03 12:00:00']] actual = field2['TIME'].get_value().tolist() res = [p == actual for p in poss] self.assertTrue(any(res))
def test_system_dataset_identifiers_on_variables(self): """Test dataset identifiers make it to output variables for iteration.""" paths = [] variables = [] for suffix in [1, 2]: path = self.get_temporary_file_path('foo{}.nc'.format(suffix)) paths.append(path) x = Variable(name='x{}'.format(suffix), value=[2, 3], dimensions='x') y = Variable(name='y{}'.format(suffix), value=[4, 5, 6], dimensions='y') data_variable_name = 'data{}'.format(suffix) variables.append(data_variable_name) data = Variable(name=data_variable_name, value=np.arange(6).reshape(2, 3) + suffix, dimensions=['x', 'y']) grid = Grid(x, y) field = Field(grid=grid, is_data=data) field.write(path) rds = [RequestDataset(uri=p, variable=dv) for p, dv in zip(paths, variables)] ops = OcgOperations(dataset=rds) rds_uids = [ds.uid for ds in ops.dataset] self.assertEqual(rds_uids, [1, 2]) ret = ops.execute() for field in ret.iter_fields(): self.assertFalse(field.grid.has_allocated_abstraction_geometry) for variable in list(field.values()): if isinstance(variable, CoordinateReferenceSystem): continue self.assertIsNotNone(variable._request_dataset.uid) for row in variable.get_iter(): self.assertIsNotNone(row[HeaderName.DATASET_IDENTIFER])
def test_write_variable_collection(self): if MPI_RANK == 0: path_in = self.get_temporary_file_path('foo.nc') path_out = self.get_temporary_file_path('foo_out.nc') with self.nc_scope(path_in, 'w') as ds: ds.createDimension('seven', 7) var = ds.createVariable('var_seven', float, dimensions=('seven', )) var[:] = np.arange(7, dtype=float) + 10 var.foo = 'bar' else: path_in, path_out = [None] * 2 path_in = MPI_COMM.bcast(path_in) path_out = MPI_COMM.bcast(path_out) rd = RequestDataset(path_in) rd.metadata['dimensions']['seven']['dist'] = True driver = DriverNetcdf(rd) vc = driver.get_variable_collection() with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path_out) if MPI_RANK == 0: self.assertNcEqual(path_in, path_out)
def run_op(resource, calc, options): """Create an OCGIS operation, launch it and return the results.""" from os.path import abspath, curdir from ocgis import OcgOperations, RequestDataset, env import uuid LOGGER.info('Start ocgis module call function') # Prepare the environment env.OVERWRITE = True dir_output = abspath(curdir) prefix = str(uuid.uuid1()) env.PREFIX = prefix rd = [ RequestDataset(val, variable=key if key != 'resource' else None) for key, val in resource.items() ] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=options['calc_grouping'], dir_output=dir_output, prefix=prefix, add_auxiliary_files=False, output_format='nc') return ops.execute()
def __iter__(self): non_iterables = [AbstractRequestObject, dict, Field] if env.USE_ESMF: from ocgis.regrid.base import ESMF non_iterables.append(ESMF.Field) if isinstance(self._value, tuple(non_iterables)): to_itr = [self._value] else: to_itr = self._value for uid, element in enumerate(to_itr, start=1): if isinstance(element, dict): element = RequestDataset(**element) if env.USE_ESMF and isinstance(element, ESMF.Field): from ocgis.regrid.base import get_ocgis_field_from_esmf_field element = get_ocgis_field_from_esmf_field(element) try: element = element.copy() except AttributeError: element = copy(element) if element.uid is None: element.uid = uid # TODO: Remove me once the driver does not accept request datasets at initialization. # Try to change the driver UID. try: element.driver.rd.uid = uid except AttributeError: # The field driver does not keep a copy of the request dataset. if hasattr(element.driver, 'rd'): raise yield element
def test_mfdataset_to_nc(self): rd = self.test_data.get_rd('maurer_2010_pr') ops = OcgOperations(dataset=rd, output_format='nc', calc=[{'func': 'mean', 'name': 'my_mean'}], calc_grouping=['year'], geom='state_boundaries', select_ugid=[23]) ret = ops.execute() field = RequestDataset(ret, 'my_mean').get() self.assertNumpyAll(field.temporal.get_value(), np.array([18444., 18809.]))
def test_init_metadata_only(self): metadata = {'variables': {'foo': {}}} rd = RequestDataset(metadata=metadata) self.assertEqual(rd.driver.key, DriverKey.NETCDF_CF) self.assertIsNone(rd.uri) self.assertEqual(rd.metadata, metadata) field = rd.create_field() self.assertIn('foo', field.keys())
def read(cls, *args, **kwargs): """ Read data from disk. `args` and `kwargs` are passed to a request dataset object and `create_field` is called. See :class:`~ocgis.RequestDataset` documentation. """ from ocgis import RequestDataset return RequestDataset(*args, **kwargs).create_field()