def test_system_subset(self): """Test data may be subsetted and that coordinate transformations return the same value arrays.""" for rpp in [False, True]: kwds = {KeywordArgument.ROTATED_POLE_PRIORITY: rpp} rd = self.fixture_rd(**kwds) ops = OcgOperations(dataset=rd, output_format=constants.OutputFormatName.OCGIS, snippet=True, geom='world_countries', select_ugid=[69]) ret = ops.execute() # Assert some of the geometry values are masked actual = ret.get_element().grid.get_mask() self.assertTrue(actual.any()) # Perform the operations but change the output coordinate system. The value arrays should be equivalent # regardless of coordinate transformation. ops2 = OcgOperations(dataset=rd, output_format=constants.OutputFormatName.OCGIS, snippet=True, geom='world_countries', select_ugid=[69], output_crs=Spherical()) ret2 = ops2.execute() # Value arrays should be the same ret_value = ret.get_element(variable_name='pr').get_value() ret2_value = ret2.get_element(variable_name='pr').get_value() self.assertNumpyAll(ret_value, ret2_value) # Grid coordinates should not be the same. ret_grid_value = ret.get_element().grid.get_value_stacked() ret2_grid_value = ret2.get_element().grid.get_value_stacked() diff = np.abs(ret_grid_value - ret2_grid_value) select = diff > 1 if rpp: self.assertTrue(select.all()) else: self.assertFalse(select.any())
def test_system_subset(self): """Test data may be subsetted and that coordinate transformations return the same value arrays.""" ops = OcgOperations(dataset=self.rd, output_format=constants.OutputFormatName.OCGIS, snippet=True, geom='world_countries', select_ugid=[69]) ret = ops.execute() # Assert some of the geometry values are masked actual = ret.get_element().grid.get_mask() self.assertTrue(actual.any()) # Perform the operations but change the output coordinate system. The value arrays should be equivalent # regardless of coordinate transformation. ops2 = OcgOperations(dataset=self.rd, output_format=constants.OutputFormatName.OCGIS, snippet=True, geom='world_countries', select_ugid=[69], output_crs=Spherical()) ret2 = ops2.execute() # Value arrays should be the same ret_value = ret.get_element(variable_name='pr').get_value() ret2_value = ret2.get_element(variable_name='pr').get_value() self.assertNumpyAll(ret_value, ret2_value) # Grid coordinates should not be the same. ret_grid_value = ret.get_element().grid.get_value_stacked() ret2_grid_value = ret2.get_element().grid.get_value_stacked() diff = np.abs(ret_grid_value - ret2_grid_value) select = diff > 1 self.assertTrue(select.all())
def test_clip_aggregate(self): # This geometry was hanging. rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'year': [2003]}}) field = rd.get() ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[14, 16], aggregate=False, spatial_operation='clip', output_format=constants.OutputFormatName.CSV_SHAPEFILE) ops.execute()
def test_calculate_operations(self): rd = self.test_data.get_rd('cancm4_tas') slc = [None, None, None, [0, 10], [0, 10]] calc_icclim = [{'func': 'icclim_TG', 'name': 'TG'}] calc_ocgis = [{'func': 'mean', 'name': 'mean'}] _calc_grouping = [['month'], ['month', 'year']] for cg in _calc_grouping: ops_ocgis = OcgOperations(calc=calc_ocgis, calc_grouping=cg, slice=slc, dataset=rd) ret_ocgis = ops_ocgis.execute() ops_icclim = OcgOperations(calc=calc_icclim, calc_grouping=cg, slice=slc, dataset=rd) ret_icclim = ops_icclim.execute() desired = ret_ocgis.get_element(variable_name='mean').get_masked_value() actual = ret_icclim.get_element(variable_name='TG').get_masked_value() self.assertNumpyAll(desired, actual)
def test_system_dataset_as_field_from_file(self): """Test with dataset argument coming in as a field as opposed to a request dataset collection.""" rd = self.test_data.get_rd('cancm4_tas') geom = 'state_boundaries' select_ugid = [23] field = rd.get() ops = OcgOperations(dataset=field, snippet=True, geom=geom, select_ugid=select_ugid) ret = ops.execute() field_out_from_field = ret.get_element(container_ugid=23) self.assertEqual(field_out_from_field.data_variables[0].shape, (1, 3, 3)) ops = OcgOperations(dataset=rd, snippet=True, geom=geom, select_ugid=select_ugid) ret = ops.execute() field_out_from_rd = ret.get_element(container_ugid=23) self.assertNumpyAll(field_out_from_field['tas'].get_value(), field_out_from_rd['tas'].get_value())
def test_seasonal_calc_grouping(self): """Test seasonal calculation grouping with an ICCLIM function.""" rd = self.test_data.get_rd('cancm4_tas') slc = [None, [0, 600], None, [0, 10], [0, 10]] calc_icclim = [{'func': 'icclim_TG', 'name': 'TG'}] calc_ocgis = [{'func': 'mean', 'name': 'mean'}] cg = [[12, 1, 2], 'unique'] ops_ocgis = OcgOperations(calc=calc_ocgis, calc_grouping=cg, slice=slc, dataset=rd) ret_ocgis = ops_ocgis.execute() ops_icclim = OcgOperations(calc=calc_icclim, calc_grouping=cg, slice=slc, dataset=rd) ret_icclim = ops_icclim.execute() desired = ret_ocgis.get_element(variable_name='mean').get_masked_value() actual = ret_icclim.get_element(variable_name='TG').get_masked_value() self.assertNumpyAll(desired, actual)
def test_system_dataset_identifiers_on_variables(self): """Test dataset identifiers make it to output variables for iteration.""" paths = [] variables = [] for suffix in [1, 2]: path = self.get_temporary_file_path('foo{}.nc'.format(suffix)) paths.append(path) x = Variable(name='x{}'.format(suffix), value=[2, 3], dimensions='x') y = Variable(name='y{}'.format(suffix), value=[4, 5, 6], dimensions='y') data_variable_name = 'data{}'.format(suffix) variables.append(data_variable_name) data = Variable(name=data_variable_name, value=np.arange(6).reshape(2, 3) + suffix, dimensions=['x', 'y']) grid = Grid(x, y) field = Field(grid=grid, is_data=data) field.write(path) rds = [RequestDataset(uri=p, variable=dv) for p, dv in zip(paths, variables)] ops = OcgOperations(dataset=rds) rds_uids = [ds.uid for ds in ops.dataset] self.assertEqual(rds_uids, [1, 2]) ret = ops.execute() for field in ret.iter_fields(): self.assertFalse(field.grid.has_allocated_abstraction_geometry) for variable in list(field.values()): if isinstance(variable, CoordinateReferenceSystem): continue self.assertIsNotNone(variable._request_dataset.uid) for row in variable.get_iter(): self.assertIsNotNone(row[HeaderName.DATASET_IDENTIFER])
def test_keyword_spatial_operations_bounding_box(self): geom = [-80, 22.5, 50, 70.0] rd = self.test_data.get_rd('subset_test_slp') ops = OcgOperations(dataset=rd, geom=geom) ret = ops.execute() field = ret.get_element() self.assertEqual(field.data_variables[0].shape, (365, 20, 144))
def test_mfdataset_to_nc(self): rd = self.test_data.get_rd('maurer_2010_pr') ops = OcgOperations(dataset=rd, output_format='nc', calc=[{'func': 'mean', 'name': 'my_mean'}], calc_grouping=['year'], geom='state_boundaries', select_ugid=[23]) ret = ops.execute() field = RequestDataset(ret, 'my_mean').get() self.assertNumpyAll(field.temporal.get_value(), np.array([18444., 18809.]))
def test(self): raise SkipTest('benchmarking only') # development laptop: 8 procs: 139 seconds ocgis.env.VERBOSE = True uri = '/home/benkoziol/l/data/bekozi-work/lisa-rensi-nwm/nwm.t00z.analysis_assim.terrain_rt.tm00.conus.nc_georeferenced.nc' # dimension_map = {'time': {'variable': 'time', DimensionMapKey.DIMS: ['time']}, # 'x': {'variable': 'x', DimensionMapKey.DIMS: ['x']}, # 'y': {'variable': 'y', DimensionMapKey.DIMS: ['y']}, # 'crs': {'variable': 'ProjectionCoordinateSystem'}} dimension_map = { 'time': { 'variable': 'time' }, 'x': { 'variable': 'x' }, 'y': { 'variable': 'y' }, 'crs': { 'variable': 'ProjectionCoordinateSystem' } } rd = RequestDataset(uri, dimension_map=dimension_map) # barrier_print(rd.dist.get_dimension('x').bounds_local) # barrier_print(rd.dist.get_dimension('y').bounds_local) # tkk # field = rd.get() # None ops = OcgOperations(dataset=rd, geom='state_boundaries', geom_select_uid=[16]) ret = ops.execute()
def test_system_spatial_averaging_through_operations(self): data_name = 'data' with vm.scoped('write', [0]): if not vm.is_null: x = Variable('x', range(5), 'x', float) y = Variable('y', range(7), 'y', float) grid = Grid(x, y) data_value = np.arange(x.size * y.size).reshape(grid.shape) data = Variable(data_name, data_value, grid.dimensions, float) data_value = data.get_value() field = Field(grid=grid, is_data=data) path = self.get_temporary_file_path('data.nc') field.write(path) else: data_value, path = None, None data_value = MPI_COMM.bcast(data_value) path = MPI_COMM.bcast(path) rd = RequestDataset(path, variable=data_name) ops = OcgOperations(dataset=rd, aggregate=True) ret = ops.execute() if ret is None: self.assertNotEqual(vm.rank, vm.root) else: out_field = ret.get_element() if MPI_RANK == 0: desired = data_value.mean() actual = out_field.data_variables[0].get_value()[0] self.assertEqual(actual, desired)
def test_system_multiple_netcdf_files(self): """Test subsetting multiple netCDF files and returning a spatial collection.""" grid = create_gridxy_global(resolution=3.0) vars = ['ocgis_example_tasmin', 'ocgis_example_tas', 'ocgis_example_tasmax'] paths = [self.get_temporary_file_path('{}.nc'.format(ii)) for ii in vars] geom_select_uid = [16, 23] field_names = ['tasmin', 'tas', 'tasmax'] for ctr, (path, var) in enumerate(zip(paths, vars), start=1): field = create_exact_field(grid.copy(), var, ntime=3) field.data_variables[0].get_value()[:] = 10 * ctr field.write(path) rds = [RequestDataset(uri=uri, variable=var, field_name=field_name) for uri, var, field_name in zip(paths, vars, field_names)] ops = OcgOperations(dataset=rds, spatial_operation='clip', aggregate=True, geom=self.path_state_boundaries, geom_select_uid=geom_select_uid) ret = ops.execute() self.assertAsSetEqual(ret.children.keys(), geom_select_uid) for geom_uid in geom_select_uid: actual = ret.children[geom_uid].children.keys() self.assertAsSetEqual(actual, field_names) for idx, field_name in enumerate(field_names): actual = ret.get_element(container_ugid=geom_uid, field_name=field_names[idx], variable_name=vars[idx]) actual = actual.get_value() actual = actual == (idx + 1) * 10 self.assertTrue(np.all(actual))
def test_system_regridding_crs(self): """Test with coordinate systems.""" dest_crs = WGS84() grid_spherical = self.get_gridxy_global(resolution=10.0, wrapped=False, crs=Spherical()) self.assertEqual(grid_spherical.crs, Spherical()) coords = grid_spherical.get_value_stacked() data_value = self.get_exact_field_value(coords[1], coords[0]) desired = data_value.copy() data_var = Variable(name='data_src', value=data_value, dimensions=grid_spherical.dimensions) source = Field(grid=grid_spherical, is_data=data_var, crs=grid_spherical.crs) self.assertEqual(source.crs, Spherical()) destination = deepcopy(source) destination.update_crs(dest_crs) source_expanded = deepcopy(source.grid) source_expanded.expand() diff = np.abs(destination.y.get_value() - source_expanded.y.get_value()) self.assertAlmostEqual(diff.max(), 0.19231511439) for output_crs in [None, WGS84()]: ops = OcgOperations(dataset=source, regrid_destination=destination, output_crs=output_crs) ret = ops.execute() actual = ret.get_element(variable_name=data_var.name) if output_crs is None: self.assertEqual(actual.parent.crs, Spherical()) else: self.assertEqual(actual.parent.crs, WGS84()) actual = actual.get_value() diff = np.abs(actual - desired) self.assertTrue(diff.max() < 1e-5)
def test_system_process_geometries(self): """Test multiple geometries with coordinate system update.""" a = 'POLYGON((-105.21347987288135073 40.21514830508475313,-104.39928495762711691 40.21514830508475313,-104.3192002118643984 39.5677966101694949,-102.37047139830508513 39.61451271186440692,-102.12354343220337682 37.51896186440677639,-105.16009004237288593 37.51896186440677639,-105.21347987288135073 40.21514830508475313))' b = 'POLYGON((-104.15235699152542281 39.02722457627118757,-103.71189088983049942 39.44099576271186436,-102.71750529661017026 39.28082627118644155,-102.35712394067796538 37.63908898305084705,-104.13900953389830306 37.63241525423728717,-104.15235699152542281 39.02722457627118757))' geom = [{'geom': wkt.loads(xx), 'properties': {'UGID': ugid}} for ugid, xx in enumerate([a, b])] grid_value = [ [[37.0, 37.0, 37.0, 37.0], [38.0, 38.0, 38.0, 38.0], [39.0, 39.0, 39.0, 39.0], [40.0, 40.0, 40.0, 40.0]], [[-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0]]] output_crs = CoordinateReferenceSystem( value={'a': 6370997, 'lon_0': -100, 'y_0': 0, 'no_defs': True, 'proj': 'laea', 'x_0': 0, 'units': 'm', 'b': 6370997, 'lat_0': 45}) x = Variable('x', grid_value[1], dimensions=['lat', 'lon']) y = Variable('y', grid_value[0], dimensions=['lat', 'lon']) grid = Grid(x, y) field = Field(grid=grid, crs=Spherical()) ops = OcgOperations(dataset=field, geom=geom, output_crs=output_crs) ret = ops.execute() expected = {0: -502052.79407259845, 1: -510391.37909706926} for field, container in ret.iter_fields(yield_container=True): self.assertAlmostEqual(field.grid.get_value_stacked().mean(), expected[container.geom.ugid.get_value()[0]])
def test_calculation_operations_to_nc(self): rd = self.test_data.get_rd('cancm4_tasmax_2011') slc = [None, None, None, [0, 10], [0, 10]] ops_ocgis = OcgOperations(calc=[{'func': 'icclim_SU', 'name': 'SU'}], calc_grouping=['month'], slice=slc, dataset=rd, output_format='nc') ret = ops_ocgis.execute() with nc_scope(ret) as ds: to_test = deepcopy(ds.__dict__) history = to_test.pop('history') self.assertEqual(history[111:187], ' Calculation of SU indice (monthly climatology) from 2011-1-1 to 2020-12-31.') actual = OrderedDict([('source_data_global_attributes', '{"institution": "CCCma (Canadian Centre for Climate Modelling and Analysis, Victoria, BC, Canada)", "institute_id": "CCCma", "experiment_id": "decadal2010", "source": "CanCM4 2010 atmosphere: CanAM4 (AGCM15i, T63L35) ocean: CanOM4 (OGCM4.0, 256x192L40) sea ice: CanSIM1 (Cavitating Fluid, T63 Gaussian Grid) land: CLASS2.7", "model_id": "CanCM4", "forcing": "GHG,Oz,SA,BC,OC,LU,Sl,Vl (GHG includes CO2,CH4,N2O,CFC11,effective CFC12)", "parent_experiment_id": "N/A", "parent_experiment_rip": "N/A", "branch_time": 0.0, "contact": "*****@*****.**", "references": "http://www.cccma.ec.gc.ca/models", "initialization_method": 1, "physics_version": 1, "tracking_id": "64384802-3f0f-4ab4-b569-697bd5430854", "branch_time_YMDH": "2011:01:01:00", "CCCma_runid": "DHFP1B_E002_I2011_M01", "CCCma_parent_runid": "DHFP1_E002", "CCCma_data_licence": "1) GRANT OF LICENCE - The Government of Canada (Environment Canada) is the \\nowner of all intellectual property rights (including copyright) that may exist in this Data \\nproduct. You (as \\"The Licensee\\") are hereby granted a non-exclusive, non-assignable, \\nnon-transferable unrestricted licence to use this data product for any purpose including \\nthe right to share these data with others and to make value-added and derivative \\nproducts from it. This licence is not a sale of any or all of the owner\'s rights.\\n2) NO WARRANTY - This Data product is provided \\"as-is\\"; it has not been designed or \\nprepared to meet the Licensee\'s particular requirements. Environment Canada makes no \\nwarranty, either express or implied, including but not limited to, warranties of \\nmerchantability and fitness for a particular purpose. In no event will Environment Canada \\nbe liable for any indirect, special, consequential or other damages attributed to the \\nLicensee\'s use of the Data product.", "product": "output", "experiment": "10- or 30-year run initialized in year 2010", "frequency": "day", "creation_date": "2012-03-28T15:32:08Z", "history": "2012-03-28T15:32:08Z CMOR rewrote data to comply with CF standards and CMIP5 requirements.", "Conventions": "CF-1.4", "project_id": "CMIP5", "table_id": "Table day (28 March 2011) f9d6cfec5981bb8be1801b35a81002f0", "title": "CanCM4 model output prepared for CMIP5 10- or 30-year run initialized in year 2010", "parent_experiment": "N/A", "modeling_realm": "atmos", "realization": 2, "cmor_version": "2.8.0"}'), ('title', 'ECA heat indice SU'), ( 'references', 'ATBD of the ECA indices calculation (http://eca.knmi.nl/documents/atbd.pdf)'), ('institution', 'Climate impact portal (http://climate4impact.eu)'), ('comment', ' ')]) self.assertDictEqual(to_test, actual) var = ds.variables['SU'] to_test = dict(var.__dict__) to_test.pop('_FillValue', None) self.assertEqual(to_test, {'units': 'days', 'standard_name': AbstractIcclimFunction.standard_name, 'long_name': 'Summer days (number of days where daily maximum temperature > 25 degrees)', 'grid_mapping': 'latitude_longitude'})
def test_bad_time_dimension(self): """Test not formatting the time dimension.""" for output_format in [constants.OutputFormatName.OCGIS, constants.OutputFormatName.CSV, constants.OutputFormatName.CSV_SHAPEFILE, constants.OutputFormatName.SHAPEFILE, constants.OutputFormatName.NETCDF]: dataset = self.test_data.get_rd('snippet_seasonalbias') ops = OcgOperations(dataset=dataset, output_format=output_format, format_time=False, prefix=output_format) ret = ops.execute() if output_format == constants.OutputFormatName.OCGIS: actual = ret.get_element() self.assertFalse(actual.temporal.format_time) self.assertNumpyAll(actual.temporal.value_numtime.data, np.array([-712208.5, -712117., -712025., -711933.5])) self.assertNumpyAll(actual.temporal.bounds.value_numtime.data, np.array([[-712254., -712163.], [-712163., -712071.], [-712071., -711979.], [-711979., -711888.]])) if output_format == constants.OutputFormatName.CSV: with open(ret) as f: reader = DictReader(f) for row in reader: self.assertTrue(all([row[k] == '' for k in ['YEAR', 'MONTH', 'DAY']])) self.assertTrue(float(row['TIME']) < -50000) if output_format == constants.OutputFormatName.NETCDF: self.assertNcEqual(ret, dataset.uri, check_types=False, ignore_attributes={'global': ['history'], 'bounds_time': ['calendar', 'units'], 'bias': ['_FillValue', 'grid_mapping', 'units'], 'latitude': ['standard_name', 'units'], 'longitude': ['standard_name', 'units']}, ignore_variables=['latitude_longitude'])
def test_keyword_calc_grouping_seasonal_with_year(self): calc_grouping = [[1, 2, 3], 'year'] calc = [{'func': 'mean', 'name': 'mean'}] rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() self.assertEqual(ret.get_element(variable_name='mean').shape, (10, 4, 4))
def test_system_calculation_variable_naming(self): """Test the same data variable name may be used for the calculation.""" field = self.get_field() calc = [{'func': 'mean', 'name': field.data_variables[0].name}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping=['month']) ret = ops.execute() out_field = ret.get_element() self.assertIn(field.data_variables[0].name, out_field)
def test_system_user_geometry_identifier_typed_appropriately(self): """Test UGID is typed appropriately according to the data model.""" ofo = {'data_model': 'NETCDF3_64BIT_OFFSET'} grid = create_gridxy_global(resolution=3.0) field = create_exact_field(grid, 'foo', crs=Spherical()) ops = OcgOperations(dataset=field, output_format_options=ofo, geom=[-100, 30, -90, 40], aggregate=True) actual = ops.execute() actual = actual[1] self.assertEqual(actual.geom.ugid.dtype, np.int32)
def test_system_field_is_untouched(self): """Test field is untouched if passed through operations with nothing happening.""" field = self.get_field() gid_name = HeaderName.ID_GEOMETRY self.assertNotIn(gid_name, field) ops = OcgOperations(dataset=field, output_format=constants.OutputFormatName.OCGIS) ret = ops.execute() actual = ret.get_element() self.assertEqual(list(field.keys()), list(actual.keys()))
def test_system_date_groups_all(self): calc = [{'func': 'mean', 'name': 'mean'}] rd = self.test_data.get_rd('cancm4_tasmax_2011') ops = OcgOperations(dataset=rd, calc=calc, calc_grouping='all', geom='state_boundaries', select_ugid=[25]) ret_calc = ops.execute() ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25]) ret_no_calc = ops.execute() field = ret_calc.get_element(container_ugid=25, field_name='tasmax') variable = field['mean'] parents = ret_no_calc.get_element(container_ugid=25, field_name='tasmax') self.assertEqual(parents['tasmax'].shape, (3650, 4, 4)) self.assertEqual(variable.shape, (1, 4, 4)) desired_value = parents['tasmax'].get_masked_value() lhs = np.ma.mean(desired_value, axis=0).reshape(1, 4, 4).astype(desired_value.dtype) # NumPy does not update the fill value type in "astype". Set this manually. lhs.fill_value = desired_value.fill_value rhs = variable.get_masked_value() self.assertNumpyAll(lhs, rhs)
def test_system_spatial_wrapping_and_reorder(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') keywords = {'spatial_wrapping': list(SpatialWrapping.iter_possible()), 'crs': [None, Spherical(), CoordinateReferenceSystem(epsg=2136)], 'unwrapped': [True, False], 'spatial_reorder': [False, True]} for ctr, k in enumerate(self.iter_product_keywords(keywords)): field = self.get_wrap_field(crs=k.crs, unwrapped=k.unwrapped) ops = OcgOperations(dataset=field, spatial_wrapping=k.spatial_wrapping, spatial_reorder=k.spatial_reorder) ret = ops.execute() actual_field = ret.get_element() with vm.scoped_by_emptyable('wrapped state', actual_field): if not vm.is_null: actual = actual_field.wrapped_state else: actual = None actual_x = actual_field.grid.x.get_value() if not actual_field.is_empty: self.assertLessEqual(actual_x.max(), 360.) if k.spatial_reorder and k.unwrapped and k.spatial_wrapping == 'wrap' and k.crs == Spherical(): actual_data_value = actual_field.data_variables[0].get_value() desired_reordered = [None] * actual_data_value.shape[1] for idx in range(actual_data_value.shape[1]): desired_reordered[idx] = [3.0, 4.0, 0.0, 1.0, 2.0] for tidx in range(actual_data_value.shape[0]): time_data_value = actual_data_value[tidx] self.assertEqual(time_data_value.tolist(), desired_reordered) if k.spatial_reorder and not k.unwrapped and not k.spatial_wrapping: self.assertTrue(actual_x[0] < actual_x[-1]) if actual is None or k.crs != Spherical(): desired = None else: p = k.spatial_wrapping if p is None: if k.unwrapped: desired = WrappedState.UNWRAPPED else: desired = WrappedState.WRAPPED elif p == 'wrap': desired = WrappedState.WRAPPED else: desired = WrappedState.UNWRAPPED self.assertEqual(actual, desired)
def test_keyword_regrid_destination_to_nc(self): """Write regridded data to netCDF.""" rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc', snippet=True, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() field = ocgis.RequestDataset(ret).get() self.assertTrue(field.grid.has_bounds) self.assertTrue(np.any(field.data_variables[0].get_mask()))
def test_system_subset_shp(self): """Test conversion to shapefile.""" for ii, output_crs in enumerate([None, Spherical()]): output_format = constants.OutputFormatName.SHAPEFILE ops = OcgOperations(dataset=self.rd, output_format=output_format, snippet=True, geom='world_countries', select_ugid=[69], output_crs=output_crs, prefix=str(ii)) ret = ops.execute() with fiona.open(ret) as source: records = list(source) self.assertTrue(len(records) > 2000)
def test_system_subset_shp(self): """Test conversion to shapefile.""" for ii, output_crs in enumerate([None, Spherical()]): output_format = constants.OutputFormatName.SHAPEFILE ops = OcgOperations(dataset=self.fixture_rd(), output_format=output_format, snippet=True, geom='world_countries', select_ugid=[69], output_crs=output_crs, prefix=str(ii)) ret = ops.execute() with fiona.open(ret) as source: records = list(source) self.assertTrue(len(records) > 2000)
def test_system_geometry_identifier_typed_appropriately(self): """Test GID is typed appropriately according to the data model.""" ofo = {'data_model': 'NETCDF3_64BIT_OFFSET'} grid = create_gridxy_global(resolution=3.0) field = create_exact_field(grid, 'foo', crs=Spherical()) ops = OcgOperations(dataset=field, output_format_options=ofo, geom=[-100, 30, -90, 40], aggregate=True) actual = ops.execute() actual = actual.get_element(container_ugid=1) self.assertEqual(actual.geom.ugid.dtype, np.int32) # Test data model is retrieved appropriately from file. ofo = {'data_model': 'NETCDF3_64BIT_OFFSET'} grid = create_gridxy_global(resolution=3.0) field = create_exact_field(grid, 'foo', crs=Spherical()) ops = OcgOperations(dataset=field, output_format_options=ofo, output_format='nc') ret = ops.execute() rd = RequestDataset(uri=ret) ops = OcgOperations(dataset=rd, geom=[-100, 30, -90, 40], aggregate=True) actual = ops.execute() actual = actual.get_element(container_ugid=1) self.assertEqual(actual.geom.ugid.dtype, np.int32)
def test_keyword_time_subset_func(self): def _func_(value, bounds=None): indices = [] for ii, v in enumerate(value.flat): if v.month == 6: indices.append(ii) return indices rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, time_subset_func=_func_, geom='state_boundaries', geom_select_uid=[20]) ret = ops.execute() ret = ret.get_element() for v in ret.temporal.value_datetime: self.assertEqual(v.month, 6) rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, time_subset_func=_func_, geom='state_boundaries', geom_select_uid=[20], output_format=constants.OutputFormatName.NETCDF) ret = ops.execute() rd_out = RequestDataset(ret) for v in rd_out.get().temporal.value_datetime: self.assertEqual(v.month, 6)
def test_keyword_conform_units_to(self): rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tas') rd2._field_name = 'foo' ops = OcgOperations(dataset=[rd1, rd2], conform_units_to='celsius', snippet=True) ret = ops.execute() original = rd1.get().get_field_slice({'time': 0}) original = original.data_variables[0].get_value().sum() for field in ret.iter_fields(): actual_sum = field.data_variables[0].get_value().sum() diff = actual_sum - original self.assertFalse(np.isclose(diff, 0))
def test_system_geometry_identifer_added(self): """Test geometry identifier is added for linked dataset geometry formats.""" field = self.get_field() gid_name = HeaderName.ID_GEOMETRY self.assertNotIn(gid_name, field) ops = OcgOperations(dataset=field, output_format=constants.OutputFormatName.CSV_SHAPEFILE) ret = ops.execute() csv_field = RequestDataset(ret).get() self.assertIn(gid_name, list(csv_field.keys())) shp_path = os.path.join(ops.dir_output, ops.prefix, 'shp', ops.prefix + '_gid.shp') shp_field = RequestDataset(shp_path).get() self.assertIn(gid_name, list(shp_field.keys()))
def test_system_netcdf_output_format(self): path = self.get_temporary_file_path('foo.nc') var = Variable('vec', value=[1, 2, 3, 4, 5], dimensions='dvec', dtype=np.int32) var.write(path) with self.nc_scope(path, 'r') as ds: self.assertEqual(ds.data_model, 'NETCDF4') rd = RequestDataset(uri=path) ops = OcgOperations(dataset=rd, prefix='converted', output_format='nc', output_format_options={'data_model': 'NETCDF4_CLASSIC'}) ret = ops.execute() with self.nc_scope(ret, 'r') as ds: self.assertEqual(ds.data_model, 'NETCDF4_CLASSIC')
def test_system_scalar_level_dimension(self): """Test scalar level dimensions are not dropped in netCDF output.""" rd = self.test_data.get_rd('cancm4_tas') desired_height_metadata = rd.metadata['variables']['height'] ops = OcgOperations(dataset=rd, output_format='nc', snippet=True) ret = ops.execute() rd_out = RequestDataset(uri=ret) actual = rd_out.metadata['variables']['height'] # Not worried about order of attributes. desired_height_metadata['attrs'] = dict(desired_height_metadata['attrs']) actual['attrs'] = dict(actual['attrs']) self.assertDictEqual(actual, desired_height_metadata)
def test_system_concatenating_files(self): field = self.get_field(ntime=5, nrlz=0, nlevel=0) paths = [] for tidx in range(field.time.shape[0]): sub = field.get_field_slice({'time': tidx}) path = self.get_temporary_file_path('time_subset_{}.nc'.format(tidx)) paths.append(path) sub.write(path, dataset_kwargs={'format': 'NETCDF4_CLASSIC'}) rd = RequestDataset(paths) ops = OcgOperations(dataset=rd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = actual_field.data_variables[0].get_value() self.assertNumpyAll(actual, field.data_variables[0].get_value()) self.assertNumpyAll(actual_field.time.value_numtime, field.time.value_numtime)
def test_system_merge_geometries_across_shapefiles(self): geoms_to_union = [] state_names = ('Nebraska', 'South Dakota', 'North Dakota') gci = GeomCabinetIterator(path=self.path_state_boundaries) for row in gci: if row['properties']['STATE_NAME'] in state_names: geoms_to_union.append(row['geom']) self.assertEqual(len(geoms_to_union), 3) unioned = cascaded_union(geoms_to_union) grid = create_gridxy_global() field = create_exact_field(grid, 'data', crs=WGS84()) original_shape = field.grid.shape ops = OcgOperations(dataset=field, geom=unioned) ret = ops.execute() actual_shape = ret.get_element().grid.shape self.assertNotEqual(actual_shape, original_shape)
def run_standard_operations(self, calc, capture=False, output_format=None): _aggregate = [False, True] _calc_grouping = [['month'], ['month', 'year'], 'all'] _output_format = output_format or [constants.OutputFormatName.OCGIS, constants.OutputFormatName.CSV_SHAPEFILE, constants.OutputFormatName.NETCDF] captured = [] for ii, tup in enumerate(itertools.product(_aggregate, _calc_grouping, _output_format)): aggregate, calc_grouping, output_format = tup if aggregate is True and output_format == constants.OutputFormatName.NETCDF: continue rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'year': [2001, 2002]}}) try: ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], calc=calc, calc_grouping=calc_grouping, output_format=output_format, aggregate=aggregate, prefix=('standard_ops_' + str(ii))) ret = ops.execute() if output_format == constants.OutputFormatName.OCGIS: refv = ret.get_element(variable_name=calc[0]['name'], container_ugid=25) ref = refv.get_value() if aggregate: space_shape = [1] else: space_shape = [4, 4] if calc_grouping == ['month']: shp1 = [12] elif calc_grouping == 'all': raise NotImplementedError('calc_grouping all') else: shp1 = [24] test_shape = shp1 + space_shape self.assertEqual(ref.shape, tuple(test_shape)) if not aggregate: # Ensure the geometry mask is appropriately updated by the function. self.assertTrue(refv.get_mask()[0, 0, 0]) except ValueError: raise except AssertionError: raise except Exception as e: if capture: parms = dict(aggregate=aggregate, calc_grouping=calc_grouping, output_format=output_format) captured.append({'exception': e, 'parms': parms}) else: raise return captured
def test_bad_time_dimension(self): """Test not formatting the time dimension.""" try: for output_format in [constants.OutputFormatName.OCGIS, constants.OutputFormatName.CSV, constants.OutputFormatName.CSV_SHAPEFILE, constants.OutputFormatName.SHAPEFILE, constants.OutputFormatName.NETCDF]: dataset = self.test_data.get_rd('snippet_seasonalbias') ops = OcgOperations(dataset=dataset, output_format=output_format, format_time=False, prefix=output_format) ret = ops.execute() if output_format == constants.OutputFormatName.OCGIS: actual = ret.get_element() self.assertFalse(actual.temporal.format_time) self.assertNumpyAll(actual.temporal.value_numtime.data, np.array([-712208.5, -712117., -712025., -711933.5])) self.assertNumpyAll(actual.temporal.bounds.value_numtime.data, np.array([[-712254., -712163.], [-712163., -712071.], [-712071., -711979.], [-711979., -711888.]])) if output_format == constants.OutputFormatName.CSV: with open(ret) as f: reader = DictReader(f) for row in reader: self.assertTrue(all([row[k] == '' for k in ['YEAR', 'MONTH', 'DAY']])) self.assertTrue(float(row['TIME']) < -50000) if output_format == constants.OutputFormatName.NETCDF: self.assertNcEqual(ret, dataset.uri, check_types=False, ignore_attributes={'global': ['history'], 'bounds_time': ['calendar', 'units'], 'bias': ['_FillValue', 'grid_mapping', 'units'], 'latitude': ['standard_name', 'units'], 'longitude': ['standard_name', 'units']}, ignore_variables=['latitude_longitude']) except RuntimeError as e: if "HDF error" in str(e): raise SkipTest('HDF sometimes has trouble reading the dataset') else: raise
def test_calculation_operations_to_nc(self): rd = self.test_data.get_rd('cancm4_tas') slc = [None, None, None, [0, 10], [0, 10]] ops_ocgis = OcgOperations(calc=[{'func': 'icclim_TG', 'name': 'TG'}], calc_grouping=['month'], slice=slc, dataset=rd, output_format='nc') ret = ops_ocgis.execute() with nc_scope(ret) as ds: self.assertIn('Calculation of TG indice (monthly climatology)', ds.history) self.assertEqual(ds.title, 'ECA temperature indice TG') var = ds.variables['TG'] # check the JSON serialization actual = '{"institution": "CCCma (Canadian Centre for Climate Modelling and Analysis, Victoria, BC, Canada)", "institute_id": "CCCma", "experiment_id": "decadal2000", "source": "CanCM4 2010 atmosphere: CanAM4 (AGCM15i, T63L35) ocean: CanOM4 (OGCM4.0, 256x192L40) sea ice: CanSIM1 (Cavitating Fluid, T63 Gaussian Grid) land: CLASS2.7", "model_id": "CanCM4", "forcing": "GHG,Oz,SA,BC,OC,LU,Sl,Vl (GHG includes CO2,CH4,N2O,CFC11,effective CFC12)", "parent_experiment_id": "N/A", "parent_experiment_rip": "N/A", "branch_time": 0.0, "contact": "*****@*****.**", "references": "http://www.cccma.ec.gc.ca/models", "initialization_method": 1, "physics_version": 1, "tracking_id": "fac7bd83-dd7a-425b-b4dc-b5ab2e915939", "branch_time_YMDH": "2001:01:01:00", "CCCma_runid": "DHFP1B_E002_I2001_M01", "CCCma_parent_runid": "DHFP1_E002", "CCCma_data_licence": "1) GRANT OF LICENCE - The Government of Canada (Environment Canada) is the \\nowner of all intellectual property rights (including copyright) that may exist in this Data \\nproduct. You (as \\"The Licensee\\") are hereby granted a non-exclusive, non-assignable, \\nnon-transferable unrestricted licence to use this data product for any purpose including \\nthe right to share these data with others and to make value-added and derivative \\nproducts from it. This licence is not a sale of any or all of the owner\'s rights.\\n2) NO WARRANTY - This Data product is provided \\"as-is\\"; it has not been designed or \\nprepared to meet the Licensee\'s particular requirements. Environment Canada makes no \\nwarranty, either express or implied, including but not limited to, warranties of \\nmerchantability and fitness for a particular purpose. In no event will Environment Canada \\nbe liable for any indirect, special, consequential or other damages attributed to the \\nLicensee\'s use of the Data product.", "product": "output", "experiment": "10- or 30-year run initialized in year 2000", "frequency": "day", "creation_date": "2011-05-08T01:01:51Z", "history": "2011-05-08T01:01:51Z CMOR rewrote data to comply with CF standards and CMIP5 requirements.", "Conventions": "CF-1.4", "project_id": "CMIP5", "table_id": "Table day (28 March 2011) f9d6cfec5981bb8be1801b35a81002f0", "title": "CanCM4 model output prepared for CMIP5 10- or 30-year run initialized in year 2000", "parent_experiment": "N/A", "modeling_realm": "atmos", "realization": 2, "cmor_version": "2.5.4"}' self.assertEqual(ds.__dict__[AbstractIcclimFunction._global_attribute_source_name], actual) # load the original source attributes from the JSON string json.loads(ds.__dict__[AbstractIcclimFunction._global_attribute_source_name]) actual = {'units': 'K', 'grid_mapping': 'latitude_longitude', 'standard_name': AbstractIcclimFunction.standard_name, 'long_name': 'Mean of daily mean temperature'} self.assertEqual(dict(var.__dict__), actual)
def test_calculation_operations_bad_units(self): rd = self.test_data.get_rd('daymet_tmax') calc_icclim = [{'func': 'icclim_SU', 'name': 'SU'}] ops_icclim = OcgOperations(calc=calc_icclim, calc_grouping=['month', 'year'], dataset=rd) with self.assertRaises(UnitsValidationError): ops_icclim.execute()
def test_system_date_groups(self): calc = [{'func': 'mean', 'name': 'mean'}] rd = self.test_data.get_rd('cancm4_tasmax_2011') calc_grouping = ['month'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertTrue(np.all(rdt == np.array([dt(2011, month, 16) for month in range(1, 13)]))) calc_grouping = ['year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertTrue(np.all(rdt == [dt(year, 7, 1) for year in range(2011, 2021)])) calc_grouping = ['month', 'year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertTrue( np.all(rdt == [dt(year, month, 16) for year, month in itertools.product(list(range(2011, 2021)), list(range(1, 13)))])) calc_grouping = ['day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertTrue(np.all(rdt == [dt(2011, 1, day, 12) for day in range(1, 32)])) calc_grouping = ['month', 'day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertEqual(rdt[0], dt(2011, 1, 1, 12)) self.assertEqual(rdt[12], dt(2011, 1, 13, 12)) calc_grouping = ['year', 'day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertEqual(rdt[0], dt(2011, constants.CALC_YEAR_CENTROID_MONTH, 1, 12)) rd = self.test_data.get_rd('cancm4_tasmax_2011', kwds={'time_region': {'month': [1], 'year': [2011]}}) field = rd.get() calc_grouping = ['month', 'day', 'year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret.get_element(container_ugid=25).time rdt = ref.value_datetime self.assertTrue(np.all(rdt == ref.value_datetime)) self.assertTrue(np.all(ref.bounds.value_datetime == field.time.bounds.value_datetime))