def test_mfdataset_to_nc(self): rd = self.test_data.get_rd('maurer_2010_pr') ops = OcgOperations(dataset=rd,output_format='nc',calc=[{'func':'mean','name':'my_mean'}], calc_grouping=['year'],geom='state_boundaries',select_ugid=[23]) ret = ops.execute() field = RequestDataset(ret,'my_mean_pr').get() self.assertNumpyAll(field.temporal.value,np.array([ 18444., 18809.]))
def test_keyword_output_format_nc_2d_flexible_mesh_ugrid(self): rd = self.test_data.get_rd('cancm4_tas') output = constants.OUTPUT_FORMAT_NETCDF_UGRID_2D_FLEXIBLE_MESH ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], output_format=output) ret = ops.execute() with self.nc_scope(ret) as ds: self.assertEqual(len(ds.dimensions['nMesh2_face']), 13)
def test_bad_time_dimension(self): ocgis.env.DIR_DATA = '/usr/local/climate_data' uri = 'seasonalbias.nc' variable = 'bias' for output_format in [ 'numpy', 'csv', 'csv+','shp', 'nc' ]: dataset = RequestDataset(uri=uri,variable=variable) ops = OcgOperations(dataset=dataset,output_format=output_format, format_time=False,prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertNumpyAll(ret[1]['bias'].temporal.value, np.array([-712208.5,-712117. ,-712025. ,-711933.5])) self.assertNumpyAll(ret[1]['bias'].temporal.bounds, np.array([[-712254.,-712163.],[-712163.,-712071.],[-712071.,-711979.],[-711979.,-711888.]])) if output_format == 'csv': with open(ret) as f: reader = DictReader(f) for row in reader: self.assertTrue(all([row[k] == '' for k in ['YEAR','MONTH','DAY']])) self.assertTrue(float(row['TIME']) < -50000) if output_format == 'nc': self.assertNcEqual(dataset.uri,ret,check_types=False)
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=[rd1, rd2], snippet=True) with self.assertRaises(ValueError): ops.execute()
def test_keyword_output_format_esmpy(self): """Test with the ESMPy output format.""" import ESMF # todo: test spatial subsetting # todo: test calculations slc = [None, None, None, [0, 10], [0, 10]] kwds = dict(as_field=[False, True], with_slice=[True, False]) for k in self.iter_product_keywords(kwds): rd = self.test_data.get_rd('cancm4_tas') if k.as_field: rd = rd.get() if k.with_slice: slc = slc else: slc = None ops = OcgOperations(dataset=rd, output_format='esmpy', slice=slc) ret = ops.execute() self.assertIsInstance(ret, ESMF.Field) try: self.assertEqual(ret.shape, (1, 3650, 1, 10, 10)) except AssertionError: self.assertFalse(k.with_slice) self.assertEqual(ret.shape, (1, 3650, 1, 64, 128))
def test_calculation(self): calc = [{ 'func': 'mean', 'name': 'mean' }, { 'func': 'std', 'name': 'std' }] calc_grouping = ['year'] kwds = { 'aggregate': True, 'spatial_operation': 'clip', 'calc': calc, 'calc_grouping': calc_grouping, 'output_format': 'numpy', 'geom': self.california, 'dataset': self.dataset, 'snippet': False } ops = OcgOperations(**kwds) ret = ops.execute() ref = ret[25].calc['Prcp'] self.assertEquals(ref.keys(), ['mean', 'std', 'n']) for value in ref.itervalues(): self.assertEqual(value.shape, (1, 1, 1, 1)) ref = ret[25].calc['tasmax'] self.assertEquals(ref.keys(), ['mean', 'std', 'n']) for value in ref.itervalues(): self.assertEqual(value.shape, (10, 1, 1, 1))
def test_to_netcdf(self): rd = self.test_data.get_rd('narccap_rotated_pole', kwds=dict(time_region={'month': [12], 'year': [1982]})) # it does not care about slices or no geometries ops = OcgOperations(dataset=rd, output_format='nc') ret = ops.execute() rd2 = ocgis.RequestDataset(uri=ret, variable='tas') self.assertEqual(rd2.get().temporal.extent, (5444.0, 5474.875))
def test_process_geometries(self): # test multiple geometries with coordinate system update works as expected a = 'POLYGON((-105.21347987288135073 40.21514830508475313,-104.39928495762711691 40.21514830508475313,-104.3192002118643984 39.5677966101694949,-102.37047139830508513 39.61451271186440692,-102.12354343220337682 37.51896186440677639,-105.16009004237288593 37.51896186440677639,-105.21347987288135073 40.21514830508475313))' b = 'POLYGON((-104.15235699152542281 39.02722457627118757,-103.71189088983049942 39.44099576271186436,-102.71750529661017026 39.28082627118644155,-102.35712394067796538 37.63908898305084705,-104.13900953389830306 37.63241525423728717,-104.15235699152542281 39.02722457627118757))' geom = [{'geom': wkt.loads(xx), 'properties': {'UGID': ugid}} for ugid, xx in enumerate([a, b])] grid_value = [ [[37.0, 37.0, 37.0, 37.0], [38.0, 38.0, 38.0, 38.0], [39.0, 39.0, 39.0, 39.0], [40.0, 40.0, 40.0, 40.0]], [[-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0]]] grid_value = np.ma.array(grid_value, mask=False) output_crs = CoordinateReferenceSystem( value={'a': 6370997, 'lon_0': -100, 'y_0': 0, 'no_defs': True, 'proj': 'laea', 'x_0': 0, 'units': 'm', 'b': 6370997, 'lat_0': 45}) grid = SpatialGridDimension(value=grid_value) sdim = SpatialDimension(grid=grid, crs=WGS84()) field = Field(spatial=sdim) ops = OcgOperations(dataset=field, geom=geom, output_crs=output_crs) ret = ops.execute() expected = {0: -502052.79407259845, 1: -510391.37909706926} for ugid, field_dict in ret.iteritems(): for field in field_dict.itervalues(): self.assertAlmostEqual(field.spatial.grid.value.data.mean(), expected[ugid])
def test_calculate(self): # ocgis.env.VERBOSE = True # ocgis.env.DEBUG = True calc = [{ 'func': 'sfwe', 'name': 'sfwe', 'kwds': { 'tas': 'tas', 'pr': 'pr' } }] time_range = [dt(1990, 1, 1), dt(1990, 3, 31)] rds = [] for var in [self.maurer_pr, self.maurer_tas]: var.update({'time_range': time_range}) rds.append(var) geom = 'state_boundaries' select_ugid = [16] ops = OcgOperations(dataset=rds, geom=geom, select_ugid=select_ugid, calc=calc, calc_grouping=['month'], output_format='nc') ret = ops.execute()
def test_keyword_spatial_operations_bounding_box(self): geom = [-80, 22.5, 50, 70.0] rd = self.test_data.get_rd('subset_test_slp') ops = OcgOperations(dataset=rd, geom=geom) ret = ops.execute() field = ret[1]['slp'] self.assertEqual(field.shape, (1, 365, 1, 18, 143))
def test_get_base_request_size_multifile(self): rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('narccap_pr_wrfg_ncep') rds = [rd1,rd2] ops = OcgOperations(dataset=rds) size = ops.get_base_request_size() self.assertEqual({'variables': {'pr': {'level': {'kb': 0.0, 'shape': None, 'dtype': None}, 'temporal': {'kb': 228.25, 'shape': (29216,), 'dtype': dtype('float64')}, 'value': {'kb': 1666909.75, 'shape': (1, 29216, 1, 109, 134), 'dtype': dtype('float32')}, 'realization': {'kb': 0.0, 'shape': None, 'dtype': None}, 'col': {'kb': 1.046875, 'shape': (134,), 'dtype': dtype('float64')}, 'row': {'kb': 0.8515625, 'shape': (109,), 'dtype': dtype('float64')}}, 'tas': {'level': {'kb': 0.0, 'shape': None, 'dtype': None}, 'temporal': {'kb': 28.515625, 'shape': (3650,), 'dtype': dtype('float64')}, 'value': {'kb': 116800.0, 'shape': (1, 3650, 1, 64, 128), 'dtype': dtype('float32')}, 'realization': {'kb': 0.0, 'shape': None, 'dtype': None}, 'col': {'kb': 1.0, 'shape': (128,), 'dtype': dtype('float64')}, 'row': {'kb': 0.5, 'shape': (64,), 'dtype': dtype('float64')}}}, 'total': 1783969.9140625},size)
def test_calculate(self): ocgis.env.DIR_BIN = '/home/local/WX/ben.koziol/links/ocgis/bin/QED_2013_dynamic_percentiles' percentiles = [90, 92.5, 95, 97.5] operations = ['gt', 'gte', 'lt', 'lte'] calc_groupings = [ ['month'], # ['month','year'], # ['year'] ] uris_variables = [[ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmax_daily.1971-2000.nc', 'tasmax'], [ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmin_daily.1971-2000.nc', 'tasmin']] geoms_select_ugids = [ ['qed_city_centroids', None], ['state_boundaries', [39]], # ['us_counties',[2416,1335]] ] for tup in itertools.product(percentiles, operations, calc_groupings, uris_variables, geoms_select_ugids): print(tup) percentile, operation, calc_grouping, uri_variable, geom_select_ugid = tup ops = OcgOperations(dataset={'uri': uri_variable[0], 'variable': uri_variable[1], 'time_region': {'year': [1990], 'month': [6, 7, 8]}}, geom=geom_select_ugid[0], select_ugid=geom_select_ugid[1], calc=[{'func': 'qed_dynamic_percentile_threshold', 'kwds': {'operation': operation, 'percentile': percentile}, 'name': 'dp'}], calc_grouping=calc_grouping, output_format='numpy') ret = ops.execute()
def test_real_data(self): uri = 'Maurer02new_OBS_tasmax_daily.1971-2000.nc' variable = 'tasmax' ocgis.env.DIR_DATA = '/usr/local/climate_data' for output_format in ['numpy','csv+','shp','csv']: ops = OcgOperations(dataset={'uri':uri, 'variable':variable, 'time_region':{'year':[1991],'month':[7]}}, output_format=output_format,prefix=output_format, calc=[{'name': 'Frequency Duration', 'func': 'freq_duration', 'kwds': {'threshold': 15.0, 'operation': 'gte'}}], calc_grouping=['month','year'], geom='us_counties',select_ugid=[2778],aggregate=True, calc_raw=False,spatial_operation='clip', headers=['did', 'ugid', 'gid', 'year', 'month', 'day', 'variable', 'calc_key', 'value'],) ret = ops.execute() if output_format == 'numpy': ref = ret[2778]['tasmax'].variables['Frequency Duration_tasmax'].value self.assertEqual(ref.compressed()[0].shape,(2,)) if output_format == 'csv+': real = [{'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '7', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}, {'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '23', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}] with open(ret,'r') as f: reader = csv.DictReader(f) rows = list(reader) for row,real_row in zip(rows,real): self.assertDictEqual(row,real_row)
def test_get_base_request_size_multifile_with_geom(self): rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('narccap_pr_wrfg_ncep') rds = [rd1,rd2] ops = OcgOperations(dataset=rds,geom='state_boundaries',select_ugid=[23]) size = ops.get_base_request_size() self.assertEqual(size,{'variables': {'pr': {'level': {'kb': 0.0, 'shape': None, 'dtype': None}, 'temporal': {'kb': 228.25, 'shape': (29216,), 'dtype': dtype('float64')}, 'value': {'kb': 21341.375, 'shape': (1, 29216, 1, 17, 11), 'dtype': dtype('float32')}, 'realization': {'kb': 0.0, 'shape': None, 'dtype': None}, 'col': {'kb': 0.0859375, 'shape': (11,), 'dtype': dtype('float64')}, 'row': {'kb': 0.1328125, 'shape': (17,), 'dtype': dtype('float64')}}, 'tas': {'level': {'kb': 0.0, 'shape': None, 'dtype': None}, 'temporal': {'kb': 28.515625, 'shape': (3650,), 'dtype': dtype('float64')}, 'value': {'kb': 171.09375, 'shape': (1, 3650, 1, 4, 3), 'dtype': dtype('float32')}, 'realization': {'kb': 0.0, 'shape': None, 'dtype': None}, 'col': {'kb': 0.0234375, 'shape': (3,), 'dtype': dtype('float64')}, 'row': {'kb': 0.03125, 'shape': (4,), 'dtype': dtype('float64')}}}, 'total': 21769.5078125})
def test_real_data(self): """Test calculations on real data.""" rd = self.test_data.get_rd('maurer_2010_concatenated_tasmax', kwds={'time_region': {'year': [1991], 'month': [7]}}) for output_format in [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV_SHAPEFILE, constants.OUTPUT_FORMAT_SHAPEFILE, constants.OUTPUT_FORMAT_CSV]: ops = OcgOperations(dataset=rd, output_format=output_format, prefix=output_format, calc=[{'name': 'Frequency Duration', 'func': 'freq_duration', 'kwds': {'threshold': 15.0, 'operation': 'gte'}}], calc_grouping=['month', 'year'], geom='us_counties', select_ugid=[2778], aggregate=True, calc_raw=False, spatial_operation='clip', headers=['did', 'ugid', 'gid', 'year', 'month', 'day', 'variable', 'calc_key', 'value'], melted=True) ret = ops.execute() if output_format == 'numpy': ref = ret[2778]['tasmax'].variables['Frequency Duration'].value self.assertEqual(ref.compressed()[0].shape, (2,)) if output_format == constants.OUTPUT_FORMAT_CSV_SHAPEFILE: real = [{'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '7', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}, {'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '23', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}] with open(ret, 'r') as f: reader = csv.DictReader(f) rows = list(reader) for row, real_row in zip(rows, real): self.assertDictEqual(row, real_row)
def test_rotated_pole_clip_aggregate(self): rd = self.test_data.get_rd('narccap_rotated_pole',kwds=dict(time_region={'month':[12],'year':[1982]})) ops = OcgOperations(dataset=rd,geom='state_boundaries',select_ugid=[16], spatial_operation='clip',aggregate=True,output_format='numpy') ret = ops.execute() ret = ret.gvu(16,'tas') self.assertEqual(ret.shape,(1, 248, 1, 1, 1))
def test_operations_two_steps(self): ## get the request dataset to use as the basis for the percentiles uri = self.test_data.get_uri('cancm4_tas') variable = 'tas' rd = RequestDataset(uri=uri,variable=variable) ## this is the underly OCGIS dataset object nc_basis = rd.get() ## NOTE: if you want to subset the basis by time, this step is necessary # nc_basis = nc_basis.get_between('temporal',datetime.datetime(2001,1,1),datetime.datetime(2003,12,31,23,59)) ## these are the values to use when calculating the percentile basis. it ## may be good to wrap this in a function to have memory freed after the ## percentile structure array is computed. all_values = nc_basis.variables[variable].value ## these are the datetime objects used for window creation temporal = nc_basis.temporal.value_datetime ## additional parameters for calculating the basis percentile = 10 width = 5 ## get the structure array from ocgis.calc.library.index.dynamic_kernel_percentile import DynamicDailyKernelPercentileThreshold daily_percentile = DynamicDailyKernelPercentileThreshold.get_daily_percentile(all_values,temporal,percentile,width) ## perform the calculation using the precomputed basis. in this case, ## the basis and target datasets are the same, so the RequestDataset is ## reused. calc_grouping = ['month','year'] kwds = {'percentile':percentile,'width':width,'operation':'lt','daily_percentile':daily_percentile} calc = [{'func':'dynamic_kernel_percentile_threshold','name':'tg10p','kwds':kwds}] ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc, output_format='nc') ret = ops.execute() ## if we want to return the values as a three-dimenional numpy array the ## method below will do this. note the interface arrangement for the next ## release will alter this slightly. ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc, output_format='numpy') arrs = ops.execute() ## reference the returned numpy data. the first key is the geometry identifier. ## 1 in this case as this is the default for no selection geometry. the second ## key is the request dataset alias and the third is the calculation name. ## the variable name is appended to the end of the calculation to maintain ## a unique identifier. tg10p = arrs[1]['tas'].variables['tg10p'].value ## if we want the date information for the temporal groups date attributes date_parts = arrs[1]['tas'].temporal.date_parts assert(date_parts.shape[0] == tg10p.shape[1]) ## these are the representative datetime objects rep_dt = arrs[1]['tas'].temporal.value_datetime ## and these are the lower and upper time bounds on the date groups bin_bounds = arrs[1]['tas'].temporal.bounds_datetime ## confirm we have values for each month and year (12*10) ret_ds = nc.Dataset(ret) try: self.assertEqual(ret_ds.variables['tg10p'].shape,(120,64,128)) finally: ret_ds.close()
def test_clip_aggregate(self): # this geometry was hanging rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'year': [2003]}}) ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[14, 16], aggregate=False, spatial_operation='clip', output_format=constants.OUTPUT_FORMAT_CSV_SHAPEFILE) ops.execute()
def test_HeatIndex(self): ds = [self.tasmax,self.rhsmax] calc = [{'func':'heat_index','name':'heat_index','kwds':{'tas':'tasmax','rhs':'rhsmax','units':'k'}}] time_range = [dt(2011,1,1),dt(2011,12,31,23,59,59)] for d in ds: d['time_range'] = time_range ops = OcgOperations(dataset=ds,calc=calc) self.assertEqual(ops.calc_grouping,None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.variables.keys(),['tasmax','rhsmax','heat_index']) hi = ref.variables['heat_index'] self.assertEqual(hi.value.shape,(365,1,64,128)) it = MeltedIterator(ret[1],mode='calc') for ii,row in enumerate(it.iter_rows()): if ii == 0: self.assertEqual(row['value'],None) if ii < 1000: for key in ['vid','var_name','did','uri']: self.assertEqual(row[key],None) else: break ops = OcgOperations(dataset=ds,calc=calc,output_format='numpy',snippet=True) ret = ops.execute()
def test_bad_time_dimension(self): """Test not formatting the time dimension.""" for output_format in [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV, constants.OUTPUT_FORMAT_CSV_SHAPEFILE, constants.OUTPUT_FORMAT_SHAPEFILE, constants.OUTPUT_FORMAT_NETCDF]: dataset = self.test_data.get_rd('snippet_seasonalbias') ops = OcgOperations(dataset=dataset, output_format=output_format, format_time=False, prefix=output_format) ret = ops.execute() if output_format == constants.OUTPUT_FORMAT_NUMPY: self.assertFalse(ret[1]['bias'].temporal.format_time) self.assertNumpyAll(ret[1]['bias'].temporal.value, np.array([-712208.5, -712117., -712025., -711933.5])) self.assertNumpyAll(ret[1]['bias'].temporal.bounds, np.array([[-712254., -712163.], [-712163., -712071.], [-712071., -711979.], [-711979., -711888.]])) if output_format == constants.OUTPUT_FORMAT_CSV: with open(ret) as f: reader = DictReader(f) for row in reader: self.assertTrue(all([row[k] == '' for k in ['YEAR', 'MONTH', 'DAY']])) self.assertTrue(float(row['TIME']) < -50000) if output_format == constants.OUTPUT_FORMAT_NETCDF: self.assertNcEqual(ret, dataset.uri, check_types=False, ignore_attributes={'global': ['history'], 'bounds_time': ['calendar', 'units'], 'bias': ['_FillValue', 'grid_mapping', 'units']}, ignore_variables=['latitude_longitude'])
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=[rd1,rd2],snippet=True) with self.assertRaises(ValueError): ops.execute()
def test_heat_index(self): ocgis.env.OVERWRITE = True kwds = {'time_range':[dt(2011,1,1),dt(2011,12,31,23,59,59)]} ds = [self.test_data.get_rd('cancm4_tasmax_2011',kwds=kwds),self.test_data.get_rd('cancm4_rhsmax',kwds=kwds)] calc = [{'func':'heat_index','name':'heat_index','kwds':{'tas':'tasmax','rhs':'rhsmax','units':'k'}}] select_ugid = [25] ## operations on entire data arrays ops = OcgOperations(dataset=ds,calc=calc) self.assertEqual(ops.calc_grouping,None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.keys(),['tasmax_rhsmax']) self.assertEqual(ref['tasmax_rhsmax'].variables.keys(),['heat_index']) hi = ref['tasmax_rhsmax'].variables['heat_index'].value self.assertEqual(hi.shape,(1,365,1,64,128)) ## confirm no masked geometries self.assertFalse(ref['tasmax_rhsmax'].spatial.geom.point.value.mask.any()) ## confirm some masked data in calculation output self.assertTrue(hi.mask.any()) # try temporal grouping ops = OcgOperations(dataset=ds,calc=calc,calc_grouping=['month'],geom='state_boundaries',select_ugid=select_ugid) ret = ops.execute() self.assertEqual(ret[25]['tasmax_rhsmax'].variables['heat_index'].value.shape,(1,12,1,5,4))
def test_calc_grouping_seasonal_with_year(self): calc_grouping = [[1,2,3],'year'] calc = [{'func':'mean','name':'mean'}] rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd,calc=calc,calc_grouping=calc_grouping, geom='state_boundaries',select_ugid=[25]) ret = ops.execute() self.assertEqual(ret[25]['tas'].shape,(1,10,1,5,4))
def test_null_parms(self): ops = OcgOperations(dataset=self.datasets_no_range) self.assertEqual(ops.geom,None) self.assertEqual(len(ops.dataset),3) for ds in ops.dataset.itervalues(): self.assertEqual(ds.time_range,None) self.assertEqual(ds.level_range,None) ops.__repr__()
def test_csv_conversion(self): ops = OcgOperations(dataset=self.get_dataset(),output_format='csv') ret = self.get_ret(ops) ## test with a geometry to check writing of user-geometry overview shapefile geom = make_poly((38,39),(-104,-103)) ops = OcgOperations(dataset=self.get_dataset(),output_format='csv',geom=geom) ret = ops.execute()
def test_clip_aggregate(self): ## this geometry was hanging # ocgis.env.VERBOSE = True # ocgis.env.DEBUG = True rd = self.test_data.get_rd('cancm4_tas',kwds={'time_region':{'year':[2003]}}) ops = OcgOperations(dataset=rd,geom='state_boundaries',select_ugid=[14,16], aggregate=False,spatial_operation='clip',output_format='csv+') ret = ops.execute()
def test_null_parms(self): ops = OcgOperations(dataset=self.datasets_no_range) self.assertEqual(ops.geom, None) self.assertEqual(len(ops.dataset), 3) for ds in ops.dataset: self.assertEqual(ds.time_range, None) self.assertEqual(ds.level_range, None) ops.__repr__()
def test_to_netcdf_with_slice(self): rd = self.test_data.get_rd('narccap_rotated_pole') ops = OcgOperations(dataset=rd, output_format='nc', slice=[None, [0, 10], None, [0, 10], [0, 10]], prefix='slice') ret = ops.execute() rd3 = ocgis.RequestDataset(uri=ret, variable='tas') self.assertEqual(rd3.get().shape, (1, 10, 1, 10, 10))
def test_geom_string(self): ops = OcgOperations(dataset=self.datasets,geom='state_boundaries') self.assertEqual(len(list(ops.geom)),51) ops.geom = None self.assertEqual(ops.geom,None) ops.geom = 'mi_watersheds' self.assertEqual(len(list(ops.geom)),60) ops.geom = [-120,40,-110,50] self.assertEqual(ops.geom[0]['geom'].bounds,(-120.0,40.0,-110.0,50.0))
def test_point_shapefile_subset(self): _output_format = ['numpy','nc','csv','csv+'] for output_format in _output_format: rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd,geom='qed_city_centroids',output_format=output_format, prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertEqual(len(ret),4)
def test_daymet(self): # uri = 'http://daymet.ornl.gov/thredds//dodsC/allcf/2011/9947_2011/tmax.nc' rd = self.test_data.get_rd('daymet_tmax') geom = 'state_boundaries' select_ugid = [32] snippet = True ops = OcgOperations(dataset=rd,geom=geom,snippet=snippet, select_ugid=select_ugid,output_format='numpy') ops.execute()
def test_get_meta(self): ops = OcgOperations(dataset=self.datasets) meta = ops.get_meta() self.assertTrue(len(meta) > 100) self.assertTrue('\n' in meta) ops = OcgOperations(dataset=self.datasets,calc=[{'func':'mean','name':'my_mean'}]) meta = ops.get_meta() self.assertTrue(len(meta) > 100) self.assertTrue('\n' in meta)
def test_clip_aggregate(self): rd = self.test_data.get_rd('narccap_rotated_pole', kwds=dict(time_region={'month': [12], 'year': [1982]})) ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[16], spatial_operation='clip', aggregate=True, output_format='numpy') # the output CRS should be automatically updated for this operation self.assertEqual(ops.output_crs, CFWGS84()) ret = ops.execute() ret = ret.gvu(16, 'tas') self.assertEqual(ret.shape, (1, 248, 1, 1, 1)) self.assertAlmostEqual(ret.mean(), 269.83051915322579)
def get_collection(self,aggregate=False): if aggregate: spatial_operation = 'clip' else: spatial_operation = 'intersects' rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd,geom='state_boundaries',select_ugid=[25], spatial_operation=spatial_operation,aggregate=aggregate) ret = ops.execute() return(ret)
def test_csv_conversion(self): ocgis.env.OVERWRITE = True ops = OcgOperations(dataset=self.get_dataset(), output_format='csv') ret = self.get_ret(ops) ## test with a geometry to check writing of user-geometry overview shapefile geom = make_poly((38, 39), (-104, -103)) ops = OcgOperations(dataset=self.get_dataset(), output_format='csv', geom=geom) ret = ops.execute()
def test_point_shapefile_subset(self): _output_format = ['numpy', 'nc', 'csv', 'csv+'] for output_format in _output_format: rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom='qed_city_centroids', output_format=output_format, prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertEqual(len(ret), 4)
def test_daymet(self): # uri = 'http://daymet.ornl.gov/thredds//dodsC/allcf/2011/9947_2011/tmax.nc' rd = self.test_data.get_rd('daymet_tmax') geom = 'state_boundaries' select_ugid = [32] snippet = True ops = OcgOperations(dataset=rd, geom=geom, snippet=snippet, select_ugid=select_ugid, output_format='numpy') ops.execute()
def get_collection(self, aggregate=False): if aggregate: spatial_operation = 'clip' else: spatial_operation = 'intersects' rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], spatial_operation=spatial_operation, aggregate=aggregate) ret = ops.execute() return (ret[25])
def test_calculate(self): ocgis.env.DIR_BIN = '/home/local/WX/ben.koziol/links/ocgis/bin/QED_2013_dynamic_percentiles' percentiles = [90, 92.5, 95, 97.5] operations = ['gt', 'gte', 'lt', 'lte'] calc_groupings = [ ['month'], # ['month','year'], # ['year'] ] uris_variables = [ [ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmax_daily.1971-2000.nc', 'tasmax' ], [ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmin_daily.1971-2000.nc', 'tasmin' ] ] geoms_select_ugids = [ ['qed_city_centroids', None], ['state_boundaries', [39]], # ['us_counties',[2416,1335]] ] for tup in itertools.product(percentiles, operations, calc_groupings, uris_variables, geoms_select_ugids): print(tup) percentile, operation, calc_grouping, uri_variable, geom_select_ugid = tup ops = OcgOperations(dataset={ 'uri': uri_variable[0], 'variable': uri_variable[1], 'time_region': { 'year': [1990], 'month': [6, 7, 8] } }, geom=geom_select_ugid[0], select_ugid=geom_select_ugid[1], calc=[{ 'func': 'qed_dynamic_percentile_threshold', 'kwds': { 'operation': operation, 'percentile': percentile }, 'name': 'dp' }], calc_grouping=calc_grouping, output_format='numpy') ret = ops.execute()
def test_clip_aggregate(self): ## this geometry was hanging # ocgis.env.VERBOSE = True # ocgis.env.DEBUG = True rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': { 'year': [2003] }}) ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[14, 16], aggregate=False, spatial_operation='clip', output_format='csv+') ret = ops.execute()
def test_calc_grouping(self): _cg = [None, ['day', 'month'], 'day'] for cg in _cg: if cg is not None: eq = tuple(cg) else: eq = cg obj = definition.CalcGrouping(cg) try: self.assertEqual(obj.value, eq) except AssertionError: self.assertEqual(obj.value, ('day', )) ## only month, year, and day combinations are currently supported rd = self.test_data.get_rd('cancm4_tas') calcs = [None, [{'func': 'mean', 'name': 'mean'}]] acceptable = ['day', 'month', 'year'] for calc in calcs: for length in [1, 2, 3, 4, 5]: for combo in itertools.combinations( ['day', 'month', 'year', 'hour', 'minute'], length): try: ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=combo) except DefinitionValidationError: reraise = True for c in combo: if c not in acceptable: reraise = False if reraise: raise
def get_ops(self, kwds={}, time_range=None, level_range=None): dataset = self.get_dataset(time_range, level_range) if 'output_format' not in kwds: kwds.update({'output_format': 'numpy'}) kwds.update({'dataset': dataset}) ops = OcgOperations(**kwds) return (ops)
def run_standard_operations(self, calc, capture=False, output_format=None): _aggregate = [False, True] _calc_grouping = [['month'], ['month', 'year']] _output_format = output_format or ['numpy', 'csv+', 'nc'] captured = [] for ii, tup in enumerate( itertools.product(_aggregate, _calc_grouping, _output_format)): aggregate, calc_grouping, output_format = tup if aggregate is True and output_format == 'nc': continue rd = self.test_data.get_rd( 'cancm4_tas', kwds={'time_region': { 'year': [2001, 2002] }}) try: ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], calc=calc, calc_grouping=calc_grouping, output_format=output_format, aggregate=aggregate, prefix=('standard_ops_' + str(ii))) ret = ops.execute() if output_format == 'numpy': ref = ret[25].calc['tas'][calc[0]['name']] if aggregate: space_shape = [1, 1] else: space_shape = [5, 4] if calc_grouping == ['month']: shp1 = [12] else: shp1 = [24] test_shape = shp1 + [1] + space_shape self.assertEqual(ref.shape, tuple(test_shape)) if not aggregate: self.assertTrue(np.ma.is_masked(ref[0, 0, 0, 0])) except Exception as e: if capture: parms = dict(aggregate=aggregate, calc_grouping=calc_grouping, output_format=output_format) captured.append({'exception': e, 'parms': parms}) else: raise return (captured)
def test_same_variable_name(self): ds = [self.cancm4.copy(), self.cancm4.copy()] with self.assertRaises(KeyError): OcgOperations(dataset=ds) ds[0].alias = 'foo' ds[1].alias = 'foo' with self.assertRaises(KeyError): OcgOperations(dataset=ds) ds = [self.cancm4.copy(), self.cancm4.copy()] ds[0].alias = 'foo_var' ops = OcgOperations(dataset=ds, snippet=True) ret = ops.execute() self.assertEqual(ret[1].variables.keys(), ['foo_var', 'tasmax']) values = ret[1].variables.values() self.assertTrue(np.all(values[0].value == values[1].value))
def test_HeatIndex_keyed_output(self): raise (SkipTest) ds = [ self.test_data.get_rd('cancm4_tasmax_2011'), self.test_data.get_rd('cancm4_rhsmax') ] calc = [{ 'func': 'heat_index', 'name': 'heat_index', 'kwds': { 'tas': 'tasmax', 'rhs': 'rhsmax', 'units': 'k' } }] ops = OcgOperations(dataset=ds, calc=calc, snippet=False, output_format='numpy') self.assertEqual(ops.calc_grouping, None) ret = ops.execute() it = KeyedIterator(ret[1], mode='calc') for ii, row in enumerate(it.iter_rows(ret[1])): if ii < 1000: self.assertEqual(row['cid'], 1) self.assertEqual(row['tgid'], None) self.assertNotEqual(row['tid'], None) else: break ops = OcgOperations(dataset=ds, calc=calc, snippet=True, output_format='keyed') ops.execute()
def get_ops(self, kwds={}): geom = self.california ops = OcgOperations(dataset=self.dataset, snippet=True, geom=geom, output_format='numpy') for k, v in kwds.iteritems(): setattr(ops, k, v) return (ops)
def test_keyed_conversion(self): raise (SkipTest) calc = [None, [{'func': 'mean', 'name': 'my_mean'}]] group = ['month', 'year'] for c in calc: ops = OcgOperations(dataset=self.get_dataset(), output_format='keyed', calc=c, calc_grouping=group) ret = self.get_ret(ops)
def test_bad_time_dimension(self): ocgis.env.DIR_DATA = '/usr/local/climate_data' uri = 'seasonalbias.nc' variable = 'bias' for output_format in ['csv', 'csv+', 'shp', 'numpy']: ops = OcgOperations(dataset={ 'uri': uri, 'variable': variable }, output_format=output_format, format_time=False, prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertNumpyAll( ret[1].variables['bias'].temporal.value, np.array([-712208.5, -712117., -712025., -711933.5])) self.assertNumpyAll( ret[1].variables['bias'].temporal.bounds, np.array([[-712254., -712163.], [-712163., -712071.], [-712071., -711979.], [-711979., -711888.]]))
def test_frequency_duration_real_data(self): uri = 'Maurer02new_OBS_tasmax_daily.1971-2000.nc' variable = 'tasmax' ocgis.env.DIR_DATA = '/usr/local/climate_data' for output_format in ['csv+', 'shp', 'csv']: ops = OcgOperations( dataset={ 'uri': uri, 'variable': variable, 'time_region': { 'year': [1991], 'month': [7] } }, output_format=output_format, prefix=output_format, calc=[{ 'name': 'Frequency Duration', 'func': 'freq_duration', 'kwds': { 'threshold': 25.0, 'operation': 'gte' } }], calc_grouping=['month', 'year'], geom='us_counties', select_ugid=[2778], aggregate=True, calc_raw=False, spatial_operation='clip', headers=[ 'did', 'ugid', 'gid', 'year', 'month', 'day', 'variable', 'calc_name', 'value' ], ) ret = ops.execute()
def test_high_res(self): ocgis.env.OVERWRITE = True nc_spatial = NcSpatial(0.5,(-90.0,90.0),(0.0,360.0)) path = self.make_data(nc_spatial) dataset = {'uri':path,'variable':'foo'} output_format = 'nc' snippet = True geom = self.nebraska for s_abstraction in ['point','polygon']: interface = {'s_abstraction':s_abstraction} ops = OcgOperations(dataset=dataset,output_format=output_format, geom=geom,snippet=snippet,abstraction=s_abstraction) ret = OcgInterpreter(ops).execute()
def test_shp_conversion(self): ocgis.env.OVERWRITE = True calc = [ None, [{ 'func': 'mean', 'name': 'my_mean' }], ] group = ['month', 'year'] for c in calc: ops = OcgOperations(dataset=self.get_dataset(), output_format='shp', calc_grouping=group, calc=c) ret = self.get_ret(ops)
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ## for numpy formats, different projections are allowed. ops = OcgOperations(dataset=[rd1, rd2], snippet=True) ret = ops.execute() ## it is not okay for other formats with self.assertRaises(ValueError): ops = OcgOperations(dataset=[rd1, rd2], snippet=True, output_format='csv+') ops.execute()
def test_low_res(self): ocgis.env.OVERWRITE = True nc_spatial = NcSpatial(5.0,(-90.0,90.0),(0.0,360.0)) path = self.make_data(nc_spatial) dataset = {'uri':path,'variable':'foo'} output_format = 'shp' geom = self.nebraska ip = Inspect(dataset['uri'],dataset['variable']) for s_abstraction in ['point','polygon']: interface = {'s_abstraction':s_abstraction} ops = OcgOperations(dataset=dataset, output_format=output_format, geom=geom, abstraction=s_abstraction) ret = OcgInterpreter(ops).execute()
def test_geom_string(self): ops = OcgOperations(dataset=self.datasets, geom='state_boundaries') self.assertEqual(len(ops.geom), 51) ops.geom = None self.assertEqual(ops.geom, None) ops.geom = 'mi_watersheds' self.assertEqual(len(ops.geom), 60) ops.geom = '-120|40|-110|50' self.assertEqual(ops.geom.spatial.geom[0].bounds, (-120.0, 40.0, -110.0, 50.0)) ops.geom = [-120, 40, -110, 50] self.assertEqual(ops.geom.spatial.geom[0].bounds, (-120.0, 40.0, -110.0, 50.0))
def _get_operations_(request): ## parse the query string query = parse_qs(request.META['QUERY_STRING']) ## reduce to pull together possible multiple arguments for dataset request query = reduce_query(query) ## construction the operations objects ops = OcgOperations.parse_query(query) return (ops) # ## get dataset information # uri = _get_uri_(query) # variable = parms.OcgQueryParm(query,'variable',nullable=False) # dataset = [] # if len(uri.value) < len(variable.value): # for u in uri: # for v in variable: # dataset.append({'uri':u,'variable':v}) # elif len(variable.value) < len(uri.value): # if len(variable.value) > 1: # raise(NotImplementedError) # else: # dataset.append({'uri':uri.value,'variable':variable.value[0]}) # else: # for u,v in zip(uri,variable): # dataset.append({'uri':u,'variable':v}) # # ## initialize initial operations object # ops = OcgOperations(dataset=dataset) # # ## iterate objects parsing the query dictionary # for value in ops.__dict__.itervalues(): # if isinstance(value,OcgParameter) and value.name != 'dataset': # value.parse_query(query) # # ## pull interface overload information # ops.interface = _get_interface_overload_(query) # # ## add request specific values # ops.request_url = request.build_absolute_uri() # # return(ops)
def test_get_meta(self): ops = OcgOperations(dataset=self.datasets) meta = ops.get_meta() self.assertTrue(len(meta) > 100) self.assertTrue('\n' in meta) ops = OcgOperations(dataset=self.datasets, calc=[{ 'func': 'mean', 'name': 'my_mean' }]) meta = ops.get_meta() self.assertTrue(len(meta) > 100) self.assertTrue('\n' in meta)
def get_ret(self, ops=None, kwds={}, shp=False, time_range=None, level_range=None): if ops is None: ops = self.get_ops(kwds, time_range=time_range, level_range=level_range) self.ops = ops ret = OcgInterpreter(ops).execute() if shp or self.return_shp: kwds2 = kwds.copy() kwds2.update({'output_format': 'shp'}) ops2 = OcgOperations(**kwds2) OcgInterpreter(ops2).execute() return (ret)
def test_keyed(self): raise(SkipTest) ds = self.dataset # ds.append(self.albisccp.copy()) ds.append(self.tasmin.copy()) ops = OcgOperations(dataset=ds,geom=self.california,output_format='numpy') ret = ops.execute() ref = ret[25].variables self.assertEqual(ref['tasmax']._use_for_id,['gid','tid']) self.assertEqual(ref['tasmin']._use_for_id,[]) # for key in ['albisccp','Prcp']: # self.assertEqual(ret[25].variables[key]._use_for_id,['gid','tid']) ops = OcgOperations(dataset=ds,geom=self.california,output_format='keyed',snippet=True) ret = ops.execute()
def test_heat_index(self): ocgis.env.OVERWRITE = True kwds = {'time_range': [dt(2011, 1, 1), dt(2011, 12, 31, 23, 59, 59)]} ds = [ self.test_data.get_rd('cancm4_tasmax_2011', kwds=kwds), self.test_data.get_rd('cancm4_rhsmax', kwds=kwds) ] calc = [{ 'func': 'heat_index', 'name': 'heat_index', 'kwds': { 'tas': 'tasmax', 'rhs': 'rhsmax', 'units': 'k' } }] geom = 'state_boundaries' select_ugid = [25] ## operations on entire data arrays ops = OcgOperations(dataset=ds, calc=calc) self.assertEqual(ops.calc_grouping, None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.variables.keys(), ['tasmax', 'rhsmax']) self.assertEqual(ref.calc.keys(), ['heat_index']) hi = ref.calc['heat_index'] self.assertEqual(hi.shape, (365, 1, 64, 128)) ## confirm no masked geometries self.assertFalse(ref._archetype.spatial.vector.geom.mask.any()) ## confirm some masked data in calculation output self.assertTrue(hi.mask.any()) ## snippet-based testing ops = OcgOperations(dataset=ds, calc=calc, snippet=True, geom=geom, select_ugid=select_ugid) ret = ops.execute() self.assertEqual(ret[25].calc['heat_index'].shape, (1, 1, 5, 4)) ops = OcgOperations(dataset=ds, calc=calc, snippet=True, output_format='csv') ret = ops.execute() # try temporal grouping ops = OcgOperations(dataset=ds, calc=calc, calc_grouping=['month'], geom='state_boundaries', select_ugid=select_ugid) ret = ops.execute() self.assertEqual(ret[25].calc['heat_index'].shape, (12, 1, 5, 4)) ret = OcgOperations(dataset=ds, calc=calc, calc_grouping=['month'], output_format='csv', snippet=True).execute()
def test_date_groups(self): calc = [{'func': 'mean', 'name': 'mean'}] rd = self.test_data.get_rd('cancm4_tasmax_2011') calc_grouping = ['month'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == np.array( [dt(2011, month, 16) for month in range(1, 13)]))) calc_grouping = ['year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == [dt(year, 7, 1) for year in range(2011, 2021)])) calc_grouping = ['month', 'year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == [ dt(year, month, 16) for year, month in itertools.product( range(2011, 2021), range(1, 13)) ])) calc_grouping = ['day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == [dt(2011, 1, day, 12) for day in range(1, 32)])) calc_grouping = ['month', 'day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertEqual(rdt[0], dt(2011, 1, 1, 12)) self.assertEqual(rdt[12], dt(2011, 1, 13, 12)) calc_grouping = ['year', 'day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertEqual(rdt[0], dt(2011, 1, 1, 12)) rd = self.test_data.get_rd( 'cancm4_tasmax_2011', kwds={'time_region': { 'month': [1], 'year': [2011] }}) calc_grouping = ['month', 'day', 'year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue(np.all(rdt == ref.value_datetime)) self.assertTrue(np.all(ref.bounds_datetime == ref.group.bounds))