def test_HeatIndex_keyed_output(self): raise (SkipTest) ds = [ self.test_data.get_rd('cancm4_tasmax_2011'), self.test_data.get_rd('cancm4_rhsmax') ] calc = [{ 'func': 'heat_index', 'name': 'heat_index', 'kwds': { 'tas': 'tasmax', 'rhs': 'rhsmax', 'units': 'k' } }] ops = OcgOperations(dataset=ds, calc=calc, snippet=False, output_format='numpy') self.assertEqual(ops.calc_grouping, None) ret = ops.execute() it = KeyedIterator(ret[1], mode='calc') for ii, row in enumerate(it.iter_rows(ret[1])): if ii < 1000: self.assertEqual(row['cid'], 1) self.assertEqual(row['tgid'], None) self.assertNotEqual(row['tid'], None) else: break ops = OcgOperations(dataset=ds, calc=calc, snippet=True, output_format='keyed') ops.execute()
def test_HeatIndex(self): ds = [self.tasmax,self.rhsmax] calc = [{'func':'heat_index','name':'heat_index','kwds':{'tas':'tasmax','rhs':'rhsmax','units':'k'}}] time_range = [dt(2011,1,1),dt(2011,12,31,23,59,59)] for d in ds: d['time_range'] = time_range ops = OcgOperations(dataset=ds,calc=calc) self.assertEqual(ops.calc_grouping,None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.variables.keys(),['tasmax','rhsmax','heat_index']) hi = ref.variables['heat_index'] self.assertEqual(hi.value.shape,(365,1,64,128)) it = MeltedIterator(ret[1],mode='calc') for ii,row in enumerate(it.iter_rows()): if ii == 0: self.assertEqual(row['value'],None) if ii < 1000: for key in ['vid','var_name','did','uri']: self.assertEqual(row[key],None) else: break ops = OcgOperations(dataset=ds,calc=calc,output_format='numpy',snippet=True) ret = ops.execute()
def test_clip_aggregate(self): # this geometry was hanging rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': {'year': [2003]}}) ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[14, 16], aggregate=False, spatial_operation='clip', output_format=constants.OUTPUT_FORMAT_CSV_SHAPEFILE) ops.execute()
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=[rd1, rd2], snippet=True) with self.assertRaises(ValueError): ops.execute()
def test_HeatIndex(self): kwds = {'time_range':[dt(2011,1,1),dt(2011,12,31,23,59,59)]} ds = [self.test_data.get_rd('cancm4_tasmax_2011',kwds=kwds),self.test_data.get_rd('cancm4_rhsmax',kwds=kwds)] calc = [{'func':'heat_index','name':'heat_index','kwds':{'tas':'tasmax','rhs':'rhsmax','units':'k'}}] ## operations on entire data arrays ops = OcgOperations(dataset=ds,calc=calc) self.assertEqual(ops.calc_grouping,None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.variables.keys(),['tasmax','rhsmax']) self.assertEqual(ref.calc.keys(),['heat_index']) hi = ref.calc['heat_index'] self.assertEqual(hi.shape,(365,1,64,128)) ## confirm no masked geometries self.assertFalse(ref._archetype.spatial.vector.geom.mask.any()) ## confirm some masked data in calculation output self.assertTrue(hi.mask.any()) ## snippet-based testing ops = OcgOperations(dataset=ds,calc=calc,snippet=True) ret = ops.execute() self.assertEqual(ret[1].calc['heat_index'].shape,(1,1,64,128)) ops = OcgOperations(dataset=ds,calc=calc,snippet=True,output_format='csv') ret = ops.execute() # subprocess.check_call(['loffice',ret]) # try temporal grouping ops = OcgOperations(dataset=ds,calc=calc,calc_grouping=['month']) ret = ops.execute() self.assertEqual(ret[1].calc['heat_index'].shape,(12,1,64,128)) ret = OcgOperations(dataset=ds,calc=calc,calc_grouping=['month'], output_format='csv',snippet=True).execute()
def test_operations_two_steps(self): ## get the request dataset to use as the basis for the percentiles uri = self.test_data.get_uri('cancm4_tas') variable = 'tas' rd = RequestDataset(uri=uri,variable=variable) ## this is the underly OCGIS dataset object nc_basis = rd.get() ## NOTE: if you want to subset the basis by time, this step is necessary # nc_basis = nc_basis.get_between('temporal',datetime.datetime(2001,1,1),datetime.datetime(2003,12,31,23,59)) ## these are the values to use when calculating the percentile basis. it ## may be good to wrap this in a function to have memory freed after the ## percentile structure array is computed. all_values = nc_basis.variables[variable].value ## these are the datetime objects used for window creation temporal = nc_basis.temporal.value_datetime ## additional parameters for calculating the basis percentile = 10 width = 5 ## get the structure array from ocgis.calc.library.index.dynamic_kernel_percentile import DynamicDailyKernelPercentileThreshold daily_percentile = DynamicDailyKernelPercentileThreshold.get_daily_percentile(all_values,temporal,percentile,width) ## perform the calculation using the precomputed basis. in this case, ## the basis and target datasets are the same, so the RequestDataset is ## reused. calc_grouping = ['month','year'] kwds = {'percentile':percentile,'width':width,'operation':'lt','daily_percentile':daily_percentile} calc = [{'func':'dynamic_kernel_percentile_threshold','name':'tg10p','kwds':kwds}] ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc, output_format='nc') ret = ops.execute() ## if we want to return the values as a three-dimenional numpy array the ## method below will do this. note the interface arrangement for the next ## release will alter this slightly. ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc, output_format='numpy') arrs = ops.execute() ## reference the returned numpy data. the first key is the geometry identifier. ## 1 in this case as this is the default for no selection geometry. the second ## key is the request dataset alias and the third is the calculation name. ## the variable name is appended to the end of the calculation to maintain ## a unique identifier. tg10p = arrs[1]['tas'].variables['tg10p'].value ## if we want the date information for the temporal groups date attributes date_parts = arrs[1]['tas'].temporal.date_parts assert(date_parts.shape[0] == tg10p.shape[1]) ## these are the representative datetime objects rep_dt = arrs[1]['tas'].temporal.value_datetime ## and these are the lower and upper time bounds on the date groups bin_bounds = arrs[1]['tas'].temporal.bounds_datetime ## confirm we have values for each month and year (12*10) ret_ds = nc.Dataset(ret) try: self.assertEqual(ret_ds.variables['tg10p'].shape,(120,64,128)) finally: ret_ds.close()
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=[rd1,rd2],snippet=True) with self.assertRaises(ValueError): ops.execute()
def test_heat_index(self): ocgis.env.OVERWRITE = True kwds = {'time_range':[dt(2011,1,1),dt(2011,12,31,23,59,59)]} ds = [self.test_data.get_rd('cancm4_tasmax_2011',kwds=kwds),self.test_data.get_rd('cancm4_rhsmax',kwds=kwds)] calc = [{'func':'heat_index','name':'heat_index','kwds':{'tas':'tasmax','rhs':'rhsmax','units':'k'}}] select_ugid = [25] ## operations on entire data arrays ops = OcgOperations(dataset=ds,calc=calc) self.assertEqual(ops.calc_grouping,None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.keys(),['tasmax_rhsmax']) self.assertEqual(ref['tasmax_rhsmax'].variables.keys(),['heat_index']) hi = ref['tasmax_rhsmax'].variables['heat_index'].value self.assertEqual(hi.shape,(1,365,1,64,128)) ## confirm no masked geometries self.assertFalse(ref['tasmax_rhsmax'].spatial.geom.point.value.mask.any()) ## confirm some masked data in calculation output self.assertTrue(hi.mask.any()) # try temporal grouping ops = OcgOperations(dataset=ds,calc=calc,calc_grouping=['month'],geom='state_boundaries',select_ugid=select_ugid) ret = ops.execute() self.assertEqual(ret[25]['tasmax_rhsmax'].variables['heat_index'].value.shape,(1,12,1,5,4))
def test_daymet(self): # uri = 'http://daymet.ornl.gov/thredds//dodsC/allcf/2011/9947_2011/tmax.nc' rd = self.test_data.get_rd('daymet_tmax') geom = 'state_boundaries' select_ugid = [32] snippet = True ops = OcgOperations(dataset=rd,geom=geom,snippet=snippet, select_ugid=select_ugid,output_format='numpy') ops.execute()
def test_keyword_dataset_esmf(self): """Test with operations on an ESMF Field.""" efield = self.get_esmf_field() output_format = OutputFormat.iter_possible() for kk in output_format: ops = OcgOperations(dataset=efield, output_format=kk, prefix=kk) ops.execute() # self.inspect(ret) raise
def test_geometries_not_duplicated_with_equivalent_ugid(self): # if geometries are equivalent, they should not have duplicates in the output shapefile. rd = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tasmax_2011') ops = OcgOperations(dataset=[rd, rd2], geom='state_boundaries', select_ugid=[16], output_format=constants.OUTPUT_FORMAT_CSV_SHAPEFILE, snippet=True) ops.execute() path_shp = os.path.join(self.current_dir_output, ops.prefix, 'shp', ops.prefix + '_ugid.shp') with fiona.open(path_shp) as source: self.assertEqual(len(list(source)), 1)
def test_daymet(self): # uri = 'http://daymet.ornl.gov/thredds//dodsC/allcf/2011/9947_2011/tmax.nc' rd = self.test_data.get_rd('daymet_tmax') geom = 'state_boundaries' select_ugid = [32] snippet = True ops = OcgOperations(dataset=rd, geom=geom, snippet=snippet, select_ugid=select_ugid, output_format='numpy') ops.execute()
def get_does_intersect(request_dataset,geom): ''' :param :class:`ocgis.RequestDataset` request_dataset: :param shapely.geometry geom: ''' ops = OcgOperations(dataset=request_dataset,geom=geom,snippet=True) try: ops.execute() ret = True except ExtentError: ret = False return(ret)
def test_HeatIndex(self): kwds = {'time_range': [dt(2011, 1, 1), dt(2011, 12, 31, 23, 59, 59)]} ds = [ self.test_data.get_rd('cancm4_tasmax_2011', kwds=kwds), self.test_data.get_rd('cancm4_rhsmax', kwds=kwds) ] calc = [{ 'func': 'heat_index', 'name': 'heat_index', 'kwds': { 'tas': 'tasmax', 'rhs': 'rhsmax', 'units': 'k' } }] ## operations on entire data arrays ops = OcgOperations(dataset=ds, calc=calc) self.assertEqual(ops.calc_grouping, None) ret = ops.execute() ref = ret[1] self.assertEqual(ref.variables.keys(), ['tasmax', 'rhsmax']) self.assertEqual(ref.calc.keys(), ['heat_index']) hi = ref.calc['heat_index'] self.assertEqual(hi.shape, (365, 1, 64, 128)) ## confirm no masked geometries self.assertFalse(ref._archetype.spatial.vector.geom.mask.any()) ## confirm some masked data in calculation output self.assertTrue(hi.mask.any()) ## snippet-based testing ops = OcgOperations(dataset=ds, calc=calc, snippet=True) ret = ops.execute() self.assertEqual(ret[1].calc['heat_index'].shape, (1, 1, 64, 128)) ops = OcgOperations(dataset=ds, calc=calc, snippet=True, output_format='csv') ret = ops.execute() # subprocess.check_call(['loffice',ret]) # try temporal grouping ops = OcgOperations(dataset=ds, calc=calc, calc_grouping=['month']) ret = ops.execute() self.assertEqual(ret[1].calc['heat_index'].shape, (12, 1, 64, 128)) ret = OcgOperations(dataset=ds, calc=calc, calc_grouping=['month'], output_format='csv', snippet=True).execute()
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ## for numpy formats, different projections are allowed. ops = OcgOperations(dataset=[rd1,rd2],snippet=True) ret = ops.execute() ## it is not okay for other formats with self.assertRaises(ValueError): ops = OcgOperations(dataset=[rd1,rd2],snippet=True,output_format='csv+') ops.execute()
def test_geometries_different_ugid(self): # equivalent geometries with different ugid values should be included row = list(ShpCabinetIterator(key='state_boundaries', select_uid=[16])) row.append(deepcopy(row[0])) row[1]['properties']['UGID'] = 17 rd = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tasmax_2011') ops = OcgOperations(dataset=[rd, rd2], geom=row, output_format=constants.OUTPUT_FORMAT_CSV_SHAPEFILE, snippet=True) ops.execute() path_shp = os.path.join(self.current_dir_output, ops.prefix, 'shp', ops.prefix + '_ugid.shp') with fiona.open(path_shp) as source: self.assertEqual(len(list(source)), 2)
def test_calculate_operations(self): rd = self.test_data.get_rd('cancm4_tas') slc = [None,None,None,[0,10],[0,10]] calc_icclim = [{'func':'icclim_TG','name':'TG'}] calc_ocgis = [{'func':'mean','name':'mean'}] _calc_grouping = [['month'],['month','year']] for cg in _calc_grouping: ops_ocgis = OcgOperations(calc=calc_ocgis,calc_grouping=cg,slice=slc, dataset=rd) ret_ocgis = ops_ocgis.execute() ops_icclim = OcgOperations(calc=calc_icclim,calc_grouping=cg,slice=slc, dataset=rd) ret_icclim = ops_icclim.execute() self.assertNumpyAll(ret_ocgis[1]['tas'].variables['mean'].value, ret_icclim[1]['tas'].variables['TG'].value)
def test_differing_projections(self): rd1 = self.test_data.get_rd('daymet_tmax') # rd2 = RequestDataset(uri=self.hostetler,variable='TG',t_calendar='noleap') rd2 = self.test_data.get_rd('cancm4_tas') ## for numpy formats, different projections are allowed. ops = OcgOperations(dataset=[rd1, rd2], snippet=True) ret = ops.execute() ## it is not okay for other formats with self.assertRaises(ValueError): ops = OcgOperations(dataset=[rd1, rd2], snippet=True, output_format='csv+') ops.execute()
def test_dataset_as_field_from_file(self): """Test with dataset argument coming in as a field as opposed to a request dataset collection.""" rd = self.test_data.get_rd('cancm4_tas') geom = 'state_boundaries' select_ugid = [23] field = rd.get() ops = OcgOperations(dataset=field, snippet=True, geom=geom, select_ugid=select_ugid) ret = ops.execute() field_out_from_field = ret[23]['tas'] self.assertEqual(field_out_from_field.shape, (1, 1, 1, 4, 3)) ops = OcgOperations(dataset=rd, snippet=True, geom=geom, select_ugid=select_ugid) ret = ops.execute() field_out_from_rd = ret[23]['tas'] self.assertNumpyAll(field_out_from_field.variables['tas'].value, field_out_from_rd.variables['tas'].value)
def test_calculate(self): # ocgis.env.VERBOSE = True # ocgis.env.DEBUG = True calc = [{ 'func': 'sfwe', 'name': 'sfwe', 'kwds': { 'tas': 'tas', 'pr': 'pr' } }] time_range = [dt(1990, 1, 1), dt(1990, 3, 31)] rds = [] for var in [self.maurer_pr, self.maurer_tas]: var.update({'time_range': time_range}) rds.append(var) geom = 'state_boundaries' select_ugid = [16] ops = OcgOperations(dataset=rds, geom=geom, select_ugid=select_ugid, calc=calc, calc_grouping=['month'], output_format='nc') ret = ops.execute()
def test_mfdataset_to_nc(self): rd = self.test_data.get_rd('maurer_2010_pr') ops = OcgOperations(dataset=rd,output_format='nc',calc=[{'func':'mean','name':'my_mean'}], calc_grouping=['year'],geom='state_boundaries',select_ugid=[23]) ret = ops.execute() field = RequestDataset(ret,'my_mean_pr').get() self.assertNumpyAll(field.temporal.value,np.array([ 18444., 18809.]))
def test_to_netcdf(self): rd = self.test_data.get_rd('narccap_rotated_pole', kwds=dict(time_region={'month': [12], 'year': [1982]})) # it does not care about slices or no geometries ops = OcgOperations(dataset=rd, output_format='nc') ret = ops.execute() rd2 = ocgis.RequestDataset(uri=ret, variable='tas') self.assertEqual(rd2.get().temporal.extent, (5444.0, 5474.875))
def test_real_data(self): uri = 'Maurer02new_OBS_tasmax_daily.1971-2000.nc' variable = 'tasmax' ocgis.env.DIR_DATA = '/usr/local/climate_data' for output_format in ['numpy','csv+','shp','csv']: ops = OcgOperations(dataset={'uri':uri, 'variable':variable, 'time_region':{'year':[1991],'month':[7]}}, output_format=output_format,prefix=output_format, calc=[{'name': 'Frequency Duration', 'func': 'freq_duration', 'kwds': {'threshold': 15.0, 'operation': 'gte'}}], calc_grouping=['month','year'], geom='us_counties',select_ugid=[2778],aggregate=True, calc_raw=False,spatial_operation='clip', headers=['did', 'ugid', 'gid', 'year', 'month', 'day', 'variable', 'calc_key', 'value'],) ret = ops.execute() if output_format == 'numpy': ref = ret[2778]['tasmax'].variables['Frequency Duration_tasmax'].value self.assertEqual(ref.compressed()[0].shape,(2,)) if output_format == 'csv+': real = [{'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '7', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}, {'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '23', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}] with open(ret,'r') as f: reader = csv.DictReader(f) rows = list(reader) for row,real_row in zip(rows,real): self.assertDictEqual(row,real_row)
def test_real_data(self): """Test calculations on real data.""" rd = self.test_data.get_rd('maurer_2010_concatenated_tasmax', kwds={'time_region': {'year': [1991], 'month': [7]}}) for output_format in [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV_SHAPEFILE, constants.OUTPUT_FORMAT_SHAPEFILE, constants.OUTPUT_FORMAT_CSV]: ops = OcgOperations(dataset=rd, output_format=output_format, prefix=output_format, calc=[{'name': 'Frequency Duration', 'func': 'freq_duration', 'kwds': {'threshold': 15.0, 'operation': 'gte'}}], calc_grouping=['month', 'year'], geom='us_counties', select_ugid=[2778], aggregate=True, calc_raw=False, spatial_operation='clip', headers=['did', 'ugid', 'gid', 'year', 'month', 'day', 'variable', 'calc_key', 'value'], melted=True) ret = ops.execute() if output_format == 'numpy': ref = ret[2778]['tasmax'].variables['Frequency Duration'].value self.assertEqual(ref.compressed()[0].shape, (2,)) if output_format == constants.OUTPUT_FORMAT_CSV_SHAPEFILE: real = [{'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '7', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}, {'COUNT': '1', 'UGID': '2778', 'DID': '1', 'CALC_KEY': 'freq_duration', 'MONTH': '7', 'DURATION': '23', 'GID': '2778', 'YEAR': '1991', 'VARIABLE': 'tasmax', 'DAY': '16'}] with open(ret, 'r') as f: reader = csv.DictReader(f) rows = list(reader) for row, real_row in zip(rows, real): self.assertDictEqual(row, real_row)
def test_calculate(self): ocgis.env.DIR_BIN = '/home/local/WX/ben.koziol/links/ocgis/bin/QED_2013_dynamic_percentiles' percentiles = [90, 92.5, 95, 97.5] operations = ['gt', 'gte', 'lt', 'lte'] calc_groupings = [ ['month'], # ['month','year'], # ['year'] ] uris_variables = [[ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmax_daily.1971-2000.nc', 'tasmax'], [ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmin_daily.1971-2000.nc', 'tasmin']] geoms_select_ugids = [ ['qed_city_centroids', None], ['state_boundaries', [39]], # ['us_counties',[2416,1335]] ] for tup in itertools.product(percentiles, operations, calc_groupings, uris_variables, geoms_select_ugids): print(tup) percentile, operation, calc_grouping, uri_variable, geom_select_ugid = tup ops = OcgOperations(dataset={'uri': uri_variable[0], 'variable': uri_variable[1], 'time_region': {'year': [1990], 'month': [6, 7, 8]}}, geom=geom_select_ugid[0], select_ugid=geom_select_ugid[1], calc=[{'func': 'qed_dynamic_percentile_threshold', 'kwds': {'operation': operation, 'percentile': percentile}, 'name': 'dp'}], calc_grouping=calc_grouping, output_format='numpy') ret = ops.execute()
def test_keyword_spatial_operations_bounding_box(self): geom = [-80, 22.5, 50, 70.0] rd = self.test_data.get_rd('subset_test_slp') ops = OcgOperations(dataset=rd, geom=geom) ret = ops.execute() field = ret[1]['slp'] self.assertEqual(field.shape, (1, 365, 1, 18, 143))
def test_bad_time_dimension(self): ocgis.env.DIR_DATA = '/usr/local/climate_data' uri = 'seasonalbias.nc' variable = 'bias' for output_format in [ 'numpy', 'csv', 'csv+','shp', 'nc' ]: dataset = RequestDataset(uri=uri,variable=variable) ops = OcgOperations(dataset=dataset,output_format=output_format, format_time=False,prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertNumpyAll(ret[1]['bias'].temporal.value, np.array([-712208.5,-712117. ,-712025. ,-711933.5])) self.assertNumpyAll(ret[1]['bias'].temporal.bounds, np.array([[-712254.,-712163.],[-712163.,-712071.],[-712071.,-711979.],[-711979.,-711888.]])) if output_format == 'csv': with open(ret) as f: reader = DictReader(f) for row in reader: self.assertTrue(all([row[k] == '' for k in ['YEAR','MONTH','DAY']])) self.assertTrue(float(row['TIME']) < -50000) if output_format == 'nc': self.assertNcEqual(dataset.uri,ret,check_types=False)
def test_keyword_output_format_esmpy(self): """Test with the ESMPy output format.""" import ESMF # todo: test spatial subsetting # todo: test calculations slc = [None, None, None, [0, 10], [0, 10]] kwds = dict(as_field=[False, True], with_slice=[True, False]) for k in self.iter_product_keywords(kwds): rd = self.test_data.get_rd('cancm4_tas') if k.as_field: rd = rd.get() if k.with_slice: slc = slc else: slc = None ops = OcgOperations(dataset=rd, output_format='esmpy', slice=slc) ret = ops.execute() self.assertIsInstance(ret, ESMF.Field) try: self.assertEqual(ret.shape, (1, 3650, 1, 10, 10)) except AssertionError: self.assertFalse(k.with_slice) self.assertEqual(ret.shape, (1, 3650, 1, 64, 128))
def test_process_geometries(self): # test multiple geometries with coordinate system update works as expected a = 'POLYGON((-105.21347987288135073 40.21514830508475313,-104.39928495762711691 40.21514830508475313,-104.3192002118643984 39.5677966101694949,-102.37047139830508513 39.61451271186440692,-102.12354343220337682 37.51896186440677639,-105.16009004237288593 37.51896186440677639,-105.21347987288135073 40.21514830508475313))' b = 'POLYGON((-104.15235699152542281 39.02722457627118757,-103.71189088983049942 39.44099576271186436,-102.71750529661017026 39.28082627118644155,-102.35712394067796538 37.63908898305084705,-104.13900953389830306 37.63241525423728717,-104.15235699152542281 39.02722457627118757))' geom = [{'geom': wkt.loads(xx), 'properties': {'UGID': ugid}} for ugid, xx in enumerate([a, b])] grid_value = [ [[37.0, 37.0, 37.0, 37.0], [38.0, 38.0, 38.0, 38.0], [39.0, 39.0, 39.0, 39.0], [40.0, 40.0, 40.0, 40.0]], [[-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0], [-105.0, -104.0, -103.0, -102.0]]] grid_value = np.ma.array(grid_value, mask=False) output_crs = CoordinateReferenceSystem( value={'a': 6370997, 'lon_0': -100, 'y_0': 0, 'no_defs': True, 'proj': 'laea', 'x_0': 0, 'units': 'm', 'b': 6370997, 'lat_0': 45}) grid = SpatialGridDimension(value=grid_value) sdim = SpatialDimension(grid=grid, crs=WGS84()) field = Field(spatial=sdim) ops = OcgOperations(dataset=field, geom=geom, output_crs=output_crs) ret = ops.execute() expected = {0: -502052.79407259845, 1: -510391.37909706926} for ugid, field_dict in ret.iteritems(): for field in field_dict.itervalues(): self.assertAlmostEqual(field.spatial.grid.value.data.mean(), expected[ugid])
def test_calculation(self): calc = [{ 'func': 'mean', 'name': 'mean' }, { 'func': 'std', 'name': 'std' }] calc_grouping = ['year'] kwds = { 'aggregate': True, 'spatial_operation': 'clip', 'calc': calc, 'calc_grouping': calc_grouping, 'output_format': 'numpy', 'geom': self.california, 'dataset': self.dataset, 'snippet': False } ops = OcgOperations(**kwds) ret = ops.execute() ref = ret[25].calc['Prcp'] self.assertEquals(ref.keys(), ['mean', 'std', 'n']) for value in ref.itervalues(): self.assertEqual(value.shape, (1, 1, 1, 1)) ref = ret[25].calc['tasmax'] self.assertEquals(ref.keys(), ['mean', 'std', 'n']) for value in ref.itervalues(): self.assertEqual(value.shape, (10, 1, 1, 1))
def test_keyed(self): raise(SkipTest) ds = self.dataset # ds.append(self.albisccp.copy()) ds.append(self.tasmin.copy()) ops = OcgOperations(dataset=ds,geom=self.california,output_format='numpy') ret = ops.execute() ref = ret[25].variables self.assertEqual(ref['tasmax']._use_for_id,['gid','tid']) self.assertEqual(ref['tasmin']._use_for_id,[]) # for key in ['albisccp','Prcp']: # self.assertEqual(ret[25].variables[key]._use_for_id,['gid','tid']) ops = OcgOperations(dataset=ds,geom=self.california,output_format='keyed',snippet=True) ret = ops.execute()
def test_keyed(self): raise(SkipTest('keyed format currently deprecated')) ds = self.dataset # ds.append(self.albisccp.copy()) ds.append(self.tasmin.copy()) ops = OcgOperations(dataset=ds,geom=self.california,output_format='numpy') ret = ops.execute() ref = ret[25].variables self.assertEqual(ref['tasmax']._use_for_id,['gid','tid']) self.assertEqual(ref['tasmin']._use_for_id,[]) # for key in ['albisccp','Prcp']: # self.assertEqual(ret[25].variables[key]._use_for_id,['gid','tid']) ops = OcgOperations(dataset=ds,geom=self.california,output_format='keyed',snippet=True) ret = ops.execute()
def test_bad_time_dimension(self): """Test not formatting the time dimension.""" for output_format in [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV, constants.OUTPUT_FORMAT_CSV_SHAPEFILE, constants.OUTPUT_FORMAT_SHAPEFILE, constants.OUTPUT_FORMAT_NETCDF]: dataset = self.test_data.get_rd('snippet_seasonalbias') ops = OcgOperations(dataset=dataset, output_format=output_format, format_time=False, prefix=output_format) ret = ops.execute() if output_format == constants.OUTPUT_FORMAT_NUMPY: self.assertFalse(ret[1]['bias'].temporal.format_time) self.assertNumpyAll(ret[1]['bias'].temporal.value, np.array([-712208.5, -712117., -712025., -711933.5])) self.assertNumpyAll(ret[1]['bias'].temporal.bounds, np.array([[-712254., -712163.], [-712163., -712071.], [-712071., -711979.], [-711979., -711888.]])) if output_format == constants.OUTPUT_FORMAT_CSV: with open(ret) as f: reader = DictReader(f) for row in reader: self.assertTrue(all([row[k] == '' for k in ['YEAR', 'MONTH', 'DAY']])) self.assertTrue(float(row['TIME']) < -50000) if output_format == constants.OUTPUT_FORMAT_NETCDF: self.assertNcEqual(ret, dataset.uri, check_types=False, ignore_attributes={'global': ['history'], 'bounds_time': ['calendar', 'units'], 'bias': ['_FillValue', 'grid_mapping', 'units']}, ignore_variables=['latitude_longitude'])
def test_rotated_pole_clip_aggregate(self): rd = self.test_data.get_rd('narccap_rotated_pole',kwds=dict(time_region={'month':[12],'year':[1982]})) ops = OcgOperations(dataset=rd,geom='state_boundaries',select_ugid=[16], spatial_operation='clip',aggregate=True,output_format='numpy') ret = ops.execute() ret = ret.gvu(16,'tas') self.assertEqual(ret.shape,(1, 248, 1, 1, 1))
def test_keyword_output_format_nc_2d_flexible_mesh_ugrid(self): rd = self.test_data.get_rd('cancm4_tas') output = constants.OUTPUT_FORMAT_NETCDF_UGRID_2D_FLEXIBLE_MESH ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], output_format=output) ret = ops.execute() with self.nc_scope(ret) as ds: self.assertEqual(len(ds.dimensions['nMesh2_face']), 13)
def test_calc_grouping_seasonal_with_year(self): calc_grouping = [[1,2,3],'year'] calc = [{'func':'mean','name':'mean'}] rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd,calc=calc,calc_grouping=calc_grouping, geom='state_boundaries',select_ugid=[25]) ret = ops.execute() self.assertEqual(ret[25]['tas'].shape,(1,10,1,5,4))
def test_csv_conversion(self): ops = OcgOperations(dataset=self.get_dataset(),output_format='csv') ret = self.get_ret(ops) ## test with a geometry to check writing of user-geometry overview shapefile geom = make_poly((38,39),(-104,-103)) ops = OcgOperations(dataset=self.get_dataset(),output_format='csv',geom=geom) ret = ops.execute()
def test_geometries_not_duplicated_with_equivalent_ugid(self): ## if geometries are equivalent, they should not have duplicates in the ## output shapefile. rd = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tasmax_2011') ops = OcgOperations(dataset=[rd,rd2],geom='state_boundaries',select_ugid=[16], output_format='csv+',snippet=True) ops.execute() path_shp = os.path.join(self._test_dir,ops.prefix,'shp',ops.prefix+'_ugid.shp') path_csv = os.path.join(self._test_dir,ops.prefix,'shp',ops.prefix+'_ugid.csv') with fiona.open(path_shp) as source: self.assertEqual(len(list(source)),1) with open(path_csv) as source: reader = csv.DictReader(source) rows_csv = list(reader) self.assertEqual(len(rows_csv),1)
def test_point_shapefile_subset(self): _output_format = ['numpy', 'nc', 'csv', 'csv+'] for output_format in _output_format: rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom='qed_city_centroids', output_format=output_format, prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertEqual(len(ret), 4)
def test_csv_conversion(self): ocgis.env.OVERWRITE = True ops = OcgOperations(dataset=self.get_dataset(), output_format='csv') ret = self.get_ret(ops) ## test with a geometry to check writing of user-geometry overview shapefile geom = make_poly((38, 39), (-104, -103)) ops = OcgOperations(dataset=self.get_dataset(), output_format='csv', geom=geom) ret = ops.execute()
def get_collection(self, aggregate=False): if aggregate: spatial_operation = 'clip' else: spatial_operation = 'intersects' rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], spatial_operation=spatial_operation, aggregate=aggregate) ret = ops.execute() return (ret[25])
def test_calculate(self): ocgis.env.DIR_BIN = '/home/local/WX/ben.koziol/links/ocgis/bin/QED_2013_dynamic_percentiles' percentiles = [90, 92.5, 95, 97.5] operations = ['gt', 'gte', 'lt', 'lte'] calc_groupings = [ ['month'], # ['month','year'], # ['year'] ] uris_variables = [ [ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmax_daily.1971-2000.nc', 'tasmax' ], [ '/home/local/WX/ben.koziol/climate_data/maurer/2010-concatenated/Maurer02new_OBS_tasmin_daily.1971-2000.nc', 'tasmin' ] ] geoms_select_ugids = [ ['qed_city_centroids', None], ['state_boundaries', [39]], # ['us_counties',[2416,1335]] ] for tup in itertools.product(percentiles, operations, calc_groupings, uris_variables, geoms_select_ugids): print(tup) percentile, operation, calc_grouping, uri_variable, geom_select_ugid = tup ops = OcgOperations(dataset={ 'uri': uri_variable[0], 'variable': uri_variable[1], 'time_region': { 'year': [1990], 'month': [6, 7, 8] } }, geom=geom_select_ugid[0], select_ugid=geom_select_ugid[1], calc=[{ 'func': 'qed_dynamic_percentile_threshold', 'kwds': { 'operation': operation, 'percentile': percentile }, 'name': 'dp' }], calc_grouping=calc_grouping, output_format='numpy') ret = ops.execute()
def test_clip_aggregate(self): ## this geometry was hanging # ocgis.env.VERBOSE = True # ocgis.env.DEBUG = True rd = self.test_data.get_rd('cancm4_tas', kwds={'time_region': { 'year': [2003] }}) ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[14, 16], aggregate=False, spatial_operation='clip', output_format='csv+') ret = ops.execute()
def run_standard_operations(self, calc, capture=False, output_format=None): _aggregate = [False, True] _calc_grouping = [['month'], ['month', 'year']] _output_format = output_format or ['numpy', 'csv+', 'nc'] captured = [] for ii, tup in enumerate( itertools.product(_aggregate, _calc_grouping, _output_format)): aggregate, calc_grouping, output_format = tup if aggregate is True and output_format == 'nc': continue rd = self.test_data.get_rd( 'cancm4_tas', kwds={'time_region': { 'year': [2001, 2002] }}) try: ops = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[25], calc=calc, calc_grouping=calc_grouping, output_format=output_format, aggregate=aggregate, prefix=('standard_ops_' + str(ii))) ret = ops.execute() if output_format == 'numpy': ref = ret[25].calc['tas'][calc[0]['name']] if aggregate: space_shape = [1, 1] else: space_shape = [5, 4] if calc_grouping == ['month']: shp1 = [12] else: shp1 = [24] test_shape = shp1 + [1] + space_shape self.assertEqual(ref.shape, tuple(test_shape)) if not aggregate: self.assertTrue(np.ma.is_masked(ref[0, 0, 0, 0])) except Exception as e: if capture: parms = dict(aggregate=aggregate, calc_grouping=calc_grouping, output_format=output_format) captured.append({'exception': e, 'parms': parms}) else: raise return (captured)
def test_same_variable_name(self): ds = [self.cancm4.copy(), self.cancm4.copy()] with self.assertRaises(KeyError): OcgOperations(dataset=ds) ds[0].alias = 'foo' ds[1].alias = 'foo' with self.assertRaises(KeyError): OcgOperations(dataset=ds) ds = [self.cancm4.copy(), self.cancm4.copy()] ds[0].alias = 'foo_var' ops = OcgOperations(dataset=ds, snippet=True) ret = ops.execute() self.assertEqual(ret[1].variables.keys(), ['foo_var', 'tasmax']) values = ret[1].variables.values() self.assertTrue(np.all(values[0].value == values[1].value))
def test_bad_time_dimension(self): ocgis.env.DIR_DATA = '/usr/local/climate_data' uri = 'seasonalbias.nc' variable = 'bias' for output_format in ['csv', 'csv+', 'shp', 'numpy']: ops = OcgOperations(dataset={ 'uri': uri, 'variable': variable }, output_format=output_format, format_time=False, prefix=output_format) ret = ops.execute() if output_format == 'numpy': self.assertNumpyAll( ret[1].variables['bias'].temporal.value, np.array([-712208.5, -712117., -712025., -711933.5])) self.assertNumpyAll( ret[1].variables['bias'].temporal.bounds, np.array([[-712254., -712163.], [-712163., -712071.], [-712071., -711979.], [-711979., -711888.]]))
def test_frequency_duration_real_data(self): uri = 'Maurer02new_OBS_tasmax_daily.1971-2000.nc' variable = 'tasmax' ocgis.env.DIR_DATA = '/usr/local/climate_data' for output_format in ['csv+', 'shp', 'csv']: ops = OcgOperations( dataset={ 'uri': uri, 'variable': variable, 'time_region': { 'year': [1991], 'month': [7] } }, output_format=output_format, prefix=output_format, calc=[{ 'name': 'Frequency Duration', 'func': 'freq_duration', 'kwds': { 'threshold': 25.0, 'operation': 'gte' } }], calc_grouping=['month', 'year'], geom='us_counties', select_ugid=[2778], aggregate=True, calc_raw=False, spatial_operation='clip', headers=[ 'did', 'ugid', 'gid', 'year', 'month', 'day', 'variable', 'calc_name', 'value' ], ) ret = ops.execute()
def test_same_projection(self): daymet_uri = self.test_data.get_rd('daymet_tmax').uri rd1 = RequestDataset(uri=daymet_uri, variable='tmax', alias='tmax1') rd2 = RequestDataset(uri=daymet_uri, variable='tmax', alias='tmax2') ops = OcgOperations(dataset=[rd1, rd2], snippet=True) ops.execute()
def test_date_groups(self): calc = [{'func': 'mean', 'name': 'mean'}] rd = self.test_data.get_rd('cancm4_tasmax_2011') calc_grouping = ['month'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == np.array( [dt(2011, month, 16) for month in range(1, 13)]))) calc_grouping = ['year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == [dt(year, 7, 1) for year in range(2011, 2021)])) calc_grouping = ['month', 'year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == [ dt(year, month, 16) for year, month in itertools.product( range(2011, 2021), range(1, 13)) ])) calc_grouping = ['day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue( np.all(rdt == [dt(2011, 1, day, 12) for day in range(1, 32)])) calc_grouping = ['month', 'day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertEqual(rdt[0], dt(2011, 1, 1, 12)) self.assertEqual(rdt[12], dt(2011, 1, 13, 12)) calc_grouping = ['year', 'day'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertEqual(rdt[0], dt(2011, 1, 1, 12)) rd = self.test_data.get_rd( 'cancm4_tasmax_2011', kwds={'time_region': { 'month': [1], 'year': [2011] }}) calc_grouping = ['month', 'day', 'year'] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, geom='state_boundaries', select_ugid=[25]) ret = ops.execute() ref = ret[25].variables['tasmax'].temporal rdt = ref.group.representative_datetime self.assertTrue(np.all(rdt == ref.value_datetime)) self.assertTrue(np.all(ref.bounds_datetime == ref.group.bounds))
def run(): dataset = {'uri':'/usr/local/climate_data/CanCM4/tasmax_day_CanCM4_decadal2000_r2i1p1_20010101-20101231.nc','variable':'tasmax'} ops = OcgOperations(dataset=dataset,output_format='keyed') ret = ops.execute()