def test_system_through_operations(self): ops = OcgOperations(dataset=self.field_for_test, calc=[{ 'func': 'mff', 'name': 'my_mff' }]) ret = ops.execute() actual_field = ret.get_element() actual_variable = actual_field['my_mff'] self.assertEqual(actual_variable.attrs['long_name'], MockFieldFunction.long_name) self.assertEqual(actual_variable.get_value().tolist(), self.desired_value) self.assertNotIn('data', list(actual_field.keys())) # Test writing output to netCDF. ops = OcgOperations(dataset=self.field_for_test, calc=[{ 'func': 'mff', 'name': 'my_mff' }], output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() self.assertEqual(actual_field['my_mff'].get_value().tolist(), self.desired_value)
def test_sql_where_through_operations(self): """Test using a SQL where statement to select some geometries.""" states = ("Wisconsin", "Vermont") s = 'STATE_NAME in {0}'.format(states) rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True) ret = ops.execute() self.assertEqual(len(ret), 2) self.assertEqual(ret.keys(), [8, 10]) for v in ret.properties.itervalues(): self.assertIn(v['STATE_NAME'], states) # make sure the sql select has preference over uid ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True, geom_select_uid=[500, 600, 700]) ret = ops.execute() self.assertEqual(len(ret), 2) for v in ret.properties.itervalues(): self.assertIn(v['STATE_NAME'], states) # test possible interaction with geom_uid path = self.get_shapefile_path_with_no_ugid() ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s) ret = ops.execute() self.assertEqual(ret.keys(), [1, 2]) ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s, geom_uid='ID') ret = ops.execute() self.assertEqual(ret.keys(), [13, 15])
def test_time_region(self): uri = 'C:/testclip/WSI_OCGIS_abdu.1979.nc' shp = 'C:/testclip/state.shp' rd = RequestDataset(uri=uri) calc = [{'func': 'sum', 'name': 'sum'}] ops_one = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [1]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_one_month = ops_one.execute() ops_two = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [2]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_two_month = ops_two.execute() ops_original = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [1, 2]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_original = ops_original.execute() desired = ret_original[1]['forcalc'].variables['sum'].value # 11.580645161290322 ops_no_time_region = OcgOperations(dataset=rd, output_format='numpy', spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_no_time_region = ops_no_time_region.execute() field = ret_no_time_region[1]['forcalc'] indices = [] for idx in range(field.temporal.shape[0]): the_time = field.temporal.value_datetime[idx] if the_time.month in [1, 2]: indices.append(idx) var_sub = field.variables['sum'][:, indices, :, :, :] actual = var_sub.value self.assertNumpyAll(actual, desired)
def test_system_through_operations(self): calc = [{'func': MockMultiParamFunction.key, 'name': 'my_mvp', 'kwds': self.parms_for_test}] ops = OcgOperations(dataset=self.fields_for_ops_test, calc=calc) ret = ops.execute() actual_variable = ret.get_element(variable_name='my_mvp') self.assertEqual(actual_variable.get_value().tolist(), self.desired_value) ops = OcgOperations(dataset=self.fields_for_ops_test, calc=calc, output_format='nc') ret = ops.execute() actual = RequestDataset(ret).get()['my_mvp'] self.assertEqual(actual.get_value().tolist(), self.desired_value)
def test_disjoint_polygons(self): """Test mesh regridding with the source destination containing disjoint polygons.""" ESMF.Manager(debug=True) self.set_debug(True) path_shp = os.path.join(self.path_bin, 'three_polygons', 'three_polygons.shp') path_out_nc = self.get_temporary_file_path('ugrid.nc') path_source_nc = self.get_temporary_file_path('source.nc') mesh_name = 'mesh' self.log.debug('creating source netcdf') row = np.linspace(-1, 1, 10) col = np.linspace(-1, 1, 10) self.create_source_netcdf_data(path_source_nc, row=row, col=col) ops = OcgOperations(dataset={'uri': path_source_nc}, output_format='shp', snippet=True, prefix='source_shp', dir_output=self.path_current_tmp) ops.execute() self.log.debug('creating ugrid file: {}'.format(path_out_nc)) gm = GeometryManager('SPECIAL', path=path_shp) geoms = [r['geom'] for r in gm.iter_records()] mp = MultiPolygon(geoms) # mp = box(-0.25, -0.25, 0.25, 0.25) records = [{'geom': mp, 'properties': {'UGID': 123}}] gm = GeometryManager('UGID', records=records, allow_multipart=True) fm = get_flexible_mesh(gm, mesh_name, False, False) fm.save_as_netcdf(path_out_nc, kwargs_dataset={'format': 'NETCDF3_CLASSIC'}) self.log.debug('getting source field') srcgrid = ESMF.Grid(filename=path_source_nc, filetype=ESMF.FileFormat.GRIDSPEC, coord_names=['longitude', 'latitude'], add_corner_stagger=True) srcfield = get_field_src(srcgrid, path_source_nc, 'pr') self.log.debug('getting destination grid') dstgrid = ESMF.Mesh(filename=path_out_nc, filetype=ESMF.FileFormat.UGRID, meshname=mesh_name) self.log.debug('getting destination field') dstfield = ESMF.Field(dstgrid, "dstfield", meshloc=ESMF.MeshLoc.ELEMENT, ndbounds=[srcfield.data.shape[0]]) self.log.debug('creating regrid object') regrid = ESMF.Regrid(srcfield, dstfield, regrid_method=ESMF.RegridMethod.CONSERVE, unmapped_action=ESMF.UnmappedAction.ERROR) # "zero_region" only weighted data will be touched. self.log.debug('executing regrid') dstfield = regrid(srcfield, dstfield, zero_region=ESMF.Region.SELECT) self.assertEqual(dstfield.data.shape, (366, 1)) print dstfield.data self.log.debug('success')
def test(self): import logbook log = logbook.Logger(name='combos', level=logbook.INFO) for key, dataset in self.iter_dataset(): # if key != 'qed_2013_TNn_annual_min': continue # these datasets have only one time element if key in ('qed_2013_TNn_annual_min', 'qed_2013_TasMin_seasonal_max_of_seasonal_means', 'qed_2013_climatology_Tas_annual_max_of_annual_means', 'qed_2013_maurer02v2_median_txxmmedm_january_1971-2000', 'qed_2013_maurer02v2_median_txxmmedm_february_1971-2000', 'qed_2013_maurer02v2_median_txxmmedm_march_1971-2000', 'snippet_maurer_dtr', 'snippet_seasonalbias'): slc = None else: slc = [None, [10, 20], None, None, None] # this has different data types on the bounds for the coordinate variables. they currently get casted by the # software. if key == 'maurer_bcca_1991': check_types = False else: check_types = True log.debug('processing: {0} ({1})'.format(key, dataset.__class__.__name__)) ops = OcgOperations(dataset=dataset, output_format='nc', prefix='nc1', slice=slc) try: log.debug('initial write...') ret1 = ops.execute() except ValueError: # realization dimensions may not be written to netCDF yet if key == 'cmip3_extraction': continue else: raise else: try: ops2 = OcgOperations(dataset={'uri': ret1}, output_format='nc', prefix='nc2') log.debug('second write...') ret2 = ops2.execute() log.debug('comparing...') self.assertNcEqual(ret1, ret2, ignore_attributes={'global': ['history']}, check_types=check_types) finally: for path in [ret1, ret2]: folder = os.path.split(path)[0] shutil.rmtree(folder) log.debug('success')
def test_execute_directory(self): """Test that the output directory is removed appropriately following an operations failure.""" kwds = dict(add_auxiliary_files=[True, False]) rd = self.test_data.get_rd('cancm4_tas') # this geometry is outside the domain and will result in an exception geom = [1000, 1000, 1100, 1100] for k in itr_products_keywords(kwds, as_namedtuple=True): ops = OcgOperations(dataset=rd, output_format='csv', add_auxiliary_files=k.add_auxiliary_files, geom=geom) try: ops.execute() except ExtentError: contents = os.listdir(self.current_dir_output) self.assertEqual(len(contents), 0)
def test1d(self): p1 = self.write_field_data('v1', ncol=1, nrow=1) p3 = self.write_field_data('v1', dir='b') ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)] reference = ocgis.RequestDataset(p1, time_range=ref_range).get() cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)] candidate = ocgis.RequestDataset(p3, time_range=cand_range) calc = [{ 'func': 'dissimilarity', 'name': 'output_1d', 'kwds': { 'target': reference, 'candidate': ('v1', ) } }] ops = OcgOperations(dataset=candidate, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables[0], ('dissimilarity')) dist = actual_field['dissimilarity'] self.assertEqual(dist.shape, (1, 1, 2, 2))
def test(self): path1 = self.write_field_data('data1') path2 = self.write_field_data('data2') path3 = self.write_field_data('basis_var') time_range = [datetime(2000, 3, 1), datetime(2000, 3, 31)] rds = [RequestDataset(p, time_range=time_range) for p in [path1, path2]] mrd = MultiRequestDataset(rds) basis = RequestDataset(path3, time_range=[datetime(2000, 8, 1), datetime(2000, 8, 31)]) basis_field = basis.get() calc = [{'func': 'mfpf', 'name': 'output_mfpf', 'kwds': {'reference': ('data1', 'data2'), 'basis': basis_field}}] ops = OcgOperations(dataset=mrd, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables, ('diff_data1_basis_var', 'diff_data2_basis_var')) sums = [v.get_value().sum() for v in actual_field.data_variables] for s in sums: self.assertAlmostEqual(s, 7.8071042497325145)
def run_op(resource, calc, options): """Create an OCGIS operation, launch it and return the results.""" from os.path import abspath, curdir from ocgis import OcgOperations, RequestDataset, env import uuid LOGGER.info('Start ocgis module call function') # Prepare the environment env.OVERWRITE = True dir_output = abspath(curdir) prefix = str(uuid.uuid1()) env.PREFIX = prefix rd = [ RequestDataset(val, variable=key if key != 'resource' else None) for key, val in resource.items() ] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=options['calc_grouping'], dir_output=dir_output, prefix=prefix, add_auxiliary_files=False, output_format='nc') return ops.execute()
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii,values in enumerate(itertools.product(*its)): if self.target_combo is not None: if self.target_combo > ii: continue kwds = {} for val in values: kwds.update(val) if not self.ops_only: kwds.update({'dir_output':tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: yld = (ii,ops) else: ret = ops.execute() yld = (ii,ops,ret) yield(yld) except Exception as e: tb = traceback.format_exc() try: self.check_exception(ii,kwds,e,tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output'])
def test_subset_with_shapefile_no_ugid(self): """Test a subset operation using a shapefile without a UGID attribute.""" output_format = [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV_SHAPEFILE] geom = self.get_shapefile_path_with_no_ugid() geom_select_uid = [8, 11] geom_uid = 'ID' rd = self.test_data.get_rd('cancm4_tas') for of in output_format: ops = OcgOperations(dataset=rd, geom=geom, geom_select_uid=geom_select_uid, geom_uid=geom_uid, snippet=True, output_format=of) self.assertEqual(len(ops.geom), 2) ret = ops.execute() if of == constants.OUTPUT_FORMAT_NUMPY: for element in geom_select_uid: self.assertIn(element, ret) self.assertEqual(ret.properties[8].dtype.names, ('STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR')) else: with open(ret) as f: reader = DictReader(f) row = reader.next() self.assertIn(geom_uid, row.keys()) self.assertNotIn(env.DEFAULT_GEOM_UID, row.keys()) shp_path = os.path.split(ret)[0] shp_path = os.path.join(shp_path, 'shp', '{0}_gid.shp'.format(ops.prefix)) with fiona.open(shp_path) as source: record = source.next() self.assertIn(geom_uid, record['properties']) self.assertNotIn(env.DEFAULT_GEOM_UID, record['properties'])
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii, values in enumerate(itertools.product(*its)): if self.target_combo is not None: if self.target_combo > ii: continue kwds = {} for val in values: kwds.update(val) if not self.ops_only: kwds.update({'dir_output': tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: yld = (ii, ops) else: ret = ops.execute() yld = (ii, ops, ret) yield (yld) except Exception as e: tb = traceback.format_exc() try: self.check_exception(ii, kwds, e, tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output'])
def test_esmf(self): rd1 = RequestDataset(**self.get_dataset()) rd2 = deepcopy(rd1) ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc') ret = ops.execute() ignore_attributes = {'time_bnds': ['units', 'calendar'], 'global': ['history'], 'foo': ['grid_mapping']} ignore_variables = ['latitude_longitude'] self.assertNcEqual(ret, rd1.uri, ignore_attributes=ignore_attributes, ignore_variables=ignore_variables)
def test_system_through_operations(self): mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd) ret = ops.execute() field = ret.get_element() actual = get_variable_names(field.data_variables) self.assertEqual(actual, self.f_variable_names) mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = get_variable_names(actual_field.data_variables) self.assertEqual(actual, self.f_variable_names) actual_diff = actual_field.data_variables[1].get_value() - actual_field.data_variables[0].get_value() self.assertAlmostEqual(actual_diff.mean(), 1.0)
def test_system_through_operations(self): mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd) ret = ops.execute() field = ret.get_element() actual = get_variable_names(field.data_variables) self.assertEqual(actual, self.f_variable_names) mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = get_variable_names(actual_field.data_variables) self.assertEqual(actual, self.f_variable_names) actual_diff = actual_field.data_variables[1].get_value( ) - actual_field.data_variables[0].get_value() self.assertAlmostEqual(actual_diff.mean(), 1.0)
def test_esmf(self): rd1 = RequestDataset(**self.get_dataset()) rd2 = deepcopy(rd1) ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc') ret = ops.execute() actual_value = RequestDataset(ret).get().data_variables[0].get_value() desired_value = rd1.get().data_variables[0].get_value() self.assertNumpyAllClose(actual_value, desired_value)
def test_shapefile_through_operations(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual(list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys())) self.assertEqual((51,), field2.data_variables[0].shape)
def test_write(self): # test melted format for melted in [False, True]: kwargs_ops = dict(melted=melted) kwargs_conv = dict(outdir=tempfile.mkdtemp(dir=self.current_dir_output)) conv = self.get(kwargs_ops=kwargs_ops, kwargs_conv=kwargs_conv) csv_path = conv.write() self.assertTrue(os.path.exists(csv_path)) self.assertEqual(conv._ugid_gid_store, {1: {18: [5988, 5989, 5990, 6116, 6117, 6118], 15: [5992, 6119, 6120]}}) shp_path = os.path.split(csv_path)[0] shp_path = os.path.join(shp_path, 'shp') shp_path_gid = os.path.join(shp_path, 'foo_gid.shp') target = RequestDataset(shp_path_gid).get() self.assertEqual(target.shape[-1], 9) shp_path_ugid = os.path.join(shp_path, 'foo_ugid.shp') target = RequestDataset(shp_path_ugid).get() self.assertEqual(target.shape[-1], 2) # test aggregating the selection geometry rd1 = self.test_data.get_rd('cancm4_tasmax_2011') rd2 = self.test_data.get_rd('maurer_bccr_1950') keywords = dict(agg_selection=[True, False]) for k in self.iter_product_keywords(keywords): ops = OcgOperations(dataset=[rd1, rd2], snippet=True, output_format='csv-shp', geom='state_boundaries', agg_selection=k.agg_selection, select_ugid=[32, 47], prefix=str(k.agg_selection)) ret = ops.execute() directory = os.path.split(ret)[0] path_ugid = os.path.join(directory, 'shp', '{0}_ugid.shp'.format(ops.prefix)) with fiona.open(path_ugid) as source: records = list(source) if k.agg_selection: uids = [1] else: uids = [32, 47] self.assertEqual([r['properties'][env.DEFAULT_GEOM_UID] for r in records], uids) path_gid = os.path.join(directory, 'shp', '{0}_gid.shp'.format(ops.prefix)) with fiona.open(path_gid) as source: uid = [r['properties'][env.DEFAULT_GEOM_UID] for r in source] if k.agg_selection: self.assertAsSetEqual(uid, [1]) else: uid = np.array(uid) self.assertEqual(np.sum(uid == 32), 1915) self.assertEqual(np.sum(uid == 47), 923) meta = os.path.join(os.path.split(ret)[0], '{0}_source_metadata.txt'.format(ops.prefix)) with open(meta, 'r') as f: lines = f.readlines() self.assertTrue(len(lines) > 50)
def test_system_many_request_datasets(self): """Test numerous request datasets.""" rd_base = self.test_data.get_rd('cancm4_tas') geom = [-74.0, 40.0, -72.0, 42.0] rds = [deepcopy(rd_base) for ii in range(500)] for rd in rds: ops = OcgOperations(dataset=rd, geom=geom, snippet=True) ret = ops.execute() actual = ret.get_element(variable_name='tas').shape self.assertEqual(actual, (1, 2, 1))
def test_shapefile_through_operations(self): path = ShpCabinet().get_shp_path('state_boundaries') rd = RequestDataset(path) field = rd.get() self.assertIsNone(field.spatial.properties) ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual(field.variables.keys(), field2.variables.keys()) self.assertEqual(field.shape, field2.shape)
def test_shapefile_through_operations_subset(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() self.assertIsNone(field.spatial.properties) ops = OcgOperations(dataset=rd, output_format='shp', geom=path, select_ugid=[15]) ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual(field.variables.keys(), field2.variables.keys()) self.assertEqual(tuple([1] * 5), field2.shape)
def test_shapefile_through_operations(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual( list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys())) self.assertEqual((51, ), field2.data_variables[0].shape)
def test_sql_where_through_operations(self): """Test using a SQL where statement to select some geometries.""" states = ("Wisconsin", "Vermont") s = 'STATE_NAME in {0}'.format(states) rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True) ret = ops.execute() self.assertEqual(len(ret.children), 2) self.assertEqual(list(ret.children.keys()), [8, 10]) for v in ret.properties.values(): self.assertIn(v['STATE_NAME'], states) # Make sure the sql select has preference over UID. ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True, geom_select_uid=[500, 600, 700]) ret = ops.execute() self.assertEqual(len(ret.children), 2) for v in ret.properties.values(): self.assertIn(v['STATE_NAME'], states) # test possible interaction with geom_uid path = self.get_shapefile_path_with_no_ugid() ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s) ret = ops.execute() self.assertEqual(list(ret.children.keys()), [7, 9]) ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s, geom_uid='ID') ret = ops.execute() self.assertEqual(list(ret.children.keys()), [13, 15])
def doCalc(species): print 'Working on %s' %(species) # Directory holding climate data. DATA_DIR = 'G:/WSI data verification/dataverification' # Data returns will overwrite in this case. Use with caution!! env.OVERWRITE = True env.DIR_SHPCABINET = DATA_DIR env.DIR_OUTPUT = DATA_DIR # Always start with a snippet (if there are no calculations!). SNIPPET = False #yearstr = str(year) # Filename to variable name mapping. uri = 'G:/WSI data verification/dataverification/'+ species + '/WSI_OCGIS_'+species+'.1979_2013.nc' shp = 'G:/WSI data verification/dataverification/duckzone.shp' # RequestDatasetCollection ####################################################### rdc = RequestDataset(uri, 'forcalc') # Return daily sum calc = [{'func': 'sum', 'name': 'sum'}] ### Write to Shapefile ########################################################### prefix = 'WSI_DZ_' + species #print('returning shapefile for ' + species) ops = OcgOperations(dataset=rdc, output_format='shp', time_region={'month': [1,2,3,4,9,10,11,12]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping=['day', 'month', 'year'], prefix=prefix) ops.execute()
def test_system_through_operations(self): """Test calculation through operations.""" row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y') col = Variable(name='x', value=[10, 11, 12], dimensions='x') grid = Grid(col, row) time = TemporalVariable(name='time', value=[1, 2], dimensions='time') data = Variable(name='data', dimensions=[time.dimensions[0]] + list(grid.dimensions)) data.get_value()[0, :] = 1 data.get_value()[1, :] = 2 field = Field(grid=grid, time=time, is_data=data) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret.get_element(variable_name='sum').get_masked_value().flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
def test_full(self): """Compute the dissimilarity will all metrics.""" from flyingpigeon import dissimilarity from matplotlib import pyplot as plt p1 = self.write_field_data('v1', ncol=1, nrow=1) p2 = self.write_field_data('v2', ncol=1, nrow=1) p3 = self.write_field_data('v1', ncol=11, nrow=10, dir='c') p4 = self.write_field_data('v2', ncol=11, nrow=10, dir='c') ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)] ref = [ocgis.RequestDataset(p, time_range=ref_range) for p in [p1, p2]] reference = ocgis.MultiRequestDataset(ref) reference = reference.get() cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)] can = [ ocgis.RequestDataset(p, time_range=cand_range) for p in [p3, p4] ] candidate = ocgis.MultiRequestDataset(can) fig, axes = plt.subplots(2, 3) for i, dist in enumerate(dissimilarity.__all__): calc = [{ 'func': 'dissimilarity', 'name': 'output_mfpf', 'kwds': { 'target': reference, 'candidate': ('v1', 'v2'), 'dist': dist } }] ops = OcgOperations(dataset=candidate, calc=calc) ret = ops.execute() out_field = ret.get_element() var_name = get_variable_names(out_field.data_variables)[0] out = out_field[var_name].get_value()[0, 0] axes.flat[i].imshow(out) axes.flat[i].set_title(dist) path = os.path.join(test_output_path, 'test_spatial_analog_metrics.png') plt.savefig(path) plt.close()
def test_subset_with_shapefile_no_ugid(self): """Test a subset operation using a shapefile without a UGID attribute.""" output_format = [ constants.OutputFormatName.OCGIS, constants.OutputFormatName.CSV_SHAPEFILE ] geom = self.get_shapefile_path_with_no_ugid() geom_select_uid = [8, 11] geom_uid = 'ID' rd = self.test_data.get_rd('cancm4_tas') for of in output_format: ops = OcgOperations(dataset=rd, geom=geom, geom_select_uid=geom_select_uid, geom_uid=geom_uid, snippet=True, output_format=of) self.assertEqual(len(ops.geom), 2) ret = ops.execute() if of == constants.OutputFormatName.OCGIS: for element in geom_select_uid: self.assertIn(element, ret.children) self.assertAsSetEqual( list(ret.properties[8].keys()), ['STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR']) else: with open(ret) as f: reader = DictReader(f) row = next(reader) self.assertIn(geom_uid, list(row.keys())) self.assertNotIn(env.DEFAULT_GEOM_UID, list(row.keys())) shp_path = os.path.split(ret)[0] shp_path = os.path.join(shp_path, 'shp', '{0}_gid.shp'.format(ops.prefix)) with fiona.open(shp_path) as source: record = next(source) self.assertIn(geom_uid, record['properties']) self.assertNotIn(env.DEFAULT_GEOM_UID, record['properties'])
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii,values in enumerate(itertools.product(*its)): if self.n_only: yield(ii) continue if self.target_combo is not None: if self.target_combo > ii: continue yield(ii) kwds = {} for val in values: ## check for environmental parameters if val.keys()[0].isupper(): setattr(env,val.keys()[0],val.values()[0]) else: kwds.update(val) if not self.ops_only: kwds.update({'dir_output':tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: pass else: ret = ops.execute() except Exception as e: raise tb = traceback.format_exc() try: self.check_exception(ii,kwds,e,tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output']) env.reset()
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii, values in enumerate(itertools.product(*its)): if self.n_only: yield (ii) continue if self.target_combo is not None: if self.target_combo > ii: continue yield (ii) kwds = {} for val in values: ## check for environmental parameters if val.keys()[0].isupper(): setattr(env, val.keys()[0], val.values()[0]) else: kwds.update(val) if not self.ops_only: kwds.update({'dir_output': tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: pass else: ret = ops.execute() except Exception as e: raise tb = traceback.format_exc() try: self.check_exception(ii, kwds, e, tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output']) env.reset()
def test_build(self): path = self.get_shapefile_path_with_no_ugid() keywords = dict(geom_uid=['ID', None]) rd = self.test_data.get_rd('cancm4_tas') for k in self.iter_product_keywords(keywords): if k.geom_uid is None: geom_select_uid = None else: geom_select_uid = [8] ops = OcgOperations(dataset=rd, geom=path, geom_uid=k.geom_uid, geom_select_uid=geom_select_uid, snippet=True) coll = ops.execute() conv = CsvShapefileConverter([coll], outdir=self.current_dir_output, prefix='foo', overwrite=True, ops=ops) ret = conv._build_(coll) if k.geom_uid is None: actual = env.DEFAULT_GEOM_UID else: actual = k.geom_uid actual = [constants.HEADERS.ID_DATASET.upper(), actual, constants.HEADERS.ID_GEOMETRY.upper()] self.assertEqual(actual, ret['fiona_object'].meta['schema']['properties'].keys())
def _handler(self, request, response): try: ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd()) ocgis.env.OVERWRITE = True nc_files = archiveextract(resource=rename_complexinputs( request.inputs['resource'])) rd = RequestDataset(nc_files) rd.dimension_map.set_bounds('time', None) if nc_files[0][-3:] == '.nc': out_prefix = nc_files[0][:-3] + '_merged' else: out_prefix = nc_files[0] + '_merged' ops = OcgOperations(dataset=rd, output_format='nc', prefix=out_prefix) ret = ops.execute() response.outputs['output'].file = ret response.outputs['output'].output_format = \ Format('application/x-netcdf') return response except: raise Exception(traceback.format_exc())
def merge(resource, dir_output=None, historical_concatination=False): """ returns list of paths of sorted and merged netCDF files. sort according to filename (in DSR convention) and merge appropriate netCDF files :param resource: list of netCDF file pathes equal domain and DSR name convention is required for merging :param historical_concatination : concatination of historical files to rcp szenarios (default = False) ;param dir_output : path to directory for output """ from os.path import curdir, basename, join from os import rename #from tempfile import mkdtemp import utils from ocgis import RequestDataset , OcgOperations #if type(resource) == list: #resource = {'merge':resource} res_dic = utils.sort_by_filename(resource, historical_concatination = historical_concatination) merged_files = [] if dir_output == None: dir_output = curdir for key in res_dic: if len(res_dic[key]) > 1: ncs = res_dic[key] var = key.split('_')[0] rd = RequestDataset(uri=ncs, variable=var ) ops = OcgOperations( dataset=rd, prefix = key, output_format='nc', dir_output = dir_output, add_auxiliary_files=False) m_file = ops.execute() var = key.split('_')[0] merged_files.append(utils.drs_filename(m_file, variable=var)) else: bn = basename(res_dic[key][0]) newname = str(join(dir_output, bn)) rename(bn,newname) merged_files.append(newname) return merged_files
def test_ocgis_average(): v1 = TESTDATA['cmip3_tasmin_sresa2_da_nc'][6:] v2 = TESTDATA['cmip3_tasmax_sresa2_da_nc'][6:] rd1 = RequestDataset(v1) rd2 = RequestDataset(v2) ops = OcgOperations([rd1, rd2], calc=[{ 'func': 'average', 'name': 'tas', 'kwds': { 'v1': 'tasmin', 'v2': 'tasmax' } }]) ret = ops.execute() t = ret.get_element()['tas'][0, :, :].get_value() t1 = rd1.get_field()['tasmin'][0, :, :].get_value() t2 = rd2.get_field()['tasmax'][0, :, :].get_value() aaae(np.mean([t1, t2], axis=0), t)
def test_calculate_operations(self): """Test calculation through operations.""" row = VectorDimension(value=[1, 2, 3, 4]) col = VectorDimension(value=[10, 11, 12]) grid = SpatialGridDimension(row=row, col=col) spatial = SpatialDimension(grid=grid) time = TemporalDimension(value=[1, 2]) field = Field(spatial=spatial, temporal=time) data = np.zeros((1, 2, 1, 4, 3), dtype=float) data[:, 0, :] = 1 data[:, 1, :] = 2 var = Variable(value=data, name='data') field.variables.add_variable(var) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret[1]['data'].variables['sum'].value.flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
def test_system_through_operations(self): """Test calculation through operations.""" row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y') col = Variable(name='x', value=[10, 11, 12], dimensions='x') grid = Grid(col, row) time = TemporalVariable(name='time', value=[1, 2], dimensions='time') data = Variable(name='data', dimensions=[time.dimensions[0]] + list(grid.dimensions)) data.get_value()[0, :] = 1 data.get_value()[1, :] = 2 field = Field(grid=grid, time=time, is_data=data) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret.get_element( variable_name='sum').get_masked_value().flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
# Compute a custom percentile basis using ICCLIM. # Path to CF climate dataset. This examples uses the same file for indice and percentile basis calculation. in_file = '/path/to/cf_data.nc' # Subset the input dataset to return the desired base period for the percentile basis. variable = 'tas' years = range(2001, 2003) # A custom date range may be required for your data time_region = {'year': years} rd = RequestDataset(uri=in_file, variable=variable) field = rd.create_field() field = field.time.get_time_region(time_region).parent # Calculate the percentile basis. The data values must be a three-dimensional array. arr = field[variable].get_masked_value().squeeze() # This is the field data to use for the calculation dt_arr = field.temporal.value_datetime # This is an array of datetime objects. percentile = 90 window_width = 5 t_calendar, t_units = field.time.calendar, field.time.units # ICCLIM requires calendar and units for the calculation percentile_dict = IcclimTG90p.get_percentile_dict(arr, dt_arr, percentile, window_width, t_calendar, t_units) ######################################################################################################################## # Calculate indice using custom percentile basis. # Depending on the size of the data, this computation may take some time... calc = [{'func': 'icclim_TG90p', 'name': 'TG90p', 'kwds': {'percentile_dict': percentile_dict}}] calc_grouping = 'month' # Returns data as an in-memory spatial collection ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping) coll = ops.execute()
def test_combinatorial_projection_with_geometries(self): # self.get_ret(kwds={'output_format':'shp','prefix':'as_polygon'}) # self.get_ret(kwds={'output_format':'shp','prefix':'as_point','abstraction':'point'}) features = [ {'NAME': 'a', 'wkt': 'POLYGON((-105.020430 40.073118,-105.810753 39.327957,-105.660215 38.831183,-104.907527 38.763441,-104.004301 38.816129,-103.643011 39.802151,-103.643011 39.802151,-103.643011 39.802151,-103.643011 39.802151,-103.959140 40.118280,-103.959140 40.118280,-103.959140 40.118280,-103.959140 40.118280,-104.327957 40.201075,-104.327957 40.201075,-105.020430 40.073118))'}, {'NAME': 'b', 'wkt': 'POLYGON((-102.212903 39.004301,-102.905376 38.906452,-103.311828 37.694624,-103.326882 37.295699,-103.898925 37.220430,-103.846237 36.746237,-102.619355 37.107527,-102.634409 37.724731,-101.874194 37.882796,-102.212903 39.004301))'}, {'NAME': 'c', 'wkt': 'POLYGON((-105.336559 37.175269,-104.945161 37.303226,-104.726882 37.175269,-104.696774 36.844086,-105.043011 36.693548,-105.283871 36.640860,-105.336559 37.175269))'}, {'NAME': 'd', 'wkt': 'POLYGON((-102.318280 39.741935,-103.650538 39.779570,-103.620430 39.448387,-103.349462 39.433333,-103.078495 39.606452,-102.325806 39.613978,-102.325806 39.613978,-102.333333 39.741935,-102.318280 39.741935))'}, ] for filename in ['polygon', 'point']: if filename == 'point': geometry = 'Point' to_write = deepcopy(features) for feature in to_write: geom = wkt.loads(feature['wkt']) feature['wkt'] = geom.centroid.wkt else: to_write = features geometry = 'Polygon' path = os.path.join(self.current_dir_output, 'ab_{0}.shp'.format(filename)) with FionaMaker(path, geometry=geometry) as fm: fm.write(to_write) no_bounds_nc = SimpleNcNoBounds() no_bounds_nc.write() no_bounds_uri = os.path.join(env.DIR_OUTPUT, no_bounds_nc.filename) no_level_nc = SimpleNcNoLevel() no_level_nc.write() no_level_uri = os.path.join(env.DIR_OUTPUT, no_level_nc.filename) ocgis.env.DIR_SHPCABINET = self.current_dir_output # ocgis.env.DEBUG = True # ocgis.env.VERBOSE = True aggregate = [ False, True ] spatial_operation = [ 'intersects', 'clip' ] epsg = [ 2163, 4326, None ] output_format = [ constants.OUTPUT_FORMAT_NETCDF, constants.OUTPUT_FORMAT_SHAPEFILE, constants.OUTPUT_FORMAT_CSV_SHAPEFILE ] abstraction = [ 'polygon', 'point', None ] dataset = [ self.get_dataset(), {'uri': no_bounds_uri, 'variable': 'foo'}, {'uri': no_level_uri, 'variable': 'foo'} ] geom = [ 'ab_polygon', 'ab_point' ] calc = [ None, [{'func': 'mean', 'name': 'my_mean'}] ] calc_grouping = ['month'] args = (aggregate, spatial_operation, epsg, output_format, abstraction, geom, calc, dataset) for ii, tup in enumerate(itertools.product(*args)): a, s, e, o, ab, g, c, d = tup if os.path.split(d['uri'])[1] == 'test_simple_spatial_no_bounds_01.nc': unbounded = True else: unbounded = False if o == constants.OUTPUT_FORMAT_NETCDF and e == 4326: output_crs = CFWGS84() else: output_crs = CoordinateReferenceSystem(epsg=e) if e is not None else None kwds = dict(aggregate=a, spatial_operation=s, output_format=o, output_crs=output_crs, geom=g, abstraction=ab, dataset=d, prefix=str(ii), calc=c, calc_grouping=calc_grouping) try: ops = OcgOperations(**kwds) ret = ops.execute() except DefinitionValidationError: if o == constants.OUTPUT_FORMAT_NETCDF: if e not in [4326, None]: continue if s == 'clip': continue else: raise except ExtentError: if unbounded or ab == 'point': continue else: raise except ValueError: if unbounded and ab == 'polygon': continue if o == constants.OUTPUT_FORMAT_SHAPEFILE: ugid_path = os.path.join(self.current_dir_output, ops.prefix, ops.prefix + '_ugid.shp') else: ugid_path = os.path.join(self.current_dir_output, ops.prefix, constants.OUTPUT_FORMAT_SHAPEFILE, ops.prefix + '_ugid.shp') if o != constants.OUTPUT_FORMAT_NETCDF: with fiona.open(ugid_path, 'r') as f: if e: second = output_crs else: second = CoordinateReferenceSystem(epsg=4326) self.assertEqual(CoordinateReferenceSystem(value=f.meta['crs']), second) if o == constants.OUTPUT_FORMAT_SHAPEFILE: with fiona.open(ret, 'r') as f: if a and ab == 'point': second = 'MultiPoint' elif ab is None: field = RequestDataset(uri=d['uri'], variable='foo').get() second = field.spatial.geom.get_highest_order_abstraction().geom_type else: second = ab.title() if second in ['Polygon', 'MultiPolygon']: second = ['Polygon', 'MultiPolygon'] elif second in ['Point', 'MultiPoint']: second = ['Point', 'MultiPoint'] self.assertTrue(f.meta['schema']['geometry'] in second)
def test_calc_sample_size(self): rd1 = self.get_dataset() rd1['alias'] = 'var1' rd2 = self.get_dataset() rd2['alias'] = 'var2' dataset = [ # RequestDatasetCollection([rd1]), RequestDatasetCollection([rd1,rd2]) ] calc_sample_size = [ True, # False ] calc = [ [{'func':'mean','name':'mean'},{'func':'max','name':'max'}], # [{'func':'ln','name':'ln'}], # None, # [{'func':'divide','name':'divide','kwds':{'arr1':'var1','arr2':'var2'}}] ] calc_grouping = [ # None, ['month'], # ['month','year'] ] output_format = ['numpy'] for ii,tup in enumerate(itertools.product(dataset,calc_sample_size,calc,calc_grouping,output_format)): kwds = dict(zip(['dataset','calc_sample_size','calc','calc_grouping','output_format'],tup)) kwds['prefix'] = str(ii) try: ops = OcgOperations(**kwds) except DefinitionValidationError: if kwds['calc'] is not None: ## set functions require a temporal grouping otherwise the calculation ## is meaningless if kwds['calc'][0]['func'] == 'mean' and kwds['calc_grouping'] is None: continue ## multivariate functions may not implemented with sample size = True elif kwds['calc_sample_size'] and kwds['calc'][0]['func'] == 'divide': continue ## multivariate functions require the correct number of variables elif kwds['calc'][0]['func'] == 'divide' and len(kwds['dataset']) == 1: continue ## only one request dataset may be written to netCDF at this time elif kwds['output_format'] == 'nc' and len(kwds['dataset']) == 2: continue else: raise ## only one request dataset may be written to netCDF at this time elif kwds['output_format'] == 'nc' and len(ops.dataset) == 2: continue else: raise ret = ops.execute() if kwds['output_format'] == 'nc': if kwds['calc_sample_size'] and kwds['calc_grouping']: if kwds['calc'] is not None and kwds['calc'][0]['func'] == 'mean': with self.nc_scope(ret) as ds: self.assertEqual(sum([v.startswith('n_') for v in ds.variables.keys()]),2) self.assertEqual(ds.variables['n_max'][:].mean(),30.5) if kwds['output_format'] == 'csv': if kwds['calc'] is not None and kwds['calc'][0]['func'] == 'mean': with open(ret,'r') as f: reader = DictReader(f) alias_set = set([row['CALC_ALIAS'] for row in reader]) if len(kwds['dataset']) == 1: if kwds['calc_sample_size']: self.assertEqual(alias_set,set(['max','n_max','n_mean','mean'])) else: self.assertEqual(alias_set,set(['max','mean'])) else: if kwds['calc_sample_size']: self.assertEqual(alias_set,set(['max_var1','n_max_var1','n_mean_var1','mean_var1', 'max_var2','n_max_var2','n_mean_var2','mean_var2'])) else: self.assertEqual(alias_set,set(['max_var1','mean_var1', 'max_var2','mean_var2']))
def call(resource=[], variable=None, dimension_map=None, calc=None, calc_grouping= None, conform_units_to=None, memory_limit=None, prefix=None, regrid_destination=None, regrid_options='bil', level_range=None, geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, spatial_wrapping=None, time_region=None, time_range=None, dir_output=curdir, output_format='nc'): ''' ocgis operation call :param resource: :param variable: variable in the input file to be picked :param dimension_map: dimension map in case of unconventional starage of data :param calc: ocgis calc syntax for calcultion opartion :param calc_grouping: time aggregate grouping :param conform_units_to: :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse :param level_range: subset of given levels :param prefix: string for the file base name :param regrid_destination: file path with netCDF file with grid for outout file :param geom: name of shapefile stored in birdhouse shape cabinet :param output_format_options: output options for netCDF e.g compression level() :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file) :param regrid_options: methods for regridding: 'bil' = Biliniar interpolation 'bic' = Bicubic interpolation 'dis' = Distance weighted average remapping 'nn' = nearest neighbour 'con' = First order concerative remapping 'laf' = largest area fraction reamapping :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned :param select_nearest: neares neighbour selection for point geometries :param select_ugid: ugid for appropriate poligons :param spatial_wrapping: how to handle coordinates in case of subsets, options: None(default), 'wrap', 'unwrap' :param time_region: select single month :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output (default= curdir): :param output_format: :return: output file path ''' logger.info('Start ocgis module call function') from ocgis import OcgOperations, RequestDataset , env from ocgis.util.large_array import compute import uuid # prepare the environment env.DIR_SHPCABINET = DIR_SHP env.OVERWRITE = True env.DIR_OUTPUT = dir_output if geom != None: spatial_reorder = True spatial_wrapping = 'wrap' else: spatial_reorder = False spatial_wrapping = None if prefix == None: prefix = str(uuid.uuid1()) env.PREFIX = prefix if output_format_options == False: output_format_options = None elif output_format_options == True: output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC 'variable_kwargs': {'zlib': True, 'complevel': 9}} else: logger.info('output_format_options are set to %s ' % ( output_format_options )) if type(resource) != list: resource = list([resource]) # execute ocgis logger.info('Execute ocgis module call function') if has_Lambert_Conformal(resource) == True and not geom == None: logger.debug('input has Lambert_Conformal projection and can not subsetted with geom') output = None else: try: rd = RequestDataset(resource, variable=variable, level_range=level_range, dimension_map=dimension_map, conform_units_to=conform_units_to, time_region=time_region, time_range=time_range) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, spatial_wrapping=spatial_wrapping, spatial_reorder=spatial_reorder, # options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, prefix=prefix, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) logger.info('OcgOperations set') except Exception as e: logger.debug('failed to setup OcgOperations') raise # check memory load from numpy import sqrt from flyingpigeon.utils import FreeMemory if memory_limit == None: f = FreeMemory() mem_kb = f.user_free mem_mb = mem_kb / 1024. mem_limit = mem_mb / 2. # set limit to half of the free memory else: mem_limit = memory_limit if mem_limit >= 1024. * 4: mem_limit = 1024. * 4 # 475.0 MB for openDAP data_kb = ops.get_base_request_size()['total'] data_mb = data_kb / 1024. if variable == None: variable = rd.variable logger.info('%s as variable dedected' % (variable)) #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) logger.info('data_mb = %s ; memory_limit = %s ' % (data_mb , mem_limit )) if data_mb <= mem_limit : # input is smaler than the half of free memory size try: logger.info('ocgis module call as ops.execute()') geom_file = ops.execute() except Exception as e: logger.debug('failed to execute ocgis operation') raise else: ########################## # calcultion of chunk size ########################## size = ops.get_base_request_size() nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0] element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) element_in_mb = element_in_kb / 1024. tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size try: logger.info('ocgis module call compute with chunks') print 'ocgis module call compute with chunks' if calc == None: calc = '%s=%s*1' % (variable, variable) logger.info('calc set to = %s ' % calc) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, spatial_wrapping=spatial_wrapping, # options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, prefix=prefix, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True) except Exception as e: logger.debug('failed to compute ocgis with chunks') raise logger.info('Succeeded with ocgis module call function') ############################################ # remapping according to regrid informations ############################################ if not regrid_destination == None: try: from tempfile import mkstemp from cdo import Cdo cdo = Cdo() output = '%s.nc' % uuid.uuid1() remap = 'remap%s' % regrid_options call = [op for op in dir(cdo) if remap in op] cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) exec cmd except Exception as e: logger.debug('failed to remap') raise else: output = geom_file return output
def robustness_stats(resources, time_range=[None, None], dir_output=None, variable=None): """ calculating the spatial mean and corresponding standard deviation for an ensemble of consistent datasets containing one variableself. If a time range is given the statistical values are calculated only in the disired timeperiod. :param resources: str or list of str containing the netCDF files paths :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output: path to folder to store ouput files (default= curdir) :param variable: variable name containing in netCDF file. If not set, variable name gets detected :return netCDF files: out_ensmean.nc, out_ensstd.nc """ from ocgis import OcgOperations, RequestDataset, env env.OVERWRITE = True if variable is None: variable = get_variable(resources[0]) out_means = [] for resource in resources: rd = RequestDataset(resource, variable) prefix = basename(resource).replace('.nc', '') LOGGER.debug('processing mean of {}'.format(prefix)) calc = [{ 'func': 'median', 'name': variable }] # {'func': 'median', 'name': 'monthly_median'} ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=['all'], output_format='nc', prefix='median_' + prefix, time_range=time_range, dir_output=dir_output) out_means.append(ops.execute()) # nc_out = call(resource=resources, calc=[{'func': 'mean', 'name': 'ens_mean'}], # calc_grouping='all', # time_region=time_region, # dir_output=dir_output, output_format='nc') #### # read in numpy array for i, out_mean in enumerate(out_means): if i == 0: ds = Dataset(out_mean) var = ds[variable][:] dims = [len(out_means), var[:].shape[-2], var[:].shape[-1]] vals = np.empty(dims) vals[i, :, :] = np.squeeze(var[:]) ds.close() else: ds = Dataset(out_mean) vals[i, :, :] = np.squeeze(ds[variable][:]) ds.close() #### # calc median, std val_median = np.nanmedian(vals, axis=0) val_std = np.nanstd(vals, axis=0) ##### # prepare files by copying ... ensmean_file = 'ensmean_{}_{}_{}.nc'.format( variable, dt.strftime(time_range[0], '%Y-%m-%d'), dt.strftime(time_range[1], '%Y-%m-%d')) out_ensmean = copyfile(out_means[0], join(dir_output, ensmean_file)) ensstd_file = 'ensstd_{}_{}_{}.nc'.format( variable, dt.strftime(time_range[0], '%Y-%m-%d'), dt.strftime(time_range[1], '%Y-%m-%d')) out_ensstd = copyfile(out_means[0], join(dir_output, ensstd_file)) #### # write values to files ds_median = Dataset(out_ensmean, mode='a') ds_median[variable][:] = val_median ds_median.close() ds_std = Dataset(out_ensstd, mode='a') ds_std[variable][:] = val_std ds_std.close() LOGGER.info('processing the overall ensemble statistical mean ') # prefix = 'ensmean_tg-mean_{}-{}'.format(dt.strftime(time_range[0], '%Y-%m-%d'), # dt.strftime(time_range[1], '%Y-%m-%d')) # rd = RequestDataset(out_means, var) # calc = [{'func': 'mean', 'name': 'mean'}] # {'func': 'median', 'name': 'monthly_median'} # ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=['all'], # output_format=output_format, prefix='mean_'+prefix, time_range=time_range) # ensmean = ops.execute() return out_ensmean, out_ensstd
def call(resource=[], variable=None, dimension_map=None, agg_selection=True, calc=None, calc_grouping=None, conform_units_to=None, crs=None, memory_limit=None, prefix=None, regrid_destination=None, regrid_options='bil', level_range=None, # cdover='python', geom=None, output_format_options=None, search_radius_mult=2., select_nearest=False, select_ugid=None, spatial_wrapping=None, t_calendar=None, time_region=None, time_range=None, dir_output=None, output_format='nc'): """ Call OCGIS operation. :param resource: Input netCDF file. :param variable: variable in the input file to be picked :param dimension_map: dimension map in case of unconventional storage of data :param agg_selection: For aggregation of in case of mulitple polygons geoms :param calc: ocgis calc syntax for calculation partion :param calc_grouping: time aggregate grouping :param cdover: OUTDATED use py-cdo ('python', by default) or cdo from the system ('system') :param conform_units_to: :param crs: coordinate reference system :param memory_limit: limit the amount of data to be loaded into the memory at once \ if None (default) free memory is detected by birdhouse :param level_range: subset of given levels :param prefix: string for the file base name :param regrid_destination: file path with netCDF file with grid for output file :param geom: name of shapefile stored in birdhouse shape cabinet :param output_format_options: output options for netCDF e.g compression level() :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file) :param regrid_options: methods for regridding: 'bil' = Bilinear interpolation 'bic' = Bicubic interpolation 'dis' = Distance-weighted average remapping 'nn' = nearest neighbour 'con' = First-order conservative remapping 'laf' = largest area fraction reamapping :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned :param select_nearest: nearest neighbour selection for point geometries :param select_ugid: ugid for appropriate polygons :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap' :param time_region: select single month :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output: path to folder to store ouput files (default= curdir) :param output_format: format in which results will be returned. :return: output file path """ LOGGER.info('Start ocgis module call function') from ocgis import OcgOperations, RequestDataset, env, DimensionMap, crs from ocgis.util.large_array import compute from datetime import datetime as dt from datetime import date as dd from datetime import time as dt_time import uuid # prepare the environment env.OVERWRITE = True if dir_output is None: dir_output = abspath(curdir) # check time_range format: if time_range is not None: try: LOGGER.debug('time_range type= %s , %s ' % (type(time_range[0]), type(time_range[1]))) LOGGER.debug('time_range= %s , %s ' % (time_range[0], time_range[1])) # if type(time_range[0] is 'datetime.date'): if (isinstance(time_range[0], dd) and not isinstance(time_range[0], dt)): time_range = [dt.combine(time_range[0], dt.min.time()), dt.combine(time_range[1], dt.min.time())] # time_range = [dt.combine(time_range[0], dt_time(12,0)), # dt.combine(time_range[1], dt_time(12,0))] LOGGER.debug('time_range changed to type= %s , %s ' % (type(time_range[0]), type(time_range[1]))) LOGGER.debug('time_range changed to= %s , %s ' % (time_range[0], time_range[1])) except Exception as ex: LOGGER.exception('failed to convert data to datetime {}'.format(ex)) if spatial_wrapping == 'wrap': spatial_reorder = True else: spatial_reorder = False LOGGER.debug('spatial_reorder: %s and spatial_wrapping: %s ' % (spatial_reorder, spatial_wrapping)) if prefix is None: prefix = str(uuid.uuid1()) env.PREFIX = prefix # # if output_format_options is False: # output_format_options = None # elif output_format_options is True: # output_format_options = {'data_model': 'NETCDF4', # NETCDF4_CLASSIC # 'variable_kwargs': {'zlib': True, 'complevel': 9}} # else: if output_format_options is not None: LOGGER.info('output_format_options are set to %s ' % (output_format_options)) if type(resource) != list: resource = list([resource]) # execute ocgis LOGGER.info('Execute ocgis module call function') try: LOGGER.debug('call module dir_output = %s ' % abspath(dir_output)) rd = RequestDataset(resource, variable=variable, level_range=level_range, dimension_map=dimension_map, conform_units_to=conform_units_to, time_region=time_region, t_calendar=t_calendar, time_range=time_range) from ocgis.constants import DimensionMapKey rd.dimension_map.set_bounds(DimensionMapKey.TIME, None) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, dir_output=dir_output, spatial_wrapping=spatial_wrapping, spatial_reorder=spatial_reorder, # regrid_destination=rd_regrid, # options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, agg_selection=agg_selection, output_format=output_format, prefix=prefix, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) LOGGER.info('OcgOperations set') except Exception as ex: LOGGER.exception('failed to setup OcgOperations: {}'.format(ex)) return None # TODO include comaprison dataload to available memory dataload = 1 available_memory = 2 try: if dataload < available_memory: # compare dataload to free_memory LOGGER.info('ocgis module call as ops.execute()') geom_file = ops.execute() else: # LOGGER.info('ocgis module call as compute(ops)') # TODO: estimate right tile_dimensionS tile_dimension = 10 # default LOGGER.info('Not enough memory for data load, ocgis module call compute in chunks') geom_file = compute(ops, tile_dimension=tile_dimension, verbose=True) except Exception as ex: LOGGER.exception('failed to execute ocgis operation : {}'.format(ex)) return None return geom_file
def _handler(self, request, response): ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd()) ocgis.env.OVERWRITE = True tic = dt.now() init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('Execution started at : {}'.format(tic), 1) ###################################### # Read inputs ###################################### try: candidate = archiveextract( resource=rename_complexinputs(request.inputs['candidate'])) target = archiveextract( resource=rename_complexinputs(request.inputs['target'])) location = request.inputs['location'][0].data indices = [el.data for el in request.inputs['indices']] dist = request.inputs['dist'][0].data dateStartCandidate = request.inputs['dateStartCandidate'][0].data dateEndCandidate = request.inputs['dateEndCandidate'][0].data dateStartTarget = request.inputs['dateStartTarget'][0].data dateEndTarget = request.inputs['dateEndTarget'][0].data except Exception as ex: msg = 'Failed to read input parameter {}'.format(ex) LOGGER.error(msg) raise Exception(msg) response.update_status('Input parameters ingested', 2) ###################################### # Process inputs ###################################### try: point = Point(*map(float, location.split(','))) dateStartCandidate = dt.strptime(dateStartCandidate, '%Y-%m-%d') dateEndCandidate = dt.strptime(dateEndCandidate, '%Y-%m-%d') dateStartTarget = dt.strptime(dateStartTarget, '%Y-%m-%d') dateEndTarget = dt.strptime(dateEndTarget, '%Y-%m-%d') except Exception as ex: msg = 'failed to process inputs {}'.format(ex) LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took {}".format(dt.now() - tic)) response.update_status('Processed input parameters', 3) ###################################### # Extract target time series ###################################### savetarget = False try: # Using `call` creates a netCDF file in the tmp directory. # # Here we keep this stuff in memory if savetarget: prefix = 'target_ts' target_ts = call(resource=target, geom=point, variable=indices, time_range=[dateStartTarget, dateEndTarget], select_nearest=True, prefix=prefix) # target_ts = [get_values(prefix+'.nc', ind) for ind in indices] else: trd = RequestDataset( target, variable=indices, time_range=[dateStartTarget, dateEndTarget]) op = OcgOperations(trd, geom=point, select_nearest=True, search_radius_mult=1.75) out = op.execute() target_ts = out.get_element() except Exception as ex: msg = 'Target extraction failed {}'.format(ex) LOGGER.debug(msg) raise Exception(msg) response.update_status('Extracted target series', 5) ###################################### # Compute dissimilarity metric ###################################### response.update_status('Computing spatial analog', 6) try: output = call( resource=candidate, calc=[{ 'func': 'dissimilarity', 'name': 'spatial_analog', 'kwds': { 'dist': dist, 'target': target_ts, 'candidate': indices } }], time_range=[dateStartCandidate, dateEndCandidate], ) except Exception as ex: msg = 'Spatial analog failed: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) add_metadata(output, dist=dist, indices=",".join(indices), target_location=location, candidate_time_range="{},{}".format( dateStartCandidate, dateEndCandidate), target_time_range="{},{}".format(dateStartTarget, dateEndTarget)) response.update_status('Computed spatial analog', 95) response.outputs['output_netcdf'].file = output response.update_status('Execution completed', 100) LOGGER.debug("Total execution took {}".format(dt.now() - tic)) return response
def get_segetalflora( resource=[], dir_output=".", culture_type="fallow", climate_type=2, region=None, dimension_map=None ): """productive worker for segetalflora jobs :param resources: list of tas netCDF files. (Any time aggregation is possible) :param culture_type: Type of culture. Possible values are: 'fallow', 'intensive', 'extensive' (default:'fallow') :param climate_type: Type of climate: number 1 to 7 or 'all' (default: 2) :param region: Region for subset. If 'None' (default), the values will be calculated for Europe """ from flyingpigeon.subset import clipping from flyingpigeon.utils import calc_grouping, sort_by_filename import os from os import remove from tempfile import mkstemp from ocgis import RequestDataset, OcgOperations from cdo import Cdo cdo = Cdo() if not os.path.exists(dir_output): os.makedirs(dir_output) os.chdir(dir_output) # outputs = [] if region == None: region = "Europe" if not type(culture_type) == list: culture_type = list([culture_type]) if not type(climate_type) == list: climate_type = list([climate_type]) ncs = sort_by_filename(resource) print "%s experiments found" % (len(ncs)) print "keys: %s " % (ncs.keys()) # generate outfolder structure: dir_netCDF = "netCDF" dir_ascii = "ascii" dir_netCDF_tas = dir_netCDF + "/tas" dir_ascii_tas = dir_ascii + "/tas" if not os.path.exists(dir_netCDF): os.makedirs(dir_netCDF) if not os.path.exists(dir_ascii): os.makedirs(dir_ascii) if not os.path.exists(dir_netCDF_tas): os.makedirs(dir_netCDF_tas) if not os.path.exists(dir_ascii_tas): os.makedirs(dir_ascii_tas) tas_files = [] for key in ncs.keys(): try: print "process %s" % (key) calc = [{"func": "mean", "name": "tas"}] calc_group = calc_grouping("yr") prefix = key.replace(key.split("_")[7], "yr") if not os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")): nc_tas = clipping( resource=ncs[key], variable="tas", calc=calc, dimension_map=dimension_map, calc_grouping=calc_group, prefix=prefix, polygons="Europe", dir_output=dir_netCDF_tas, )[0] print "clipping done for %s" % (key) if os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")): tas_files.append(prefix) else: print "clipping failed for %s: No output file exists" % (key) else: print "netCDF file already exists %s" % (key) nc_tas = os.path.join(dir_netCDF_tas, prefix + ".nc") except Exception as e: print "clipping failed for %s: %s" % (key, e) try: asc_tas = os.path.join(dir_ascii_tas, prefix + ".asc") if not os.path.exists(asc_tas): f, tmp = mkstemp(dir=os.curdir, suffix=".asc") tmp = tmp.replace(os.path.abspath(os.curdir), ".") # cdo.outputtab('name,date,lon,lat,value', input = nc_tas , output = tmp) cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_tas, tmp) print cmd os.system(cmd) print ("tanslation to ascii done") remove_rows(tmp, asc_tas) remove(tmp) print ("rows with missing values removed") else: print ("tas ascii already exists") plot_ascii(asc_tas) except Exception as e: print "translation to ascii failed %s: %s" % (key, e) if os.path.exists(tmp): remove(tmp) tas_files = [os.path.join(dir_netCDF_tas, nc) for nc in os.listdir(dir_netCDF_tas)] outputs = [] for name in tas_files: for cult in culture_type: for climat in climate_type: try: calc = get_equation(culture_type=cult, climate_type=climat) if type(calc) != None: try: var = "sf%s%s" % (cult, climat) prefix = os.path.basename(name).replace("tas", var).strip(".nc") infile = name # os.path.join(dir_netCDF_tas,name+'.nc') dir_sf = os.path.join(dir_netCDF, var) if not os.path.exists(dir_sf): os.makedirs(dir_sf) if os.path.exists(os.path.join(dir_sf, prefix + ".nc")): nc_sf = os.path.join(dir_sf, prefix + ".nc") print "netCDF file already exists: %s %s " % (dir_sf, prefix) else: rd = RequestDataset(name, variable="tas", dimension_map=dimension_map) op = OcgOperations( dataset=rd, calc=calc, prefix=prefix, output_format="nc", dir_output=dir_sf, add_auxiliary_files=False, ) nc_sf = op.execute() print "segetalflora done for %s" % (prefix) outputs.append(prefix) dir_ascii_sf = os.path.join(dir_ascii, var) if not os.path.exists(dir_ascii_sf): os.makedirs(dir_ascii_sf) asc_sf = os.path.join(dir_ascii_sf, prefix + ".asc") if not os.path.exists(asc_sf): f, tmp = mkstemp(dir=os.curdir, suffix=".asc") tmp = tmp.replace(os.path.abspath(os.curdir), ".") # cdo.outputtab('name,date,lon,lat,value', input = nc_sf , output = tmp) cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_sf, tmp) os.system(cmd) print ("translation to ascii done") remove_rows(tmp, asc_sf) remove(tmp) print ("rows with missing values removed") else: print "ascii file already exists" plot_ascii(asc_sf) except Exception as e: print "failed for ascii file: %s %s " % (name, e) if os.path.exists(tmp): remove(tmp) else: print "NO EQUATION found for %s %s " % (cult, climat) except Exception as e: print "Segetal flora failed: %s" % (e) return outputs
os.path.join(p, "{0}_{1}_{2}_se.nc".format(variable, simulation, month))) filenames = [] for p in filepaths: f = os.path.split(p)[-1].split('.')[0] filenames.append(f) # Return all time slices SNIPPET = True # Data returns won't overwrite in this case. env.OVERWRITE = False # where to find the shapefiles #ocgis.env.DIR_GEOMCABINET = os.path.join(os.getcwd(), os.path.split(ocgis.test.__file__)[0], 'bin') ocgis.env.DIR_GEOMCABINET = os.path.join(os.getcwd(), "shapefiles") rds = [ RequestDataset(uri=uri, variable=variable, field_name=field_name) for uri, var, field_name in zip(filepaths, filenames, var_nc) ] ops = OcgOperations(dataset=rds, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='prov_la_p_geo83_f', geom_select_uid=[1]) ret = ops.execute() #assert len(ret.geoms) == 51
def call(resource=[], variable=None, dimension_map=None, calc=None, calc_grouping=None, conform_units_to=None, memory_limit=None, prefix=None, regrid_destination=None, regrid_options='bil', level_range=None, geom=None, output_format_options=None, search_radius_mult=2., select_nearest=False, select_ugid=None, spatial_wrapping=None, t_calendar=None, time_region=None, time_range=None, dir_output=None, output_format='nc'): ''' ocgis operation call :param resource: :param variable: variable in the input file to be picked :param dimension_map: dimension map in case of unconventional storage of data :param calc: ocgis calc syntax for calculation partion :param calc_grouping: time aggregate grouping :param conform_units_to: :param memory_limit: limit the amount of data to be loaded into the memory at once \ if None (default) free memory is detected by birdhouse :param level_range: subset of given levels :param prefix: string for the file base name :param regrid_destination: file path with netCDF file with grid for output file :param geom: name of shapefile stored in birdhouse shape cabinet :param output_format_options: output options for netCDF e.g compression level() :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file) :param regrid_options: methods for regridding: 'bil' = Bilinear interpolation 'bic' = Bicubic interpolation 'dis' = Distance-weighted average remapping 'nn' = nearest neighbour 'con' = First-order conservative remapping 'laf' = largest area fraction reamapping :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned :param select_nearest: nearest neighbour selection for point geometries :param select_ugid: ugid for appropriate polygons :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap' :param time_region: select single month :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output (default= curdir): :param output_format: :return: output file path ''' LOGGER.info('Start ocgis module call function') from ocgis import OcgOperations, RequestDataset, env from ocgis.util.large_array import compute from datetime import datetime as dt import uuid # prepare the environment env.DIR_SHPCABINET = DIR_SHP env.OVERWRITE = True # env.DIR_OUTPUT = dir_output # LOGGER.debug(' **** env.DIR_OUTPUT = %s ' % env.DIR_OUTPUT) if dir_output is None: dir_output = abspath(curdir) # check time_range fromat: if time_range is not None: try: LOGGER.debug('time_range type= %s , %s ' % (type(time_range[0]), type(time_range[1]))) if type(time_range[0] is 'datetime.date'): time_range = [ dt.combine(time_range[0], dt.min.time()), dt.combine(time_range[1], dt.min.time()) ] LOGGER.debug('time_range changed to type= %s , %s ' % (type(time_range[0]), type(time_range[1]))) except: LOGGER.exception('failed to confert data to datetime') # # if geom is not None: # spatial_reorder = True # spatial_wrapping = 'wrap' # else: # spatial_reorder = False # spatial_wrapping = None # if spatial_wrapping == 'wrap': spatial_reorder = True else: spatial_reorder = False LOGGER.debug('spatial_reorder: %s and spatial_wrapping: %s ' % (spatial_reorder, spatial_wrapping)) if prefix is None: prefix = str(uuid.uuid1()) env.PREFIX = prefix # # if output_format_options is False: # output_format_options = None # elif output_format_options is True: # output_format_options = {'data_model': 'NETCDF4', # NETCDF4_CLASSIC # 'variable_kwargs': {'zlib': True, 'complevel': 9}} # else: if output_format_options is not None: LOGGER.info('output_format_options are set to %s ' % (output_format_options)) if type(resource) != list: resource = list([resource]) # execute ocgis LOGGER.info('Execute ocgis module call function') # if has_Lambert_Conformal(resource) is True and geom is not None: # LOGGER.debug('input has Lambert_Conformal projection and can not prcessed with ocgis:\ # https://github.com/NCPP/ocgis/issues/424') # return None # else: try: LOGGER.debug('call module curdir = %s ' % abspath(curdir)) rd = RequestDataset(resource, variable=variable, level_range=level_range, dimension_map=dimension_map, conform_units_to=conform_units_to, time_region=time_region, t_calendar=t_calendar, time_range=time_range) # from ocgis.constants import DimensionMapKey # rd.dimension_map.set_bounds(DimensionMapKey.TIME, None) ops = OcgOperations( dataset=rd, output_format_options=output_format_options, dir_output=dir_output, spatial_wrapping=spatial_wrapping, spatial_reorder=spatial_reorder, # regrid_destination=rd_regrid, # options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, prefix=prefix, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) LOGGER.info('OcgOperations set') except: LOGGER.exception('failed to setup OcgOperations') return None try: LOGGER.info('ocgis module call as ops.execute()') geom_file = ops.execute() except: LOGGER.exception('failed to execute ocgis operation') return None # # try: # from numpy import sqrt # from flyingpigeon.utils import FreeMemory # # if memory_limit is None: # f = FreeMemory() # mem_kb = f.user_free # mem_mb = mem_kb / 1024. # mem_limit = mem_mb / 2. # set limit to half of the free memory # else: # mem_limit = memory_limit # # if mem_limit >= 1024. * 4: # mem_limit = 1024. * 4 # # 475.0 MB for openDAP # # LOGGER.info('memory_limit = %s Mb' % (mem_limit)) # # data_kb = ops.get_base_request_size()['total'] # data_mb = data_kb / 1024. # # # data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) # LOGGER.info('data_mb = %s Mb' % (data_mb)) # # if data_mb <= mem_limit: # input is smaler than the half of free memory size # try: # LOGGER.info('ocgis module call as ops.execute()') # geom_file = ops.execute() # except Exception as e: # LOGGER.debug('failed to execute ocgis operation') # raise # return None # # else: # ########################## # # calcultion of chunk size # ########################## # try: # size = ops.get_base_request_size() # nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0] # element_in_kb = size['total']/reduce(lambda x, y: x*y, size['variables'][variable]['value']['shape']) # element_in_mb = element_in_kb / 1024. # tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size # # LOGGER.info('ocgis module call compute with chunks') # if calc is None: # calc = '%s=%s*1' % (variable, variable) # LOGGER.info('calc set to = %s ' % calc) # ops = OcgOperations(dataset=rd, # output_format_options=output_format_options, # dir_output=dir_output, # spatial_wrapping=spatial_wrapping, # spatial_reorder=spatial_reorder, # # regrid_destination=rd_regrid, # # options=options, # calc=calc, # calc_grouping=calc_grouping, # geom=geom, # output_format=output_format, # prefix=prefix, # search_radius_mult=search_radius_mult, # select_nearest=select_nearest, # select_ugid=select_ugid, # add_auxiliary_files=False) # geom_file = compute(ops, tile_dimension=int(tile_dim), verbose=True) # print 'ocgis calculated' # except Exception as e: # LOGGER.debug('failed to compute ocgis with chunks') # raise # return None # LOGGER.info('Succeeded with ocgis module call function') # except: # LOGGER.exception('failed to compare dataload with free memory, calling as execute instead') ############################################ # remapping according to regrid informations ############################################ if regrid_destination is not None: try: from tempfile import mkstemp from cdo import Cdo cdo = Cdo() output = '%s.nc' % uuid.uuid1() remap = 'remap%s' % regrid_options call = [op for op in dir(cdo) if remap in op] cmd = "output = cdo.%s('%s',input='%s', output='%s')" \ % (str(call[0]), regrid_destination, geom_file, output) exec cmd except Exception as e: LOGGER.debug('failed to remap') raise return None else: output = geom_file # try: # from flyingpigeon.utils import unrotate_pole # lat, lon = unrotate_pole(output) # except: # LOGGER.exception('failed to unrotate pole') return output
def call(resource=[], variable=None, dimension_map=None, calc=None, calc_grouping= None, conform_units_to=None, memory_limit=None, prefix=None, geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, time_region=None, time_range=None, dir_output=None, output_format='nc'): ''' ocgis operation call :param resource: :param variable: variable in the input file to be picked :param dimension_map: dimension map in case of unconventional starage of data :param calc: ocgis calc syntax for calcultion opartion :param calc_grouping: time aggregate grouping :param conform_units_to: :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse :param prefix: :param geom: name of shapefile stored in birdhouse shape cabinet :param output_format_options: output options for netCDF e.g compression level() :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned :param select_nearest: neares neighbour selection for point geometries :param select_ugid: ugid for appropriate poligons :param time_region: :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output: :param output_format: :return: output file path ''' print 'start ocgis module' logger.info('Start ocgis module call function') from ocgis import OcgOperations, RequestDataset , env from ocgis.util.large_array import compute # prepare the environment env.DIR_SHPCABINET = DIR_SHP env.OVERWRITE = True env.DIR_OUTPUT = dir_output env.PREFIX = prefix if output_format_options == False: output_format_options = None elif output_format_options == True: output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC 'variable_kwargs': {'zlib': True, 'complevel': 9}} else: logger.info('output_format_options are set to %s ' % ( output_format_options )) if type(resource) != list: resource = list([resource]) # execute ocgis logger.info('Execute ocgis module call function') try: rd = RequestDataset(resource, variable=variable, dimension_map=dimension_map, conform_units_to=conform_units_to, time_region=time_region) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, #options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) logger.info('OcgOperations set') except Exception as e: logger.debug('failed to setup OcgOperations') raise # check memory load from numpy import sqrt from flyingpigeon.utils import FreeMemory if memory_limit == None: f = FreeMemory() mem_kb = f.user_free mem_mb = mem_kb / 1024. mem_limit = mem_mb / 2. # set limit to half of the free memory else: mem_limit = memory_limit if mem_limit >= 1024. * 4: mem_limit = 1024. * 4 # 475.0 MB for openDAP data_kb = ops.get_base_request_size()['total'] data_mb = data_kb / 1024. if variable == None: variable = rd.variable logger.info('%s as variable dedected' % (variable)) #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) logger.info('data_mb = %s ; memory_limit = %s ' % (data_mb , mem_limit )) if data_mb <= mem_limit : # input is smaler than the half of free memory size logger.info('ocgis module call as ops.execute()') try: geom_file = ops.execute() except Exception as e: logger.debug('failed to execute ocgis operation') raise else: size = ops.get_base_request_size() nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0] element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) element_in_mb = element_in_kb / 1024. tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size # calcultion of chunk size try: logger.info('tile_dim = %s; calc = %s ' % (tile_dim, calc)) if calc == None: calc = '%s=%s*1' % (variable, variable) logger.info('calc set to = %s ' % calc) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, calc=calc, output_format=output_format, # 'nc' is necessary for chunked execution select_ugid=select_ugid, geom=geom, add_auxiliary_files=False) geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True) except Exception as e: logger.debug('failed to compute ocgis operation') raise logger.info('Succeeded with ocgis module call function') return geom_file
env.DIR_SHPCABINET = '/Users/ryan.okuinghttons/netCDFfiles/shapefiles/ocgis_data/shp' ## RequestDatasetCollection #################################################### rdc = RequestDatasetCollection([RequestDataset(os.path.join(DATA_DIR,NCS),'tas')]) ## Return In-Memory ############################################################ ## Data is returned as a dictionary with 51 keys (don't forget Puerto Rico...). ## A key in the returned dictionary corresponds to a geometry "ugid" with the ## value of type OcgCollection. print('returning numpy...') ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True, snippet=SNIPPET,geom='state_boundaries') path = ops.execute() ## Write to Shapefile ########################################################## print('returning shapefile...') ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True, snippet=SNIPPET,geom='state_boundaries',output_format='shp') path = ops.execute() ## Write All Data to Keyed Format ############################################## ## Without the snippet, we are writing all data to the linked CSV-Shapefile ## output format. The operation will take considerably longer. print('returning csv+...') ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True, snippet=False,geom='state_boundaries',output_format='csv+')
# calc = '%s=%s*1' % (variable, variable) rd = RequestDataset(ncs) ops = OcgOperations(rd, # time_range=time_range, calc = '%s=%s*1' % ('tas', 'tas'), # level_range=level_range, geom=bbox, output_format='nc', prefix='ocgis_module_optimisation', dir_output='/home/nils/data/', add_auxiliary_files=False) shnip = dt.now() geom = ops.execute() shnap = dt.now() duration = (shnap - shnip).total_seconds() print("operation performed with execute in {} sec.".format(duration)) print(geom) tile_dimension=5 # default shnip = dt.now() geom = compute(ops, tile_dimension=tile_dimension, verbose=True) shnap = dt.now() duration = (shnap - shnip).total_seconds() print("operation performed with compute in {} sec.".format(duration)) print(geom)
## Colorado in WGS84 latitude/longitude coordinates. BBOX = [-109.1, 36.9, -102.0, 41.0] ## Construct RequestDataset Object ############################################# ## This object will be reused so just build it once. rd = RequestDataset(URI_TAS,VAR_TAS) ## Returning NumPy Data Objects ################################################ ## The NumPy data type return is the default. Only the geometry and ## RequestDataset are required (except "snippet" of course...). See the ## documentation for the OcgCollection object to understand the return ## structure. ret = OcgOperations(dataset=rd,geom=BBOX,snippet=SNIPPET).execute() ## Returning Converted Files ################################################### ## At this time, the software will create named temporary directories inside ## env.DIR_OUTPUT. This is to avoid the confusing process of managine overwrites ## etc. The support for managing output files will be improved in future ## releases. The returned value is the absolute path to the file or folder ## depending on the requested format. output_formats = ['shp','csv','keyed'] for output_format in output_formats: prefix = output_format ops = OcgOperations(dataset=rd,geom=BBOX,snippet=SNIPPET, output_format=output_format,prefix=prefix) ret = ops.execute()
# Data returns will overwrite in this case. Use with caution!! env.OVERWRITE = True # RequestDatasetCollection ############################################################################################# rdc = RequestDatasetCollection([RequestDataset( os.path.join(DATA_DIR, uri), var) for uri, var in NCS.iteritems()]) # Return In-Memory ##################################################################################################### # Data is returned as a dictionary-like object (SpatialCollection) with 51 keys (don't forget Puerto Rico...). A key in # the returned dictionary corresponds to a geometry "ugid" with the value of type OcgCollection. print('returning numpy...') ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries') ret = ops.execute() # Return a SpatialCollection, but only for a target state in a U.S. state boundaries shapefile. In this case, the UGID # attribute value of 23 is associated with Nebraska. print('returning numpy for a state...') ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries', geom_select_uid=[23]) ret = ops.execute() # Write to Shapefile ################################################################################################### print('returning shapefile...') ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries', output_format='shp') path = ops.execute()
# Subset the input dataset to return the desired base period for the percentile basis. variable = 'tas' years = range(1971, 2001) time_region = {'year': years} rd = RequestDataset(uri=in_file, variable=variable) field = rd.get() field.get_time_region(time_region) # Calculate the percentile basis. The data values must be a three-dimensional array. arr = field.variables[variable].value.squeeze() dt_arr = field.temporal.value_datetime percentile = 90 window_width = 5 percentile_dict = IcclimTG90p.get_percentile_dict(arr, dt_arr, percentile, window_width) ######################################################################################################################## # Calculate indice using custom percentile basis. calc = [{ 'func': 'icclim_TG90p', 'name': 'TG90p', 'kwds': { 'percentile_dict': percentile_dict } }] calc_grouping = 'month' ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping) coll = ops.execute()
rdc = RequestDatasetCollection([ RequestDataset(os.path.join(DATA_DIR, uri), var) for uri, var in NCS.iteritems() ]) ## Return In-Memory ############################################################ ## Data is returned as a dictionary with 51 keys (don't forget Puerto Rico...). ## A key in the returned dictionary corresponds to a geometry "ugid" with the ## value of type OcgCollection. ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries') ret = ops.execute() ## Write to Shapefile ########################################################## ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries', output_format='shp') path = ops.execute() ## Write All Data to Keyed Format ############################################## ## Without the snippet, we are writing all data to the linked CSV files. The ## operation will take considerably longer.
def call(resource=[], variable=None, dimension_map=None, calc=None, calc_grouping= None, conform_units_to=None, memory_limit=None, prefix=None, regrid_destination=None, regrid_options='bil', level_range=None, geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, spatial_wrapping=None, t_calendar=None, time_region=None, time_range=None, dir_output=curdir, output_format='nc'): ''' ocgis operation call :param resource: :param variable: variable in the input file to be picked :param dimension_map: dimension map in case of unconventional storage of data :param calc: ocgis calc syntax for calculation partion :param calc_grouping: time aggregate grouping :param conform_units_to: :param memory_limit: limit the amount of data to be loaded into the memory at once if None (default) free memory is detected by birdhouse :param level_range: subset of given levels :param prefix: string for the file base name :param regrid_destination: file path with netCDF file with grid for output file :param geom: name of shapefile stored in birdhouse shape cabinet :param output_format_options: output options for netCDF e.g compression level() :param regrid_destination: file containing the targed grid (griddes.txt or netCDF file) :param regrid_options: methods for regridding: 'bil' = Bilinear interpolation 'bic' = Bicubic interpolation 'dis' = Distance-weighted average remapping 'nn' = nearest neighbour 'con' = First-order conservative remapping 'laf' = largest area fraction reamapping :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned :param select_nearest: nearest neighbour selection for point geometries :param select_ugid: ugid for appropriate polygons :param spatial_wrapping: how to handle coordinates in case of subsets, options: None (default), 'wrap', 'unwrap' :param time_region: select single month :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output (default= curdir): :param output_format: :return: output file path ''' logger.info('Start ocgis module call function') from ocgis import OcgOperations, RequestDataset , env from ocgis.util.large_array import compute import uuid # prepare the environment env.DIR_SHPCABINET = DIR_SHP env.OVERWRITE = True env.DIR_OUTPUT = dir_output if geom != None: spatial_reorder = True spatial_wrapping = 'wrap' else: spatial_reorder = False spatial_wrapping = None if prefix == None: prefix = str(uuid.uuid1()) env.PREFIX = prefix if output_format_options == False: output_format_options = None elif output_format_options == True: output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC 'variable_kwargs': {'zlib': True, 'complevel': 9}} else: logger.info('output_format_options are set to %s ' % ( output_format_options )) if type(resource) != list: resource = list([resource]) # execute ocgis logger.info('Execute ocgis module call function') #if time_range != None: #time_range = eval_timerange(resource, time_range) if has_Lambert_Conformal(resource) == True and not geom == None: logger.debug('input has Lambert_Conformal projection and can not subsetted with geom') output = None else: try: #if regrid_destination != None: #rd_regrid = RequestDataset(uri=regrid_destination) #else: #rd_regrid = None rd = RequestDataset(resource, variable=variable, level_range=level_range, dimension_map=dimension_map, conform_units_to=conform_units_to, time_region=time_region,t_calendar=t_calendar, time_range=time_range) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, spatial_wrapping=spatial_wrapping, spatial_reorder=spatial_reorder, # regrid_destination=rd_regrid, # options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, prefix=prefix, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) logger.info('OcgOperations set') except Exception as e: logger.debug('failed to setup OcgOperations') raise try: from numpy import sqrt from flyingpigeon.utils import FreeMemory if memory_limit == None: f = FreeMemory() mem_kb = f.user_free mem_mb = mem_kb / 1024. mem_limit = mem_mb / 2. # set limit to half of the free memory else: mem_limit = memory_limit if mem_limit >= 1024. * 4: mem_limit = 1024. * 4 # 475.0 MB for openDAP data_kb = ops.get_base_request_size()['total'] data_mb = data_kb / 1024. if variable == None: variable = rd.variable logger.info('%s as variable dedected' % (variable)) #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) logger.info('data_mb = %s ; memory_limit = %s ' % (data_mb , mem_limit )) except Exception as e: logger.debug('failed to compare dataload with free memory %s ' % e) raise ## check memory load #from os import stat #if memory_limit == None: #f = FreeMemory() #mem_kb = f.user_free #mem_mb = mem_kb / 1024. #mem_limit = mem_mb / 2. # set limit to half of the free memory #else: #mem_limit = memory_limit #if mem_limit >= 1024. * 4: #mem_limit = 1024. * 4 ## 475.0 MB for openDAP ##if type(resource) == list : ##data_kb = stat(resource[0]).st_size * len(resource) ##else: ##data_kb = stat(resource).st_size #size = ops.get_base_request_size()['total'] #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) #data_mb = data_kb / 1024. #if variable == None: #variable = rd.variable #logger.info('%s as variable dedected' % (variable)) #logger.info('data_mb = %s ; memory_limit = %s ' % (data_mb , mem_limit )) if data_mb <= mem_limit : # input is smaler than the half of free memory size try: logger.info('ocgis module call as ops.execute()') geom_file = ops.execute() except Exception as e: logger.debug('failed to execute ocgis operation') raise else: ########################## # calcultion of chunk size ########################## try: size = ops.get_base_request_size() nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0] element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) element_in_mb = element_in_kb / 1024. tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size logger.info('ocgis module call compute with chunks') print 'ocgis module call compute with chunks' if calc == None: calc = '%s=%s*1' % (variable, variable) logger.info('calc set to = %s ' % calc) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, spatial_wrapping=spatial_wrapping, spatial_reorder=spatial_reorder, # regrid_destination=rd_regrid, # options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, prefix=prefix, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True) except Exception as e: logger.debug('failed to compute ocgis with chunks') raise logger.info('Succeeded with ocgis module call function') ############################################ # remapping according to regrid informations ############################################ if not regrid_destination == None: try: from tempfile import mkstemp from cdo import Cdo cdo = Cdo() output = '%s.nc' % uuid.uuid1() remap = 'remap%s' % regrid_options call = [op for op in dir(cdo) if remap in op] cmd = "output = cdo.%s('%s',input='%s', output='%s')" % (str(call[0]), regrid_destination, geom_file, output) exec cmd except Exception as e: logger.debug('failed to remap') raise else: output = geom_file return output