def test_system_regridding_different_fields_variable_regrid_targets(self): """Test with a request dataset having regrid_source as False.""" rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('maurer_2010_tas', kwds={'time_region': {'year': [1990], 'month': [2]}}) rd2.regrid_source = False rd3 = deepcopy(rd2) rd3.regrid_source = True rd3._field_name = 'maurer2' geom = 'state_boundaries' select_ugid = [25] ops = ocgis.OcgOperations(dataset=[rd2, rd3], regrid_destination=rd1, geom=geom, select_ugid=select_ugid) subset = OperationsEngine(ops) colls = list(subset) self.assertEqual(len(colls), 2) for coll in colls: for dd in coll.iter_melted(tag=TagName.DATA_VARIABLES): field = dd['field'] variable = dd['variable'] if field.name == 'tas': self.assertEqual(variable.shape, (28, 77, 83)) elif field.name == 'maurer2': self.assertEqual(variable.shape, (28, 4, 4)) else: raise NotImplementedError
def test_system_regridding_same_field(self): """Test regridding operations with same field used to regrid the source.""" rd_dest = self.test_data.get_rd('cancm4_tas') keywords = dict(regrid_destination=[rd_dest, rd_dest.get()], geom=['state_boundaries']) select_ugid = [25, 41] for ctr, k in enumerate(itr_products_keywords(keywords, as_namedtuple=True)): rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tas', kwds={'field_name': 'tas2'}) ops = ocgis.OcgOperations(dataset=[rd1, rd2], geom=k.geom, regrid_destination=k.regrid_destination, time_region={'month': [1], 'year': [2002]}, select_ugid=select_ugid) subset = OperationsEngine(ops) colls = list(subset) self.assertEqual(len(colls), 4) for coll in colls: for d in coll.iter_melted(tag=TagName.DATA_VARIABLES): field = d['field'] self.assertEqual(field.crs, env.DEFAULT_COORDSYS) self.assertTrue(d['variable'].get_value().mean() > 100) self.assertTrue(np.any(field.grid.get_mask())) self.assertTrue(np.any(d['variable'].get_mask()))
def test_get_converter(self): rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd) outdir = self.current_dir_output prefix = 'foo' interp = OcgInterpreter(ops) so = OperationsEngine(ops) ret = interp._get_converter_(NumpyConverter, outdir, prefix, so) self.assertIsInstance(ret, NumpyConverter) # Test melted is registered by the converter. ops = OcgOperations(dataset=rd, melted=True, output_format=constants.OutputFormatName.SHAPEFILE) interp = OcgInterpreter(ops) ret = interp._get_converter_(ShpConverter, outdir, prefix, so) self.assertIsInstance(ret, ShpConverter) self.assertTrue(ret.melted) # Test options are passed to the underlying converter. opts = {'data_model': 'foo'} ops = OcgOperations(dataset=rd, output_format='nc', output_format_options=opts) interp = OcgInterpreter(ops) ret = interp._get_converter_(NcConverter, outdir, prefix, so) self.assertDictEqual(ret.options, opts)
def test_init(self): for rb, p in itertools.product([True, False], [None, ProgressOcgOperations()]): sub = OperationsEngine(self.get_operations(), request_base_size_only=rb, progress=p) for ii, coll in enumerate(sub): self.assertIsInstance(coll, SpatialCollection) self.assertEqual(ii, 0)
def test_system_regridding_same_field_value_mask(self): """Test with a value mask.""" rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tas', kwds={'field_name': 'tas2'}) value_mask = np.zeros(rd2.get().grid.shape, dtype=bool) value_mask[30, 45] = True regrid_options = {'value_mask': value_mask} ops = ocgis.OcgOperations(dataset=rd1, regrid_destination=rd2, snippet=True, regrid_options=regrid_options) ret = list(OperationsEngine(ops)) actual = ret[0].get_element(variable_name='tas').get_mask().sum() self.assertEqual(1, actual)
def test_system_regridding_same_field_bad_bounds_without_corners(self): """Test bad bounds may be regridded with_corners as False.""" from ESMF.api.constants import RegridMethod rd1 = self.test_data.get_rd('cancm4_tas') ops = ocgis.OcgOperations(dataset=rd1, regrid_destination=rd1, snippet=True, regrid_options={'regrid_method': RegridMethod.BILINEAR}) subset = OperationsEngine(ops) ret = list(subset) for coll in ret: for dd in coll.iter_melted(): field = dd['field'] for dv in field.data_variables: self.assertGreater(dv.get_value().sum(), 100)
def test_system_regridding_different_fields_requiring_wrapping(self): """Test with fields requiring wrapping.""" rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('maurer_2010_tas') geom = 'state_boundaries' select_ugid = [25] ops = ocgis.OcgOperations(dataset=rd2, regrid_destination=rd1, geom=geom, select_ugid=select_ugid, time_region={'month': [2], 'year': [1990]}) subset = OperationsEngine(ops) colls = list(subset) self.assertEqual(len(colls), 1) for coll in colls: for dd in coll.iter_melted(tag=TagName.DATA_VARIABLES): self.assertEqual(dd['variable'].shape, (28, 4, 4))
def get_base_request_size(self): """ Return the estimated request size in kilobytes. This is the estimated size of the requested data not the returned data product. :returns: Dictionary containing sizes of variables. Format is: ``dict['field'][<field name>][<variable name>]``. :return type: dict >>> ops = OcgOperations(...) >>> ret = ops.get_base_request_size() {'field': {'tas': {u'height': {'dtype': dtype('float64'), 'kb': 0.0, 'shape': ()}, u'lat': {'dtype': dtype('float64'), 'kb': 0.5, 'shape': (64,)}, u'lat_bnds': {'dtype': dtype('float64'), 'kb': 1.0, 'shape': (64, 2)}, 'latitude_longitude': {'dtype': None, 'kb': 0.0, 'shape': (0,)}, u'lon': {'dtype': dtype('float64'), 'kb': 1.0, 'shape': (128,)}, u'lon_bnds': {'dtype': dtype('float64'), 'kb': 2.0, 'shape': (128, 2)}, 'tas': {'dtype': dtype('float32'), 'kb': 116800.0, 'shape': (3650, 64, 128)}, u'time': {'dtype': dtype('float64'), 'kb': 28.515625, 'shape': (3650,)}, u'time_bnds': {'dtype': dtype('float64'), 'kb': 57.03125, 'shape': (3650, 2)}}}, 'total': 116890.046875} """ if self.regrid_destination is not None: msg = 'Base request size not supported with a regrid destination.' raise DefinitionValidationError(RegridDestination, msg) def _get_kb_(dtype, elements): nbytes = np.array([1], dtype=dtype).nbytes return float((elements * nbytes) / 1024.0) def _get_zero_or_kb_(var): ret = {'shape': None, 'kb': 0.0, 'dtype': None} ret['dtype'] = var.dtype ret['shape'] = var.shape ret['kb'] = _get_kb_(var.dtype, var.size) return ret ops_size = deepcopy(self) subset = OperationsEngine(ops_size, request_base_size_only=True) ret = Dict() for coll in subset: for row in coll.iter_melted(): field = row['field'] curr = ret.field[field.name] = {} for variable in list(field.values()): curr[variable.name] = _get_zero_or_kb_(variable) total = 0.0 for v in list(ret.values()): for v2 in list(v.values()): for v3 in v2.values(): total += float(v3['kb']) ret['total'] = total return ret
def get_subset_operation(self): geom = TestGeom.get_geometry_dictionaries() rd = self.test_data.get_rd('cancm4_tas') ops = ocgis.OcgOperations(dataset=rd, geom=geom, select_nearest=True) subset = OperationsEngine(ops) return subset
def execute(self): # check for a user-supplied output prefix prefix = self.ops.prefix # do directory management # # flag to indicate a directory is made. mostly a precaution to make sure the appropriate directory is is # removed. made_output_directory = False if self.ops.output_format in self._no_directory: # No output directory for some formats. outdir = None else: # Directories or a single output file(s) is created for the other cases. if self.ops.add_auxiliary_files: # Auxiliary files require that a directory be created. outdir = os.path.join(self.ops.dir_output, prefix) # Create and/or remove the output directory. if vm.rank == 0: if os.path.exists(outdir): if env.OVERWRITE: shutil.rmtree(outdir) else: raise IOError('The output directory exists but env.OVERWRITE is False: {0}'.format(outdir)) os.mkdir(outdir) # Block until output directory is created. Most often the zero rank manages writing, but this is not a # requirement. vm.Barrier() # On an exception, the output directory needs to be removed. made_output_directory = True else: # with no auxiliary files the output directory will do just fine outdir = self.ops.dir_output try: # configure logging ######################################################################################## progress = self._get_progress_and_configure_logging_(outdir, prefix) # create local logger interpreter_log = ocgis_lh.get_logger('interpreter') ocgis_lh('Initializing...', interpreter_log) # set up environment ####################################################################################### # run validation - doesn't do much now self.check() # do not perform vector wrapping for NetCDF output if self.ops.output_format == 'nc': ocgis_lh('"vector_wrap" set to False for netCDF output', interpreter_log, level=logging.WARN) self.ops.vector_wrap = False # if the requested output format is "meta" then no operations are run and only the operations dictionary is # required to generate output. Converter = self.ops._get_object_(OutputFormat.name).get_converter_class() if issubclass(Converter, AbstractMetaConverter): ret = Converter(self.ops).write() # this is the standard request for other output types. else: # the operations object performs subsetting and calculations ocgis_lh('initializing subset', interpreter_log, level=logging.DEBUG) so = OperationsEngine(self.ops, progress=progress) # if there is no grouping on the output files, a singe converter is needed if self.ops.output_grouping is None: ocgis_lh('initializing converter', interpreter_log, level=logging.DEBUG) conv = self._get_converter_(Converter, outdir, prefix, so) ocgis_lh('starting converter write loop: {0}'.format(self.ops.output_format), interpreter_log, level=logging.DEBUG) ret = conv.write() else: raise NotImplementedError ocgis_lh('Operations successful.'.format(self.ops.prefix), interpreter_log) return ret except: # The output directory needs to be removed if one was created. Shutdown logging before to make sure there # is no file lock (Windows). ocgis_lh.shutdown() if vm.rank == 0 and made_output_directory: shutil.rmtree(outdir) raise finally: ocgis_lh.shutdown() if env.ADD_OPS_MPI_BARRIER: vm.Barrier()