def test_system_units_validation_equal_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='fahrenheit', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual(set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), set(['tasmax', 'rhsmax'])) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) vc = hi.execute() self.assertIsInstance(vc, VariableCollection)
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): assert write_mode is not None dataset_kwargs = kwargs.get('dataset_kwargs', {}) variable_kwargs = kwargs.get('variable_kwargs', {}) # When filling a dataset, we use append mode. if write_mode == MPIWriteMode.FILL: mode = 'a' else: mode = 'w' # For an asynchronous write, treat everything like a single rank. if write_mode == MPIWriteMode.ASYNCHRONOUS: possible_ranks = [0] else: possible_ranks = vm.ranks # Write the data on each rank. for idx, rank_to_write in enumerate(possible_ranks): # The template write only occurs on the first rank. if write_mode == MPIWriteMode.TEMPLATE and rank_to_write != 0: pass # If this is not a template write, fill the data. elif write_mode == MPIWriteMode.ASYNCHRONOUS or vm.rank == rank_to_write: with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, **dataset_kwargs) as dataset: # Write global attributes if we are not filling data. if write_mode != MPIWriteMode.FILL: vc.write_attributes_to_netcdf_object(dataset) # This is the main variable write loop. variables_to_write = get_variables_to_write(vc) for variable in variables_to_write: # Load the variable's data before orphaning. The variable needs its parent to know which # group it is in. variable.load() # Call the individual variable write method in fill mode. Orphaning is required as a # variable will attempt to write its parent first. with orphaned(variable, keep_dimensions=True): variable.write(dataset, write_mode=write_mode, **variable_kwargs) # Recurse the children. for child in list(vc.children.values()): if write_mode != MPIWriteMode.FILL: group = nc.Group(dataset, child.name) else: group = dataset.groups[child.name] child.write(group, write_mode=write_mode, **kwargs) dataset.sync() vm.barrier()
def test_calculate(self): tasmin = self.test_data.get_rd('cancm4_tasmin_2001') tasmax = self.test_data.get_rd('cancm4_tasmax_2001') field = tasmin.get() field_tasmax = tasmax.get() with orphaned(field_tasmax['tasmax']): field.add_variable(field_tasmax['tasmax'], is_data=True) field = field.get_field_slice({'time': slice(0, 600), 'y': slice(25, 50), 'x': slice(25, 50)}) tgd = field.temporal.get_grouping(['month']) dtr = IcclimETR(field=field, tgd=tgd) ret = dtr.execute() self.assertEqual(ret['icclim_ETR'].get_value().shape, (12, 25, 25))
def test_calculation_file_only_two_variables(self): rd = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tasmax_2001') field = rd.get() field2 = rd2.get() with orphaned(field2['tasmax']): field.add_variable(field2['tasmax'], is_data=True) field = field.get_field_slice({'time': slice(0, 10)}) expr = 'foo=log(1000*(tasmax-tas))/3' ef = EvalFunction(expr=expr, field=field, file_only=True) ret = ef.execute() self.assertEqual(ret['foo']._value, None)
def test_system_units_validation_wrong_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='kelvin', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual(set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), {'tasmax', 'rhsmax'}) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) with self.assertRaises(UnitsValidationError): hi.execute()
def test_system_units_validation_equal_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='fahrenheit', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual( set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), set(['tasmax', 'rhsmax'])) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) vc = hi.execute() self.assertIsInstance(vc, VariableCollection)
def test_system_units_validation_wrong_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='kelvin', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual( set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), {'tasmax', 'rhsmax'}) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) with self.assertRaises(UnitsValidationError): hi.execute()
def test_calculation_two_variables_exp_only(self): rd = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tasmax_2001') field = rd.get() field2 = rd2.get() with orphaned(field2['tasmax']): field.add_variable(field2['tasmax'], is_data=True) field = field.get_field_slice({'time': slice(0, 10)}) expr = 'foo=log(1000*(tasmax-tas))/3' ef = EvalFunction(expr=expr, field=field) ret = ef.execute() self.assertEqual(list(ret.keys()), ['foo']) tas = field['tas'] tasmax = field['tasmax'] actual_value = np.log(1000 * (tasmax.get_value() - tas.get_value())) / 3 self.assertNumpyAll(ret['foo'].get_value(), actual_value)
def test_with_eval_function_two_variables(self): funcs = [{'func': 'tas_out=tas+tas2', 'ref': EvalFunction}] engine = self.get_engine(funcs=funcs, grouping=None) rd = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_tas', kwds={'rename_variable': 'tas2'}) field = rd.get() field2 = rd2.get() with orphaned(field2['tas2']): field.add_variable(field2['tas2'], is_data=True) field = field.get_field_slice({'time': slice(0, 100), 'y': slice(0, 10), 'x': slice(0, 10)}) desired = SpatialCollection() desired.add_field(field, None) actual = deepcopy(desired) engine.execute(desired) tas_out = desired.get_element(variable_name='tas_out').get_value() tas = actual.get_element(variable_name='tas').get_value() tas2 = actual.get_element(variable_name='tas2').get_value() self.assertNumpyAll(tas_out, tas + tas2)
def execute(self, coll, file_only=False, tgds=None): """ :param :class:~`ocgis.SpatialCollection` coll: :param bool file_only: :param dict tgds: {'field_alias': :class:`ocgis.interface.base.dimension.temporal.TemporalGroupDimension`,...} """ from ocgis import VariableCollection # Select which dictionary will hold the temporal group dimensions. if tgds is None: tgds_to_use = self._tgds tgds_overloaded = False else: tgds_to_use = tgds tgds_overloaded = True # Group the variables. If grouping is None, calculations are performed on each element. if self.grouping is not None: ocgis_lh('Setting temporal groups: {0}'.format(self.grouping), 'calc.engine') for field in coll.iter_fields(): if tgds_overloaded: assert field.name in tgds_to_use else: if field.name not in tgds_to_use: tgds_to_use[field.name] = field.time.get_grouping( self.grouping) # Iterate over functions. for ugid, container in list(coll.children.items()): for field_name, field in list(container.children.items()): new_temporal = tgds_to_use.get(field_name) if new_temporal is not None: new_temporal = new_temporal.copy() # If the engine has a grouping, ensure it is equivalent to the new temporal dimension. if self.grouping is not None: try: compare = set(new_temporal.grouping) == set( self.grouping) # Types may be unhashable, compare directly. except TypeError: compare = new_temporal.grouping == self.grouping if not compare: msg = 'Engine temporal grouping and field temporal grouping are not equivalent. Perhaps ' \ 'optimizations are incorrect?' ocgis_lh(logger='calc.engine', exc=ValueError(msg)) out_vc = VariableCollection() for f in self.funcs: try: ocgis_lh('Calculating: {0}'.format(f['func']), logger='calc.engine') # Initialize the function. function = f['ref']( alias=f['name'], dtype=None, field=field, file_only=file_only, vc=out_vc, parms=f['kwds'], tgd=new_temporal, calc_sample_size=self.calc_sample_size, meta_attrs=f.get('meta_attrs'), spatial_aggregation=self.spatial_aggregation) # Allow a calculation to create a temporal aggregation after initialization. if new_temporal is None and function.tgd is not None: new_temporal = function.tgd.extract() except KeyError: # Likely an eval function which does not have the name key. function = EvalFunction( field=field, file_only=file_only, vc=out_vc, expr=self.funcs[0]['func'], meta_attrs=self.funcs[0].get('meta_attrs')) ocgis_lh('calculation initialized', logger='calc.engine', level=logging.DEBUG) # Return the variable collection from the calculations. out_vc = function.execute() for dv in out_vc.values(): # Any outgoing variables from a calculation must have an associated data type. try: assert dv.dtype is not None except AssertionError: assert isinstance(dv.dtype, np.dtype) # If this is a file only operation, there should be no computed values. if file_only: assert dv._value is None ocgis_lh('calculation finished', logger='calc.engine', level=logging.DEBUG) # Try to mark progress. Okay if it is not there. try: self._progress.mark() except AttributeError: pass out_field = function.field.copy() function_tag = function.tag # Format the returned field. Doing things like removing original data variables and modifying the # time dimension if necessary. Field functions handle all field modifications on their own, so bypass # in that case. if new_temporal is not None: new_temporal = new_temporal.extract() format_return_field(function_tag, out_field, new_temporal=new_temporal) # Add the calculation variables. for variable in list(out_vc.values()): with orphaned(variable): out_field.add_variable(variable) # Tag the calculation data as data variables. out_field.append_to_tags(function_tag, list(out_vc.keys())) coll.children[ugid].children[field_name] = out_field return coll