def test_get_iter(self): element = 'hi' ret = list(get_iter(element)) self.assertEqual(ret, ['hi']) element = np.array([5, 6, 7]) ret = list(get_iter(element)) self.assertNumpyAll(ret[0], np.array([5, 6, 7])) ## test dtype ################################################################################################## class FooIterable(object): def __init__(self): self.value = [4, 5, 6] def __iter__(self): for element in self.value: yield element element = FooIterable() ret = list(get_iter(element)) self.assertEqual(ret, [4, 5, 6]) for dtype in FooIterable, (FooIterable, list): ret = list(get_iter(element, dtype=dtype)) self.assertIsInstance(ret, list) self.assertEqual(len(ret), 1) self.assertIsInstance(ret[0], FooIterable)
def _open_(uri, mode='r', **kwargs): """ :rtype: object """ kwargs = kwargs.copy() group_indexing = kwargs.pop('group_indexing', None) lvm = kwargs.pop('vm', vm) if isinstance(uri, six.string_types): # Open the dataset in parallel if we want to use the netCDF MPI capability. It may not be available even in # parallel. if mode == 'w' and lvm.size > 1: if kwargs.get('format', 'NETCDF4') == 'NETCDF4': if kwargs.get('parallel') is None and env.USE_NETCDF4_MPI: kwargs['parallel'] = True if kwargs.get('parallel') and kwargs.get('comm') is None: kwargs['comm'] = lvm.comm ret = nc.Dataset(uri, mode=mode, **kwargs) # tdk:FIX: this should be enabled for MFDataset as well. see https://github.com/Unidata/netcdf4-python/issues/809#issuecomment-435144221 # netcdf4 >= 1.4.0 always returns masked arrays. This is inefficient and is turned off by default by ocgis. if hasattr(ret, 'set_always_mask'): ret.set_always_mask(False) else: ret = nc.MFDataset(uri, **kwargs) if group_indexing is not None: for group_name in get_iter(group_indexing): ret = ret.groups[group_name] return ret
def conform_units_to(self): ret = [] m = self.metadata['variables'] for v in get_iter(self.variable): ret.append(m[v].get('conform_units_to')) ret = get_first_or_tuple(ret) return ret
def add_variable(self, name_or_variable, force=False, dimensions=None, group=None): """ Add a variable to the distribution mapping. :param name_or_variable: The variable or variable name to add to the distribution. :type name_or_variable: :class:`~ocgis.new_interface.variable.Variable` :param force: If ``True``, overwrite any variables with the same name. :param sequence dimensions: A sequence of dimension names if ``name_or_variable`` is a name. Otherwise, dimensions are pulled from the variable object. :raises: ValueError """ from ocgis.variable.base import Variable from ocgis.variable.dimension import Dimension if isinstance(name_or_variable, Variable): group = group or name_or_variable.group name = name_or_variable.name dimensions = name_or_variable.dimensions else: name = name_or_variable dimensions = list(get_iter(dimensions, dtype=(str, Dimension))) if dimensions is not None and len(dimensions) > 0: if isinstance(dimensions[0], Dimension): dimensions = [dim.name for dim in dimensions] else: dimensions = [] dimensions = tuple(dimensions) for rank_home in self.ranks: the_group = self._create_or_get_group_(group, rank_home) if not force and name in the_group['variables']: msg = 'Variable with name "{}" already in group "{}" and "force=False".' raise ValueError(msg.format(name, group)) else: the_group['variables'][name] = {'dimensions': dimensions}
def test_unwrap(self): """Test different geometry types are appropriately unwrapped.""" wrapper = Wrapper() path = tempfile.mkdtemp() for desc, geom in self.possible.iteritems(): unwrapped = wrapper.unwrap(geom) if desc in self.actual_unwrapped: self.assertTrue(self.actual_unwrapped[desc].almost_equals(unwrapped, decimal=5)) try: self.assertEqual(type(geom), type(unwrapped)) except AssertionError: if desc == 'axis_polygon': # by necessity of being split on the axis, this will come out as a multipolygon self.assertIsInstance(unwrapped, MultiPolygon) else: raise self.assertFalse(np.any(np.array(unwrapped) < 0.0)) if isinstance(unwrapped, (MultiPolygon, Polygon)): it = get_iter(unwrapped) for polygon in it: self.assertFalse(np.any(np.array(polygon.exterior) > 360.0)) else: self.assertFalse(np.any(np.array(unwrapped) > 360.0))
def _open_(uri, mode='r', **kwargs): """ :rtype: object """ kwargs = kwargs.copy() group_indexing = kwargs.pop('group_indexing', None) lvm = kwargs.pop('vm', vm) if isinstance(uri, six.string_types): # Open the dataset in parallel if we want to use the netCDF MPI capability. It may not be available even in # parallel. if mode == 'w' and lvm.size > 1: if kwargs.get('format', 'NETCDF4') == 'NETCDF4': if kwargs.get('parallel') is None and env.USE_NETCDF4_MPI: kwargs['parallel'] = True if kwargs.get('parallel') and kwargs.get('comm') is None: kwargs['comm'] = lvm.comm ret = nc.Dataset(uri, mode=mode, **kwargs) else: ret = nc.MFDataset(uri, **kwargs) if group_indexing is not None: for group_name in get_iter(group_indexing): ret = ret.groups[group_name] return ret
def test_init(self): for k in self.iter_keywords(): rdc = RequestDatasetCollection(target=k.target) if k.target is not None: self.assertEqual(len(rdc), len(list(get_iter(k.target, dtype=(dict, RequestDataset, Field))))) self.assertTrue(len(rdc) >= 1) else: self.assertEqual(len(rdc), 0)
def __init__(self, target=None): super(RequestDatasetCollection, self).__init__() self._unique_id_store = [] if target is not None: for element in get_iter(target, dtype=(dict, RequestDataset, Field)): self.update(element)
def __init__(self, request_datasets=None): super(RequestDatasetCollection, self).__init__() self._did = [] if request_datasets is not None: for rd in get_iter(request_datasets, dtype=(dict, RequestDataset)): self.update(rd)
def __init__(self,**kwds): self._uid_ctr = 1 variables = kwds.pop('variables',None) self._storage = OrderedDict() if variables is not None: for variable in get_iter(variables,dtype=Variable): self.add_variable(variable)
def get_request_dataset_iterable_attribute(obj, attr): nested = [getattr(target, attr) for target in obj.request_datasets] flattened = [] for n in get_iter(nested): if isinstance(n, six.string_types) or n is None: flattened.append(n) else: flattened += list(n) return tuple(flattened)
def _get_module_available_(self): results = [] for m in get_iter(self.module_names): try: import_module(m) app = True except ImportError: app = False results.append(app) return any(results)
def metadata(self): build = True for rd in self.request_datasets: if build: ret = rd.metadata.copy() build = False else: for variable_name in get_iter(rd.variable): ret['variables'][variable_name] = rd.metadata['variables'][variable_name] return ret
def get_dimension_names(target): from ocgis.variable.dimension import Dimension itr = get_iter(target, dtype=(str, Dimension)) ret_names = [] for element in itr: try: to_append = element.name except AttributeError: to_append = element ret_names.append(to_append) return tuple(ret_names)
def get_variable_names(target): from ocgis.variable.base import Variable itr = get_iter(target, dtype=(str, Variable)) ret_names = [] for element in itr: try: to_append = element.name except AttributeError: to_append = element ret_names.append(to_append) return tuple(ret_names)
def test_init(self): rd1 = self.test_data.get_rd('cancm4_tas') rd2 = self.test_data.get_rd('cancm4_rhs') keywords = dict(request_datasets=[None, rd1, [rd1], [rd1, rd2], {'uri': rd1.uri, 'variable': rd1.variable}]) for k in itr_products_keywords(keywords, as_namedtuple=True): rdc = RequestDatasetCollection(request_datasets=k.request_datasets) if k.request_datasets is not None: self.assertEqual(len(rdc), len(list(get_iter(k.request_datasets, dtype=(dict, RequestDataset))))) else: self.assertEqual(len(rdc), 0)
def size(self): """ :returns: Size of test data in bytes. :rtype: int """ total = 0 for key in self.keys(): path = self.get_uri(key) # path is returned as a sequence...sometimes for element in get_iter(path): total += os.path.getsize(element) return total
def test_get_iter_dtype(self): """Test the dtype is properly used when determining how to iterate over elements.""" class foo(object): pass dtypes = [dict, (dict,), (dict, foo)] data = {'hi': 'there'} for dtype in dtypes: itr = get_iter(data, dtype=dtype) self.assertDictEqual(list(itr)[0], data) ## a foo object should also be okay f = foo() itr = get_iter(f, dtype=foo) self.assertEqual(list(itr), [f]) ## if no dtype is passed, then the builtin iterator of the element will be used itr = get_iter(data) self.assertEqual(list(itr), ['hi'])
def field_name(self): if self._field_name is None: # Use renamed variables for field names. Often there is a single variable in the request. This ensures # unique field names if renamed variables are unique. ret = list(get_iter(self.rename_variable)) if len(ret) > 1: msg = 'No default "field_name" based on variables name possible with multiple data variables: {}. ' \ 'Using default field name: {}.'.format(self.variable, constants.MiscName.DEFAULT_FIELD_NAME) ocgis_lh(msg=msg, level=logging.WARN) ret = constants.MiscName.DEFAULT_FIELD_NAME else: ret = ret[0] else: ret = self._field_name return ret
def open(self): try: ret = nc.Dataset(self.rd.uri, "r") except TypeError: try: ret = nc.MFDataset(self.rd.uri) except KeyError as e: # it is possible the variable is not in one of the data URIs. check for this to raise a cleaner error. for uri in get_iter(self.rd.uri): ds = nc.Dataset(uri, "r") try: for variable in get_iter(self.rd.variable): try: ds.variables[variable] except KeyError: msg = 'The variable "{0}" was not found in URI "{1}".'.format(variable, uri) raise KeyError(msg) finally: ds.close() # if all variables were found, raise the other error raise e return ret
def _get_autodiscovered_driver_(cls, uri): """ :param str uri: The target URI containing data for which to choose a driver. :returns: The correct driver for opening the ``uri``. :rtype: :class:`ocgis.api.request.driver.base.AbstractDriver` :raises: RequestValidationError """ for element in get_iter(uri): for driver in cls._Drivers.itervalues(): for pattern in driver.extensions: if re.match(pattern, element) is not None: return driver msg = 'Driver not found for URI: {0}'.format(uri) raise RequestValidationError('driver/uri', msg)
def _open_(uri, mode='r', **kwargs): """ :rtype: object """ group_indexing = kwargs.pop('group_indexing', None) if isinstance(uri, six.string_types): ret = nc.Dataset(uri, mode=mode, **kwargs) else: ret = nc.MFDataset(uri, **kwargs) if group_indexing is not None: for group_name in get_iter(group_indexing): ret = ret.groups[group_name] return ret
def validate(cls, ops): if ops.calc_sample_size: from ocgis.ops.parms.definition import CalcSampleSize exc = DefinitionValidationError( CalcSampleSize, 'Multivariate functions do not calculate sample size at this time.' ) ocgis_lh(exc=exc, logger='calc.base') # ensure the required variables are present should_raise = False for c in ops.calc: if c['func'] == cls.key: kwds = c['kwds'] # Check the required variables are keyword arguments. if not len( set(kwds.keys()).intersection( set(cls.required_variables))) >= 2: should_raise = True break # Ensure the mapped aliases exist. fnames = [] for d in ops.dataset: try: for r in get_iter(d.rename_variable): fnames.append(r) except AttributeError: # Fields do not have a rename variable attribute. fnames += list(d.keys()) for xx in cls.required_variables: to_check = kwds[xx] if to_check not in fnames: should_raise = True break if should_raise: from ocgis.ops.parms.definition import Calc msg = 'These field names are missing for multivariate function "{0}": {1}.' exc = DefinitionValidationError( Calc, msg.format(cls.__name__, cls.required_variables)) ocgis_lh(exc=exc, logger='calc.base')
def test_unwrap(self): """Test different geometry types are appropriately unwrapped.""" wrapper = GeometryWrapper() # path = tempfile.mkdtemp() for desc, geom in self.possible.items(): unwrapped = wrapper.unwrap(geom) if desc in self.actual_unwrapped: try: self.assertTrue(self.actual_unwrapped[desc].almost_equals( unwrapped, decimal=5)) except: # gv = GeometryVariable.from_shapely(geom, crs=Spherical()) # gv.write_vector(os.path.join('/tmp/tw-geom.shp')) # gv = GeometryVariable.from_shapely(unwrapped, crs=Spherical()) # gv.write_vector(os.path.join('/tmp/tw-unwrapped.shp')) # gv = GeometryVariable.from_shapely(self.actual_unwrapped[desc], crs=Spherical()) # gv.write_vector(os.path.join('/tmp/tw-actual-unwrapped.shp')) # print(unwrapped.wkt) raise try: self.assertEqual(type(geom), type(unwrapped)) except AssertionError: if desc == 'axis_polygon': # by necessity of being split on the axis, this will come out as a multipolygon self.assertIsInstance(unwrapped, MultiPolygon) else: raise if isinstance(unwrapped, Polygon): coords = np.array(unwrapped.exterior) elif isinstance(unwrapped, MultiPolygon): coords = np.array( [np.array(u.exterior).min() for u in unwrapped]) else: coords = np.array(unwrapped) self.assertFalse(np.any(coords < 0.0)) if isinstance(unwrapped, (MultiPolygon, Polygon)): it = get_iter(unwrapped) for polygon in it: self.assertFalse( np.any(np.array(polygon.exterior) > 360.0)) else: self.assertFalse(np.any(np.array(unwrapped) > 360.0))
def _write_dataset_identifier_file_(path, ops): from ocgis.conv.csv_ import OcgDialect rows = [] headers = [ 'DID', 'VARIABLE', 'STANDARD_NAME', 'LONG_NAME', 'UNITS', 'URI', 'GROUP' ] with open(path, 'w') as f: writer = csv.DictWriter(f, headers, dialect=OcgDialect) writer.writeheader() # writer.writerow(headers) for element in ops.dataset: row_template = {'DID': element.uid} if element.has_data_variables: try: itr = get_iter(element.variable) except AttributeError: itr = element.get_by_tag(TagName.DATA_VARIABLES) for idx, variable in enumerate(itr): row = row_template.copy() try: attrs = variable.attrs units = variable.units group = variable.group uri = None variable_name = variable.name except AttributeError: attrs = element.metadata['variables'][variable][ 'attrs'] units = get_tuple(element.units)[idx] group = None uri = element.uri variable_name = variable row['STANDARD_NAME'] = attrs.get('standard_name') row['LONG_NAME'] = attrs.get('long_name') row['UNITS'] = units row['GROUP'] = group row['URI'] = uri row['VARIABLE'] = variable_name rows.append(row) writer.writerows(rows)
def validate(cls, ops): if ops.calc_sample_size: from ocgis.ops.parms.definition import CalcSampleSize exc = DefinitionValidationError(CalcSampleSize, 'Multivariate functions do not calculate sample size at this time.') ocgis_lh(exc=exc, logger='calc.base') # ensure the required variables are present should_raise = False for c in ops.calc: if c['func'] == cls.key: kwds = c['kwds'] # Check the required variables are keyword arguments. if not len(set(kwds.keys()).intersection(set(cls.required_variables))) >= 2: should_raise = True break # Ensure the mapped aliases exist. fnames = [] for d in ops.dataset: try: for r in get_iter(d.rename_variable): fnames.append(r) except AttributeError: # Fields do not have a rename variable attribute. fnames += list(d.keys()) for xx in cls.required_variables: to_check = kwds[xx] if to_check not in fnames: should_raise = True break if should_raise: from ocgis.ops.parms.definition import Calc msg = 'These field names are missing for multivariate function "{0}": {1}.' exc = DefinitionValidationError(Calc, msg.format(cls.__name__, cls.required_variables)) ocgis_lh(exc=exc, logger='calc.base')
def get_autodiscovered_driver(uri): """ :param str uri: The target URI containing data for which to choose a driver. :returns: The correct driver for opening the ``uri``. :rtype: :class:`ocgis.api.request.driver.base.AbstractDriver` :raises: RequestValidationError """ possible = [] for element in get_iter(uri): for driver in driver_registry.drivers: for pattern in driver.extensions: if re.match(pattern, element) is not None: possible.append(driver) exc_msg = None ret = None if len(possible) == 0: exc_msg = 'Driver not found for URI: {0}'.format(uri) elif len(possible) == 1: ret = possible[0] else: sub_possible = [] for p in possible: if p._priority is True: sub_possible.append(p) sub_possible_keys = [sp.key for sp in sub_possible] if len(set(sub_possible_keys)) == 1: ret = sub_possible[0] else: exc_msg = 'More than one possible driver matched URI: {}'.format( uri) if exc_msg is None: return ret else: ocgis_lh(logger='request', exc=RequestValidationError('driver/uri', exc_msg))
def _write_dataset_identifier_file_(path, ops): from ocgis.conv.csv_ import OcgDialect rows = [] headers = ['DID', 'VARIABLE', 'STANDARD_NAME', 'LONG_NAME', 'UNITS', 'URI', 'GROUP'] with open(path, 'w') as f: writer = csv.DictWriter(f, headers, dialect=OcgDialect) writer.writeheader() # writer.writerow(headers) for element in ops.dataset: row_template = {'DID': element.uid} if element.has_data_variables: try: itr = get_iter(element.variable) except AttributeError: itr = element.get_by_tag(TagName.DATA_VARIABLES) for idx, variable in enumerate(itr): row = row_template.copy() try: attrs = variable.attrs units = variable.units group = variable.group uri = None variable_name = variable.name except AttributeError: attrs = element.metadata['variables'][variable]['attrs'] units = get_tuple(element.units)[idx] group = None uri = element.uri variable_name = variable row['STANDARD_NAME'] = attrs.get('standard_name') row['LONG_NAME'] = attrs.get('long_name') row['UNITS'] = units row['GROUP'] = group row['URI'] = uri row['VARIABLE'] = variable_name rows.append(row) writer.writerows(rows)
def get_autodiscovered_driver(uri): """ :param str uri: The target URI containing data for which to choose a driver. :returns: The correct driver for opening the ``uri``. :rtype: :class:`ocgis.api.request.driver.base.AbstractDriver` :raises: RequestValidationError """ possible = [] for element in get_iter(uri): for driver in driver_registry.drivers: for pattern in driver.extensions: if re.match(pattern, element) is not None: possible.append(driver) exc_msg = None ret = None if len(possible) == 0: exc_msg = 'Driver not found for URI: {0}'.format(uri) elif len(possible) == 1: ret = possible[0] else: sub_possible = [] for p in possible: if p._priority is True: sub_possible.append(p) sub_possible_keys = [sp.key for sp in sub_possible] if len(set(sub_possible_keys)) == 1: ret = sub_possible[0] else: exc_msg = 'More than one possible driver matched URI: {}'.format(uri) if exc_msg is None: return ret else: ocgis_lh(logger='request', exc=RequestValidationError('driver/uri', exc_msg))
def rename_variable_map(self): ret = {} for name, rename in zip(get_iter(self.variable), get_iter(self.rename_variable)): ret[name] = rename return ret
def get_fuzzy_names(cls): ret = list(get_iter(cls.grid_mapping_name)) if cls._fuzzy_grid_mapping_names is not None: ret += list(get_iter(cls._fuzzy_grid_mapping_names)) return tuple(ret)
def get_is_none(value): return all([v is None for v in get_iter(value)])
def __init__(self,request_datasets=[]): self._s = OrderedDict() self._did = [] for rd in get_iter(request_datasets): self.update(rd)
def __init__(self, **kwargs): kwargs = kwargs.copy() dimension_map = kwargs.pop('dimension_map', None) # Flag updated by driver to indicate if the coordinate system is assigned or implied. self._has_assigned_coordinate_system = False # Flag to indicate if this is a regrid destination. self.regrid_destination = kwargs.pop('regrid_destination', False) # Flag to indicate if this is a regrid source. self.regrid_source = kwargs.pop('regrid_source', True) # Other incoming data objects may have a coordinate system which should be used. crs = kwargs.pop(KeywordArgument.CRS, 'auto') # Add grid variable metadata to dimension map. grid = kwargs.pop(KeywordArgument.GRID, 'auto') # Configure the driver. driver = kwargs.pop(KeywordArgument.DRIVER, 'auto') # Extract standard coordinate variables from the field keyword arguments. k = (DimensionMapKey.GEOM, DimensionMapKey.REALIZATION, DimensionMapKey.TIME, DimensionMapKey.LEVEL) s = OrderedDict() for ii in k: s[ii] = kwargs.pop(ii, None) grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION, 'auto') if grid_abstraction is None: raise ValueError("'{}' may not be None.".format(KeywordArgument.GRID_ABSTRACTION)) grid_is_isomorphic = kwargs.pop('grid_is_isomorphic', 'auto') if grid_is_isomorphic is None: raise ValueError("'{}' may not be None.".format('grid_is_isomorphic')) # TODO: This should maybe be part of the dimension map? Time variables are not dependent on fields. self.format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True) # Use tags to set data variables. is_data = kwargs.pop(KeywordArgument.IS_DATA, []) VariableCollection.__init__(self, **kwargs) dimension_map = deepcopy(dimension_map) if dimension_map is None: dimension_map = DimensionMap() elif isinstance(dimension_map, dict): dimension_map = DimensionMap.from_dict(dimension_map) self.dimension_map = dimension_map self.set_grid(grid, crs=crs) if driver != 'auto': self.dimension_map.set_driver(driver) if grid_abstraction != 'auto': self.dimension_map.set_grid_abstraction(grid_abstraction) if grid_is_isomorphic != 'auto': self.dimension_map.set_property(DMK.IS_ISOMORPHIC, grid_is_isomorphic) # Append the data variable tagged variable names. is_data = list(get_iter(is_data, dtype=Variable)) is_data_variable_names = get_variable_names(is_data) for idvn in is_data_variable_names: self.append_to_tags(TagName.DATA_VARIABLES, idvn, create=True) for idx, dvn in enumerate(is_data_variable_names): if dvn not in self: if isinstance(is_data[idx], Variable): self.add_variable(is_data[idx]) # Configure the field updating the dimension map in the process. cvar = s[DimensionMapKey.REALIZATION] if cvar is not None: self.set_realization(cvar) cvar = s[DimensionMapKey.TIME] if cvar is not None: self.set_time(cvar) cvar = s[DimensionMapKey.LEVEL] if cvar is not None: self.set_level(cvar) cvar = s[DimensionMapKey.GEOM] if cvar is not None: self.set_geom(cvar, crs=crs) if crs != 'auto': self.set_crs(crs)
def test_init_variable_not_none(self): variables = [self.get_variable(), [self.get_variable(), self.get_variable('tas_foo2')]] for v in variables: vc = VariableCollection(variables=v) self.assertEqual(vc.keys(), [iv.alias for iv in get_iter(v, dtype=Variable)])
def test_get_iter_numpy(self): """Test entire NumPy array returned versus its individual elements.""" arr = np.array([1, 2, 3, 4]) itr = get_iter(arr) self.assertNumpyAll(list(itr)[0], arr)
def test_get_iter_str(self): """Test whole string returned as opposed to its immutable elements.""" itr = get_iter('hi') self.assertEqual(list(itr), ['hi'])
def rename_variable(self): if self._rename_variable is None: ret = self.variable else: ret = get_first_or_tuple(list(get_iter(self._rename_variable))) return ret
def units(self): ret = [] for v in get_iter(self.variable): ret.append(self.metadata['variables'][v]['attrs'].get('units')) ret = get_first_or_tuple(ret) return ret
def __init__(self, variables=None): super(VariableCollection, self).__init__() if variables is not None: for variable in get_iter(variables, dtype=Variable): self.add_variable(variable)