def test_extract_nc4_encoding(self): var = xray.Variable(('x', ), [1, 2, 3], {}, {'foo': 'bar'}) with self.assertRaisesRegexp(ValueError, 'unexpected encoding'): _extract_nc4_encoding(var, raise_on_invalid=True) var = xray.Variable(('x', ), [1, 2, 3], {}, {'chunking': (2, 1)}) encoding = _extract_nc4_encoding(var) self.assertEqual({}, encoding)
def encode_cf_variable(var): """Converts an Variable into an Variable suitable for saving as a netCDF variable """ dimensions = var.dimensions data = var.values attributes = var.attrs.copy() encoding = var.encoding.copy() if (np.issubdtype(data.dtype, np.datetime64) or (data.dtype.kind == 'O' and isinstance(data.reshape(-1)[0], datetime))): # encode datetime arrays into numeric arrays (data, units, calendar) = encode_cf_datetime(data, encoding.pop('units', None), encoding.pop('calendar', None)) attributes['units'] = units attributes['calendar'] = calendar elif data.dtype.kind == 'O': # Occasionally, one will end up with variables with dtype=object # (likely because they were created from pandas objects which don't # maintain dtype careful). This code makes a best effort attempt to # encode them into a dtype that NETCDF can handle by inspecting the # dtype of the first element. dtype = np.array(data.reshape(-1)[0]).dtype # N.B. the "astype" call below will fail if data cannot be cast to the # type of its first element (which is probably the only sensible thing # to do). data = np.asarray(data).astype(dtype) def get_to(source, dest, k): v = source.get(k) dest[k] = v return v # unscale/mask if any(k in encoding for k in ['add_offset', 'scale_factor']): data = np.array(data, dtype=float, copy=True) if 'add_offset' in encoding: data -= get_to(encoding, attributes, 'add_offset') if 'scale_factor' in encoding: data /= get_to(encoding, attributes, 'scale_factor') # replace NaN with the fill value if '_FillValue' in encoding: if encoding['_FillValue'] is np.nan: attributes['_FillValue'] = np.nan else: nans = np.isnan(data) if nans.any(): data[nans] = get_to(encoding, attributes, '_FillValue') # restore original dtype if 'dtype' in encoding and encoding['dtype'].kind != 'O': if np.issubdtype(encoding['dtype'], int): data = data.round() data = data.astype(encoding['dtype']) return xray.Variable(dimensions, data, attributes, encoding=encoding)
def test_unary(self): args = [0, np.zeros(2), xray.Variable(['x'], [0, 0]), xray.DataArray([0, 0], dims='x'), xray.Dataset({'y': ('x', [0, 0])})] for a in args: self.assertIdentical(a + 1, xu.cos(a))
def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, decode_times=True): # use _data instead of data so as not to trigger loading data var = xray.variable.as_variable(var) data = var._data dimensions = var.dimensions attributes = var.attrs.copy() encoding = var.encoding.copy() def pop_to(source, dest, k): """ A convenience function which pops a key k from source to dest. None values are not passed on. If k already exists in dest an error is raised. """ v = source.pop(k, None) if v is not None: if k in dest: raise ValueError("Failed hard to prevent overwriting key %s" % k) dest[k] = v return v if 'dtype' in encoding: if data.dtype != encoding['dtype']: raise ValueError("Refused to overwrite dtype") encoding['dtype'] = data.dtype if concat_characters: if data.dtype.kind == 'S' and data.dtype.itemsize == 1: dimensions = dimensions[:-1] data = CharToStringArray(data) if mask_and_scale: fill_value = pop_to(attributes, encoding, '_FillValue') scale_factor = pop_to(attributes, encoding, 'scale_factor') add_offset = pop_to(attributes, encoding, 'add_offset') if ((fill_value is not None and not np.isnan(fill_value)) or scale_factor is not None or add_offset is not None): data = MaskedAndScaledArray(data, fill_value, scale_factor, add_offset) if decode_times: if 'units' in attributes and 'since' in attributes['units']: units = pop_to(attributes, encoding, 'units') calendar = pop_to(attributes, encoding, 'calendar') data = DecodedCFDatetimeArray(data, units, calendar) return xray.Variable(dimensions, indexing.LazilyIndexedArray(data), attributes, encoding=encoding)
def test_binary(self): args = [0, np.zeros(2), xray.Variable(['x'], [0, 0]), xray.DataArray([0, 0], dims='x'), xray.Dataset({'y': ('x', [0, 0])})] for n, t1 in enumerate(args): for t2 in args[n:]: self.assertIdentical(t2 + 1, xu.maximum(t1, t2 + 1)) self.assertIdentical(t2 + 1, xu.maximum(t2, t1 + 1)) self.assertIdentical(t2 + 1, xu.maximum(t1 + 1, t2)) self.assertIdentical(t2 + 1, xu.maximum(t2 + 1, t1))
def _iter_grouped_shortcut(self): """Fast version of `_iter_grouped` that yields Variables without metadata """ from .variable import as_variable array = as_variable(self.obj) # build the new dimensions if isinstance(self.group_indices[0], (int, np.integer)): # group_dim is squeezed out dims = tuple(d for d in array.dimensions if d != self.group_dim) else: dims = array.dimensions # slice the data and build the new Arrays directly indexer = [slice(None)] * array.ndim group_axis = array.get_axis_num(self.group_dim) for indices in self.group_indices: indexer[group_axis] = indices data = array.values[tuple(indexer)] yield xray.Variable(dims, data)
def open_store_variable(self, var): var.set_auto_maskandscale(False) dimensions = var.dimensions data = indexing.LazilyIndexedArray(NetCDF4ArrayWrapper(var)) attributes = OrderedDict((k, var.getncattr(k)) for k in var.ncattrs()) # netCDF4 specific encoding; save _FillValue for later encoding = {} filters = var.filters() if filters is not None: encoding.update(filters) chunking = var.chunking() if chunking is not None: if chunking == 'contiguous': encoding['contiguous'] = True encoding['chunksizes'] = None else: encoding['contiguous'] = False encoding['chunksizes'] = tuple(chunking) # TODO: figure out how to round-trip "endian-ness" without raising # warnings from netCDF4 # encoding['endian'] = var.endian() encoding['least_significant_digit'] = \ attributes.pop('least_significant_digit', None) return xray.Variable(dimensions, data, attributes, encoding)
def test_extract_h5nc_encoding(self): # not supported with h5netcdf (yet) var = xray.Variable(('x', ), [1, 2, 3], {}, {'least_sigificant_digit': 2}) with self.assertRaisesRegexp(ValueError, 'unexpected encoding'): _extract_nc4_encoding(var, raise_on_invalid=True)
def open_store_variable(self, var): return xray.Variable(var.dimensions, var.data, _decode_values(var._attributes))
def open_store_variable(self, var): data = indexing.LazilyIndexedArray(PydapArrayWrapper(var)) return xray.Variable(var.dimensions, data, var.attributes)