Beispiel #1
0
    def test_extract_nc4_encoding(self):
        var = xray.Variable(('x', ), [1, 2, 3], {}, {'foo': 'bar'})
        with self.assertRaisesRegexp(ValueError, 'unexpected encoding'):
            _extract_nc4_encoding(var, raise_on_invalid=True)

        var = xray.Variable(('x', ), [1, 2, 3], {}, {'chunking': (2, 1)})
        encoding = _extract_nc4_encoding(var)
        self.assertEqual({}, encoding)
Beispiel #2
0
def encode_cf_variable(var):
    """Converts an Variable into an Variable suitable for saving as a netCDF
    variable
    """
    dimensions = var.dimensions
    data = var.values
    attributes = var.attrs.copy()
    encoding = var.encoding.copy()

    if (np.issubdtype(data.dtype, np.datetime64)
            or (data.dtype.kind == 'O'
                and isinstance(data.reshape(-1)[0], datetime))):
        # encode datetime arrays into numeric arrays
        (data, units,
         calendar) = encode_cf_datetime(data, encoding.pop('units', None),
                                        encoding.pop('calendar', None))
        attributes['units'] = units
        attributes['calendar'] = calendar
    elif data.dtype.kind == 'O':
        # Occasionally, one will end up with variables with dtype=object
        # (likely because they were created from pandas objects which don't
        # maintain dtype careful). This code makes a best effort attempt to
        # encode them into a dtype that NETCDF can handle by inspecting the
        # dtype of the first element.
        dtype = np.array(data.reshape(-1)[0]).dtype
        # N.B. the "astype" call below will fail if data cannot be cast to the
        # type of its first element (which is probably the only sensible thing
        # to do).
        data = np.asarray(data).astype(dtype)

    def get_to(source, dest, k):
        v = source.get(k)
        dest[k] = v
        return v

    # unscale/mask
    if any(k in encoding for k in ['add_offset', 'scale_factor']):
        data = np.array(data, dtype=float, copy=True)
        if 'add_offset' in encoding:
            data -= get_to(encoding, attributes, 'add_offset')
        if 'scale_factor' in encoding:
            data /= get_to(encoding, attributes, 'scale_factor')

    # replace NaN with the fill value
    if '_FillValue' in encoding:
        if encoding['_FillValue'] is np.nan:
            attributes['_FillValue'] = np.nan
        else:
            nans = np.isnan(data)
            if nans.any():
                data[nans] = get_to(encoding, attributes, '_FillValue')

    # restore original dtype
    if 'dtype' in encoding and encoding['dtype'].kind != 'O':
        if np.issubdtype(encoding['dtype'], int):
            data = data.round()
        data = data.astype(encoding['dtype'])

    return xray.Variable(dimensions, data, attributes, encoding=encoding)
Beispiel #3
0
 def test_unary(self):
     args = [0,
             np.zeros(2),
             xray.Variable(['x'], [0, 0]),
             xray.DataArray([0, 0], dims='x'),
             xray.Dataset({'y': ('x', [0, 0])})]
     for a in args:
         self.assertIdentical(a + 1, xu.cos(a))
Beispiel #4
0
def decode_cf_variable(var,
                       concat_characters=True,
                       mask_and_scale=True,
                       decode_times=True):
    # use _data instead of data so as not to trigger loading data
    var = xray.variable.as_variable(var)
    data = var._data
    dimensions = var.dimensions
    attributes = var.attrs.copy()
    encoding = var.encoding.copy()

    def pop_to(source, dest, k):
        """
        A convenience function which pops a key k from source to dest.
        None values are not passed on.  If k already exists in dest an
        error is raised.
        """
        v = source.pop(k, None)
        if v is not None:
            if k in dest:
                raise ValueError("Failed hard to prevent overwriting key %s" %
                                 k)
            dest[k] = v
        return v

    if 'dtype' in encoding:
        if data.dtype != encoding['dtype']:
            raise ValueError("Refused to overwrite dtype")
    encoding['dtype'] = data.dtype

    if concat_characters:
        if data.dtype.kind == 'S' and data.dtype.itemsize == 1:
            dimensions = dimensions[:-1]
            data = CharToStringArray(data)

    if mask_and_scale:
        fill_value = pop_to(attributes, encoding, '_FillValue')
        scale_factor = pop_to(attributes, encoding, 'scale_factor')
        add_offset = pop_to(attributes, encoding, 'add_offset')
        if ((fill_value is not None and not np.isnan(fill_value))
                or scale_factor is not None or add_offset is not None):
            data = MaskedAndScaledArray(data, fill_value, scale_factor,
                                        add_offset)

    if decode_times:
        if 'units' in attributes and 'since' in attributes['units']:
            units = pop_to(attributes, encoding, 'units')
            calendar = pop_to(attributes, encoding, 'calendar')
            data = DecodedCFDatetimeArray(data, units, calendar)

    return xray.Variable(dimensions,
                         indexing.LazilyIndexedArray(data),
                         attributes,
                         encoding=encoding)
Beispiel #5
0
 def test_binary(self):
     args = [0,
             np.zeros(2),
             xray.Variable(['x'], [0, 0]),
             xray.DataArray([0, 0], dims='x'),
             xray.Dataset({'y': ('x', [0, 0])})]
     for n, t1 in enumerate(args):
         for t2 in args[n:]:
             self.assertIdentical(t2 + 1, xu.maximum(t1, t2 + 1))
             self.assertIdentical(t2 + 1, xu.maximum(t2, t1 + 1))
             self.assertIdentical(t2 + 1, xu.maximum(t1 + 1, t2))
             self.assertIdentical(t2 + 1, xu.maximum(t2 + 1, t1))
Beispiel #6
0
    def _iter_grouped_shortcut(self):
        """Fast version of `_iter_grouped` that yields Variables without
        metadata
        """
        from .variable import as_variable
        array = as_variable(self.obj)

        # build the new dimensions
        if isinstance(self.group_indices[0], (int, np.integer)):
            # group_dim is squeezed out
            dims = tuple(d for d in array.dimensions if d != self.group_dim)
        else:
            dims = array.dimensions

        # slice the data and build the new Arrays directly
        indexer = [slice(None)] * array.ndim
        group_axis = array.get_axis_num(self.group_dim)
        for indices in self.group_indices:
            indexer[group_axis] = indices
            data = array.values[tuple(indexer)]
            yield xray.Variable(dims, data)
Beispiel #7
0
 def open_store_variable(self, var):
     var.set_auto_maskandscale(False)
     dimensions = var.dimensions
     data = indexing.LazilyIndexedArray(NetCDF4ArrayWrapper(var))
     attributes = OrderedDict((k, var.getncattr(k)) for k in var.ncattrs())
     # netCDF4 specific encoding; save _FillValue for later
     encoding = {}
     filters = var.filters()
     if filters is not None:
         encoding.update(filters)
     chunking = var.chunking()
     if chunking is not None:
         if chunking == 'contiguous':
             encoding['contiguous'] = True
             encoding['chunksizes'] = None
         else:
             encoding['contiguous'] = False
             encoding['chunksizes'] = tuple(chunking)
     # TODO: figure out how to round-trip "endian-ness" without raising
     # warnings from netCDF4
     # encoding['endian'] = var.endian()
     encoding['least_significant_digit'] = \
         attributes.pop('least_significant_digit', None)
     return xray.Variable(dimensions, data, attributes, encoding)
Beispiel #8
0
 def test_extract_h5nc_encoding(self):
     # not supported with h5netcdf (yet)
     var = xray.Variable(('x', ), [1, 2, 3], {},
                         {'least_sigificant_digit': 2})
     with self.assertRaisesRegexp(ValueError, 'unexpected encoding'):
         _extract_nc4_encoding(var, raise_on_invalid=True)
Beispiel #9
0
 def open_store_variable(self, var):
     return xray.Variable(var.dimensions, var.data,
                          _decode_values(var._attributes))
Beispiel #10
0
 def open_store_variable(self, var):
     data = indexing.LazilyIndexedArray(PydapArrayWrapper(var))
     return xray.Variable(var.dimensions, data, var.attributes)