Example #1
0
def _decode_datetime_cf(data_array, decode_times, decode_timedelta):
    """
    Decide the datetime based on CF conventions
    """
    if decode_timedelta is None:
        decode_timedelta = decode_times

    for coord in data_array.coords:
        time_var = None
        if decode_times and "since" in data_array[coord].attrs.get("units", ""):
            time_var = times.CFDatetimeCoder(use_cftime=True).decode(
                as_variable(data_array[coord]), name=coord
            )
        elif (
            decode_timedelta
            and data_array[coord].attrs.get("units") in times.TIME_UNITS
        ):
            time_var = times.CFTimedeltaCoder().decode(
                as_variable(data_array[coord]), name=coord
            )
        if time_var is not None:
            dimensions, data, attributes, encoding = variables.unpack_for_decoding(
                time_var
            )
            data_array = data_array.assign_coords(
                {
                    coord: IndexVariable(
                        dims=dimensions,
                        data=data,
                        attrs=attributes,
                        encoding=encoding,
                    )
                }
            )
    return data_array
Example #2
0
    def to_xarray_variable(self, value):
        """Convert the input value to an `xarray.Variable` object.

        Parameters
        ----------
        value : object
            The input value can be in the form of a single value,
            an array-like, a ``(dims, data[, attrs])`` tuple, another
            `xarray.Variable` object or a `xarray.DataArray` object.
            if None, the `default_value` attribute is used to set the value.

        Returns
        -------
        variable : `xarray.Variable`
            A xarray Variable object whith data corresponding to the input (or
            default) value and with attributes ('description' and other
            key:value pairs found in `Variable.attrs`).

        """
        if value is None:
            value = self.default_value

        # in case where value is a 1-d array without dimension name,
        # dimension name is set to 'this_variable' and has to be renamed
        # later by the name of the variable in a process/model.
        xr_variable = as_variable(value, name='this_variable')

        xr_variable.attrs.update(self.attrs)
        if self.description:
            xr_variable.attrs['description'] = self.description

        return xr_variable
Example #3
0
def _calc_concat_dims_coords(dims):
    """
    Infer the dimension names and 1d coordinate variables (if appropriate)
    for concatenating along the new dimensions. Based on the function _calc_concat_dim_coord in xarray, but updated to
    support multiple dims.
    """
    dimensions = []
    coordinates = []

    for dim in dims:
        if isinstance(dim, basestring):
            coord = None
        elif not hasattr(dim, 'dims'):
            # dim is not a DataArray or IndexVariable
            dim_name = getattr(dim, "name", None)
            if dim_name is None:
                dim_name = "concat_dim"
            coord = IndexVariable(dim_name, dim)
            dim = dim_name
        elif not hasattr(dim, 'name'):
            coord = as_variable(dim).to_index_variable()
            (dim, ) = coord.dims
        else:
            coord = dim
            (dim, ) = coord.dims

        dimensions.append(dim)
        coordinates.append(coord)

    return dimensions, coordinates
Example #4
0
def enforce_cf_variable(var, mask_and_scale=True):
    """ Given a Variable constructed from GEOS-Chem output, enforce
    CF-compliant metadata and formatting.

    Until a bug with lazily-loaded data and masking/scaling is resolved in
    xarray, you have the option to manually mask and scale the data here.

    Parameters
    ----------
    var : xarray.Variable
        A variable holding information decoded from GEOS-Chem output.
    mask_and_scale : bool
        Flag to scale and mask the data given the unit conversions provided

    Returns
    -------
    out : xarray.Variable
        The original variable processed to conform to CF standards

    .. note::

        This method borrows heavily from the ideas in ``xarray.decode_cf_variable``

    """
    var = as_variable(var)
    data = var._data  # avoid loading by accessing _data instead of data
    dims = var.dims
    attrs = var.attrs.copy()
    encoding = var.encoding.copy()
    orig_dtype = data.dtype

    # Process masking/scaling coordinates. We only expect a "scale" value
    # for the units with this output.
    if 'scale' in attrs:
        scale = attrs.pop('scale')
        attrs['scale_factor'] = scale
        encoding['scale_factor'] = scale

        # TODO: Once the xr.decode_cf bug is fixed, we won't need to manually
        #       handle masking/scaling
        if mask_and_scale:
            data = scale*data

    # Process units
    # TODO: How do we want to handle parts-per-* units? These are not part of
    #       the udunits standard, and the CF conventions suggest using units
    #       like 1e-6 for parts-per-million. But we potentially mix mass and
    #       volume/molar mixing ratios in GEOS-Chem output, so we need a way
    #       to handle that edge case.
    if 'unit' in attrs:
        unit = attrs.pop('unit')
        unit = get_cfcompliant_units(unit)
        attrs['units'] = unit

    # TODO: Once the xr.decode_cf bug is fixed, we won't need to manually
    #       handle masking/scaling
    return Variable(dims, data, attrs, encoding=encoding)
Example #5
0
 def assertVariableNotEqual(self, v1, v2):
     self.assertFalse(as_variable(v1).equals(v2))
Example #6
0
 def assertVariableIdentical(self, v1, v2):
     assert as_variable(v1).identical(v2), (v1, v2)
Example #7
0
 def assertVariableEqual(self, v1, v2):
     assert as_variable(v1).equals(v2), (v1, v2)
Example #8
0
def _infer_coords_and_dims(shape, coords, dims):
    """All the logic for creating a new DataArray

    Note: Copied with minor modifications from xarray.variable.py version 0.9.6
    as it was not part of the xarray public API.

    """

    if (coords is not None and not is_dict_like(coords)
            and len(coords) != len(shape)):
        raise ValueError('coords is not dict-like, but it has %s items, '
                         'which does not match the %s dimensions of the '
                         'data' % (len(coords), len(shape)))

    if isinstance(dims, six.string_types):
        dims = (dims, )

    if dims is None:
        dims = ['dim_%s' % n for n in range(len(shape))]
        if coords is not None and len(coords) == len(shape):
            # try to infer dimensions from coords
            if is_dict_like(coords):
                raise TypeError(
                    'inferring DataArray dimensions from dictionary '
                    'like ``coords`` has been deprecated. Use an '
                    'explicit list of ``dims`` instead.')
            else:
                for n, (dim, coord) in enumerate(zip(dims, coords)):
                    coord = as_variable(coord,
                                        name=dims[n]).to_index_variable()
                    dims[n] = coord.name
        dims = tuple(dims)
    else:
        for d in dims:
            if not isinstance(d, six.string_types):
                raise TypeError('dimension %s is not a string' % d)

    new_coords = OrderedDict()

    if is_dict_like(coords):
        for k, v in coords.items():
            new_coords[k] = as_variable(v, name=k)
    elif coords is not None:
        for dim, coord in zip(dims, coords):
            var = as_variable(coord, name=dim)
            var.dims = (dim, )
            new_coords[dim] = var

    sizes = dict(zip(dims, shape))
    for k, v in new_coords.items():
        if any(d not in dims for d in v.dims):
            raise ValueError('coordinate %s has dimensions %s, but these '
                             'are not a subset of the DataArray '
                             'dimensions %s' % (k, v.dims, dims))

        for d, s in zip(v.dims, v.shape):
            if s != sizes[d]:
                raise ValueError('conflicting sizes for dimension %r: '
                                 'length %s on the data but length %s on '
                                 'coordinate %r' % (d, sizes[d], s, k))

    assert_unique_multiindex_level_names(new_coords)

    return new_coords, dims
def decode_cf_variable(var,
                       concat_characters=True,
                       mask_and_scale=True,
                       decode_times=True,
                       decode_endianness=True):
    """
    Decodes a variable which may hold CF encoded information.

    This includes variables that have been masked and scaled, which
    hold CF style time variables (this is almost always the case if
    the dataset has been serialized) and which have strings encoded
    as character arrays.

    Parameters
    ----------
    var : Variable
        A variable holding potentially CF encoded information.
    concat_characters : bool
        Should character arrays be concatenated to strings, for
        example: ['h', 'e', 'l', 'l', 'o'] -> 'hello'
    mask_and_scale: bool
        Lazily scale (using scale_factor and add_offset) and mask
        (using _FillValue).
    decode_times : bool
        Decode cf times ('hours since 2000-01-01') to np.datetime64.
    decode_endianness : bool
        Decode arrays from non-native to native endianness.

    Returns
    -------
    out : Variable
        A variable holding the decoded equivalent of var
    """
    # use _data instead of data so as not to trigger loading data
    var = as_variable(var)
    data = var._data
    dimensions = var.dims
    attributes = var.attrs.copy()
    encoding = var.encoding.copy()

    original_dtype = data.dtype

    if concat_characters:
        if data.dtype.kind == 'S' and data.dtype.itemsize == 1 and data.shape[
                -1] != 0:
            dimensions = dimensions[:-1]
            data = CharToStringArray(data)

    if mask_and_scale:
        if 'missing_value' in attributes:
            # missing_value is deprecated, but we still want to support it as
            # an alias for _FillValue.
            if ('_FillValue' in attributes and not utils.equivalent(
                    attributes['_FillValue'], attributes['missing_value'])):
                raise ValueError(
                    "Discovered conflicting _FillValue "
                    "and missing_value.  Considering "
                    "opening the offending dataset using "
                    "decode_cf=False, corrected the attributes",
                    "and decoding explicitly using "
                    "xarray.conventions.decode_cf(ds)")
            attributes['_FillValue'] = attributes.pop('missing_value')

        fill_value = np.array(pop_to(attributes, encoding, '_FillValue'))
        if fill_value.size > 1:
            warnings.warn("variable has multiple fill values {0}, decoding "
                          "all values to NaN.".format(str(fill_value)),
                          RuntimeWarning,
                          stacklevel=3)
        scale_factor = pop_to(attributes, encoding, 'scale_factor')
        add_offset = pop_to(attributes, encoding, 'add_offset')
        if ((fill_value is not None and not np.any(pd.isnull(fill_value)))
                or scale_factor is not None or add_offset is not None):
            if fill_value.dtype.kind in ['U', 'S']:
                dtype = object
            else:
                dtype = float
            data = MaskedAndScaledArray(data, fill_value, scale_factor,
                                        add_offset, dtype)

    if decode_times and 'units' in attributes:
        if 'since' in attributes['units']:
            # datetime
            units = pop_to(attributes, encoding, 'units')
            calendar = pop_to(attributes, encoding, 'calendar')
            data = DecodedCFDatetimeArray(data, units, calendar)
        elif attributes['units'] in TIME_UNITS:
            # timedelta
            units = pop_to(attributes, encoding, 'units')
            data = DecodedCFTimedeltaArray(data, units)

    if decode_endianness and not data.dtype.isnative:
        # do this last, so it's only done if we didn't already unmask/scale
        data = NativeEndiannessArray(data)
        original_dtype = data.dtype

    if 'dtype' in encoding:
        if original_dtype != encoding['dtype']:
            warnings.warn("CF decoding is overwriting dtype")
    else:
        encoding['dtype'] = original_dtype

    if 'dtype' in attributes and attributes['dtype'] == 'bool':
        del attributes['dtype']
        data = BoolTypeArray(data)

    return Variable(dimensions,
                    indexing.LazilyIndexedArray(data),
                    attributes,
                    encoding=encoding)
Example #10
0
    def test_as_variable(self):
        data = np.arange(10)
        expected = Variable("x", data)

        self.assertVariableIdentical(expected, as_variable(expected))

        ds = Dataset({"x": expected})
        self.assertVariableIdentical(expected, as_variable(ds["x"]))
        self.assertNotIsInstance(ds["x"], Variable)
        self.assertIsInstance(as_variable(ds["x"]), Variable)

        FakeVariable = namedtuple("FakeVariable", "values dims")
        fake_xarray = FakeVariable(expected.values, expected.dims)
        self.assertVariableIdentical(expected, as_variable(fake_xarray))

        xarray_tuple = (expected.dims, expected.values)
        self.assertVariableIdentical(expected, as_variable(xarray_tuple))

        with self.assertRaisesRegexp(TypeError, "tuples to convert"):
            as_variable(tuple(data))
        with self.assertRaisesRegexp(TypeError, "without an explicit list of dimensions"):
            as_variable(data)

        actual = as_variable(data, name="x")
        self.assertVariableIdentical(expected, actual)
        self.assertIsInstance(actual, Coordinate)

        actual = as_variable(0)
        expected = Variable([], 0)
        self.assertVariableIdentical(expected, actual)
Example #11
0
    def test_as_variable(self):
        data = np.arange(10)
        expected = Variable('x', data)

        self.assertVariableIdentical(expected, as_variable(expected))

        ds = Dataset({'x': expected})
        self.assertVariableIdentical(expected, as_variable(ds['x']))
        self.assertNotIsInstance(ds['x'], Variable)
        self.assertIsInstance(as_variable(ds['x']), Variable)
        self.assertIsInstance(as_variable(ds['x'], strict=False), DataArray)

        FakeVariable = namedtuple('FakeVariable', 'values dims')
        fake_xarray = FakeVariable(expected.values, expected.dims)
        self.assertVariableIdentical(expected, as_variable(fake_xarray))

        xarray_tuple = (expected.dims, expected.values)
        self.assertVariableIdentical(expected, as_variable(xarray_tuple))

        with self.assertRaisesRegexp(TypeError, 'cannot convert arg'):
            as_variable(tuple(data))
        with self.assertRaisesRegexp(TypeError, 'cannot infer .+ dimensions'):
            as_variable(data)

        actual = as_variable(data, key='x')
        self.assertVariableIdentical(expected, actual)

        actual = as_variable(0)
        expected = Variable([], 0)
        self.assertVariableIdentical(expected, actual)
def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
                       decode_times=True, decode_endianness=True):
    """
    Decodes a variable which may hold CF encoded information.

    This includes variables that have been masked and scaled, which
    hold CF style time variables (this is almost always the case if
    the dataset has been serialized) and which have strings encoded
    as character arrays.

    Parameters
    ----------
    var : Variable
        A variable holding potentially CF encoded information.
    concat_characters : bool
        Should character arrays be concatenated to strings, for
        example: ['h', 'e', 'l', 'l', 'o'] -> 'hello'
    mask_and_scale: bool
        Lazily scale (using scale_factor and add_offset) and mask
        (using _FillValue).
    decode_times : bool
        Decode cf times ('hours since 2000-01-01') to np.datetime64.
    decode_endianness : bool
        Decode arrays from non-native to native endianness.

    Returns
    -------
    out : Variable
        A variable holding the decoded equivalent of var
    """
    # use _data instead of data so as not to trigger loading data
    var = as_variable(var)
    data = var._data
    dimensions = var.dims
    attributes = var.attrs.copy()
    encoding = var.encoding.copy()

    original_dtype = data.dtype

    if concat_characters:
        if data.dtype.kind == 'S' and data.dtype.itemsize == 1 and data.shape[-1] != 0:
            dimensions = dimensions[:-1]
            data = CharToStringArray(data)

    if mask_and_scale:
        if 'missing_value' in attributes:
            # missing_value is deprecated, but we still want to support it as
            # an alias for _FillValue.
            if ('_FillValue' in attributes and
                not utils.equivalent(attributes['_FillValue'],
                                     attributes['missing_value'])):
                raise ValueError("Discovered conflicting _FillValue "
                                 "and missing_value.  Considering "
                                 "opening the offending dataset using "
                                 "decode_cf=False, corrected the attributes",
                                 "and decoding explicitly using "
                                 "xarray.conventions.decode_cf(ds)")
            attributes['_FillValue'] = attributes.pop('missing_value')

        fill_value = np.array(pop_to(attributes, encoding, '_FillValue'))
        if fill_value.size > 1:
            warnings.warn("variable has multiple fill values {0}, decoding "
                          "all values to NaN.".format(str(fill_value)),
                          RuntimeWarning, stacklevel=3)
        scale_factor = pop_to(attributes, encoding, 'scale_factor')
        add_offset = pop_to(attributes, encoding, 'add_offset')
        if ((fill_value is not None and not np.any(pd.isnull(fill_value))) or
                scale_factor is not None or add_offset is not None):
            if fill_value.dtype.kind in ['U', 'S']:
                dtype = object
            else:
                dtype = float
            data = MaskedAndScaledArray(data, fill_value, scale_factor,
                                        add_offset, dtype)

    if decode_times and 'units' in attributes:
        if 'since' in attributes['units']:
            # datetime
            units = pop_to(attributes, encoding, 'units')
            calendar = pop_to(attributes, encoding, 'calendar')
            data = DecodedCFDatetimeArray(data, units, calendar)
        elif attributes['units'] in TIME_UNITS:
            # timedelta
            units = pop_to(attributes, encoding, 'units')
            data = DecodedCFTimedeltaArray(data, units)

    if decode_endianness and not data.dtype.isnative:
        # do this last, so it's only done if we didn't already unmask/scale
        data = NativeEndiannessArray(data)
        original_dtype = data.dtype

    if 'dtype' in encoding:
        if original_dtype != encoding['dtype']:
            warnings.warn("CF decoding is overwriting dtype")
    else:
        encoding['dtype'] = original_dtype

    if 'dtype' in attributes and attributes['dtype'] == 'bool':
        del attributes['dtype']
        data = BoolTypeArray(data)

    return Variable(dimensions, indexing.LazilyIndexedArray(data),
                    attributes, encoding=encoding)
Example #13
0
    def test_as_variable(self):
        data = np.arange(10)
        expected = Variable('x', data)

        self.assertVariableIdentical(expected, as_variable(expected))

        ds = Dataset({'x': expected})
        self.assertVariableIdentical(expected, as_variable(ds['x']))
        self.assertNotIsInstance(ds['x'], Variable)
        self.assertIsInstance(as_variable(ds['x']), Variable)
        self.assertIsInstance(as_variable(ds['x'], strict=False), DataArray)

        FakeVariable = namedtuple('FakeVariable', 'values dims')
        fake_xarray = FakeVariable(expected.values, expected.dims)
        self.assertVariableIdentical(expected, as_variable(fake_xarray))

        xarray_tuple = (expected.dims, expected.values)
        self.assertVariableIdentical(expected, as_variable(xarray_tuple))

        with self.assertRaisesRegexp(TypeError, 'cannot convert arg'):
            as_variable(tuple(data))
        with self.assertRaisesRegexp(TypeError, 'cannot infer .+ dimensions'):
            as_variable(data)

        actual = as_variable(data, key='x')
        self.assertVariableIdentical(expected, actual)

        actual = as_variable(0)
        expected = Variable([], 0)
        self.assertVariableIdentical(expected, actual)
Example #14
0
    def test_as_variable(self):
        data = np.arange(10)
        expected = Variable('x', data)

        self.assertVariableIdentical(expected, as_variable(expected))

        ds = Dataset({'x': expected})
        self.assertVariableIdentical(expected, as_variable(ds['x']))
        self.assertNotIsInstance(ds['x'], Variable)
        self.assertIsInstance(as_variable(ds['x']), Variable)

        FakeVariable = namedtuple('FakeVariable', 'values dims')
        fake_xarray = FakeVariable(expected.values, expected.dims)
        self.assertVariableIdentical(expected, as_variable(fake_xarray))

        xarray_tuple = (expected.dims, expected.values)
        self.assertVariableIdentical(expected, as_variable(xarray_tuple))

        with self.assertRaisesRegexp(TypeError, 'tuples to convert'):
            as_variable(tuple(data))
        with self.assertRaisesRegexp(TypeError,
                                     'without an explicit list of dimensions'):
            as_variable(data)

        actual = as_variable(data, name='x')
        self.assertVariableIdentical(expected, actual)
        self.assertIsInstance(actual, IndexVariable)

        actual = as_variable(0)
        expected = Variable([], 0)
        self.assertVariableIdentical(expected, actual)
Example #15
0
 def assertVariableNotEqual(self, v1, v2):
     self.assertFalse(as_variable(v1).equals(v2))
Example #16
0
 def assertVariableIdentical(self, v1, v2):
     assert as_variable(v1).identical(v2), (v1, v2)
Example #17
0
 def assertVariableEqual(self, v1, v2):
     assert as_variable(v1).equals(v2), (v1, v2)