Esempio n. 1
    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        assert not {'axes', 'keys'}.issubset(
        )  # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        kwargs = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()

        values = kwargs.values()
        keys = kwargs.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = DimArray(value)
                    raise TypeError(
                        "A Dataset can only store DimArray instances, got {}: {}"
                        .format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value
Esempio n. 2
    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        assert not {'axes','keys'}.issubset(kwargs.keys()) # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        data = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__() = odict()

        values = data.values()
        keys = data.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = self._constructor(value)
                    raise TypeError("A Dataset can only store DimArray instances, got {}: {}".format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value
Esempio n. 3
class Dataset(odict):
    """ Container for a set of aligned objects

    _metadata = MetadataDesc(exclude=['axes'])

    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        assert not {'axes', 'keys'}.issubset(
        )  # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        kwargs = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()

        values = kwargs.values()
        keys = kwargs.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = DimArray(value)
                    raise TypeError(
                        "A Dataset can only store DimArray instances, got {}: {}"
                        .format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value

    def dims(self):
        """ list of dimensions contained in the Dataset, consistently with DimArray's `dims`
        return [ for ax in self.axes]

    def __repr__(self):
        """ string representation
        lines = []
        header = "Dataset of %s variables" % (len(self))
        if len(self) == 1: header = header.replace('variables', 'variable')
        axes = repr(self.axes)
        for nm in self.keys():
            dims = self[nm].dims
            shape = self[nm].shape
            #print nm,":", ", ".join(dims)
            repr_dims = repr(dims)
            if repr_dims == "()": repr_dims = self[nm].values
            lines.append("{}: {}".format(nm, repr_dims))
        return "\n".join(lines)

    def __delitem__(self, item):
        axes = self[item].axes
        super(Dataset, self).__delitem__(item)

        # update axes
        for ax in axes:
            found = False
            for k in self:
                if in self[k].dims:
                    found = True
            if not found:

    def __setitem__(self, key, val):
        """ Make sure the object is a DimArray with appropriate axes

#        >>> axes = da.Axes.from_tuples(('time',[1, 2, 3]))
        >>> ds = Dataset()
        >>> ds
        Dataset of 0 variables
        >>> a = DimArray([0, 1, 2], dims=('time',))
        >>> ds['yo'] = a 
        >>> ds['yo']
        dimarray: 3 non-null elements (0 null)
        dimensions: 'time'
        0 / time (3): 0 to 2
        array([0, 1, 2])
        if not isinstance(val, DimArray):
            if np.isscalar(val):
                val = DimArray(val)
                raise TypeError("can only append DimArray instances")

        # Check dimensions
        for axis in val.axes:

            # Check dimensions if already existing axis
            if in [ for ax in self.axes]:
                if not axis == self.axes[]:
                    raise ValueError(
                        "axes values do not match, align data first.\
                            \nDataset: {}, \nGot: {}".format(
                            self.axes[], axis))

            # Append new axis

        # update name = key
        super(Dataset, self).__setitem__(key, val)

    def write_nc(self, f, *args, **kwargs):
        """ Saves dataset in netCDF file.
        f : netCDF file name.
        understood keyword arguments:
        zlib : Enable zlib compression if True. Default is False (no compression).
        complevel : integer between 1 and 9 describing the level of compression desired. Ignored if zlib=False.
        mode : File creation mode. Default is 'w-'. Set to 'w' to overwrite any existing file.
        format : netCDF file format. Default is 'NETCDF4'.
        See the netCDF4-python module documentation for more information about the use
        of keyword arguments to write_nc.
        import as ncio
        ncio.write_dataset(f, self, *args, **kwargs)

    write = write_nc

    def read_nc(cls, f, *args, **kwargs):
        import as ncio
        return ncio.read_dataset(f, *args, **kwargs)

    read = read_nc

    def to_array(self, axis=None, keys=None, _constructor=None):
        """ Convert to DimArray

        axis  : axis name, by default "unnamed"
        #if names is not None or dims is not None:
        #    return self.subset(names=names, dims=dims).to_array()

        if axis is None:
            axis = "unnamed"
            if axis in self.dims:
                i = 1
                while "unnamed_{}".format(i) in self.dims:
                    i += 1
                axis = "unnamed_{}".format(i)

        if axis in self.dims:
            raise ValueError("please provide an axis name which does not \
                    already exist in Dataset")

        if keys is None:
            keys = self.keys()

        # align all variables to the same dimensions
        data = odict()

        for k in keys:
            data[k] = self[k].reshape(self.dims).broadcast(self.axes)

        # make it a numpy array
        data = [data[k].values for k in keys]
        data = np.array(data)

        # determine axes
        axes = [Axis(keys, axis)] + self.axes

        if _constructor is None: _constructor = DimArray
        return _constructor(data, axes)

    def take(self, indices, axis=0, raise_error=True, **kwargs):
        """ analogous to DimArray's take, but for each DimArray of the Dataset

        indices: scalar, or array-like, or slice
        axis: axis name (str)
        raise_error: raise an error if a variable does not have the desired dimension
        **kwargs: arguments passed to the axis locator, similar to `take`, such as `indexing` or `keepdims`


        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([11,22,33], axes=('time', [1951, 1952, 1953]))
        >>> ds = Dataset(a=a, b=b)
        >>> ds
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        >>> ds.take(1951, axis='time')
        Dataset of 2 variables
        a: 2.0
        b: 11.0
        >>> ds.take(0, axis='time', indexing='position')
        Dataset of 2 variables
        a: 1.0
        b: nan
        assert isinstance(axis, str), "axis must be a string"
        ii = self.axes[axis].loc(indices, **kwargs)
        newdata = self.copy()  # copy the dict
        for k in self.keys():
            if axis not in self[k].dims:
                if raise_error:
                    raise ValueError(
                        "{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged"
                        .format(k, axis))
            a = self[k].take(ii, axis=axis, indexing='position')
            if not isinstance(a, DimArray):
                a = DimArray(a)
            odict.__setitem__(newdata, k, a)

        # update the axis
        newaxis = self.axes[axis][ii]
        if type(axis) is not int: axis = self.dims.index(axis)  # axis is int

        # remove if axis collapsed
        if not isinstance(newaxis, Axis):
            del newdata.axes[axis]

        # otherwise update
            newdata.axes[axis] = newaxis

        return newdata

    def _apply_dimarray_axis(self, funcname, *args, **kwargs):
        """ Apply a function on every Dataset variable. 
        If the 'axis=' parameter is passed, only the variables with the required axis are called.
        axis = kwargs.pop('axis', None)
        if axis is not None: axis = self.axes[axis].name
        kwargs['axis'] = axis

        d = odict(self)
        for k in self.keys():
            if axis is not None and axis not in self[k].dims:
            #d[k] = self[k].apply(func, *args, **kwargs)
            d[k] = getattr(self[k], funcname)(*args, **kwargs)

        return Dataset(d)

    def mean(self, axis=0, **kwargs):
        """ Apply transformantion on every variable of the Dataset

        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([[11,22,33],[44,55,66]], axes=[('items',['a','b']), ('time', [1950, 1951, 1952])])
        >>> ds = Dataset(a=a, b=b)
        >>> ds.mean(axis='time')
        Dataset of 2 variables
        dimensions: 'items'
        0 / items (2): a to b
        a: 2.0
        b: ('items',)
        >>> ds.mean(axis='items')
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (3): 1950 to 1952
        a: ('time',)
        b: ('time',)
        return self._apply_dimarray_axis('mean', axis=axis, **kwargs)

    def std(self, axis=0, **kwargs):
        return self._apply_dimarray_axis('std', axis=axis, **kwargs)

    def var(self, axis=0, **kwargs):
        return self._apply_dimarray_axis('var', axis=axis, **kwargs)

    def median(self, axis=0, **kwargs):
        return self._apply_dimarray_axis('median', axis=axis, **kwargs)

    def __getattr__(self, att):
        """ allow access of dimensions
        # check for dimensions
        if att in self.dims:
            ax = self.axes[att]
            return ax.values  # return numpy array

            raise AttributeError("{} object has no attribute {}".format(
                self.__class__.__name__, att))

    def to_dict(self):
        """ export to dict
        return dict(self)

    def to_odict(self):
        """ export to ordered dict
        return odict(self)
Esempio n. 4
class Dataset(odict):
    """ Container for a set of aligned objects

    _metadata = MetadataDesc(exclude=['axes'])

    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        assert not {'axes','keys'}.issubset(kwargs.keys()) # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        kwargs = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()

        values = kwargs.values()
        keys = kwargs.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = DimArray(value)
                    raise TypeError("A Dataset can only store DimArray instances, got {}: {}".format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value

    def dims(self):
        """ list of dimensions contained in the Dataset, consistently with DimArray's `dims`
        return [ for ax in self.axes]

    def __repr__(self):
        """ string representation
        lines = []
        header = "Dataset of %s variables" % (len(self))
        if len(self) == 1: header = header.replace('variables','variable')
        axes = repr(self.axes)
        for nm in self.keys():
            dims = self[nm].dims
            shape = self[nm].shape
            #print nm,":", ", ".join(dims)
            repr_dims = repr(dims)
            if repr_dims == "()": repr_dims = self[nm].values
            lines.append("{}: {}".format(nm,repr_dims))
        return "\n".join(lines)

    def __delitem__(self, item):
        axes = self[item].axes
        super(Dataset, self).__delitem__(item)

        # update axes
        for ax in axes:
            found = False
            for k in self:
                if in self[k].dims:
                    found = True
            if not found:

    def __setitem__(self, key, val):
        """ Make sure the object is a DimArray with appropriate axes

#        >>> axes = da.Axes.from_tuples(('time',[1, 2, 3]))
        >>> ds = Dataset()
        >>> ds
        Dataset of 0 variables
        >>> a = DimArray([0, 1, 2], dims=('time',))
        >>> ds['yo'] = a 
        >>> ds['yo']
        dimarray: 3 non-null elements (0 null)
        dimensions: 'time'
        0 / time (3): 0 to 2
        array([0, 1, 2])
        if not isinstance(val, DimArray):
            if np.isscalar(val):
                val = DimArray(val)
                raise TypeError("can only append DimArray instances")

        # Check dimensions
        for axis in val.axes:

            # Check dimensions if already existing axis
            if in [ for ax in self.axes]:
                if not axis == self.axes[]:
                    raise ValueError("axes values do not match, align data first.\
                            \nDataset: {}, \nGot: {}".format(self.axes[], axis))

            # Append new axis

        # update name = key
        super(Dataset, self).__setitem__(key, val)

    def write_nc(self, f, *args, **kwargs):
        """ Saves dataset in netCDF file.
        f : netCDF file name.
        understood keyword arguments:
        zlib : Enable zlib compression if True. Default is False (no compression).
        complevel : integer between 1 and 9 describing the level of compression desired. Ignored if zlib=False.
        mode : File creation mode. Default is 'w-'. Set to 'w' to overwrite any existing file.
        format : netCDF file format. Default is 'NETCDF4'.
        See the netCDF4-python module documentation for more information about the use
        of keyword arguments to write_nc.
        import as ncio
        ncio.write_dataset(f, self, *args, **kwargs)

    write = write_nc

    def read_nc(cls, f, *args, **kwargs):
        import as ncio
        return ncio.read_dataset(f, *args, **kwargs)

    read = read_nc

    def to_array(self, axis=None, keys=None, _constructor=None):
        """ Convert to DimArray

        axis  : axis name, by default "unnamed"
        #if names is not None or dims is not None:
        #    return self.subset(names=names, dims=dims).to_array()

        if axis is None:
            axis = "unnamed"
            if axis in self.dims:
                i = 1
                while "unnamed_{}".format(i) in self.dims:
                axis = "unnamed_{}".format(i)

        if axis in self.dims:
            raise ValueError("please provide an axis name which does not \
                    already exist in Dataset")

        if keys is None:
            keys = self.keys()

        # align all variables to the same dimensions
        data = odict()

        for k in keys:
            data[k] = self[k].reshape(self.dims).broadcast(self.axes)

        # make it a numpy array
        data = [data[k].values for k in keys]
        data = np.array(data)

        # determine axes
        axes = [Axis(keys, axis)] + self.axes 

        if _constructor is None: _constructor = DimArray
        return _constructor(data, axes)

    def take(self, indices, axis=0, raise_error=True, **kwargs):
        """ analogous to DimArray's take, but for each DimArray of the Dataset

        indices: scalar, or array-like, or slice
        axis: axis name (str)
        raise_error: raise an error if a variable does not have the desired dimension
        **kwargs: arguments passed to the axis locator, similar to `take`, such as `indexing` or `keepdims`


        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([11,22,33], axes=('time', [1951, 1952, 1953]))
        >>> ds = Dataset(a=a, b=b)
        >>> ds
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        >>> ds.take(1951, axis='time')
        Dataset of 2 variables
        a: 2.0
        b: 11.0
        >>> ds.take(0, axis='time', indexing='position')
        Dataset of 2 variables
        a: 1.0
        b: nan
        assert isinstance(axis, str), "axis must be a string"
        ii = self.axes[axis].loc(indices, **kwargs)
        newdata = self.copy() # copy the dict
        for k in self.keys():
            if axis not in self[k].dims: 
                if raise_error: 
                    raise ValueError("{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged".format(k, axis))
            a = self[k].take(ii, axis=axis, indexing='position')
            if not isinstance(a, DimArray):
                a = DimArray(a)
            odict.__setitem__(newdata, k, a)

        # update the axis
        newaxis = self.axes[axis][ii]
        if type(axis) is not int: axis = self.dims.index(axis) # axis is int

        # remove if axis collapsed
        if not isinstance(newaxis, Axis):
            del newdata.axes[axis]

        # otherwise update
            newdata.axes[axis] = newaxis

        return newdata

    def _apply_dimarray_axis(self, funcname, *args, **kwargs):
        """ Apply a function on every Dataset variable. 
        If the 'axis=' parameter is passed, only the variables with the required axis are called.
        axis = kwargs.pop('axis',None)
        if axis is not None: axis = self.axes[axis].name
        kwargs['axis'] = axis

        d = odict(self)
        for k in self.keys():
            if axis is not None and axis not in self[k].dims: 
            #d[k] = self[k].apply(func, *args, **kwargs)
            d[k] = getattr(self[k], funcname)(*args, **kwargs)

        return Dataset(d)

    def mean(self, axis=0, **kwargs):
        """ Apply transformantion on every variable of the Dataset

        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([[11,22,33],[44,55,66]], axes=[('items',['a','b']), ('time', [1950, 1951, 1952])])
        >>> ds = Dataset(a=a, b=b)
        >>> ds.mean(axis='time')
        Dataset of 2 variables
        dimensions: 'items'
        0 / items (2): a to b
        a: 2.0
        b: ('items',)
        >>> ds.mean(axis='items')
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (3): 1950 to 1952
        a: ('time',)
        b: ('time',)
        return self._apply_dimarray_axis('mean', axis=axis, **kwargs)

    def std(self, axis=0, **kwargs): return self._apply_dimarray_axis('std', axis=axis, **kwargs)
    def var(self, axis=0, **kwargs): return self._apply_dimarray_axis('var', axis=axis, **kwargs)
    def median(self, axis=0, **kwargs): return self._apply_dimarray_axis('median', axis=axis, **kwargs)

    def __getattr__(self, att):
        """ allow access of dimensions
        # check for dimensions
        if att in self.dims:
            ax = self.axes[att]
            return ax.values # return numpy array

            raise AttributeError("{} object has no attribute {}".format(self.__class__.__name__, att))

    def to_dict(self):
        """ export to dict
        return dict(self)

    def to_odict(self):
        """ export to ordered dict
        return odict(self)
Esempio n. 5
class Dataset(odict, MetadataBase):
    """ Container for a set of aligned objects
    def axes(self):
        return self._axes

    def axes(self, axes):
        self._axes = axes

    _constructor = DimArray

    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        assert not {'axes','keys'}.issubset(kwargs.keys()) # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        data = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__() = odict()

        values = data.values()
        keys = data.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = self._constructor(value)
                    raise TypeError("A Dataset can only store DimArray instances, got {}: {}".format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value

    def dims(self):
        """ tuple of dimensions contained in the Dataset, consistently with DimArray's `dims`
        return tuple([ for ax in self.axes])

    def dims(self, newdims):
        """ rename all axis names at once
        if not np.iterable(newdims): 
            raise TypeError("new dims must be iterable")
        if not len(newdims) == len(self.axes):
            raise ValueError("dimension mistmatch")

        # update every element's dimension
        for i, newname in enumerate(newdims):
            oldname = self.axes[i].name
            self.axes[i].name = newname

            # axes in individual items will be updated automatically 
            # since they are all references of the central axes

    def _repr(self, metadata=True):
        """ string representation
        lines = []
        header = "Dataset of %s variables" % (len(self))
        if len(self) == 1: header = header.replace('variables','variable')

        # display single variables
        for nm in self.keys():
            v = self[nm]
            repr_dims = repr(v.dims)
            if repr_dims == "()": repr_dims = v.values
            vlines = []
            if metadata and len(v._metadata()) > 0:
            lines.append(nm+': '+"\n".join(vlines))

        if metadata and len(self._metadata()) > 0:
            lines.append('//global attributes:')

        return "\n".join(lines)

    def summary(self):
        return self._repr(metadata=True)

    def __repr__(self):
        return self._repr(metadata=False)

    # overload dictionary methods
    def __delitem__(self, item):
        axes = self[item].axes
        super(Dataset, self).__delitem__(item)
        #del super(Dataset, self)[item]

        # update axes
        for ax in axes:
            found = False
            for k in self:
                if in self[k].dims:
                    found = True
            if not found:

    def __setitem__(self, key, val):
        """ Make sure the object is a DimArray with appropriate axes

        >>> ds = Dataset()
        >>> ds
        Dataset of 0 variables
        >>> a = DimArray([0, 1, 2], dims=('time',))
        >>> ds['yo'] = a 
        >>> ds['yo']
        dimarray: 3 non-null elements (0 null)
        0 / time (3): 0 to 2
        array([0, 1, 2])
        >>> ds['ya'] = a.values  # also accepts numpy array if shape matches
        >>> ds['ya']
        dimarray: 3 non-null elements (0 null)
        0 / time (3): 0 to 2
        array([0, 1, 2])
        if not isinstance(val, DimArray):
            if np.isscalar(val):
                val = self._constructor(val)
            elif hasattr(val, '__array__'):
                if np.shape(val) == tuple([ax.size for ax in self.axes]):
                    val = self._constructor(val, axes=self.axes) # make a dimarray with same axes
                    raise ValueError("array_like shape does not match, use DimArray if dimensions vary within the dataset")
                raise TypeError("can only append DimArray instances")

        # shallow copy of the DimArray so that its axes attribute can be 
        # modified without affecting the original array
        val = copy.copy(val)  
        val.axes = copy.deepcopy(val.axes)

        # Check dimensions
        # make sure axes match those of the dataset
        for i, newaxis in enumerate(val.axes):

            # Check dimensions if already existing axis
            if in [ for ax in self.axes]:
                existing_axis = self.axes[]
                if not newaxis == existing_axis:
                    raise ValueError("axes values do not match, align data first.\
                            \nDataset: {}, \nGot: {}".format(existing_axis, newaxis))

                # assign the Dataset axis : they all must share the same axis
                val.axes[i] = existing_axis

            # Append new axis

            assert val.axes[i] is self.axes[]

        super(Dataset, self).__setitem__(key, val)

        # now just checking 
        test_internal = super(Dataset, self).__getitem__(key)
        for ax in test_internal.axes:
            assert self.axes[] is ax

    def copy(self):
        ds2 = super(Dataset, self).copy() # odict method, copy axes but not metadata
        return ds2

    def __eq__(self, other):
        """ test equality but bypass annoying numpy's __eq__ method
        return isinstance(other, Dataset) and self.keys() == other.keys() \
                and self.axes == other.axes \
                and np.all([np.all(self[k] == other[k]) for k in self.keys()])

    def write_nc(self, f, *args, **kwargs):
        """ Save dataset in netCDF file.

        If you see this documentation, it means netCDF4 is not installed on your system 
        and you will not be able to use this functionality.
        import as ncio
        ncio._write_dataset(f, self, *args, **kwargs)

    write = write_nc

    def read_nc(cls, f, *args, **kwargs):
        """ Read dataset from netCDF file.

        If you see this documentation, it means netCDF4 is not installed on your system 
        and you will not be able to use this functionality.
        import as ncio
        return ncio._read_dataset(f, *args, **kwargs)

    #read = read_nc

    def to_array(self, axis=None, keys=None):
        """ Convert to DimArray

        axis  : axis name, by default "unnamed"
        #if names is not None or dims is not None:
        #    return self.subset(names=names, dims=dims).to_array()

        if axis is None:
            axis = "unnamed"
            if axis in self.dims:
                i = 1
                while "unnamed_{}".format(i) in self.dims:
                axis = "unnamed_{}".format(i)

        if axis in self.dims:
            raise ValueError("please provide an axis name which does not \
                    already exist in Dataset")

        if keys is None:
            keys = self.keys()

        # align all variables to the same dimensions
        data = odict()

        for k in keys:
            data[k] = self[k].reshape(self.dims).broadcast(self.axes)

        # make it a numpy array
        data = [data[k].values for k in keys]
        data = np.array(data)

        # determine axes
        axes = [Axis(keys, axis)] + self.axes 

        return self._constructor(data, axes)

    def take(self, indices, axis=0, raise_error=False, **kwargs):
        """ analogous to DimArray's take, but for each DimArray of the Dataset

        indices : scalar, or array-like, or slice
        axis : axis name (str)
        raise_error : raise an error if a variable does not have the desired dimension
        **kwargs : arguments passed to the axis locator, similar to `take`, such as `indexing` or `keepdims`

        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([11,22,33], axes=('time', [1951, 1952, 1953]))
        >>> ds = Dataset(a=a, b=b)
        >>> ds
        Dataset of 2 variables
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        >>> ds.take(1951, axis='time')
        Dataset of 2 variables
        a: 2.0
        b: 11.0
        >>> ds.take(0, axis='time', indexing='position')
        Dataset of 2 variables
        a: 1.0
        b: nan
        >>> ds['c'] = DimArray([[1,2],[11,22],[111,222],[3,4]], axes=[('time', [1950,1951,1952,1953]),('item',['a','b'])])
        >>> ds.take({'time':1950})
        Dataset of 3 variables
        0 / item (2): a to b
        a: 1.0
        b: nan
        c: ('item',)
        >>> ds.take({'time':1950})['c']
        dimarray: 2 non-null elements (0 null)
        0 / item (2): a to b
        array([1, 2])
        >>> ds.take({'item':'b'})
        Dataset of 3 variables
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        c: ('time',)
        # first find the index for the shared axes
        kw_indices = {self.axes[i].name:ind for i,ind in enumerate(self.axes.loc(indices, axis=axis, **kwargs))}

        # then apply take in 'position' mode
        newdata = self.__class__()
        # loop over variables
        for k in self.keys():
            v = self[k]
            # loop over axes to index on
            for axis in kw_indices.keys():
                if np.ndim(v) == 0 or axis not in v.dims: 
                    if raise_error: 
                        raise ValueError("{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged".format(k, axis))
                # slice along one axis
                v = v.take({axis:kw_indices[axis]}, indexing='position')
            newdata[k] = v

        return newdata

    def _apply_dimarray_axis(self, funcname, *args, **kwargs):
        """ Apply a function on every Dataset variable. 
        If the 'axis=' parameter is passed, only the variables with the required axis are called.
        axis = kwargs.pop('axis',None)
        if axis is not None: axis = self.axes[axis].name
        kwargs['axis'] = axis

        d = self.to_odict()
        for k in self.keys():
            if axis is not None and axis not in self[k].dims: 
            #d[k] = self[k].apply(func, *args, **kwargs)
            d[k] = getattr(self[k], funcname)(*args, **kwargs)

        return Dataset(d)

    def mean(self, axis=0, **kwargs):
        """ Apply transformantion on every variable of the Dataset

        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([[11,22,33],[44,55,66]], axes=[('items',['a','b']), ('time', [1950, 1951, 1952])])
        >>> ds = Dataset(a=a, b=b)
        >>> ds.mean(axis='time')
        Dataset of 2 variables
        0 / items (2): a to b
        a: 2.0
        b: ('items',)
        >>> ds.mean(axis='items')
        Dataset of 2 variables
        0 / time (3): 1950 to 1952
        a: ('time',)
        b: ('time',)
        return self._apply_dimarray_axis('mean', axis=axis, **kwargs)

    def std(self, axis=0, **kwargs): return self._apply_dimarray_axis('std', axis=axis, **kwargs)
    def var(self, axis=0, **kwargs): return self._apply_dimarray_axis('var', axis=axis, **kwargs)
    def median(self, axis=0, **kwargs): return self._apply_dimarray_axis('median', axis=axis, **kwargs)
    def sum(self, axis=0, **kwargs): return self._apply_dimarray_axis('sum', axis=axis, **kwargs)

    def __getattr__(self, att):
        """ allow access of dimensions
        # check for dimensions
        if att in self.dims:
            ax = self.axes[att]
            return ax.values # return numpy array

            raise AttributeError("{} object has no attribute {}".format(self.__class__.__name__, att))

    def to_dict(self):
        """ export to dict
        return dict({nm:self[nm] for nm in self.keys()})

    def to_odict(self):
        """ export to ordered dict
        return odict([(nm, self[nm]) for nm in self.keys()])

    def set_axis(self, values=None, axis=0, inplace=False, **kwargs):
        """ (re)set axis values and attributes in all dimarrays present in the dataset


        Dataset instance, or None if inplace is True

        >>> ds = Dataset()
        >>> ds['a'] = da.zeros(shape=(3,))  # some dimarray with dimension 'x0'
        >>> ds['b'] = da.zeros(shape=(3,4)) # dimensions 'x0', 'x1'
        >>> ds.set_axis(['a','b','c'], axis='x0')
        Dataset of 2 variables
        0 / x0 (3): a to c
        1 / x1 (4): 0 to 3
        a: ('x0',)
        b: ('x0', 'x1')
        if inplace is False:
            self = self.copy()
        ## update every dimarray in the dict
        #axis_name = self.axes[axis].name
        #for nm in self.keys():
        #    if not axis_name in self[nm].dims:
        #        continue
        #    super(Dataset, self).__setitem__(nm, self[nm].set_axis(values, axis, inplace=False, **kwargs) )

        # update the main axis instance
        self.axes = self.axes.set_axis(values, axis, inplace=False, **kwargs)

        if inplace is False:
            return self

    def reset_axis(self, axis=0, inplace=False, **kwargs):
        """ (re)set axis values and attributes in all dimarrays present in the dataset


        Dataset instance, or None if inplace is True

        >>> ds = Dataset()
        >>> ds['a'] = da.zeros(axes=[['a','b','c']])  # some dimarray with dimension 'x0'
        >>> ds['b'] = da.zeros(axes=[['a','b','c'], [11,22,33,44]]) # dimensions 'x0', 'x1'
        >>> ds.reset_axis(axis='x0')
        Dataset of 2 variables
        0 / x0 (3): 0 to 2
        1 / x1 (4): 11 to 44
        a: ('x0',)
        b: ('x0', 'x1')
        if inplace is False:
            self = self.copy()

        ## update every dimarray in the dict
        #axis_name = self.axes[axis].name
        #for nm in self.keys():
        #    if not axis_name in self[nm].dims:
        #        continue
        #    super(Dataset, self).__setitem__(nm, self[nm].reset_axis(axis, inplace=False, **kwargs) )

        # update the main axis instance
        self.axes = self.axes.reset_axis(axis, inplace=False, **kwargs)

        if inplace is False:
            return self