예제 #1
0
    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        """
        assert not {'axes', 'keys'}.issubset(
            kwargs.keys()
        )  # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        kwargs = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()

        values = kwargs.values()
        keys = kwargs.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = DimArray(value)
                else:
                    raise TypeError(
                        "A Dataset can only store DimArray instances, got {}: {}"
                        .format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value
예제 #2
0
    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        """
        assert not {'axes','keys'}.issubset(kwargs.keys()) # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        data = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()
        #self.data = odict()

        values = data.values()
        keys = data.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = self._constructor(value)
                else:
                    raise TypeError("A Dataset can only store DimArray instances, got {}: {}".format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value
예제 #3
0
class Dataset(odict):
    """ Container for a set of aligned objects
    """

    _metadata = MetadataDesc(exclude=['axes'])

    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        """
        assert not {'axes', 'keys'}.issubset(
            kwargs.keys()
        )  # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        kwargs = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()

        values = kwargs.values()
        keys = kwargs.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = DimArray(value)
                else:
                    raise TypeError(
                        "A Dataset can only store DimArray instances, got {}: {}"
                        .format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value

    @property
    def dims(self):
        """ list of dimensions contained in the Dataset, consistently with DimArray's `dims`
        """
        return [ax.name for ax in self.axes]

    def __repr__(self):
        """ string representation
        """
        lines = []
        header = "Dataset of %s variables" % (len(self))
        if len(self) == 1: header = header.replace('variables', 'variable')
        lines.append(header)
        axes = repr(self.axes)
        lines.append(axes)
        for nm in self.keys():
            dims = self[nm].dims
            shape = self[nm].shape
            #print nm,":", ", ".join(dims)
            repr_dims = repr(dims)
            if repr_dims == "()": repr_dims = self[nm].values
            lines.append("{}: {}".format(nm, repr_dims))
        return "\n".join(lines)

    def __delitem__(self, item):
        """ 
        """
        axes = self[item].axes
        super(Dataset, self).__delitem__(item)

        # update axes
        for ax in axes:
            found = False
            for k in self:
                if ax.name in self[k].dims:
                    found = True
                    continue
            if not found:
                self.axes.remove(ax)

    def __setitem__(self, key, val):
        """ Make sure the object is a DimArray with appropriate axes

        Tests:
        -----
#        >>> axes = da.Axes.from_tuples(('time',[1, 2, 3]))
        >>> ds = Dataset()
        >>> ds
        Dataset of 0 variables
        dimensions: 
        <BLANKLINE>
        >>> a = DimArray([0, 1, 2], dims=('time',))
        >>> ds['yo'] = a 
        >>> ds['yo']
        dimarray: 3 non-null elements (0 null)
        dimensions: 'time'
        0 / time (3): 0 to 2
        array([0, 1, 2])
        """
        if not isinstance(val, DimArray):
            if np.isscalar(val):
                val = DimArray(val)
            else:
                raise TypeError("can only append DimArray instances")

        # Check dimensions
        for axis in val.axes:

            # Check dimensions if already existing axis
            if axis.name in [ax.name for ax in self.axes]:
                if not axis == self.axes[axis.name]:
                    raise ValueError(
                        "axes values do not match, align data first.\
                            \nDataset: {}, \nGot: {}".format(
                            self.axes[axis.name], axis))

            # Append new axis
            else:
                self.axes.append(axis)

        # update name
        val.name = key
        super(Dataset, self).__setitem__(key, val)

    def write_nc(self, f, *args, **kwargs):
        """ Saves dataset in netCDF file.
        
        parameters:
        -----------
        f : netCDF file name.
        
        understood keyword arguments:
        zlib : Enable zlib compression if True. Default is False (no compression).
        complevel : integer between 1 and 9 describing the level of compression desired. Ignored if zlib=False.
        mode : File creation mode. Default is 'w-'. Set to 'w' to overwrite any existing file.
        format : netCDF file format. Default is 'NETCDF4'.
        
        See the netCDF4-python module documentation for more information about the use
        of keyword arguments to write_nc.
        """
        import io.nc as ncio
        ncio.write_dataset(f, self, *args, **kwargs)

    write = write_nc

    @classmethod
    def read_nc(cls, f, *args, **kwargs):
        import io.nc as ncio
        return ncio.read_dataset(f, *args, **kwargs)

    read = read_nc

    def to_array(self, axis=None, keys=None, _constructor=None):
        """ Convert to DimArray

        axis  : axis name, by default "unnamed"
        """
        #if names is not None or dims is not None:
        #    return self.subset(names=names, dims=dims).to_array()

        if axis is None:
            axis = "unnamed"
            if axis in self.dims:
                i = 1
                while "unnamed_{}".format(i) in self.dims:
                    i += 1
                axis = "unnamed_{}".format(i)

        if axis in self.dims:
            raise ValueError("please provide an axis name which does not \
                    already exist in Dataset")

        if keys is None:
            keys = self.keys()

        # align all variables to the same dimensions
        data = odict()

        for k in keys:
            data[k] = self[k].reshape(self.dims).broadcast(self.axes)

        # make it a numpy array
        data = [data[k].values for k in keys]
        data = np.array(data)

        # determine axes
        axes = [Axis(keys, axis)] + self.axes

        if _constructor is None: _constructor = DimArray
        return _constructor(data, axes)

    def take(self, indices, axis=0, raise_error=True, **kwargs):
        """ analogous to DimArray's take, but for each DimArray of the Dataset

        parameters:
        -----------
        indices: scalar, or array-like, or slice
        axis: axis name (str)
        raise_error: raise an error if a variable does not have the desired dimension
        **kwargs: arguments passed to the axis locator, similar to `take`, such as `indexing` or `keepdims`

        parameters:
        -----------


        Examples:
        ---------
        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([11,22,33], axes=('time', [1951, 1952, 1953]))
        >>> ds = Dataset(a=a, b=b)
        >>> ds
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        >>> ds.take(1951, axis='time')
        Dataset of 2 variables
        dimensions: 
        <BLANKLINE>
        a: 2.0
        b: 11.0
        >>> ds.take(0, axis='time', indexing='position')
        Dataset of 2 variables
        dimensions: 
        <BLANKLINE>
        a: 1.0
        b: nan
        """
        assert isinstance(axis, str), "axis must be a string"
        ii = self.axes[axis].loc(indices, **kwargs)
        newdata = self.copy()  # copy the dict
        for k in self.keys():
            if axis not in self[k].dims:
                if raise_error:
                    raise ValueError(
                        "{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged"
                        .format(k, axis))
                else:
                    continue
            a = self[k].take(ii, axis=axis, indexing='position')
            if not isinstance(a, DimArray):
                a = DimArray(a)
            odict.__setitem__(newdata, k, a)

        # update the axis
        newaxis = self.axes[axis][ii]
        if type(axis) is not int: axis = self.dims.index(axis)  # axis is int

        # remove if axis collapsed
        if not isinstance(newaxis, Axis):
            del newdata.axes[axis]

        # otherwise update
        else:
            newdata.axes[axis] = newaxis

        return newdata

    def _apply_dimarray_axis(self, funcname, *args, **kwargs):
        """ Apply a function on every Dataset variable. 
        
        If the 'axis=' parameter is passed, only the variables with the required axis are called.
        """
        axis = kwargs.pop('axis', None)
        if axis is not None: axis = self.axes[axis].name
        kwargs['axis'] = axis

        d = odict(self)
        for k in self.keys():
            if axis is not None and axis not in self[k].dims:
                continue
            #d[k] = self[k].apply(func, *args, **kwargs)
            d[k] = getattr(self[k], funcname)(*args, **kwargs)

        return Dataset(d)

    def mean(self, axis=0, **kwargs):
        """ Apply transformantion on every variable of the Dataset

        Examples:
        ---------
        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([[11,22,33],[44,55,66]], axes=[('items',['a','b']), ('time', [1950, 1951, 1952])])
        >>> ds = Dataset(a=a, b=b)
        >>> ds.mean(axis='time')
        Dataset of 2 variables
        dimensions: 'items'
        0 / items (2): a to b
        a: 2.0
        b: ('items',)
        >>> ds.mean(axis='items')
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (3): 1950 to 1952
        a: ('time',)
        b: ('time',)
        """
        return self._apply_dimarray_axis('mean', axis=axis, **kwargs)

    def std(self, axis=0, **kwargs):
        return self._apply_dimarray_axis('std', axis=axis, **kwargs)

    def var(self, axis=0, **kwargs):
        return self._apply_dimarray_axis('var', axis=axis, **kwargs)

    def median(self, axis=0, **kwargs):
        return self._apply_dimarray_axis('median', axis=axis, **kwargs)

    def __getattr__(self, att):
        """ allow access of dimensions
        """
        # check for dimensions
        if att in self.dims:
            ax = self.axes[att]
            return ax.values  # return numpy array

        else:
            raise AttributeError("{} object has no attribute {}".format(
                self.__class__.__name__, att))

    def to_dict(self):
        """ export to dict
        """
        return dict(self)

    def to_odict(self):
        """ export to ordered dict
        """
        return odict(self)
예제 #4
0
파일: dataset.py 프로젝트: koenvo/dimarray
class Dataset(odict):
    """ Container for a set of aligned objects
    """

    _metadata = MetadataDesc(exclude=['axes'])

    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        """
        assert not {'axes','keys'}.issubset(kwargs.keys()) # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        kwargs = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()

        values = kwargs.values()
        keys = kwargs.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = DimArray(value)
                else:
                    raise TypeError("A Dataset can only store DimArray instances, got {}: {}".format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value

    @property
    def dims(self):
        """ list of dimensions contained in the Dataset, consistently with DimArray's `dims`
        """
        return [ax.name for ax in self.axes]

    def __repr__(self):
        """ string representation
        """
        lines = []
        header = "Dataset of %s variables" % (len(self))
        if len(self) == 1: header = header.replace('variables','variable')
        lines.append(header)
        axes = repr(self.axes)
        lines.append(axes)
        for nm in self.keys():
            dims = self[nm].dims
            shape = self[nm].shape
            #print nm,":", ", ".join(dims)
            repr_dims = repr(dims)
            if repr_dims == "()": repr_dims = self[nm].values
            lines.append("{}: {}".format(nm,repr_dims))
        return "\n".join(lines)

    def __delitem__(self, item):
        """ 
        """
        axes = self[item].axes
        super(Dataset, self).__delitem__(item)

        # update axes
        for ax in axes:
            found = False
            for k in self:
                if ax.name in self[k].dims:
                    found = True
                    continue
            if not found:
                self.axes.remove(ax)

    def __setitem__(self, key, val):
        """ Make sure the object is a DimArray with appropriate axes

        Tests:
        -----
#        >>> axes = da.Axes.from_tuples(('time',[1, 2, 3]))
        >>> ds = Dataset()
        >>> ds
        Dataset of 0 variables
        dimensions: 
        <BLANKLINE>
        >>> a = DimArray([0, 1, 2], dims=('time',))
        >>> ds['yo'] = a 
        >>> ds['yo']
        dimarray: 3 non-null elements (0 null)
        dimensions: 'time'
        0 / time (3): 0 to 2
        array([0, 1, 2])
        """
        if not isinstance(val, DimArray):
            if np.isscalar(val):
                val = DimArray(val)
            else:
                raise TypeError("can only append DimArray instances")

        # Check dimensions
        for axis in val.axes:

            # Check dimensions if already existing axis
            if axis.name in [ax.name for ax in self.axes]:
                if not axis == self.axes[axis.name]:
                    raise ValueError("axes values do not match, align data first.\
                            \nDataset: {}, \nGot: {}".format(self.axes[axis.name], axis))

            # Append new axis
            else:
                self.axes.append(axis)

        # update name
        val.name = key
        super(Dataset, self).__setitem__(key, val)

    def write_nc(self, f, *args, **kwargs):
        """ Saves dataset in netCDF file.
        
        parameters:
        -----------
        f : netCDF file name.
        
        understood keyword arguments:
        zlib : Enable zlib compression if True. Default is False (no compression).
        complevel : integer between 1 and 9 describing the level of compression desired. Ignored if zlib=False.
        mode : File creation mode. Default is 'w-'. Set to 'w' to overwrite any existing file.
        format : netCDF file format. Default is 'NETCDF4'.
        
        See the netCDF4-python module documentation for more information about the use
        of keyword arguments to write_nc.
        """
        import io.nc as ncio
        ncio.write_dataset(f, self, *args, **kwargs)

    write = write_nc

    @classmethod
    def read_nc(cls, f, *args, **kwargs):
        import io.nc as ncio
        return ncio.read_dataset(f, *args, **kwargs)

    read = read_nc

    def to_array(self, axis=None, keys=None, _constructor=None):
        """ Convert to DimArray

        axis  : axis name, by default "unnamed"
        """
        #if names is not None or dims is not None:
        #    return self.subset(names=names, dims=dims).to_array()

        if axis is None:
            axis = "unnamed"
            if axis in self.dims:
                i = 1
                while "unnamed_{}".format(i) in self.dims:
                    i+=1
                axis = "unnamed_{}".format(i)

        if axis in self.dims:
            raise ValueError("please provide an axis name which does not \
                    already exist in Dataset")

        if keys is None:
            keys = self.keys()

        # align all variables to the same dimensions
        data = odict()

        for k in keys:
            data[k] = self[k].reshape(self.dims).broadcast(self.axes)

        # make it a numpy array
        data = [data[k].values for k in keys]
        data = np.array(data)

        # determine axes
        axes = [Axis(keys, axis)] + self.axes 

        if _constructor is None: _constructor = DimArray
        return _constructor(data, axes)


    def take(self, indices, axis=0, raise_error=True, **kwargs):
        """ analogous to DimArray's take, but for each DimArray of the Dataset

        parameters:
        -----------
        indices: scalar, or array-like, or slice
        axis: axis name (str)
        raise_error: raise an error if a variable does not have the desired dimension
        **kwargs: arguments passed to the axis locator, similar to `take`, such as `indexing` or `keepdims`

        parameters:
        -----------


        Examples:
        ---------
        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([11,22,33], axes=('time', [1951, 1952, 1953]))
        >>> ds = Dataset(a=a, b=b)
        >>> ds
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        >>> ds.take(1951, axis='time')
        Dataset of 2 variables
        dimensions: 
        <BLANKLINE>
        a: 2.0
        b: 11.0
        >>> ds.take(0, axis='time', indexing='position')
        Dataset of 2 variables
        dimensions: 
        <BLANKLINE>
        a: 1.0
        b: nan
        """
        assert isinstance(axis, str), "axis must be a string"
        ii = self.axes[axis].loc(indices, **kwargs)
        newdata = self.copy() # copy the dict
        for k in self.keys():
            if axis not in self[k].dims: 
                if raise_error: 
                    raise ValueError("{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged".format(k, axis))
                else:
                    continue
            a = self[k].take(ii, axis=axis, indexing='position')
            if not isinstance(a, DimArray):
                a = DimArray(a)
            odict.__setitem__(newdata, k, a)

        # update the axis
        newaxis = self.axes[axis][ii]
        if type(axis) is not int: axis = self.dims.index(axis) # axis is int

        # remove if axis collapsed
        if not isinstance(newaxis, Axis):
            del newdata.axes[axis]

        # otherwise update
        else:
            newdata.axes[axis] = newaxis

        return newdata

    def _apply_dimarray_axis(self, funcname, *args, **kwargs):
        """ Apply a function on every Dataset variable. 
        
        If the 'axis=' parameter is passed, only the variables with the required axis are called.
        """
        axis = kwargs.pop('axis',None)
        if axis is not None: axis = self.axes[axis].name
        kwargs['axis'] = axis

        d = odict(self)
        for k in self.keys():
            if axis is not None and axis not in self[k].dims: 
                continue
            #d[k] = self[k].apply(func, *args, **kwargs)
            d[k] = getattr(self[k], funcname)(*args, **kwargs)

        return Dataset(d)

    def mean(self, axis=0, **kwargs):
        """ Apply transformantion on every variable of the Dataset

        Examples:
        ---------
        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([[11,22,33],[44,55,66]], axes=[('items',['a','b']), ('time', [1950, 1951, 1952])])
        >>> ds = Dataset(a=a, b=b)
        >>> ds.mean(axis='time')
        Dataset of 2 variables
        dimensions: 'items'
        0 / items (2): a to b
        a: 2.0
        b: ('items',)
        >>> ds.mean(axis='items')
        Dataset of 2 variables
        dimensions: 'time'
        0 / time (3): 1950 to 1952
        a: ('time',)
        b: ('time',)
        """
        return self._apply_dimarray_axis('mean', axis=axis, **kwargs)

    def std(self, axis=0, **kwargs): return self._apply_dimarray_axis('std', axis=axis, **kwargs)
    def var(self, axis=0, **kwargs): return self._apply_dimarray_axis('var', axis=axis, **kwargs)
    def median(self, axis=0, **kwargs): return self._apply_dimarray_axis('median', axis=axis, **kwargs)

    def __getattr__(self, att):
        """ allow access of dimensions
        """
        # check for dimensions
        if att in self.dims:
            ax = self.axes[att]
            return ax.values # return numpy array

        else:
            raise AttributeError("{} object has no attribute {}".format(self.__class__.__name__, att))

    def to_dict(self):
        """ export to dict
        """
        return dict(self)

    def to_odict(self):
        """ export to ordered dict
        """
        return odict(self)
예제 #5
0
class Dataset(odict, MetadataBase):
    """ Container for a set of aligned objects
    """
    @property
    def axes(self):
        return self._axes

    @axes.setter
    def axes(self, axes):
        self._axes = axes

    _constructor = DimArray

    def __init__(self, *args, **kwargs):
        """ initialize a dataset from a set of objects of varying dimensions

        data  : dict of DimArrays or list of named DimArrays or Axes object
        keys  : keys to order data if provided as dict, or to name data if list
        """
        assert not {'axes','keys'}.issubset(kwargs.keys()) # just to check bugs due to back-compat ==> TO BE REMOVED AFTER DEBUGGING

        # check input arguments: same init as odict
        data = odict(*args, **kwargs)

        # Basic initialization
        self.axes = Axes()

        # initialize an ordered dictionary
        super(Dataset, self).__init__()
        #self.data = odict()

        values = data.values()
        keys = data.keys()

        # Check everything is a DimArray
        #for key, value in zip(keys, values):
        for i, key in enumerate(keys):
            value = values[i]
            if not isinstance(value, DimArray):
                if np.isscalar(value):
                    values[i] = self._constructor(value)
                else:
                    raise TypeError("A Dataset can only store DimArray instances, got {}: {}".format(key, value))

        # Align objects
        values = align_axes(*values)

        # Append object (will automatically update self.axes)
        for key, value in zip(keys, values):
            self[key] = value

    @property
    def dims(self):
        """ tuple of dimensions contained in the Dataset, consistently with DimArray's `dims`
        """
        return tuple([ax.name for ax in self.axes])

    @dims.setter
    def dims(self, newdims):
        """ rename all axis names at once
        """
        if not np.iterable(newdims): 
            raise TypeError("new dims must be iterable")
        if not len(newdims) == len(self.axes):
            raise ValueError("dimension mistmatch")

        # update every element's dimension
        for i, newname in enumerate(newdims):
            oldname = self.axes[i].name
            self.axes[i].name = newname

            # axes in individual items will be updated automatically 
            # since they are all references of the central axes

    def _repr(self, metadata=True):
        """ string representation
        """
        lines = []
        header = "Dataset of %s variables" % (len(self))
        if len(self) == 1: header = header.replace('variables','variable')
        lines.append(header)
        lines.append(self.axes._repr(metadata=metadata))

        # display single variables
        for nm in self.keys():
            v = self[nm]
            repr_dims = repr(v.dims)
            if repr_dims == "()": repr_dims = v.values
            vlines = []
            vlines.append("{}".format(repr_dims))
            if metadata and len(v._metadata()) > 0:
                vlines.append(v._metadata_summary())
            lines.append(nm+': '+"\n".join(vlines))

        if metadata and len(self._metadata()) > 0:
            lines.append('//global attributes:')
            lines.append(self._metadata_summary())

        return "\n".join(lines)

    def summary(self):
        return self._repr(metadata=True)

    def __repr__(self):
        return self._repr(metadata=False)

    #
    # overload dictionary methods
    #
    def __delitem__(self, item):
        """ 
        """
        axes = self[item].axes
        #del self.data[item]
        super(Dataset, self).__delitem__(item)
        #del super(Dataset, self)[item]

        # update axes
        for ax in axes:
            found = False
            for k in self:
                if ax.name in self[k].dims:
                    found = True
                    continue
            if not found:
                self.axes.remove(ax)

    def __setitem__(self, key, val):
        """ Make sure the object is a DimArray with appropriate axes

        Examples
        --------
        >>> ds = Dataset()
        >>> ds
        Dataset of 0 variables
        <BLANKLINE>
        >>> a = DimArray([0, 1, 2], dims=('time',))
        >>> ds['yo'] = a 
        >>> ds['yo']
        dimarray: 3 non-null elements (0 null)
        0 / time (3): 0 to 2
        array([0, 1, 2])
        >>> ds['ya'] = a.values  # also accepts numpy array if shape matches
        >>> ds['ya']
        dimarray: 3 non-null elements (0 null)
        0 / time (3): 0 to 2
        array([0, 1, 2])
        """
        if not isinstance(val, DimArray):
            if np.isscalar(val):
                val = self._constructor(val)
            elif hasattr(val, '__array__'):
                if np.shape(val) == tuple([ax.size for ax in self.axes]):
                    val = self._constructor(val, axes=self.axes) # make a dimarray with same axes
                else:
                    raise ValueError("array_like shape does not match, use DimArray if dimensions vary within the dataset")
            else:
                raise TypeError("can only append DimArray instances")

        # shallow copy of the DimArray so that its axes attribute can be 
        # modified without affecting the original array
        val = copy.copy(val)  
        val.axes = copy.deepcopy(val.axes)

        # Check dimensions
        # make sure axes match those of the dataset
        for i, newaxis in enumerate(val.axes):

            # Check dimensions if already existing axis
            if newaxis.name in [ax.name for ax in self.axes]:
                existing_axis = self.axes[newaxis.name]
                if not newaxis == existing_axis:
                    raise ValueError("axes values do not match, align data first.\
                            \nDataset: {}, \nGot: {}".format(existing_axis, newaxis))

                # assign the Dataset axis : they all must share the same axis
                val.axes[i] = existing_axis

            # Append new axis
            else:
                self.axes.append(newaxis)  

            assert val.axes[i] is self.axes[newaxis.name]

        super(Dataset, self).__setitem__(key, val)

        # now just checking 
        test_internal = super(Dataset, self).__getitem__(key)
        for ax in test_internal.axes:
            assert self.axes[ax.name] is ax

    def copy(self):
        ds2 = super(Dataset, self).copy() # odict method, copy axes but not metadata
        ds2._metadata(self._metadata())
        return ds2

    def __eq__(self, other):
        """ test equality but bypass annoying numpy's __eq__ method
        """
        return isinstance(other, Dataset) and self.keys() == other.keys() \
                and self.axes == other.axes \
                and np.all([np.all(self[k] == other[k]) for k in self.keys()])

    #
    #
    #
    def write_nc(self, f, *args, **kwargs):
        """ Save dataset in netCDF file.

        If you see this documentation, it means netCDF4 is not installed on your system 
        and you will not be able to use this functionality.
        """
        import io.nc as ncio
        ncio._write_dataset(f, self, *args, **kwargs)

    write = write_nc

    @classmethod
    def read_nc(cls, f, *args, **kwargs):
        """ Read dataset from netCDF file.

        If you see this documentation, it means netCDF4 is not installed on your system 
        and you will not be able to use this functionality.
        """
        import io.nc as ncio
        return ncio._read_dataset(f, *args, **kwargs)

    #read = read_nc

    def to_array(self, axis=None, keys=None):
        """ Convert to DimArray

        axis  : axis name, by default "unnamed"
        """
        #if names is not None or dims is not None:
        #    return self.subset(names=names, dims=dims).to_array()

        if axis is None:
            axis = "unnamed"
            if axis in self.dims:
                i = 1
                while "unnamed_{}".format(i) in self.dims:
                    i+=1
                axis = "unnamed_{}".format(i)

        if axis in self.dims:
            raise ValueError("please provide an axis name which does not \
                    already exist in Dataset")

        if keys is None:
            keys = self.keys()

        # align all variables to the same dimensions
        data = odict()

        for k in keys:
            data[k] = self[k].reshape(self.dims).broadcast(self.axes)

        # make it a numpy array
        data = [data[k].values for k in keys]
        data = np.array(data)

        # determine axes
        axes = [Axis(keys, axis)] + self.axes 

        return self._constructor(data, axes)

    # 
    # REMOVE THESE FUNCTIONS AS NON-ESSENTIAL ???
    #
    def take(self, indices, axis=0, raise_error=False, **kwargs):
        """ analogous to DimArray's take, but for each DimArray of the Dataset

        Parameters
        ----------
        indices : scalar, or array-like, or slice
        axis : axis name (str)
        raise_error : raise an error if a variable does not have the desired dimension
        **kwargs : arguments passed to the axis locator, similar to `take`, such as `indexing` or `keepdims`

        Examples
        --------
        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([11,22,33], axes=('time', [1951, 1952, 1953]))
        >>> ds = Dataset(a=a, b=b)
        >>> ds
        Dataset of 2 variables
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        >>> ds.take(1951, axis='time')
        Dataset of 2 variables
        <BLANKLINE>
        a: 2.0
        b: 11.0
        >>> ds.take(0, axis='time', indexing='position')
        Dataset of 2 variables
        <BLANKLINE>
        a: 1.0
        b: nan
        >>> ds['c'] = DimArray([[1,2],[11,22],[111,222],[3,4]], axes=[('time', [1950,1951,1952,1953]),('item',['a','b'])])
        >>> ds.take({'time':1950})
        Dataset of 3 variables
        0 / item (2): a to b
        a: 1.0
        b: nan
        c: ('item',)
        >>> ds.take({'time':1950})['c']
        dimarray: 2 non-null elements (0 null)
        0 / item (2): a to b
        array([1, 2])
        >>> ds.take({'item':'b'})
        Dataset of 3 variables
        0 / time (4): 1950 to 1953
        a: ('time',)
        b: ('time',)
        c: ('time',)
        """
        # first find the index for the shared axes
        kw_indices = {self.axes[i].name:ind for i,ind in enumerate(self.axes.loc(indices, axis=axis, **kwargs))}

        # then apply take in 'position' mode
        newdata = self.__class__()
        # loop over variables
        for k in self.keys():
            v = self[k]
            # loop over axes to index on
            for axis in kw_indices.keys():
                if np.ndim(v) == 0 or axis not in v.dims: 
                    if raise_error: 
                        raise ValueError("{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged".format(k, axis))
                    else:
                        continue
                # slice along one axis
                v = v.take({axis:kw_indices[axis]}, indexing='position')
            newdata[k] = v

        return newdata

    def _apply_dimarray_axis(self, funcname, *args, **kwargs):
        """ Apply a function on every Dataset variable. 
        
        If the 'axis=' parameter is passed, only the variables with the required axis are called.
        """
        axis = kwargs.pop('axis',None)
        if axis is not None: axis = self.axes[axis].name
        kwargs['axis'] = axis

        d = self.to_odict()
        for k in self.keys():
            if axis is not None and axis not in self[k].dims: 
                continue
            #d[k] = self[k].apply(func, *args, **kwargs)
            d[k] = getattr(self[k], funcname)(*args, **kwargs)

        return Dataset(d)

    def mean(self, axis=0, **kwargs):
        """ Apply transformantion on every variable of the Dataset

        Examples
        --------
        >>> a = DimArray([1,2,3], axes=('time', [1950, 1951, 1952]))
        >>> b = DimArray([[11,22,33],[44,55,66]], axes=[('items',['a','b']), ('time', [1950, 1951, 1952])])
        >>> ds = Dataset(a=a, b=b)
        >>> ds.mean(axis='time')
        Dataset of 2 variables
        0 / items (2): a to b
        a: 2.0
        b: ('items',)
        >>> ds.mean(axis='items')
        Dataset of 2 variables
        0 / time (3): 1950 to 1952
        a: ('time',)
        b: ('time',)
        """
        return self._apply_dimarray_axis('mean', axis=axis, **kwargs)

    def std(self, axis=0, **kwargs): return self._apply_dimarray_axis('std', axis=axis, **kwargs)
    def var(self, axis=0, **kwargs): return self._apply_dimarray_axis('var', axis=axis, **kwargs)
    def median(self, axis=0, **kwargs): return self._apply_dimarray_axis('median', axis=axis, **kwargs)
    def sum(self, axis=0, **kwargs): return self._apply_dimarray_axis('sum', axis=axis, **kwargs)

    def __getattr__(self, att):
        """ allow access of dimensions
        """
        # check for dimensions
        if att in self.dims:
            ax = self.axes[att]
            return ax.values # return numpy array

        else:
            raise AttributeError("{} object has no attribute {}".format(self.__class__.__name__, att))

    def to_dict(self):
        """ export to dict
        """
        return dict({nm:self[nm] for nm in self.keys()})

    def to_odict(self):
        """ export to ordered dict
        """
        return odict([(nm, self[nm]) for nm in self.keys()])

    @format_doc(**_doc_reset_axis)
    def set_axis(self, values=None, axis=0, inplace=False, **kwargs):
        """ (re)set axis values and attributes in all dimarrays present in the dataset

        Parameters
        ----------
        {values}
        {axis}
        {inplace}
        {kwargs}

        Returns
        -------
        Dataset instance, or None if inplace is True

        Examples
        --------
        >>> ds = Dataset()
        >>> ds['a'] = da.zeros(shape=(3,))  # some dimarray with dimension 'x0'
        >>> ds['b'] = da.zeros(shape=(3,4)) # dimensions 'x0', 'x1'
        >>> ds.set_axis(['a','b','c'], axis='x0')
        Dataset of 2 variables
        0 / x0 (3): a to c
        1 / x1 (4): 0 to 3
        a: ('x0',)
        b: ('x0', 'x1')
        """
        if inplace is False:
            self = self.copy()
        ## update every dimarray in the dict
        #axis_name = self.axes[axis].name
        #for nm in self.keys():
        #    if not axis_name in self[nm].dims:
        #        continue
        #    super(Dataset, self).__setitem__(nm, self[nm].set_axis(values, axis, inplace=False, **kwargs) )

        # update the main axis instance
        self.axes = self.axes.set_axis(values, axis, inplace=False, **kwargs)

        if inplace is False:
            return self

    @format_doc(**_doc_reset_axis)
    def reset_axis(self, axis=0, inplace=False, **kwargs):
        """ (re)set axis values and attributes in all dimarrays present in the dataset

        Parameters
        ----------
        {axis}
        {inplace}
        {kwargs}

        Returns
        -------
        Dataset instance, or None if inplace is True

        Examples
        --------
        >>> ds = Dataset()
        >>> ds['a'] = da.zeros(axes=[['a','b','c']])  # some dimarray with dimension 'x0'
        >>> ds['b'] = da.zeros(axes=[['a','b','c'], [11,22,33,44]]) # dimensions 'x0', 'x1'
        >>> ds.reset_axis(axis='x0')
        Dataset of 2 variables
        0 / x0 (3): 0 to 2
        1 / x1 (4): 11 to 44
        a: ('x0',)
        b: ('x0', 'x1')
        """
        if inplace is False:
            self = self.copy()

        ## update every dimarray in the dict
        #axis_name = self.axes[axis].name
        #for nm in self.keys():
        #    if not axis_name in self[nm].dims:
        #        continue
        #    super(Dataset, self).__setitem__(nm, self[nm].reset_axis(axis, inplace=False, **kwargs) )

        # update the main axis instance
        self.axes = self.axes.reset_axis(axis, inplace=False, **kwargs)

        if inplace is False:
            return self