Exemple #1
0
    def __getitem__(self, item, *args):
        if isinstance(item, str):
            if item in self.grid.vars:
                data = self.get_array(item)
                new_data = dm.GridArray(data, grid=self.grid)
                return new_data
            else:
                raise IndexError('No variable %s in DataSet' % item)

        if isinstance(item, dm.GridArray):
            if item.dtype == np.bool:
                mask = np.logical_and(~self.mask, ~np.asarray(item))
                new_item = dm.GridArray(np.asarray(self), grid=self.grid)
                new_item.mask = mask
                return new_item
            raise NotImplementedError('get item %s' % item)
        if not isinstance(item, tuple):  # and not isinstance(item, slice):
            return self[(item, )]
        if isinstance(item, list):
            if all([isinstance(i, int) for i in item]):
                return self[(list, )]
            else:
                raise IndexError('Cannot process list of indices %s' % item)
        if isinstance(item, tuple):

            item = self.grid.convert_slice(item)

            new_grid = self.grid[item]
            if len(new_grid) == 0:
                # then we have a single element
                return np.ma.asarray(self)[item]
            return dm.GridArray(np.ma.asarray(self)[item], grid=new_grid)
Exemple #2
0
    def generate_destination(self, *args, **kwargs):
        '''Correctly set up a destination data format
        depending on the supplied input
        
        Parameters
        ----------
        args, kwargs

        '''
        if len(args) == 1 and len(kwargs) == 0:
            dest = args[0]
            if isinstance(dest, dm.GridData):
                grid = dest.grid
                grid.initialize(self.source)
                return dm.GridData(grid)
            if isinstance(dest, dm.Grid):
                grid = dest
                grid.initialize(self.source)
                return dm.GridData(grid)
            if isinstance(dest, dm.GridArray):
                grid = dest.grid
                grid.initialize(self.source)
                return dm.GridArray(np.empty(grid.shape), grid=grid)
            if isinstance(dest, dm.PointData):
                # check which vars we need:
                if self.source_has_grid:
                    return dest[self.source.grid.vars]
                else:
                    return dest

        # check if source has a grid and if any args are in there
        if isinstance(self.source, (dm.GridData, dm.GridArray)):
            dims = []
            for arg in args:
                # in this case the source may have a grid, get those edges
                if isinstance(arg, str):
                    if arg in self.source.grid.vars:
                        dims.append(self.source.grid[arg])
                        continue
                dims.append(arg)
            args = dims

        # instantiate
        grid = dm.Grid(*args, **kwargs)
        grid.initialize(self.source)

        if isinstance(self.source, dm.GridArray):
            return dm.GridArray(np.empty(grid.shape), grid=grid)

        return dm.GridData(grid)
Exemple #3
0
    def std_devs(self, sigmas=[
        1.,
    ], fill_value=np.nan, **kwargs):
        """ Mean + Sigma values
        sigmas : iterable
            values to produce bands = mean =/- sigmas * std
        """
        mean = self.mean(fill_value=np.nan, **kwargs)
        std = self.std(fill_value=np.nan, **kwargs)

        output_maps = {}

        for var in self.data.vars:
            m = np.ma.asarray(mean[var])
            s = np.ma.asarray(std[var])

            arrays = [m]
            for sigma in sigmas:
                delta = sigma * s
                arrays.append(m + delta)
                arrays.insert(0, m - delta)

            output_maps[var] = np.swapaxes(np.stack(arrays), 0, -1)

        # Pack into GridArray
        if self.single:
            out_data = dm.GridArray(output_maps['test'], grid=self.grid)

        # Pack into GridData
        else:
            out_data = dm.GridData(self.grid)
            for var, output_map in output_maps.items():
                out_data[var] = output_map

        return out_data
Exemple #4
0
 def T(self):
     '''transpose'''
     if self.ndim > self.nax + 1:
         raise NotImplementedError()
     if self.nax == 1:
         return self
     if self.nax > 1:
         new_data = self.data.T
     if self.ndim == self.nax + 1:
         new_data = np.rollaxis(new_data, 0, self.ndim)
     return dm.GridArray(new_data, grid=self.grid.T)
Exemple #5
0
    def run_np_indexed(self, method, fill_value=np.nan, **kwargs):
        '''run the numpy indexed methods
        Parameters:
        -----------

        method : str
            choice of ['sum', 'mean', 'min', 'max', 'std', 'var', 'argmin', 'argmax', 'median', 'mode', 'prod']
        '''
        axis = kwargs.pop('axis', None)
        assert axis is None, 'Axis kw not supported for BinnedData'

        if self.indices is None:
            self.compute_indices()
        outputs = {}
        output_maps = {}
        for var in self.data.vars:
            source_data = self.data[var]
            if source_data.ndim > 1:
                output_maps[var] = np.full(self.grid.shape +
                                           source_data.shape[1:],
                                           fill_value=fill_value,
                                           dtype=self.data[var].dtype)
            else:
                output_maps[var] = np.full(self.grid.shape,
                                           fill_value=fill_value,
                                           dtype=self.data[var].dtype)
            source_data = self.data[var]
            indices, outputs[var] = self.group.__getattribute__(method)(
                source_data)

        for i, idx in enumerate(indices):
            if idx < 0:
                continue
            out_idx = np.unravel_index(idx, self.grid.shape)
            for var in self.data.vars:
                output_maps[var][out_idx] = result = outputs[var][i]

        # Pack into GridArray
        if self.single:
            out_data = dm.GridArray(output_maps['test'], grid=self.grid)

        else:
            # Pack into GridData
            out_data = dm.GridData(self.grid)
            for var, output_map in output_maps.items():
                out_data[var] = output_map

        return out_data
Exemple #6
0
    def __getitem__(self, item):
        if isinstance(item, str):
            if item in self.vars:
                if item in self.data_vars:
                    data = self._data[item]
                    if callable(data):
                        self[item] = data()
                        data = self._data[item]
                else:
                    data = self.get_array(item)
                new_data = dm.GridArray(data, grid=self._grid)
                return new_data
            else:
                raise IndexError('No variable %s in DataSet' % item)

        # mask
        if isinstance(item, dm.GridArray):
            if item.dtype == np.bool:
                # in this case it is a mask
                # ToDo: masked operations behave strangely, operations are applyed to all elements, even if masked
                new_data = dm.GridData(self._grid)
                for v in self.data_vars:
                    new_data[v] = self[v][item]
                return new_data
            raise NotImplementedError('get item %s' % item)

        # create new instance with only those vars
        if isinstance(item, Iterable) and all(
            [isinstance(v, str) for v in item]):
            new_data = dm.GridData(self._grid)
            for v in item:
                if v in self.data_vars:
                    new_data[v] = self[v]
            return new_data

        # slice
        new_grid = self._grid[item]
        if len(new_grid) == 0:
            return {n: d[item] for n, d in self.items()}
        new_data = dm.GridData(new_grid)
        for n, d in self.items():
            new_data[n] = d[item]
        return new_data
Exemple #7
0
    def run(self):

        self.setup()

        if isinstance(self.dest, dm.GridArray):
            # in this case it is a single array, no vars
            if isinstance(self.source, (dm.GridData, dm.PointData)):
                assert len(self.source.vars) - len(self.wrt) == 1
                for var in self.source.vars:
                    if var in self.wrt:
                        continue
                    source_data = self.source[var]
            else:
                source_data = self.source

            result = self.eval(source_data)
            self.dest = dm.GridArray(result, grid=self.dest.grid)
            return self.dest

        if isinstance(self.source, dm.GridArray):
            source_data = self.source
            result = self.eval(source_data)
            self.dest['result'] = result
            return self.dest

        def make_closure(data):
            def f():
                return self.eval(data)

            return f

        for var in self.source.vars:
            if var in self.wrt:
                continue
            self.dest[var] = make_closure(self.source[var])

        for var, data in self.additional_runs.items():
            self.dest[var] = make_closure(data)

        return self.dest
Exemple #8
0
    def apply_function(self,
                       function,
                       *args,
                       fill_value=np.nan,
                       return_len=None,
                       **kwargs):
        '''apply function per bin

        function : callable
        return_len : int (optional)
            the shape (array length) of the `function` output. If None is passed, 
            `return_len` is (tried to be) determined on the fly

        if weights are passed as kwarg, they are interpreted as being a variable in the dataset
        '''

        if self.indices is None:
            self.compute_indices()

        outputs = {}
        output_maps = {}

        weights = kwargs.pop('weights', None)

        for var in self.data.vars:
            source_data = self.data[var]

            if return_len is None:
                # try to figure out return length of function

                if source_data.ndim > 1:
                    if weights is not None:
                        kwargs['weights'] = self.data[weights][:3, [0] * (
                            source_data.ndim - 1)]
                    test_value = function(
                        source_data[:3, [0] * (source_data.ndim - 1)], *args,
                        **kwargs)
                else:
                    if weights is not None:
                        kwargs['weights'] = self.data[weights][:3]
                    test_value = function(source_data[:3], *args, **kwargs)
                if np.isscalar(test_value):
                    return_len = 1
                else:
                    return_len = len(test_value)

            if source_data.ndim > 1:
                if return_len > 1:
                    output_maps[var] = np.full(self.grid.shape +
                                               source_data.shape[1:] +
                                               (return_len, ),
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)
                else:
                    output_maps[var] = np.full(self.grid.shape +
                                               source_data.shape[1:],
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)
            else:
                if return_len > 1:
                    output_maps[var] = np.full(self.grid.shape +
                                               (return_len, ),
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)
                else:
                    output_maps[var] = np.full(self.grid.shape,
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)

        for i in range(self.grid.size):
            mask = self.indices == i
            if np.any(mask):
                out_idx = np.unravel_index(i, self.grid.shape)
                for var in self.data.vars:
                    source_data = self.data[var]

                    if source_data.ndim > 1:
                        for idx in np.ndindex(*source_data.shape[1:]):
                            if weights is not None:
                                kwargs['weights'] = self.data[weights][:, idx][
                                    mask]

                            output_maps[var][out_idx + (idx, )] = function(
                                source_data[:, idx][mask], *args, **kwargs)
                    else:
                        if weights is not None:
                            kwargs['weights'] = self.data[weights][mask]

                        output_maps[var][out_idx] = function(
                            source_data[mask], *args, **kwargs)

        # Pack into GridArray
        if self.single:
            out_data = dm.GridArray(output_maps['test'], grid=self.grid)

        # Pack into GridData
        else:
            out_data = dm.GridData(self.grid)
            for var, output_map in output_maps.items():
                out_data[var] = output_map

        return out_data
Exemple #9
0
    def apply_function(self,
                       function,
                       *args,
                       fill_value=np.nan,
                       return_len=None,
                       **kwargs):
        '''apply function per bin'''

        if self.indices is None:
            self.compute_indices()

        outputs = {}
        output_maps = {}

        weights = kwargs.pop('weights', None)

        for var in self.data.vars:
            source_data = self.data[var]

            if return_len is None:
                # try to figure out return length of function

                if source_data.ndim > 1:
                    test_value = function(
                        source_data[:3, [0] * (source_data.ndim - 1)], *args,
                        **kwargs)
                else:
                    test_value = function(source_data[:3], *args, **kwargs)
                if np.isscalar(test_value):
                    return_len = 1
                else:
                    return_len = len(test_value)

            if source_data.ndim > 1:
                if return_len > 1:
                    output_maps[var] = np.full(self.grid.shape +
                                               source_data.shape[1:] +
                                               (return_len, ),
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)
                else:
                    output_maps[var] = np.full(self.grid.shape +
                                               source_data.shape[1:],
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)
            else:
                if return_len > 1:
                    output_maps[var] = np.full(self.grid.shape +
                                               (return_len, ),
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)
                else:
                    output_maps[var] = np.full(self.grid.shape,
                                               fill_value=fill_value,
                                               dtype=source_data.dtype)

        for i in range(self.grid.size):
            mask = self.indices == i
            if np.any(mask):
                out_idx = np.unravel_index(i, self.grid.shape)
                for var in self.data.vars:
                    source_data = self.data[var]

                    if source_data.ndim > 1:
                        for idx in np.ndindex(*source_data.shape[1:]):
                            if weights is not None:
                                kwargs['weights'] = self.data[weights][:, idx][
                                    mask]

                            output_maps[var][out_idx + (idx, )] = function(
                                source_data[:, idx][mask], *args, **kwargs)
                    else:
                        if weights is not None:
                            kwargs['weights'] = self.data[weights][mask]

                        output_maps[var][out_idx] = function(
                            source_data[mask], *args, **kwargs)

        # Pack into GridArray
        if self.single:
            out_data = dm.GridArray(output_maps['test'], grid=self.grid)

        # Pack into GridData
        else:
            out_data = dm.GridData(self.grid)
            for var, output_map in output_maps.items():
                out_data[var] = output_map

        return out_data
Exemple #10
0
    def wrapped_func(*args, **kwargs):
        # find first instance of GridArray:
        first = None
        inputs = []
        grid = None
        for arg in args:
            if isinstance(arg, dm.GridArray):
                inputs.append(np.ma.asarray(arg))
                if first is None:
                    first = arg
                    grid = arg.grid
                else:
                    # make sure all grids are compatible
                    assert arg.grid == grid, 'Incompatible grids'
            else:
                inputs.append(arg)

        if first is None:
            raise ValueError()
        if 'axis' in kwargs:
            axis = kwargs.get('axis')
            if not isinstance(axis, tuple) and axis is not None:
                axis = (axis, )
            if axis is not None:
                new_axis = []
                for a in axis:
                    # translate them
                    if isinstance(a, str):
                        a = first.grid.vars.index(a)
                    if a < 0:
                        a += first.ndim
                    new_axis.append(a)
                if len(new_axis) == 1:
                    kwargs['axis'] = new_axis[0]
                else:
                    kwargs['axis'] = tuple(new_axis)
                axis = new_axis
        else:
            axis = None

        result = func(*inputs, **kwargs)
        if isinstance(result, (np.ma.masked_array, np.ndarray)):
            if result.ndim > 0:
                # new grid
                if axis is not None and any([a < first.grid.nax
                                             for a in axis]):
                    new_grid = copy.deepcopy(first.grid)
                    for a in sorted(axis)[::-1]:
                        # need to be careful, and start deleting from last element
                        if a < first.grid.nax:
                            del (new_grid.axes[a])
                else:
                    new_grid = first.grid

                new_obj = dm.GridArray(result, grid=new_grid)
                if new_obj.nax == 0:
                    return new_obj.data
                return new_obj
            if result.ndim == 0:
                return np.asscalar(result)
        return result