def __getitem__(self, item, *args): if isinstance(item, str): if item in self.grid.vars: data = self.get_array(item) new_data = dm.GridArray(data, grid=self.grid) return new_data else: raise IndexError('No variable %s in DataSet' % item) if isinstance(item, dm.GridArray): if item.dtype == np.bool: mask = np.logical_and(~self.mask, ~np.asarray(item)) new_item = dm.GridArray(np.asarray(self), grid=self.grid) new_item.mask = mask return new_item raise NotImplementedError('get item %s' % item) if not isinstance(item, tuple): # and not isinstance(item, slice): return self[(item, )] if isinstance(item, list): if all([isinstance(i, int) for i in item]): return self[(list, )] else: raise IndexError('Cannot process list of indices %s' % item) if isinstance(item, tuple): item = self.grid.convert_slice(item) new_grid = self.grid[item] if len(new_grid) == 0: # then we have a single element return np.ma.asarray(self)[item] return dm.GridArray(np.ma.asarray(self)[item], grid=new_grid)
def generate_destination(self, *args, **kwargs): '''Correctly set up a destination data format depending on the supplied input Parameters ---------- args, kwargs ''' if len(args) == 1 and len(kwargs) == 0: dest = args[0] if isinstance(dest, dm.GridData): grid = dest.grid grid.initialize(self.source) return dm.GridData(grid) if isinstance(dest, dm.Grid): grid = dest grid.initialize(self.source) return dm.GridData(grid) if isinstance(dest, dm.GridArray): grid = dest.grid grid.initialize(self.source) return dm.GridArray(np.empty(grid.shape), grid=grid) if isinstance(dest, dm.PointData): # check which vars we need: if self.source_has_grid: return dest[self.source.grid.vars] else: return dest # check if source has a grid and if any args are in there if isinstance(self.source, (dm.GridData, dm.GridArray)): dims = [] for arg in args: # in this case the source may have a grid, get those edges if isinstance(arg, str): if arg in self.source.grid.vars: dims.append(self.source.grid[arg]) continue dims.append(arg) args = dims # instantiate grid = dm.Grid(*args, **kwargs) grid.initialize(self.source) if isinstance(self.source, dm.GridArray): return dm.GridArray(np.empty(grid.shape), grid=grid) return dm.GridData(grid)
def std_devs(self, sigmas=[ 1., ], fill_value=np.nan, **kwargs): """ Mean + Sigma values sigmas : iterable values to produce bands = mean =/- sigmas * std """ mean = self.mean(fill_value=np.nan, **kwargs) std = self.std(fill_value=np.nan, **kwargs) output_maps = {} for var in self.data.vars: m = np.ma.asarray(mean[var]) s = np.ma.asarray(std[var]) arrays = [m] for sigma in sigmas: delta = sigma * s arrays.append(m + delta) arrays.insert(0, m - delta) output_maps[var] = np.swapaxes(np.stack(arrays), 0, -1) # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) # Pack into GridData else: out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def T(self): '''transpose''' if self.ndim > self.nax + 1: raise NotImplementedError() if self.nax == 1: return self if self.nax > 1: new_data = self.data.T if self.ndim == self.nax + 1: new_data = np.rollaxis(new_data, 0, self.ndim) return dm.GridArray(new_data, grid=self.grid.T)
def run_np_indexed(self, method, fill_value=np.nan, **kwargs): '''run the numpy indexed methods Parameters: ----------- method : str choice of ['sum', 'mean', 'min', 'max', 'std', 'var', 'argmin', 'argmax', 'median', 'mode', 'prod'] ''' axis = kwargs.pop('axis', None) assert axis is None, 'Axis kw not supported for BinnedData' if self.indices is None: self.compute_indices() outputs = {} output_maps = {} for var in self.data.vars: source_data = self.data[var] if source_data.ndim > 1: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:], fill_value=fill_value, dtype=self.data[var].dtype) else: output_maps[var] = np.full(self.grid.shape, fill_value=fill_value, dtype=self.data[var].dtype) source_data = self.data[var] indices, outputs[var] = self.group.__getattribute__(method)( source_data) for i, idx in enumerate(indices): if idx < 0: continue out_idx = np.unravel_index(idx, self.grid.shape) for var in self.data.vars: output_maps[var][out_idx] = result = outputs[var][i] # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) else: # Pack into GridData out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def __getitem__(self, item): if isinstance(item, str): if item in self.vars: if item in self.data_vars: data = self._data[item] if callable(data): self[item] = data() data = self._data[item] else: data = self.get_array(item) new_data = dm.GridArray(data, grid=self._grid) return new_data else: raise IndexError('No variable %s in DataSet' % item) # mask if isinstance(item, dm.GridArray): if item.dtype == np.bool: # in this case it is a mask # ToDo: masked operations behave strangely, operations are applyed to all elements, even if masked new_data = dm.GridData(self._grid) for v in self.data_vars: new_data[v] = self[v][item] return new_data raise NotImplementedError('get item %s' % item) # create new instance with only those vars if isinstance(item, Iterable) and all( [isinstance(v, str) for v in item]): new_data = dm.GridData(self._grid) for v in item: if v in self.data_vars: new_data[v] = self[v] return new_data # slice new_grid = self._grid[item] if len(new_grid) == 0: return {n: d[item] for n, d in self.items()} new_data = dm.GridData(new_grid) for n, d in self.items(): new_data[n] = d[item] return new_data
def run(self): self.setup() if isinstance(self.dest, dm.GridArray): # in this case it is a single array, no vars if isinstance(self.source, (dm.GridData, dm.PointData)): assert len(self.source.vars) - len(self.wrt) == 1 for var in self.source.vars: if var in self.wrt: continue source_data = self.source[var] else: source_data = self.source result = self.eval(source_data) self.dest = dm.GridArray(result, grid=self.dest.grid) return self.dest if isinstance(self.source, dm.GridArray): source_data = self.source result = self.eval(source_data) self.dest['result'] = result return self.dest def make_closure(data): def f(): return self.eval(data) return f for var in self.source.vars: if var in self.wrt: continue self.dest[var] = make_closure(self.source[var]) for var, data in self.additional_runs.items(): self.dest[var] = make_closure(data) return self.dest
def apply_function(self, function, *args, fill_value=np.nan, return_len=None, **kwargs): '''apply function per bin function : callable return_len : int (optional) the shape (array length) of the `function` output. If None is passed, `return_len` is (tried to be) determined on the fly if weights are passed as kwarg, they are interpreted as being a variable in the dataset ''' if self.indices is None: self.compute_indices() outputs = {} output_maps = {} weights = kwargs.pop('weights', None) for var in self.data.vars: source_data = self.data[var] if return_len is None: # try to figure out return length of function if source_data.ndim > 1: if weights is not None: kwargs['weights'] = self.data[weights][:3, [0] * ( source_data.ndim - 1)] test_value = function( source_data[:3, [0] * (source_data.ndim - 1)], *args, **kwargs) else: if weights is not None: kwargs['weights'] = self.data[weights][:3] test_value = function(source_data[:3], *args, **kwargs) if np.isscalar(test_value): return_len = 1 else: return_len = len(test_value) if source_data.ndim > 1: if return_len > 1: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:] + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:], fill_value=fill_value, dtype=source_data.dtype) else: if return_len > 1: output_maps[var] = np.full(self.grid.shape + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape, fill_value=fill_value, dtype=source_data.dtype) for i in range(self.grid.size): mask = self.indices == i if np.any(mask): out_idx = np.unravel_index(i, self.grid.shape) for var in self.data.vars: source_data = self.data[var] if source_data.ndim > 1: for idx in np.ndindex(*source_data.shape[1:]): if weights is not None: kwargs['weights'] = self.data[weights][:, idx][ mask] output_maps[var][out_idx + (idx, )] = function( source_data[:, idx][mask], *args, **kwargs) else: if weights is not None: kwargs['weights'] = self.data[weights][mask] output_maps[var][out_idx] = function( source_data[mask], *args, **kwargs) # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) # Pack into GridData else: out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def apply_function(self, function, *args, fill_value=np.nan, return_len=None, **kwargs): '''apply function per bin''' if self.indices is None: self.compute_indices() outputs = {} output_maps = {} weights = kwargs.pop('weights', None) for var in self.data.vars: source_data = self.data[var] if return_len is None: # try to figure out return length of function if source_data.ndim > 1: test_value = function( source_data[:3, [0] * (source_data.ndim - 1)], *args, **kwargs) else: test_value = function(source_data[:3], *args, **kwargs) if np.isscalar(test_value): return_len = 1 else: return_len = len(test_value) if source_data.ndim > 1: if return_len > 1: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:] + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:], fill_value=fill_value, dtype=source_data.dtype) else: if return_len > 1: output_maps[var] = np.full(self.grid.shape + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape, fill_value=fill_value, dtype=source_data.dtype) for i in range(self.grid.size): mask = self.indices == i if np.any(mask): out_idx = np.unravel_index(i, self.grid.shape) for var in self.data.vars: source_data = self.data[var] if source_data.ndim > 1: for idx in np.ndindex(*source_data.shape[1:]): if weights is not None: kwargs['weights'] = self.data[weights][:, idx][ mask] output_maps[var][out_idx + (idx, )] = function( source_data[:, idx][mask], *args, **kwargs) else: if weights is not None: kwargs['weights'] = self.data[weights][mask] output_maps[var][out_idx] = function( source_data[mask], *args, **kwargs) # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) # Pack into GridData else: out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def wrapped_func(*args, **kwargs): # find first instance of GridArray: first = None inputs = [] grid = None for arg in args: if isinstance(arg, dm.GridArray): inputs.append(np.ma.asarray(arg)) if first is None: first = arg grid = arg.grid else: # make sure all grids are compatible assert arg.grid == grid, 'Incompatible grids' else: inputs.append(arg) if first is None: raise ValueError() if 'axis' in kwargs: axis = kwargs.get('axis') if not isinstance(axis, tuple) and axis is not None: axis = (axis, ) if axis is not None: new_axis = [] for a in axis: # translate them if isinstance(a, str): a = first.grid.vars.index(a) if a < 0: a += first.ndim new_axis.append(a) if len(new_axis) == 1: kwargs['axis'] = new_axis[0] else: kwargs['axis'] = tuple(new_axis) axis = new_axis else: axis = None result = func(*inputs, **kwargs) if isinstance(result, (np.ma.masked_array, np.ndarray)): if result.ndim > 0: # new grid if axis is not None and any([a < first.grid.nax for a in axis]): new_grid = copy.deepcopy(first.grid) for a in sorted(axis)[::-1]: # need to be careful, and start deleting from last element if a < first.grid.nax: del (new_grid.axes[a]) else: new_grid = first.grid new_obj = dm.GridArray(result, grid=new_grid) if new_obj.nax == 0: return new_obj.data return new_obj if result.ndim == 0: return np.asscalar(result) return result