def generate_destination(self, *args, **kwargs): '''Correctly set up a destination data format depending on the supplied input Parameters ---------- args, kwargs ''' if len(args) == 1 and len(kwargs) == 0: dest = args[0] if isinstance(dest, dm.GridData): grid = dest.grid grid.initialize(self.source) return dm.GridData(grid) if isinstance(dest, dm.Grid): grid = dest grid.initialize(self.source) return dm.GridData(grid) if isinstance(dest, dm.GridArray): grid = dest.grid grid.initialize(self.source) return dm.GridArray(np.empty(grid.shape), grid=grid) if isinstance(dest, dm.PointData): # check which vars we need: if self.source_has_grid: return dest[self.source.grid.vars] else: return dest # check if source has a grid and if any args are in there if isinstance(self.source, (dm.GridData, dm.GridArray)): dims = [] for arg in args: # in this case the source may have a grid, get those edges if isinstance(arg, str): if arg in self.source.grid.vars: dims.append(self.source.grid[arg]) continue dims.append(arg) args = dims # instantiate grid = dm.Grid(*args, **kwargs) grid.initialize(self.source) if isinstance(self.source, dm.GridArray): return dm.GridArray(np.empty(grid.shape), grid=grid) return dm.GridData(grid)
def std_devs(self, sigmas=[ 1., ], fill_value=np.nan, **kwargs): """ Mean + Sigma values sigmas : iterable values to produce bands = mean =/- sigmas * std """ mean = self.mean(fill_value=np.nan, **kwargs) std = self.std(fill_value=np.nan, **kwargs) output_maps = {} for var in self.data.vars: m = np.ma.asarray(mean[var]) s = np.ma.asarray(std[var]) arrays = [m] for sigma in sigmas: delta = sigma * s arrays.append(m + delta) arrays.insert(0, m - delta) output_maps[var] = np.swapaxes(np.stack(arrays), 0, -1) # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) # Pack into GridData else: out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def T(self): '''transpose''' new_obj = dm.GridData() new_obj._grid = self._grid.T for n, d in self.items(): new_obj[n] = d.T return new_obj
def __getitem__(self, item): if isinstance(item, str): if item in self.vars: if item in self.data_vars: data = self._data[item] if callable(data): self[item] = data() data = self._data[item] else: data = self.get_array(item) new_data = dm.GridArray(data, grid=self._grid) return new_data else: raise IndexError('No variable %s in DataSet' % item) # mask if isinstance(item, dm.GridArray): if item.dtype == np.bool: # in this case it is a mask # ToDo: masked operations behave strangely, operations are applyed to all elements, even if masked new_data = dm.GridData(self._grid) for v in self.data_vars: new_data[v] = self[v][item] return new_data raise NotImplementedError('get item %s' % item) # create new instance with only those vars if isinstance(item, Iterable) and all( [isinstance(v, str) for v in item]): new_data = dm.GridData(self._grid) for v in item: if v in self.data_vars: new_data[v] = self[v] return new_data # slice new_grid = self._grid[item] if len(new_grid) == 0: return {n: d[item] for n, d in self.items()} new_data = dm.GridData(new_grid) for n, d in self.items(): new_data[n] = d[item] return new_data
def run_np_indexed(self, method, fill_value=np.nan, **kwargs): '''run the numpy indexed methods Parameters: ----------- method : str choice of ['sum', 'mean', 'min', 'max', 'std', 'var', 'argmin', 'argmax', 'median', 'mode', 'prod'] ''' axis = kwargs.pop('axis', None) assert axis is None, 'Axis kw not supported for BinnedData' if self.indices is None: self.compute_indices() outputs = {} output_maps = {} for var in self.data.vars: source_data = self.data[var] if source_data.ndim > 1: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:], fill_value=fill_value, dtype=self.data[var].dtype) else: output_maps[var] = np.full(self.grid.shape, fill_value=fill_value, dtype=self.data[var].dtype) source_data = self.data[var] indices, outputs[var] = self.group.__getattribute__(method)( source_data) for i, idx in enumerate(indices): if idx < 0: continue out_idx = np.unravel_index(idx, self.grid.shape) for var in self.data.vars: output_maps[var][out_idx] = result = outputs[var][i] # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) else: # Pack into GridData out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): '''interface to numpy unversal functions''' scalar_results = OrderedDict() array_result = dm.GridData() for var in inputs[0].data_vars: converted_inputs = [inp[var] for inp in inputs] result = converted_inputs[0].__array_ufunc__( ufunc, method, *converted_inputs, **kwargs) if isinstance(result, dm.GridArray): array_result[var] = result else: scalar_results[var] = result if len(array_result.data_vars) == 0: return scalar_results if len(scalar_results) == 0: return array_result return scalar_results, array_result
def apply_function(self, function, *args, fill_value=np.nan, return_len=None, **kwargs): '''apply function per bin function : callable return_len : int (optional) the shape (array length) of the `function` output. If None is passed, `return_len` is (tried to be) determined on the fly if weights are passed as kwarg, they are interpreted as being a variable in the dataset ''' if self.indices is None: self.compute_indices() outputs = {} output_maps = {} weights = kwargs.pop('weights', None) for var in self.data.vars: source_data = self.data[var] if return_len is None: # try to figure out return length of function if source_data.ndim > 1: if weights is not None: kwargs['weights'] = self.data[weights][:3, [0] * ( source_data.ndim - 1)] test_value = function( source_data[:3, [0] * (source_data.ndim - 1)], *args, **kwargs) else: if weights is not None: kwargs['weights'] = self.data[weights][:3] test_value = function(source_data[:3], *args, **kwargs) if np.isscalar(test_value): return_len = 1 else: return_len = len(test_value) if source_data.ndim > 1: if return_len > 1: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:] + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:], fill_value=fill_value, dtype=source_data.dtype) else: if return_len > 1: output_maps[var] = np.full(self.grid.shape + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape, fill_value=fill_value, dtype=source_data.dtype) for i in range(self.grid.size): mask = self.indices == i if np.any(mask): out_idx = np.unravel_index(i, self.grid.shape) for var in self.data.vars: source_data = self.data[var] if source_data.ndim > 1: for idx in np.ndindex(*source_data.shape[1:]): if weights is not None: kwargs['weights'] = self.data[weights][:, idx][ mask] output_maps[var][out_idx + (idx, )] = function( source_data[:, idx][mask], *args, **kwargs) else: if weights is not None: kwargs['weights'] = self.data[weights][mask] output_maps[var][out_idx] = function( source_data[mask], *args, **kwargs) # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) # Pack into GridData else: out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data
def apply_function(self, function, *args, fill_value=np.nan, return_len=None, **kwargs): '''apply function per bin''' if self.indices is None: self.compute_indices() outputs = {} output_maps = {} weights = kwargs.pop('weights', None) for var in self.data.vars: source_data = self.data[var] if return_len is None: # try to figure out return length of function if source_data.ndim > 1: test_value = function( source_data[:3, [0] * (source_data.ndim - 1)], *args, **kwargs) else: test_value = function(source_data[:3], *args, **kwargs) if np.isscalar(test_value): return_len = 1 else: return_len = len(test_value) if source_data.ndim > 1: if return_len > 1: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:] + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape + source_data.shape[1:], fill_value=fill_value, dtype=source_data.dtype) else: if return_len > 1: output_maps[var] = np.full(self.grid.shape + (return_len, ), fill_value=fill_value, dtype=source_data.dtype) else: output_maps[var] = np.full(self.grid.shape, fill_value=fill_value, dtype=source_data.dtype) for i in range(self.grid.size): mask = self.indices == i if np.any(mask): out_idx = np.unravel_index(i, self.grid.shape) for var in self.data.vars: source_data = self.data[var] if source_data.ndim > 1: for idx in np.ndindex(*source_data.shape[1:]): if weights is not None: kwargs['weights'] = self.data[weights][:, idx][ mask] output_maps[var][out_idx + (idx, )] = function( source_data[:, idx][mask], *args, **kwargs) else: if weights is not None: kwargs['weights'] = self.data[weights][mask] output_maps[var][out_idx] = function( source_data[mask], *args, **kwargs) # Pack into GridArray if self.single: out_data = dm.GridArray(output_maps['test'], grid=self.grid) # Pack into GridData else: out_data = dm.GridData(self.grid) for var, output_map in output_maps.items(): out_data[var] = output_map return out_data