def _getitems(self, indices=None, axis=0, indexing=None, tol=None, broadcast=None, keepdims=False): # first find the index for the shared axes tuple_indices = self._get_indices(indices, axis=axis, tol=tol, keepdims=keepdims, indexing=indexing) # then index all arrays, one after the other newdata = self.__class__() # then apply take in 'position' mode newdata = self.__class__() axes_dict = {ax.name:ax[ix] for ix, ax in zip(tuple_indices, self.axes) if not np.isscalar(ix)} indices_dict = {ax.name:ix for ix, ax in zip(tuple_indices, self.axes)} # loop over variables for k in self.keys(): v = self[k] # loop over axes to index on for axis in kw_indices.keys(): if np.ndim(v) == 0 or axis not in v.dims: if raise_error: raise ValueError("{} does not have dimension {} ==> set raise_error=False to keep this variable unchanged".format(k, axis)) else: continue # slice along one axis v = v.take({axis:kw_indices[axis]}, indexing='position') newdata[k] = v return newdata
def _check_axis_values(values, dtype=None): """ convert Axis type to have "object" instead of string """ try: values = np.asarray(values, dtype=dtype) except Exception as error: raise TypeError(error.message + "\n==> axis values could not be converted to numpy array") # Treat the particular case of a sequence of sequences, leads to a 2-D array # ==> convert to a list of tuples if values.ndim == 2: try: val = np.empty(values.shape[0], dtype=object) val[:] = list(zip(*values.T.tolist())) # pass a list of tuples values = val except: pass if values.ndim != 1: raise ValueError("an Axis object can only be 1-D, got ndim={}".format(values.ndim)) if values.dtype.kind in ("S", "U"): values = np.asarray(values, dtype=object) return values
def _set_dims(self, newdims): if not np.iterable(newdims): raise TypeError("new dims must be iterable") if not isinstance(newdims, dict): if len(newdims) != len(self.dims): raise ValueError("dimensions number mismatch") newdims = dict(zip(self.dims, newdims)) for old in newdims.keys(): self.axes[old].name = newdims[old]
def _get_values(self): # Each element of the new axis is a tuple, which makes a 2-D numpy array if len(self.axes) == 1: return self.axes[0].values aval = _flatten(*[ax.values for ax in self.axes]) val = np.empty(aval.shape[0], dtype=object) val[:] = list(zip(*aval.T.tolist())) # pass a list of tuples return val
def from_arrays(cls, arrays, dims=None): """ list of np.ndarrays and dims """ assert np.iterable(arrays) and (dims is None or len(dims) == len(arrays)), "invalid input arrays={}, dims={}".format(arrays, dims) # default names if dims is None: dims = ["x{}".format(i) for i in range(len(arrays))] return cls(list(zip(dims, arrays)))
def _flatten(*list_of_arrays): """ flatten a list of arrays ax1, ax2, ... to a list of tuples [(ax1[0], ax2[0], ax3[0]..), (ax1[0], ax2[0], ax3[1]..), ...] """ assert len(list_of_arrays) > 0, "empty axis" if len(list_of_arrays) == 1: return list_of_arrays[0] kwargs = dict(indexing="ij") grd = np.meshgrid(*list_of_arrays, **kwargs) array_of_tuples = np.array(list(zip(*[g.ravel() for g in grd]))) assert array_of_tuples.shape[1] == len(list_of_arrays), "pb when reshaping: {} and {}".format(array_of_tuples.shape, len(list_of_arrays)) assert array_of_tuples.shape[0] == np.prod([x.size for x in list_of_arrays]), "pb when reshaping: {} and {}".format(array_of_tuples.shape, np.prod([x.size for x in list_of_arrays])) return array_of_tuples
def _get_axes_info(self, axes): """ return axis (dimension) positions AND names from a sequence of axis (dimension) positions OR names Parameters ---------- axes : sequence of str or int, representing axis (dimension) names or positions, possibly mixed up. Returns ------- pos : list of `int` indicating dimension's rank in the array names : list of dimension names """ pos, names = zip(*[self._get_axis_info(x) for x in axes]) return pos, names
def getaxes_broadcast(obj, indices): """ broadcast array-indices & integers, numpy's classical Examples -------- >>> import dimarray as da >>> a = da.zeros(shape=(3,4,5,6)) >>> a.take((slice(None),[0, 1],slice(None),2), broadcast=True).shape (2, 3, 5) >>> a.take((slice(None),[0, 1],2,slice(None)), broadcast=True).shape (3, 2, 6) """ from dimarray import Axis, Axes # new axes: broacast indices (should do the same as above, since integers are just broadcast) indices2 = broadcast_indices(indices) # assert np.all(newval == obj.values[indices2]) # make a multi-axis with tuples is_array2 = np.array([np.iterable(ix) for ix in indices2]) nb_array2 = is_array2.sum() # If none or one array is present, easy if nb_array2 <= 1: newaxes = [obj.axes[i][ix] for i, ix in enumerate(indices) if not np.isscalar(ix)] # indices or indices2, does not matter # else, finer check needed else: # same stats but on original indices is_array = np.array([np.iterable(ix) for ix in indices]) array_ix_pos = np.where(is_array)[0] # Determine where the axis will be inserted # - need to consider the integers as well (broadcast as arrays) # - if two indexed dimensions are not contiguous, new axis placed at first position... # obj = zeros((3,4,5,6)) # obj[:,[1,2],:,0].shape ==> (2, 3, 5) # obj[:,[1,2],0,:].shape ==> (3, 2, 6) array_ix_pos2 = np.where(is_array2)[0] if np.any(np.diff(array_ix_pos2) > 1): # that mean, if two indexed dimensions are not contiguous insert = 0 else: insert = array_ix_pos2[0] # Now determine axis value # ...if originally only one array was provided, use these values correspondingly if len(array_ix_pos) == 1: i = array_ix_pos[0] values = obj.axes[i].values[indices[i]] name = obj.axes[i].name # ...else use a list of tuples else: values = list(zip(*[obj.axes[i].values[indices2[i]] for i in array_ix_pos])) name = ",".join([obj.axes[i].name for i in array_ix_pos]) broadcastaxis = Axis(values, name) newaxes = Axes() for i, ax in enumerate(obj.axes): # axis is already part of the broadcast axis: skip if is_array2[i]: continue else: newaxis = ax[indices2[i]] ## do not append axis if scalar #if np.isscalar(newaxis): # continue newaxes.append(newaxis) # insert the right new axis at the appropriate position newaxes.insert(insert, broadcastaxis) return newaxes
def todict(self): return odict(zip(self.keys(), self.values()))