def __getitem__(self, item): # handle values -- convert ints to slices so no dimensions are dropped if isinstance(item, int): item = tuple([slicify(item, self.shape[0])]) if isinstance(item, tuple): item = tuple([slicify(i, n) if isinstance(i, int) else i for i, n in zip(item, self.shape[:len(item)])]) if isinstance(item, (list, ndarray)): item = (item,) new = self._values.__getitem__(item) result = self._constructor(new).__finalize__(self, noprop=('index', 'labels')) # handle labels if self.labels is not None: if isinstance(item, int): label_item = ([item],) elif isinstance(item, (list, ndarray, slice)): label_item = (item, ) elif isinstance(item, tuple): label_item = item[:len(self.baseaxes)] newlabels = self.labels for (i, s) in enumerate(label_item): if isinstance(s, slice): newlabels = newlabels[[s if j==i else slice(None) for j in range(len(label_item))]] else: newlabels = newlabels.take(tupleize(s), i) result.labels = newlabels return result
def _getbasic(self, index): """ Basic indexing (for slices or ints). """ index = tuple([slicify(s, d) for (s, d) in zip(index, self.shape)]) key_slices = index[0:self.split] value_slices = index[self.split:] def key_check(key): check = lambda kk, ss: ss.start <= kk < ss.stop and mod(kk - ss.start, ss.step) == 0 out = [check(k, s) for k, s in zip(key, key_slices)] return all(out) def key_func(key): return tuple([(k - s.start)/s.step for k, s in zip(key, key_slices)]) filtered = self._rdd.filter(lambda kv: key_check(kv[0])) rdd = filtered.map(lambda kv: (key_func(kv[0]), kv[1][value_slices])) shape = tuple([int(ceil((s.stop - s.start) / float(s.step))) for s in index]) split = self.split return rdd, shape, split
def __getitem__(self, index): """ Get an item from the array through indexing. Supports basic indexing with slices and ints, or advanced indexing with lists or ndarrays of integers. Mixing basic and advanced indexing across axes is currently supported only for a single advanced index amidst multiple basic indices. Parameters ---------- index : tuple of slices, ints, list, tuple, or ndarrays One or more index specifications Returns ------- BoltSparkArray """ if isinstance(index, tuple): index = list(index) else: index = [index] int_locs = where([isinstance(i, int) for i in index])[0] if len(index) > self.ndim: raise ValueError("Too many indices for array") if not all( [isinstance(i, (slice, int, list, tuple, ndarray)) for i in index]): raise ValueError( "Each index must either be a slice, int, list, set, or ndarray" ) # fill unspecified axes with full slices if len(index) < self.ndim: index += tuple( [slice(0, None, None) for _ in range(self.ndim - len(index))]) # standardize slices and bounds checking for n, idx in enumerate(index): size = self.shape[n] if isinstance(idx, (slice, int)): slc = slicify(idx, size) # throw an error if this would lead to an empty dimension in numpy if slc.step > 0: minval, maxval = slc.start, slc.stop else: minval, maxval = slc.stop, slc.start if minval > size - 1 or maxval < 1 or minval >= maxval: raise ValueError( "Index {} in dimension {} with shape {} would " "produce an empty dimension".format(idx, n, size)) index[n] = slc else: adjusted = array(idx) inds = where(adjusted < 0) adjusted[inds] += size if adjusted.min() < 0 or adjusted.max() > size - 1: raise ValueError( "Index {} out of bounds in dimension {} with " "shape {}".format(idx, n, size)) index[n] = adjusted # select basic or advanced indexing if all([isinstance(i, slice) for i in index]): rdd, shape, split = self._getbasic(index) elif all([isinstance(i, (tuple, list, ndarray)) for i in index]): rdd, shape, split = self._getadvanced(index) elif sum([isinstance(i, (tuple, list, ndarray)) for i in index]) == 1: rdd, shape, split = self._getmixed(index) else: raise NotImplementedError( "When mixing basic indexing (slices and int) with " "with advanced indexing (lists, tuples, and ndarrays), " "can only have a single advanced index") # if any key indices used negative steps, records are no longer ordered if self._ordered is False or any( [isinstance(s, slice) and s.step < 0 for s in index[:self.split]]): ordered = False else: ordered = True result = self._constructor(rdd, shape=shape, split=split, ordered=ordered).__finalize__(self) # squeeze out int dimensions (and squeeze to singletons if all ints) if len(int_locs) == self.ndim: return result.squeeze().toarray()[()] else: return result.squeeze(tuple(int_locs))
def __getitem__(self, index): """ Get an item from the array through indexing. Supports basic indexing with slices and ints, or advanced indexing with lists or ndarrays of integers. Mixing basic and advanced indexing across axes is currently supported only for a single advanced index amidst multiple basic indices. Parameters ---------- index : tuple of slices One or more index specifications Returns ------- NDArray """ if isinstance(index, tuple): index = list(index) else: index = [index] int_locs = np.where([isinstance(i, int) for i in index])[0] if len(index) > self.ndim: raise ValueError("Too many indices for array") if not all([ isinstance(i, (slice, int, list, tuple, np.ndarray)) for i in index ]): raise ValueError( "Each index must either be a slice, int, list, set, or ndarray" ) # fill unspecified axes with full slices if len(index) < self.ndim: index += tuple( [slice(0, None, None) for _ in range(self.ndim - len(index))]) # standardize slices and bounds checking for n, idx in enumerate(index): size = self.shape[n] if isinstance(idx, (slice, int)): slc = slicify(idx, size) # throw an error if this would lead to an empty dimension in numpy if slc.step > 0: minval, maxval = slc.start, slc.stop else: minval, maxval = slc.stop, slc.start if minval > size - 1 or maxval < 1 or minval >= maxval: raise ValueError( "Index {} in dimension {} with shape {} would " "produce an empty dimension".format(idx, n, size)) index[n] = slc else: adjusted = np.array(idx) inds = np.where(adjusted < 0) adjusted[inds] += size if adjusted.min() < 0 or adjusted.max() > size - 1: raise ValueError( "Index {} out of bounds in dimension {} with " "shape {}".format(idx, n, size)) index[n] = adjusted # assume basic indexing if all([isinstance(i, slice) for i in index]) and (len(index) <= 3): assert len(index) > 1, "Too short of an index" assert index[0].start <= index[ 0].stop, "Indexes cannot be backwards" assert index[1].start <= index[ 1].stop, "Indexes cannot be backwards" out_arr = self._bckend.read_region(xstart=int(index[0].start), xend=int(index[0].stop), ystart=int(index[1].start), yend=int(index[1].stop)) out_arr = out_arr[::index[0].step, ::index[1].step] return out_arr[:, :, index[2]] if len(index) == 3 else out_arr else: raise NotImplementedError( "When mixing basic indexing (slices and int) with " "with advanced indexing (lists, tuples, and ndarrays), " "can only have a single advanced index")
def __getitem__(self, index): """ Get an item from the array through indexing. Supports basic indexing with slices and ints, or advanced indexing with lists or ndarrays of integers. Mixing basic and advanced indexing across axes is currently supported only for a single advanced index amidst multiple basic indices. Parameters ---------- index : tuple of slices, ints, list, tuple, or ndarrays One or more index specifications Returns ------- BoltSparkArray """ if isinstance(index, tuple): index = list(index) else: index = [index] int_locs = where([isinstance(i, int) for i in index])[0] if len(index) > self.ndim: raise ValueError("Too many indices for array") if not all([isinstance(i, (slice, int, list, tuple, ndarray)) for i in index]): raise ValueError("Each index must either be a slice, int, list, set, or ndarray") # fill unspecified axes with full slices if len(index) < self.ndim: index += tuple([slice(0, None, None) for _ in range(self.ndim - len(index))]) # standardize slices and bounds checking for n, idx in enumerate(index): size = self.shape[n] if isinstance(idx, (slice, int)): slc = slicify(idx, size) # throw an error if this would lead to an empty dimension in numpy if slc.step > 0: minval, maxval = slc.start, slc.stop else: minval, maxval = slc.stop, slc.start if minval > size-1 or maxval < 1 or minval >= maxval: raise ValueError("Index {} in dimension {} with shape {} would " "produce an empty dimension".format(idx, n, size)) index[n] = slc else: adjusted = array(idx) inds = where(adjusted<0) adjusted[inds] += size if adjusted.min() < 0 or adjusted.max() > size-1: raise ValueError("Index {} out of bounds in dimension {} with " "shape {}".format(idx, n, size)) index[n] = adjusted # select basic or advanced indexing if all([isinstance(i, slice) for i in index]): rdd, shape, split = self._getbasic(index) elif all([isinstance(i, (tuple, list, ndarray)) for i in index]): rdd, shape, split = self._getadvanced(index) elif sum([isinstance(i, (tuple, list, ndarray)) for i in index]) == 1: rdd, shape, split = self._getmixed(index) else: raise NotImplementedError("When mixing basic indexing (slices and int) with " "with advanced indexing (lists, tuples, and ndarrays), " "can only have a single advanced index") # if any key indices used negative steps, records are no longer ordered if self._ordered is False or any([isinstance(s, slice) and s.step<0 for s in index[:self.split]]): ordered = False else: ordered = True result = self._constructor(rdd, shape=shape, split=split, ordered=ordered).__finalize__(self) # squeeze out int dimensions (and squeeze to singletons if all ints) if len(int_locs) == self.ndim: return result.squeeze().toarray()[()] else: return result.squeeze(tuple(int_locs))