def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series assert(axis == 0) return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) return _reindex(labels[np.asarray(key)]) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr) and not _is_integer_index(labels): keyarr = labels.take(keyarr) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None return _reindex(keyarr, level=level)
def _getitem_axis(self, key, axis=0): if isinstance(key, slice): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) # a single integer or a list of integers else: if _is_list_like(key): # force an actual list key = list(key) else: key = self._convert_scalar_indexer(key, axis) if not com.is_integer(key): raise TypeError( "Cannot index by location index with a non-integer key" ) return self._get_loc(key, axis=axis)
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index # slice of lables (where start-end in labels) # slice of integers (only if in the lables) # boolean if isinstance(key, slice): if ax.is_floating(): # allowing keys to be slicers with no fallback pass else: if key.start is not None: if key.start not in ax: raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis))) if key.stop is not None: if key.stop not in ax: raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): # mi is just a passthru if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): raise KeyError("[%s] are not in ALL in the [%s]" % (key,self.obj._get_axis_name(axis))) return True else: def error(): if isnull(key): raise ValueError("cannot use label indexing with a null key") raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) if not key in ax: error() except (TypeError) as e: # python 3 type errors should be raised if 'unorderable' in str(e): # pragma: no cover error() raise except: error() return True
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series assert (axis == 0) return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) return _reindex(labels[np.asarray(key)]) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr) and not _is_integer_index(labels): return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None return _reindex(keyarr, level=level)
def _has_valid_type(self, key, axis): if com._is_bool_indexer(key): if hasattr(key,'index') and isinstance(key.index,Index): if key.index.inferred_type == 'integer': raise NotImplementedError("iLocation based boolean indexing on an integer type is not available") raise ValueError("iLocation based boolean indexing cannot use an indexable as a mask") return True return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key)
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): return arr_idx[key] else: if _is_bool_indexer(key): key = np.asarray(key) return Index(arr_idx[key], name=self.name)
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): return arr_idx[key] else: if _is_bool_indexer(key): key = np.asarray(key) return Index(arr_idx[key])
def delete(self, item): i, _ = self._find_block(item) loc = self.items.get_loc(item) self._delete_from_block(i, item) if com._is_bool_indexer(loc): # dupe keys may return mask loc = [i for i, v in enumerate(loc) if v] new_items = self.items.delete(loc) self.set_items_norename(new_items)
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) if labels.inferred_type == 'mixed-integer': indexer = labels.get_indexer(keyarr) if (indexer >= 0).all(): self.obj.take(indexer, axis=axis, convert=True) else: return self.obj.take(keyarr, axis=axis) elif not labels.inferred_type == 'integer': return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None if labels.is_unique: return _reindex(keyarr, level=level) else: mask = labels.isin(keyarr) return self.obj.take(mask.nonzero()[0], axis=axis, convert=False)
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = np.asarray(key, dtype=bool).nonzero() return self.obj.take(inds, axis=axis) else: was_index = isinstance(key, Index) if was_index: # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) if labels.inferred_type == 'mixed-integer': indexer = labels.get_indexer(keyarr) if (indexer >= 0).all(): self.obj.take(indexer, axis=axis) else: return self.obj.take(keyarr, axis=axis) elif not labels.inferred_type == 'integer': return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None if labels.is_unique: return _reindex(keyarr, level=level) else: mask = labels.isin(keyarr) return self.obj.take(mask.nonzero()[0], axis=axis)
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index # slice of lables (where start-end in labels) # slice of integers (only if in the lables) # boolean if isinstance(key, slice): if key.start is not None: if key.start not in ax: raise KeyError("start bound [%s] is not the [%s]" % (key.start, self.obj._get_axis_name(axis))) if key.stop is not None: if key.stop not in ax: raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop, self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): raise KeyError("[%s] are not in ALL in the [%s]" % (key, self.obj._get_axis_name(axis))) return True else: # if its empty we want a KeyError here if not len(ax): raise KeyError("The [%s] axis is empty" % self.obj._get_axis_name(axis)) try: if not key in ax: raise KeyError("the label [%s] is not in the [%s]" % (key, self.obj._get_axis_name(axis))) except (TypeError): # if we have a weird type of key/ax raise KeyError("the label [%s] is not in the [%s]" % (key, self.obj._get_axis_name(axis))) return True
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] return Period(ordinal=val, freq=self.freq) else: if com._is_bool_indexer(key): key = np.asarray(key) result = arr_idx[key] if result.ndim > 1: return PeriodIndex(result, name=self.name, freq=self.freq) return PeriodIndex(result, name=self.name, freq=self.freq)
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] return Period(val, freq=self.freq) else: if com._is_bool_indexer(key): key = np.asarray(key) result = arr_idx[key] if result.ndim > 1: return PeriodIndex(result, name=self.name, freq=self.freq) return PeriodIndex(result, name=self.name, freq=self.freq)
def _getitem_axis(self, key, axis=0): if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): return self._getbool_axis(key, axis=axis) # a single integer or a list of integers else: if not (com.is_integer(key) or _is_list_like(key)): raise ValueError("Cannot index by location index with a non-integer key") return self._get_loc(key,axis=axis)
def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): return self._getbool_axis(key, axis=axis) elif _is_list_like(key) and not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): if hasattr(key, "ndim") and key.ndim > 1: raise ValueError("Cannot index with multidimensional key") return self._getitem_iterable(key, axis=axis) else: return self._get_label(key, axis=axis)
def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): return self._getbool_axis(key, axis=axis) elif _is_list_like(key) and not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): if hasattr(key, 'ndim') and key.ndim > 1: raise ValueError('Cannot index with multidimensional key') return self._getitem_iterable(key, axis=axis) else: return self._get_label(key, axis=axis)
def __getitem__(self, key): """ Returns item(s) for requested index/sequence, overrides default behavior for series[key]. Logic is as follows: - If key is in the index, return the value corresponding to that index - Otherwise, use key (presumably one integer or a sequence of integers) to obtain values from the series. In the case of a sequence, a 'slice' of the series (with corresponding dates) will be returned, otherwise a single value. """ try: if isinstance(self.index, MultiIndex): return self._multilevel_index(key) else: values = self.values try: return values[self.index.get_loc(key)] except KeyError: if isinstance(key, (int, np.integer)): return values[key] raise except TypeError: pass def _index_with(indexer): return Series(self.values[indexer], index=self.index[indexer]) # special handling of boolean data with NAs stored in object # arrays. Sort of an elaborate hack since we can't represent boolean # NA. Hmm if _is_bool_indexer(key): self._check_bool_indexer(key) key = np.asarray(key, dtype=bool) return _index_with(key) # TODO: [slice(0, 5, None)] will break if you convert to ndarray, # e.g. as requested by np.median try: return _index_with(key) except Exception: key = np.asarray(key) return _index_with(key)
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) if isinstance(key, slice): return True elif com._is_bool_indexer(key): return True elif _is_list_like(key): return True else: self._convert_scalar_indexer(key, axis) return True
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index # slice of lables (where start-end in labels) # slice of integers (only if in the lables) # boolean if isinstance(key, slice): if key.start is not None: if key.start not in ax: raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis))) if key.stop is not None: if key.stop not in ax: raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): raise KeyError("[%s] are not in ALL in the [%s]" % (key,self.obj._get_axis_name(axis))) return True else: # if its empty we want a KeyError here if not len(ax): raise KeyError("The [%s] axis is empty" % self.obj._get_axis_name(axis)) try: if not key in ax: raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) except (TypeError): # if we have a weird type of key/ax raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) return True
def __getitem__(self, key): getitem = self._data.__getitem__ if np.isscalar(key): val = getitem(key) return Timedelta(val) else: if com._is_bool_indexer(key): key = np.asarray(key) if key.all(): key = slice(0,None,None) else: key = lib.maybe_booleans_to_slice(key.view(np.uint8)) result = getitem(key) if result.ndim > 1: return result return self._simple_new(result, self.name)
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] return Period(ordinal=val, freq=self.freq) else: if com._is_bool_indexer(key): key = np.asarray(key) result = arr_idx[key] if result.ndim > 1: values = PeriodIndex(result.squeeze(), name=self.name, freq=self.freq) values = np.asarray(list(values), dtype=object) return values.reshape(result.shape) return PeriodIndex(result, name=self.name, freq=self.freq)
def _multi_take_opportunity(self, tup): from pandas.core.generic import NDFrame # ugly hack for GH #836 if not isinstance(self.obj, NDFrame): return False if not all(_is_list_like(x) for x in tup): return False # just too complicated for indexer, ax in zip(tup,self.obj._data.axes): if isinstance(ax, MultiIndex): return False elif com._is_bool_indexer(indexer): return False return True
def _convert_for_reindex(self, key, axis=0): labels = self.obj._get_axis(axis) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) return labels[np.asarray(key)] else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr) and not _is_integer_index(labels): return labels.take(keyarr) return keyarr
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) axis_name = self.obj._get_axis_name(axis) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) return self.obj.reindex(**{axis_name: labels[np.asarray(key)]}) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr) and not _is_integer_index(labels): keyarr = labels.take(keyarr) return self.obj.reindex(**{axis_name: keyarr})
def __getitem__(self, key): getitem = self._data.__getitem__ if np.isscalar(key): val = getitem(key) return Period(ordinal=val, freq=self.freq) else: if com._is_bool_indexer(key): key = np.asarray(key) result = getitem(key) if result.ndim > 1: # MPL kludge # values = np.asarray(list(values), dtype=object) # return values.reshape(result.shape) return PeriodIndex(result, name=self.name, freq=self.freq) return PeriodIndex(result, name=self.name, freq=self.freq)
def __getitem__(self, key): getitem = self._data.__getitem__ if np.isscalar(key): val = getitem(key) return Timedelta(val) else: if com._is_bool_indexer(key): key = np.asarray(key) if key.all(): key = slice(0, None, None) else: key = lib.maybe_booleans_to_slice(key.view(np.uint8)) result = getitem(key) if result.ndim > 1: return result return self._simple_new(result, self.name)
def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): ltype = labels.inferred_type if ltype == 'mixed-integer-float' or ltype == 'mixed-integer': raise ValueError('cannot slice with a non-single type label array') return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): return self._getbool_axis(key, axis=axis) elif _is_list_like(key) and not (isinstance(key, tuple) and isinstance(labels, MultiIndex)): if hasattr(key, 'ndim') and key.ndim > 1: raise ValueError('Cannot index with multidimensional key') return self._getitem_iterable(key, axis=axis) else: return self._get_label(key, axis=axis)
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] return Period(ordinal=val, freq=self.freq) else: if com._is_bool_indexer(key): key = np.asarray(key) result = arr_idx[key] if result.ndim > 1: # MPL kludge # values = np.asarray(list(values), dtype=object) # return values.reshape(result.shape) return PeriodIndex(result, name=self.name, freq=self.freq) return PeriodIndex(result, name=self.name, freq=self.freq)
def _getitem_axis(self, key, axis=0): if isinstance(key, slice): self._has_valid_type(key,axis) return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): self._has_valid_type(key,axis) return self._getbool_axis(key, axis=axis) # a single integer or a list of integers else: if _is_list_like(key): pass else: key = self._convert_scalar_indexer(key, axis) if not com.is_integer(key): raise TypeError("Cannot index by location index with a non-integer key") return self._get_loc(key,axis=axis)
def __getitem__(self, key): arr_idx = self.view(np.ndarray) if np.isscalar(key): return tuple(lev[lab[key]] for lev, lab in zip(self.levels, self.labels)) else: if _is_bool_indexer(key): key = np.asarray(key) sortorder = self.sortorder else: # cannot be sure whether the result will be sorted sortorder = None new_tuples = arr_idx[key] new_labels = [lab[key] for lab in self.labels] # an optimization result = new_tuples.view(MultiIndex) result.levels = self.levels result.labels = new_labels result.sortorder = sortorder return result
def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] return Timestamp(val, offset=self.offset, tz=self.tz) else: if com._is_bool_indexer(key): key = np.asarray(key) key = lib.maybe_booleans_to_slice(key.view(np.uint8)) new_offset = None if isinstance(key, slice): if self.offset is not None and key.step is not None: new_offset = key.step * self.offset else: new_offset = self.offset result = arr_idx[key] if result.ndim > 1: return result return self._simple_new(result, self.name, new_offset, self.tz)
def __getitem__(self, key): arr_idx = self.view(np.ndarray) if np.isscalar(key): return tuple(lev[lab[key]] for lev, lab in zip(self.levels, self.labels)) else: if _is_bool_indexer(key): key = np.asarray(key) sortorder = self.sortorder else: # cannot be sure whether the result will be sorted sortorder = None new_tuples = arr_idx[key] new_labels = [lab[key] for lab in self.labels] # an optimization result = new_tuples.view(MultiIndex) result.levels = self.levels result.labels = new_labels result.sortorder = sortorder result.names = self.names return result
def __getitem__(self, key): """ Retrieve column or slice from DataFrame """ try: # unsure about how kludgy this is s = self._series[key] s.name = key return s except (TypeError, KeyError): if isinstance(key, slice): date_rng = self.index[key] return self.reindex(date_rng) elif isinstance(key, (np.ndarray, list)): if isinstance(key, list): key = lib.list_to_object_array(key) # also raises Exception if object array with NA values if com._is_bool_indexer(key): key = np.asarray(key, dtype=bool) return self._getitem_array(key) else: # pragma: no cover raise
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index # slice of lables (where start-end in labels) # slice of integers (only if in the lables) # boolean if isinstance(key, slice): if ax.is_floating(): # allowing keys to be slicers with no fallback pass else: if key.start is not None: if key.start not in ax: raise KeyError( "start bound [%s] is not the [%s]" % (key.start, self.obj._get_axis_name(axis))) if key.stop is not None: if key.stop not in ax: raise KeyError( "stop bound [%s] is not in the [%s]" % (key.stop, self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): # mi is just a passthru if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): raise KeyError("[%s] are not in ALL in the [%s]" % (key, self.obj._get_axis_name(axis))) return True else: def error(): if isnull(key): raise ValueError( "cannot use label indexing with a null key") raise KeyError("the label [%s] is not in the [%s]" % (key, self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) if not key in ax: error() except (TypeError) as e: # python 3 type errors should be raised if 'unorderable' in str(e): # pragma: no cover error() raise except: error() return True
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) is_int_index = _is_integer_index(labels) if com.is_integer(obj) and not is_int_index: return obj try: return labels.get_loc(obj) except (KeyError, TypeError): pass if isinstance(obj, slice): ltype = labels.inferred_type # in case of providing all floats, use label-based indexing float_slice = (labels.inferred_type == 'floating' and _is_float_slice(obj)) # floats that are within tolerance of int used as positions int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex, # in which case we wouldn't want to do position-based slicing position_slice = (int_slice and not ltype == 'integer' and not isinstance(labels, MultiIndex) and not float_slice) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: if position_slice and 'mixed' in ltype: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: if ltype == 'mixed-integer-float': raise if null_slice or position_slice: indexer = obj else: try: indexer = labels.slice_indexer(start, stop, obj.step) except Exception: if _is_index_slice(obj): if ltype == 'integer': raise indexer = obj else: raise return indexer elif _is_list_like(obj): if com._is_bool_indexer(obj): obj = _check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # unique index if labels.is_unique: indexer = check = labels.get_indexer(objarr) # non-unique (dups) else: indexer, missing = labels.get_indexer_non_unique(objarr) check = indexer mask = check == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: return labels.get_loc(obj)
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) if labels.inferred_type == 'mixed-integer': indexer = labels.get_indexer(keyarr) if (indexer >= 0).all(): self.obj.take(indexer, axis=axis, convert=True) else: return self.obj.take(keyarr, axis=axis) elif not labels.inferred_type == 'integer': return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None if labels.is_unique and Index(keyarr).is_unique: return _reindex(keyarr, level=level) else: indexer, missing = labels.get_indexer_non_unique(keyarr) check = indexer != -1 result = self.obj.take(indexer[check], axis=axis, convert=False) # need to merge the result labels and the missing labels if len(missing): l = np.arange(len(indexer)) missing = com._ensure_platform_int(missing) missing_labels = keyarr.take(missing) missing_indexer = com._ensure_int64(l[~check]) cur_labels = result._get_axis(axis).values cur_indexer = com._ensure_int64(l[check]) new_labels = np.empty(tuple([len(indexer)]),dtype=object) new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values new_indexer[missing_indexer] = -1 # need to reindex with an indexer on a specific axis from pandas.core.frame import DataFrame if not (type(self.obj) == DataFrame): raise NotImplementedError("cannot handle non-unique indexing for non-DataFrame (yet)") args = [None] * 4 args[2*axis] = new_labels args[2*axis+1] = new_indexer result = result._reindex_with_indexers(*args, copy=False, fill_value=np.nan) return result
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) is_int_index = _is_integer_index(labels) if com.is_integer(obj) and not is_int_index: return obj try: return labels.get_loc(obj) except (KeyError, TypeError): pass if isinstance(obj, slice): ltype = labels.inferred_type if ltype == 'floating': int_slice = _is_int_slice(obj) else: # floats that are within tolerance of int used int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex position_slice = (int_slice and not ltype == 'integer' and not isinstance(labels, MultiIndex)) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: if 'mixed' in ltype and int_slice: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: if ltype == 'mixed-integer-float': raise if null_slice or position_slice: slicer = obj else: try: i, j = labels.slice_locs(start, stop) slicer = slice(i, j, obj.step) except Exception: if _is_index_slice(obj): if labels.inferred_type == 'integer': raise slicer = obj else: raise return slicer elif _is_list_like(obj): if com._is_bool_indexer(obj): objarr = _check_bool_indexer(labels, obj) return objarr else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # XXX if labels.is_unique: indexer = check = labels.get_indexer(objarr) else: mask = np.zeros(len(labels), dtype=bool) lvalues = labels.values for x in objarr: # ugh to_or = lib.map_infer(lvalues, x.__eq__) if not to_or.any(): raise KeyError('%s not in index' % str(x)) mask |= to_or indexer = check = mask.nonzero()[0] mask = check == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: return labels.get_loc(obj)
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) if labels.inferred_type == 'mixed-integer': indexer = labels.get_indexer(keyarr) if (indexer >= 0).all(): self.obj.take(indexer, axis=axis, convert=True) else: return self.obj.take(keyarr, axis=axis) elif not labels.inferred_type == 'integer': return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None if labels.is_unique and Index(keyarr).is_unique: return _reindex(keyarr, level=level) else: indexer, missing = labels.get_indexer_non_unique(keyarr) check = indexer != -1 result = self.obj.take(indexer[check], axis=axis, convert=False) # need to merge the result labels and the missing labels if len(missing): l = np.arange(len(indexer)) missing = com._ensure_platform_int(missing) missing_labels = keyarr.take(missing) missing_indexer = com._ensure_int64(l[~check]) cur_labels = result._get_axis(axis).values cur_indexer = com._ensure_int64(l[check]) new_labels = np.empty(tuple([len(indexer)]),dtype=object) new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values new_indexer[missing_indexer] = -1 # reindex with the specified axis ndim = self.obj.ndim if axis+1 > ndim: raise AssertionError("invalid indexing error with non-unique index") args = [None] * (2*ndim) args[2*axis] = new_labels args[2*axis+1] = new_indexer result = result._reindex_with_indexers(*args, copy=False, fill_value=np.nan) return result
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if is_integer_dtype(keyarr) and not labels.is_floating(): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) if labels.inferred_type == 'mixed-integer': indexer = labels.get_indexer(keyarr) if (indexer >= 0).all(): self.obj.take(indexer, axis=axis, convert=True) else: return self.obj.take(keyarr, axis=axis) elif not labels.inferred_type == 'integer': return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None keyarr_is_unique = Index(keyarr).is_unique # existing labels are unique and indexer is unique if labels.is_unique and keyarr_is_unique: return _reindex(keyarr, level=level) else: indexer, missing = labels.get_indexer_non_unique(keyarr) check = indexer != -1 result = self.obj.take(indexer[check], axis=axis, convert=False) # need to merge the result labels and the missing labels if len(missing): l = np.arange(len(indexer)) missing = com._ensure_platform_int(missing) missing_labels = keyarr.take(missing) missing_indexer = com._ensure_int64(l[~check]) cur_labels = result._get_axis(axis).values cur_indexer = com._ensure_int64(l[check]) new_labels = np.empty(tuple([len(indexer)]),dtype=object) new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels # reindex with the specified axis ndim = self.obj.ndim if axis+1 > ndim: raise AssertionError("invalid indexing error with non-unique index") # a unique indexer if keyarr_is_unique: new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values new_indexer[missing_indexer] = -1 # we have a non_unique selector, need to use the original indexer here else: # need to retake to have the same size as the indexer rindexer = indexer.values rindexer[~check] = 0 result = self.obj.take(rindexer, axis=axis, convert=False) # reset the new indexer to account for the new size new_indexer = np.arange(len(result)) new_indexer[~check] = -1 result = result._reindex_with_indexers({ axis : [ new_labels, new_indexer ] }, copy=True, allow_dups=True) return result
def _convert_to_indexer(self, obj, axis=0, is_setter=False): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) # if we are a scalar indexer and not type correct raise obj = self._convert_scalar_indexer(obj, axis) # see if we are positional in nature is_int_index = labels.is_integer() is_int_positional = com.is_integer(obj) and not is_int_index # if we are a label return me try: return labels.get_loc(obj) except (KeyError, TypeError): pass except (ValueError): if not is_int_positional: raise # a positional if is_int_positional: # if we are setting and its not a valid location # its an insert which fails by definition if is_setter: if obj >= len(self.obj) and not isinstance(labels, MultiIndex): raise ValueError("cannot set by positional indexing with enlargement") return obj if isinstance(obj, slice): return self._convert_slice_indexer(obj, axis) elif _is_list_like(obj): if com._is_bool_indexer(obj): obj = _check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # unique index if labels.is_unique: indexer = check = labels.get_indexer(objarr) # non-unique (dups) else: indexer, missing = labels.get_indexer_non_unique(objarr) check = indexer mask = check == -1 if mask.any(): # mi here if isinstance(obj, tuple) and is_setter: return { 'key' : obj } raise KeyError('%s not in index' % objarr[mask]) return indexer else: try: return labels.get_loc(obj) except (KeyError): # allow a not found key only if we are a setter if not is_list_like(obj) and is_setter: return { 'key' : obj } raise
def _has_valid_type(self, key, axis): return isinstance(key, slice) or com.is_integer(key) or com._is_bool_indexer(key) or _is_list_like(key)
def _getitem_iterable(self, key, axis=0): labels = self.obj._get_axis(axis) def _reindex(keys, level=None): try: return self.obj.reindex_axis(keys, axis=axis, level=level) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') return self.obj.reindex(keys, level=level) if com._is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: if isinstance(key, Index): # want Index objects to pass through untouched keyarr = key else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) if _is_integer_dtype(keyarr): if labels.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(labels) + keyarr, keyarr) if labels.inferred_type == 'mixed-integer': indexer = labels.get_indexer(keyarr) if (indexer >= 0).all(): self.obj.take(indexer, axis=axis, convert=True) else: return self.obj.take(keyarr, axis=axis) elif not labels.inferred_type == 'integer': return self.obj.take(keyarr, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(keyarr[0], tuple)): level = 0 else: level = None if labels.is_unique: return _reindex(keyarr, level=level) else: indexer, missing = labels.get_indexer_non_unique(keyarr) check = indexer != -1 result = self.obj.take(indexer[check], axis=axis, convert=False) # need to merge the result labels and the missing labels if len(missing): l = np.arange(len(indexer)) missing_labels = keyarr.take(missing) missing_labels_indexer = l[~check] cur_labels = result._get_axis(axis).values cur_labels_indexer = l[check] new_labels = lib.combine_from_indexers(cur_labels, cur_labels_indexer, missing_labels, missing_labels_indexer) result = result.reindex_axis(new_labels,axis=axis) return result
def _convert_to_indexer(self, obj, axis=0, is_setter=False): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) # if we are a scalar indexer and not type correct raise obj = self._convert_scalar_indexer(obj, axis) # see if we are positional in nature is_int_index = labels.is_integer() is_int_positional = com.is_integer(obj) and not is_int_index # if we are a label return me try: return labels.get_loc(obj) except (KeyError, TypeError): pass except (ValueError): if not is_int_positional: raise # a positional if is_int_positional: # if we are setting and its not a valid location # its an insert which fails by definition if is_setter: if obj >= len(self.obj) and not isinstance(labels, MultiIndex): raise ValueError( "cannot set by positional indexing with enlargement") return obj if isinstance(obj, slice): return self._convert_slice_indexer(obj, axis) elif _is_list_like(obj): if com._is_bool_indexer(obj): obj = _check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: if isinstance(obj, Index): objarr = obj.values else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if is_integer_dtype(objarr) and not is_int_index: if labels.inferred_type != 'integer': objarr = np.where(objarr < 0, len(labels) + objarr, objarr) return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None # unique index if labels.is_unique: indexer = check = labels.get_indexer(objarr) # non-unique (dups) else: indexer, missing = labels.get_indexer_non_unique( objarr) check = indexer mask = check == -1 if mask.any(): # mi here if isinstance(obj, tuple) and is_setter: return {'key': obj} raise KeyError('%s not in index' % objarr[mask]) return indexer else: try: return labels.get_loc(obj) except (KeyError): # allow a not found key only if we are a setter if not is_list_like(obj) and is_setter: return {'key': obj} raise
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) is_int_index = _is_integer_index(labels) if com.is_integer(obj) and not is_int_index: return obj try: return labels.get_loc(obj) except (KeyError, TypeError): pass if isinstance(obj, slice): int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex position_slice = (int_slice and not labels.inferred_type == 'integer' and not isinstance(labels, MultiIndex)) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: if 'mixed' in labels.inferred_type and int_slice: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: if labels.inferred_type == 'mixed-integer': raise if null_slice or position_slice: slicer = obj else: try: i, j = labels.slice_locs(start, stop) slicer = slice(i, j, obj.step) except Exception: if _is_index_slice(obj): if labels.inferred_type == 'integer': raise slicer = obj else: raise return slicer elif _is_list_like(obj): if com._is_bool_indexer(obj): objarr = _check_bool_indexer(labels, obj) return objarr else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: return objarr indexer = labels.get_indexer(objarr) mask = indexer == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: return labels.get_loc(obj)
def _convert_to_indexer(self, obj, axis=0): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray Examples ix[:5] -> slice(0, 5) ix[[1,2,3]] -> [1,2,3] ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? "In the face of ambiguity, refuse the temptation to guess." raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ labels = self.obj._get_axis(axis) is_int_index = _is_integer_index(labels) if com.is_integer(obj) and not is_int_index: return obj try: return labels.get_loc(obj) except (KeyError, TypeError): pass if isinstance(obj, slice): int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None # could have integers in the first level of the MultiIndex position_slice = (int_slice and not labels.inferred_type == 'integer' and not isinstance(labels, MultiIndex)) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: if 'mixed' in labels.inferred_type and int_slice: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: if labels.inferred_type == 'mixed-integer': raise if null_slice or position_slice: slicer = obj else: try: i, j = labels.slice_locs(start, stop) slicer = slice(i, j, obj.step) except Exception: if _is_index_slice(obj): if labels.inferred_type == 'integer': raise slicer = obj else: raise return slicer elif _is_list_like(obj): if com._is_bool_indexer(obj): objarr = _check_bool_indexer(labels, obj) return objarr else: objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing if _is_integer_dtype(objarr) and not is_int_index: return objarr # this is not the most robust, but... if (isinstance(labels, MultiIndex) and not isinstance(objarr[0], tuple)): level = 0 _, indexer = labels.reindex(objarr, level=level) check = labels.levels[0].get_indexer(objarr) else: level = None indexer = check = labels.get_indexer(objarr) mask = check == -1 if mask.any(): raise KeyError('%s not in index' % objarr[mask]) return indexer else: return labels.get_loc(obj)