def _simple_new(cls, left, right, closed=None, name=None, copy=False, verify_integrity=True): result = IntervalMixin.__new__(cls) if closed is None: closed = 'right' left = _ensure_index(left, copy=copy) right = _ensure_index(right, copy=copy) # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) if is_float_dtype(right) and is_integer_dtype(left): left = left.astype(right.dtype) if type(left) != type(right): raise ValueError("must not have differing left [{}] " "and right [{}] types".format( type(left), type(right))) if isinstance(left, ABCPeriodIndex): raise ValueError("Period dtypes are not supported, " "use a PeriodIndex instead") result._left = left result._right = right result._closed = closed result.name = name if verify_integrity: result._validate() result._reset_identity() return result
def _simple_new(cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True): result = IntervalMixin.__new__(cls) closed = closed or 'right' left = _ensure_index(left, copy=copy) right = _ensure_index(right, copy=copy) if dtype is not None: # GH 19262: dtype must be an IntervalDtype to override inferred dtype = pandas_dtype(dtype) if not is_interval_dtype(dtype): msg = 'dtype must be an IntervalDtype, got {dtype}' raise TypeError(msg.format(dtype=dtype)) elif dtype.subtype is not None: left = left.astype(dtype.subtype) right = right.astype(dtype.subtype) # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) elif is_float_dtype(right) and is_integer_dtype(left): left = left.astype(right.dtype) if type(left) != type(right): msg = ('must not have differing left [{ltype}] and right ' '[{rtype}] types') raise ValueError(msg.format(ltype=type(left).__name__, rtype=type(right).__name__)) elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalArray') raise TypeError(msg) elif isinstance(left, ABCPeriodIndex): msg = 'Period dtypes are not supported, use a PeriodIndex instead' raise ValueError(msg) elif (isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz)): msg = ("left and right must have the same time zone, got " "'{left_tz}' and '{right_tz}'") raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz)) result._left = left result._right = right result._closed = closed if verify_integrity: result._validate() return result
def get_indexer(self, target, method=None, limit=None, tolerance=None): from pandas.core.arrays.categorical import _recode_for_categories method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) if self.is_unique and self.equals(target): return np.arange(len(self), dtype='intp') if method == 'pad' or method == 'backfill': raise NotImplementedError("method='pad' and method='backfill' not " "implemented yet for CategoricalIndex") elif method == 'nearest': raise NotImplementedError("method='nearest' not implemented yet " 'for CategoricalIndex') if (isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target)): if self.values.equals(target.values): # we have the same codes codes = target.codes else: codes = _recode_for_categories(target.codes, target.categories, self.values.categories) else: if isinstance(target, CategoricalIndex): code_indexer = self.categories.get_indexer(target.categories) codes = take_1d(code_indexer, target.codes, fill_value=-1) else: codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) return _ensure_platform_int(indexer)
def get_indexer(self, target, method=None, limit=None, tolerance=None): self._check_method(method) target = _ensure_index(target) target = self._maybe_cast_indexed(target) if self.equals(target): return np.arange(len(self), dtype='intp') if self.is_non_overlapping_monotonic: start, stop = self._find_non_overlapping_monotonic_bounds(target) start_plus_one = start + 1 if not ((start_plus_one < stop).any()): return np.where(start_plus_one == stop, start, -1) if not self.is_unique: raise ValueError("cannot handle non-unique indices") # IntervalIndex if isinstance(target, IntervalIndex): indexer = self._get_reindexer(target) # non IntervalIndex else: indexer = np.concatenate([self.get_loc(i) for i in target]) return _ensure_platform_int(indexer)
def _as_like_interval_index(self, other, error_msg): self._assert_can_do_setop(other) other = _ensure_index(other) if (not isinstance(other, IntervalIndex) or self.closed != other.closed): raise ValueError(error_msg) return other
def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray or None Indices of output values in original index """ if method is not None: raise NotImplementedError("argument method is not implemented for " "CategoricalIndex.reindex") if level is not None: raise NotImplementedError("argument level is not implemented for " "CategoricalIndex.reindex") if limit is not None: raise NotImplementedError("argument limit is not implemented for " "CategoricalIndex.reindex") target = ibase._ensure_index(target) if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) new_target = self.take(indexer) # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) if (cats == -1).any(): # coerce to a regular index here! result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) else: codes = new_target.codes.copy() codes[indexer == -1] = cats[missing] new_target = self._create_from_codes(codes) # we always want to return an Index type here # to be consistent with .reindex for other index types (e.g. they don't # coerce based on the actual values, only on the dtype) # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) if is_categorical_dtype(target): new_target = target._shallow_copy(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) return new_target, indexer
def get_indexer_non_unique(self, target): target = ibase._ensure_index(target) if isinstance(target, CategoricalIndex): target = target.categories codes = self.categories.get_indexer(target) return self._engine.get_indexer_non_unique(codes)
def get_indexer_non_unique(self, target): target = ibase._ensure_index(target) if isinstance(target, CategoricalIndex): target = target.categories codes = self.categories.get_indexer(target) indexer, missing = self._engine.get_indexer_non_unique(codes) return _ensure_platform_int(indexer), missing
def _as_like_interval_index(self, other): self._assert_can_do_setop(other) other = _ensure_index(other) if not isinstance(other, IntervalIndex): msg = ('the other index needs to be an IntervalIndex too, but ' 'was type {}').format(other.__class__.__name__) raise TypeError(msg) elif self.closed != other.closed: msg = ('can only do set operations between two IntervalIndex ' 'objects that are closed on the same side') raise ValueError(msg) return other
def _get_combined_index(indexes, intersect=False): # TODO: handle index names! indexes = com._get_distinct_objs(indexes) if len(indexes) == 0: return Index([]) if len(indexes) == 1: return indexes[0] if intersect: index = indexes[0] for other in indexes[1:]: index = index.intersection(other) return index union = _union_indexes(indexes) return _ensure_index(union)
def get_indexer(self, target, method=None, limit=None, tolerance=None): target = _ensure_index(target) if hasattr(target, 'freq') and target.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, target.freqstr) raise IncompatibleFrequency(msg) if isinstance(target, PeriodIndex): target = target.asi8 if tolerance is not None: tolerance = self._convert_tolerance(tolerance, target) return Index.get_indexer(self._int64index, target, method, limit, tolerance)
def get_indexer_non_unique(self, target): target = ibase._ensure_index(target) if isinstance(target, CategoricalIndex): # Indexing on codes is more efficient if categories are the same: if target.categories is self.categories: target = target.codes indexer, missing = self._engine.get_indexer_non_unique(target) return _ensure_platform_int(indexer), missing target = target.values codes = self.categories.get_indexer(target) indexer, missing = self._engine.get_indexer_non_unique(codes) return _ensure_platform_int(indexer), missing
def _simple_new(cls, left, right, closed=None, name=None, copy=False, verify_integrity=True): result = IntervalMixin.__new__(cls) if closed is None: closed = 'right' left = _ensure_index(left, copy=copy) right = _ensure_index(right, copy=copy) # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) elif is_float_dtype(right) and is_integer_dtype(left): left = left.astype(right.dtype) if type(left) != type(right): msg = ('must not have differing left [{ltype}] and right ' '[{rtype}] types') raise ValueError(msg.format(ltype=type(left).__name__, rtype=type(right).__name__)) elif isinstance(left, ABCPeriodIndex): msg = 'Period dtypes are not supported, use a PeriodIndex instead' raise ValueError(msg) elif (isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz)): msg = ("left and right must have the same time zone, got " "'{left_tz}' and '{right_tz}'") raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz)) result._left = left result._right = right result._closed = closed result.name = name if verify_integrity: result._validate() result._reset_identity() return result
def _get_combined_index(indexes, intersect=False, sort=False): # TODO: handle index names! indexes = com._get_distinct_objs(indexes) if len(indexes) == 0: index = Index([]) elif len(indexes) == 1: index = indexes[0] elif intersect: index = indexes[0] for other in indexes[1:]: index = index.intersection(other) else: index = _union_indexes(indexes, sort=sort) index = _ensure_index(index) if sort: try: index = index.sort_values() except TypeError: pass return index
def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray or None Indices of output values in original index """ if method is not None: raise NotImplementedError("argument method is not implemented for " "CategoricalIndex.reindex") if level is not None: raise NotImplementedError("argument level is not implemented for " "CategoricalIndex.reindex") if limit is not None: raise NotImplementedError("argument limit is not implemented for " "CategoricalIndex.reindex") target = ibase._ensure_index(target) if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) if len(self.codes): new_target = self.take(indexer) else: new_target = target # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) if (cats == -1).any(): # coerce to a regular index here! result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) else: codes = new_target.codes.copy() codes[indexer == -1] = cats[missing] new_target = self._create_from_codes(codes) # we always want to return an Index type here # to be consistent with .reindex for other index types (e.g. they don't # coerce based on the actual values, only on the dtype) # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) if is_categorical_dtype(target): new_target = target._shallow_copy(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) return new_target, indexer
def get_indexer_non_unique(self, target): target = self._maybe_cast_indexed(_ensure_index(target)) return super(IntervalIndex, self).get_indexer_non_unique(target)