def _format_labels(bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None): """ based on the dtype, return our labels """ closed = "right" if right else "left" if is_datetime64tz_dtype(dtype): formatter = lambda x: Timestamp(x, tz=dtype.tz) adjust = lambda x: x - Timedelta("1ns") elif is_datetime64_dtype(dtype): formatter = Timestamp adjust = lambda x: x - Timedelta("1ns") elif is_timedelta64_dtype(dtype): formatter = Timedelta adjust = lambda x: x - Timedelta("1ns") else: precision = _infer_precision(precision, bins) formatter = lambda x: _round_frac(x, precision) adjust = lambda x: x - 10**(-precision) breaks = [formatter(b) for b in bins] labels = IntervalIndex.from_breaks(breaks, closed=closed) if right and include_lowest: # we will adjust the left hand side by precision to # account that we are all right closed v = adjust(labels[0].left) i = IntervalIndex([Interval(v, labels[0].right, closed="right")]) labels = i.append(labels[1:]) return labels
def get_loc(self, key, method=None): self._check_method(method) original_key = key key = self._maybe_cast_indexed(key) if self.is_non_overlapping_monotonic: if isinstance(key, Interval): left = self._maybe_cast_slice_bound(key.left, 'left', None) right = self._maybe_cast_slice_bound(key.right, 'right', None) key = Interval(left, right, key.closed) else: key = self._maybe_cast_slice_bound(key, 'left', None) start, stop = self._find_non_overlapping_monotonic_bounds(key) if start is None or stop is None: return slice(start, stop) elif start + 1 == stop: return start elif start < stop: return slice(start, stop) else: raise KeyError(original_key) else: # use the interval tree if isinstance(key, Interval): left, right = _get_interval_closed_bounds(key) return self._engine.get_loc_interval(left, right) else: return self._engine.get_loc(key)
def __getitem__(self, value): left = self.left[value] right = self.right[value] # scalar if not isinstance(left, ABCIndexClass): if isna(left): return self._fill_value return Interval(left, right, self.closed) return self._shallow_copy(left, right)
def __getitem__(self, value): mask = self._isnan[value] if is_scalar(mask) and mask: return self._na_value left = self.left[value] right = self.right[value] # scalar if not isinstance(left, Index): return Interval(left, right, self.closed) return self._shallow_copy(left, right)
def _ndarray_values(self): left = self.left right = self.right mask = self._isnan closed = self.closed result = np.empty(len(left), dtype=object) for i in range(len(left)): if mask[i]: result[i] = np.nan else: result[i] = Interval(left[i], right[i], closed) return result
def __getitem__(self, key): key = check_array_indexer(self, key) left = self._left[key] right = self._right[key] if not isinstance(left, (np.ndarray, ExtensionArray)): # scalar if is_scalar(left) and isna(left): return self._fill_value return Interval(left, right, self.closed) if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return self._shallow_copy(left, right)
def __getitem__(self, value): left = self.left[value] right = self.right[value] # scalar if not isinstance(left, ABCIndexClass): if is_scalar(left) and isna(left): return self._fill_value if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return Interval(left, right, self.closed) return self._shallow_copy(left, right)
def __getitem__(self, key): key = check_array_indexer(self, key) result = self._combined[key] if is_integer(key): left, right = result[0], result[1] if isna(left): return self._fill_value return Interval(left, right, self.closed) # TODO: need to watch out for incorrectly-reducing getitem if np.ndim(result) > 2: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return type(self)._simple_new(result, closed=self.closed)
def values(self): """ Returns the IntervalIndex's data as a numpy array of Interval objects (with dtype='object') """ left = self.left right = self.right mask = self._isnan closed = self._closed result = np.empty(len(left), dtype=object) for i in range(len(left)): if mask[i]: result[i] = np.nan else: result[i] = Interval(left[i], right[i], closed) return result
def __array__(self, dtype=None) -> np.ndarray: """ Return the IntervalArray's data as a numpy array of Interval objects (with dtype='object') """ left = self.left right = self.right mask = self.isna() closed = self._closed result = np.empty(len(left), dtype=object) for i in range(len(left)): if mask[i]: result[i] = np.nan else: result[i] = Interval(left[i], right[i], closed) return result
def test__one_hot_encoded_targets(self): """given""" df = pd.DataFrame({"a": [-0.1, 0, 0.1], "b": [0, 1, 2]}) encoder = OneHotEncodedTargets( "a", np.linspace(-0.1, 0.1, 4, endpoint=True)) """when""" encoded = encoder.encode(df) decoded = encoder.decode(encoded) """then""" np.testing.assert_array_almost_equal( encoded.values, np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])) self.assertEqual( decoded[1], Interval(-0.03333333333333334, 0.033333333333333326, closed='right'))
def get_loc(self, key, method=None): """Get integer location, slice or boolean mask for requested label. Parameters ---------- key : label method : {None}, optional * default: matches where the label is within an interval only. Returns ------- loc : int if unique index, slice if monotonic index, else mask Examples --------- >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) >>> index = pd.IntervalIndex.from_intervals([i1, i2]) >>> index.get_loc(1) 0 You can also supply an interval or an location for a point inside an interval. >>> index.get_loc(pd.Interval(0, 2)) array([0, 1], dtype=int64) >>> index.get_loc(1.5) 1 If a label is in several intervals, you get the locations of all the relevant intervals. >>> i3 = pd.Interval(0, 2) >>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3]) >>> overlapping_index.get_loc(1.5) array([0, 1], dtype=int64) """ self._check_method(method) original_key = key key = self._maybe_cast_indexed(key) if self.is_non_overlapping_monotonic: if isinstance(key, Interval): left = self._maybe_cast_slice_bound(key.left, 'left', None) right = self._maybe_cast_slice_bound(key.right, 'right', None) key = Interval(left, right, key.closed) else: key = self._maybe_cast_slice_bound(key, 'left', None) start, stop = self._find_non_overlapping_monotonic_bounds(key) if start is None or stop is None: return slice(start, stop) elif start + 1 == stop: return start elif start < stop: return slice(start, stop) else: raise KeyError(original_key) else: # use the interval tree if isinstance(key, Interval): left, right = _get_interval_closed_bounds(key) return self._engine.get_loc_interval(left, right) else: return self._engine.get_loc(key)