Ejemplo n.º 1
0
def _format_labels(bins,
                   precision: int,
                   right: bool = True,
                   include_lowest: bool = False,
                   dtype=None):
    """ based on the dtype, return our labels """

    closed = "right" if right else "left"

    if is_datetime64tz_dtype(dtype):
        formatter = lambda x: Timestamp(x, tz=dtype.tz)
        adjust = lambda x: x - Timedelta("1ns")
    elif is_datetime64_dtype(dtype):
        formatter = Timestamp
        adjust = lambda x: x - Timedelta("1ns")
    elif is_timedelta64_dtype(dtype):
        formatter = Timedelta
        adjust = lambda x: x - Timedelta("1ns")
    else:
        precision = _infer_precision(precision, bins)
        formatter = lambda x: _round_frac(x, precision)
        adjust = lambda x: x - 10**(-precision)

    breaks = [formatter(b) for b in bins]
    labels = IntervalIndex.from_breaks(breaks, closed=closed)

    if right and include_lowest:
        # we will adjust the left hand side by precision to
        # account that we are all right closed
        v = adjust(labels[0].left)

        i = IntervalIndex([Interval(v, labels[0].right, closed="right")])
        labels = i.append(labels[1:])

    return labels
Ejemplo n.º 2
0
    def get_loc(self, key, method=None):
        self._check_method(method)

        original_key = key
        key = self._maybe_cast_indexed(key)

        if self.is_non_overlapping_monotonic:
            if isinstance(key, Interval):
                left = self._maybe_cast_slice_bound(key.left, 'left', None)
                right = self._maybe_cast_slice_bound(key.right, 'right', None)
                key = Interval(left, right, key.closed)
            else:
                key = self._maybe_cast_slice_bound(key, 'left', None)

            start, stop = self._find_non_overlapping_monotonic_bounds(key)

            if start is None or stop is None:
                return slice(start, stop)
            elif start + 1 == stop:
                return start
            elif start < stop:
                return slice(start, stop)
            else:
                raise KeyError(original_key)

        else:
            # use the interval tree
            if isinstance(key, Interval):
                left, right = _get_interval_closed_bounds(key)
                return self._engine.get_loc_interval(left, right)
            else:
                return self._engine.get_loc(key)
Ejemplo n.º 3
0
    def __getitem__(self, value):
        left = self.left[value]
        right = self.right[value]

        # scalar
        if not isinstance(left, ABCIndexClass):
            if isna(left):
                return self._fill_value
            return Interval(left, right, self.closed)

        return self._shallow_copy(left, right)
Ejemplo n.º 4
0
    def __getitem__(self, value):
        mask = self._isnan[value]
        if is_scalar(mask) and mask:
            return self._na_value

        left = self.left[value]
        right = self.right[value]

        # scalar
        if not isinstance(left, Index):
            return Interval(left, right, self.closed)

        return self._shallow_copy(left, right)
Ejemplo n.º 5
0
    def _ndarray_values(self):
        left = self.left
        right = self.right
        mask = self._isnan
        closed = self.closed

        result = np.empty(len(left), dtype=object)
        for i in range(len(left)):
            if mask[i]:
                result[i] = np.nan
            else:
                result[i] = Interval(left[i], right[i], closed)
        return result
Ejemplo n.º 6
0
    def __getitem__(self, key):
        key = check_array_indexer(self, key)
        left = self._left[key]
        right = self._right[key]

        if not isinstance(left, (np.ndarray, ExtensionArray)):
            # scalar
            if is_scalar(left) and isna(left):
                return self._fill_value
            return Interval(left, right, self.closed)
        if np.ndim(left) > 1:
            # GH#30588 multi-dimensional indexer disallowed
            raise ValueError("multi-dimensional indexing not allowed")
        return self._shallow_copy(left, right)
Ejemplo n.º 7
0
    def __getitem__(self, value):
        left = self.left[value]
        right = self.right[value]

        # scalar
        if not isinstance(left, ABCIndexClass):
            if is_scalar(left) and isna(left):
                return self._fill_value
            if np.ndim(left) > 1:
                # GH#30588 multi-dimensional indexer disallowed
                raise ValueError("multi-dimensional indexing not allowed")
            return Interval(left, right, self.closed)

        return self._shallow_copy(left, right)
Ejemplo n.º 8
0
    def __getitem__(self, key):
        key = check_array_indexer(self, key)

        result = self._combined[key]

        if is_integer(key):
            left, right = result[0], result[1]
            if isna(left):
                return self._fill_value
            return Interval(left, right, self.closed)

        # TODO: need to watch out for incorrectly-reducing getitem
        if np.ndim(result) > 2:
            # GH#30588 multi-dimensional indexer disallowed
            raise ValueError("multi-dimensional indexing not allowed")
        return type(self)._simple_new(result, closed=self.closed)
Ejemplo n.º 9
0
    def values(self):
        """
        Returns the IntervalIndex's data as a numpy array of Interval
        objects (with dtype='object')
        """
        left = self.left
        right = self.right
        mask = self._isnan
        closed = self._closed

        result = np.empty(len(left), dtype=object)
        for i in range(len(left)):
            if mask[i]:
                result[i] = np.nan
            else:
                result[i] = Interval(left[i], right[i], closed)
        return result
Ejemplo n.º 10
0
    def __array__(self, dtype=None) -> np.ndarray:
        """
        Return the IntervalArray's data as a numpy array of Interval
        objects (with dtype='object')
        """
        left = self.left
        right = self.right
        mask = self.isna()
        closed = self._closed

        result = np.empty(len(left), dtype=object)
        for i in range(len(left)):
            if mask[i]:
                result[i] = np.nan
            else:
                result[i] = Interval(left[i], right[i], closed)
        return result
Ejemplo n.º 11
0
 def test__one_hot_encoded_targets(self):
     """given"""
     df = pd.DataFrame({"a": [-0.1, 0, 0.1], "b": [0, 1, 2]})
     encoder = OneHotEncodedTargets(
         "a", np.linspace(-0.1, 0.1, 4, endpoint=True))
     """when"""
     encoded = encoder.encode(df)
     decoded = encoder.decode(encoded)
     """then"""
     np.testing.assert_array_almost_equal(
         encoded.values, np.array([[1., 0., 0.], [0., 1., 0.], [0., 0.,
                                                                1.]]))
     self.assertEqual(
         decoded[1],
         Interval(-0.03333333333333334,
                  0.033333333333333326,
                  closed='right'))
Ejemplo n.º 12
0
    def get_loc(self, key, method=None):
        """Get integer location, slice or boolean mask for requested label.

        Parameters
        ----------
        key : label
        method : {None}, optional
            * default: matches where the label is within an interval only.

        Returns
        -------
        loc : int if unique index, slice if monotonic index, else mask

        Examples
        ---------
        >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
        >>> index = pd.IntervalIndex.from_intervals([i1, i2])
        >>> index.get_loc(1)
        0

        You can also supply an interval or an location for a point inside an
        interval.

        >>> index.get_loc(pd.Interval(0, 2))
        array([0, 1], dtype=int64)
        >>> index.get_loc(1.5)
        1

        If a label is in several intervals, you get the locations of all the
        relevant intervals.

        >>> i3 = pd.Interval(0, 2)
        >>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3])
        >>> overlapping_index.get_loc(1.5)
        array([0, 1], dtype=int64)
        """
        self._check_method(method)

        original_key = key
        key = self._maybe_cast_indexed(key)

        if self.is_non_overlapping_monotonic:
            if isinstance(key, Interval):
                left = self._maybe_cast_slice_bound(key.left, 'left', None)
                right = self._maybe_cast_slice_bound(key.right, 'right', None)
                key = Interval(left, right, key.closed)
            else:
                key = self._maybe_cast_slice_bound(key, 'left', None)

            start, stop = self._find_non_overlapping_monotonic_bounds(key)

            if start is None or stop is None:
                return slice(start, stop)
            elif start + 1 == stop:
                return start
            elif start < stop:
                return slice(start, stop)
            else:
                raise KeyError(original_key)

        else:
            # use the interval tree
            if isinstance(key, Interval):
                left, right = _get_interval_closed_bounds(key)
                return self._engine.get_loc_interval(left, right)
            else:
                return self._engine.get_loc(key)