예제 #1
0
    def __setitem__(self, key, value):
        from cudf.core.column import column

        if isinstance(key, tuple):
            key = list(key)

        # coerce value into a scalar or column
        if is_scalar(value):
            value = to_cudf_compatible_scalar(value)
        elif not (isinstance(value, (list, dict))
                  and isinstance(self._sr._column.dtype,
                                 (cudf.ListDtype, cudf.StructDtype))):
            value = column.as_column(value)

        if (not isinstance(
                self._sr._column.dtype,
            (cudf.Decimal64Dtype, cudf.CategoricalDtype),
        ) and hasattr(value, "dtype")
                and _is_non_decimal_numeric_dtype(value.dtype)):
            # normalize types if necessary:
            if not is_integer(key):
                to_dtype = np.result_type(value.dtype, self._sr._column.dtype)
                value = value.astype(to_dtype)
                self._sr._column._mimic_inplace(
                    self._sr._column.astype(to_dtype), inplace=True)

        self._sr._column[key] = value
예제 #2
0
 def _window_to_window_sizes(self, window):
     """
     For non-fixed width windows,
     convert the window argument into window sizes.
     """
     if is_integer(window):
         return window
     else:
         return cudautils.window_sizes_from_offset(
             self.obj.index._values.data_array_view, window)
예제 #3
0
    def _normalize(self):
        """
        Normalize the *window* and *min_periods* args

        *window* can be:

        * An integer, in which case it is the window size.
          If *min_periods* is unspecified, it is set to be equal to
          the window size.

        * A timedelta offset, in which case it is used to generate
          a column of window sizes to use for each element.
          If *min_periods* is unspecified, it is set to 1.
          Only valid for datetime index.
        """
        window, min_periods = self.window, self.min_periods
        if is_number(window):
            # only allow integers
            if not is_integer(window):
                raise ValueError("window must be an integer")
            if window <= 0:
                raise ValueError("window cannot be zero or negative")
            if self.min_periods is None:
                min_periods = window
        else:
            if isinstance(window,
                          (numba.cuda.devicearray.DeviceNDArray, BaseIndexer)):
                # window is a device_array of window sizes or BaseIndexer
                self.window = window
                self.min_periods = min_periods
                return

            if not isinstance(self.obj.index, cudf.core.index.DatetimeIndex):
                raise ValueError(
                    "window must be an integer for non datetime index")

            self._time_window = True

            try:
                window = pd.to_timedelta(window)
                # to_timedelta will also convert np.arrays etc.,
                if not isinstance(window, pd.Timedelta):
                    raise ValueError
                window = window.to_timedelta64()
            except ValueError as e:
                raise ValueError(
                    "window must be integer or convertible to a timedelta"
                ) from e
            if self.min_periods is None:
                min_periods = 1

        self.window = self._window_to_window_sizes(window)
        self.min_periods = min_periods
예제 #4
0
 def _window_to_window_sizes(self, window):
     if is_integer(window):
         return cudautils.grouped_window_sizes_from_offset(
             column.arange(len(self.obj)).data_array_view,
             self._group_starts,
             window,
         )
     else:
         return cudautils.grouped_window_sizes_from_offset(
             self.obj.index._values.data_array_view,
             self._group_starts,
             window,
         )
예제 #5
0
    def get_level_values(self, level):
        """
        Return an Index of values for requested level.

        This is primarily useful to get an individual level of values from a
        MultiIndex, but is provided on Index as well for compatibility.

        Parameters
        ----------
        level : int or str
            It is either the integer position or the name of the level.

        Returns
        -------
        Index
            Calling object, as there is only one level in the Index.

        See Also
        --------
        cudf.core.multiindex.MultiIndex.get_level_values : Get values for
            a level of a MultiIndex.

        Notes
        -----
        For Index, level should be 0, since there are no multiple levels.

        Examples
        --------
        >>> import cudf
        >>> idx = cudf.Index(["a", "b", "c"])
        >>> idx.get_level_values(0)
        StringIndex(['a' 'b' 'c'], dtype='object')
        """

        if level == self.name:
            return self
        elif is_integer(level):
            if level != 0:
                raise IndexError(f"Cannot get level: {level} "
                                 f"for index with 1 level")
            return self
        else:
            raise KeyError(f"Requested level with name {level} " "not found")
예제 #6
0
    def _loc_to_iloc(self, arg):
        if _is_scalar_or_zero_d_array(arg):
            if not _is_non_decimal_numeric_dtype(self._sr.index.dtype):
                # TODO: switch to cudf.utils.dtypes.is_integer(arg)
                if isinstance(arg, cudf.Scalar) and is_integer_dtype(
                        arg.dtype):
                    found_index = arg.value
                    return found_index
                elif is_integer(arg):
                    found_index = arg
                    return found_index
            try:
                found_index = self._sr.index._values.find_first_value(
                    arg, closest=False)
                return found_index
            except (TypeError, KeyError, IndexError, ValueError):
                raise KeyError("label scalar is out of bound")

        elif isinstance(arg, slice):
            return get_label_range_or_mask(self._sr.index, arg.start, arg.stop,
                                           arg.step)
        elif isinstance(arg, (cudf.MultiIndex, pd.MultiIndex)):
            if isinstance(arg, pd.MultiIndex):
                arg = cudf.MultiIndex.from_pandas(arg)

            return indices_from_labels(self._sr, arg)

        else:
            arg = cudf.core.series.Series(cudf.core.column.as_column(arg))
            if arg.dtype in (bool, np.bool_):
                return arg
            else:
                indices = indices_from_labels(self._sr, arg)
                if indices.null_count > 0:
                    raise KeyError("label scalar is out of bound")
                return indices
예제 #7
0
def test_is_integer(obj, expect):
    assert types.is_integer(obj) == expect
예제 #8
0
def test_pandas_agreement(obj):
    assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj)
    assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj)
    assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj)
    assert types.is_integer(obj) == ptypes.is_integer(obj)
    assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj)