def __setitem__(self, key, value): from cudf.core.column import column if isinstance(key, tuple): key = list(key) # coerce value into a scalar or column if is_scalar(value): value = to_cudf_compatible_scalar(value) elif not (isinstance(value, (list, dict)) and isinstance(self._sr._column.dtype, (cudf.ListDtype, cudf.StructDtype))): value = column.as_column(value) if (not isinstance( self._sr._column.dtype, (cudf.Decimal64Dtype, cudf.CategoricalDtype), ) and hasattr(value, "dtype") and _is_non_decimal_numeric_dtype(value.dtype)): # normalize types if necessary: if not is_integer(key): to_dtype = np.result_type(value.dtype, self._sr._column.dtype) value = value.astype(to_dtype) self._sr._column._mimic_inplace( self._sr._column.astype(to_dtype), inplace=True) self._sr._column[key] = value
def _window_to_window_sizes(self, window): """ For non-fixed width windows, convert the window argument into window sizes. """ if is_integer(window): return window else: return cudautils.window_sizes_from_offset( self.obj.index._values.data_array_view, window)
def _normalize(self): """ Normalize the *window* and *min_periods* args *window* can be: * An integer, in which case it is the window size. If *min_periods* is unspecified, it is set to be equal to the window size. * A timedelta offset, in which case it is used to generate a column of window sizes to use for each element. If *min_periods* is unspecified, it is set to 1. Only valid for datetime index. """ window, min_periods = self.window, self.min_periods if is_number(window): # only allow integers if not is_integer(window): raise ValueError("window must be an integer") if window <= 0: raise ValueError("window cannot be zero or negative") if self.min_periods is None: min_periods = window else: if isinstance(window, (numba.cuda.devicearray.DeviceNDArray, BaseIndexer)): # window is a device_array of window sizes or BaseIndexer self.window = window self.min_periods = min_periods return if not isinstance(self.obj.index, cudf.core.index.DatetimeIndex): raise ValueError( "window must be an integer for non datetime index") self._time_window = True try: window = pd.to_timedelta(window) # to_timedelta will also convert np.arrays etc., if not isinstance(window, pd.Timedelta): raise ValueError window = window.to_timedelta64() except ValueError as e: raise ValueError( "window must be integer or convertible to a timedelta" ) from e if self.min_periods is None: min_periods = 1 self.window = self._window_to_window_sizes(window) self.min_periods = min_periods
def _window_to_window_sizes(self, window): if is_integer(window): return cudautils.grouped_window_sizes_from_offset( column.arange(len(self.obj)).data_array_view, self._group_starts, window, ) else: return cudautils.grouped_window_sizes_from_offset( self.obj.index._values.data_array_view, self._group_starts, window, )
def get_level_values(self, level): """ Return an Index of values for requested level. This is primarily useful to get an individual level of values from a MultiIndex, but is provided on Index as well for compatibility. Parameters ---------- level : int or str It is either the integer position or the name of the level. Returns ------- Index Calling object, as there is only one level in the Index. See Also -------- cudf.core.multiindex.MultiIndex.get_level_values : Get values for a level of a MultiIndex. Notes ----- For Index, level should be 0, since there are no multiple levels. Examples -------- >>> import cudf >>> idx = cudf.Index(["a", "b", "c"]) >>> idx.get_level_values(0) StringIndex(['a' 'b' 'c'], dtype='object') """ if level == self.name: return self elif is_integer(level): if level != 0: raise IndexError(f"Cannot get level: {level} " f"for index with 1 level") return self else: raise KeyError(f"Requested level with name {level} " "not found")
def _loc_to_iloc(self, arg): if _is_scalar_or_zero_d_array(arg): if not _is_non_decimal_numeric_dtype(self._sr.index.dtype): # TODO: switch to cudf.utils.dtypes.is_integer(arg) if isinstance(arg, cudf.Scalar) and is_integer_dtype( arg.dtype): found_index = arg.value return found_index elif is_integer(arg): found_index = arg return found_index try: found_index = self._sr.index._values.find_first_value( arg, closest=False) return found_index except (TypeError, KeyError, IndexError, ValueError): raise KeyError("label scalar is out of bound") elif isinstance(arg, slice): return get_label_range_or_mask(self._sr.index, arg.start, arg.stop, arg.step) elif isinstance(arg, (cudf.MultiIndex, pd.MultiIndex)): if isinstance(arg, pd.MultiIndex): arg = cudf.MultiIndex.from_pandas(arg) return indices_from_labels(self._sr, arg) else: arg = cudf.core.series.Series(cudf.core.column.as_column(arg)) if arg.dtype in (bool, np.bool_): return arg else: indices = indices_from_labels(self._sr, arg) if indices.null_count > 0: raise KeyError("label scalar is out of bound") return indices
def test_is_integer(obj, expect): assert types.is_integer(obj) == expect
def test_pandas_agreement(obj): assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj) assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj) assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj) assert types.is_integer(obj) == ptypes.is_integer(obj) assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj)