def test_rangeindex_get_slice_bound(bounds, indices, side, kind): start, stop = bounds pd_index = pd.RangeIndex(start, stop) cudf_index = RangeIndex(start, stop) for idx in indices: expect = pd_index.get_slice_bound(idx, side, kind) got = cudf_index.get_slice_bound(idx, side, kind) assert expect == got
def test_index_comparision(): start, stop = 10, 34 rg = RangeIndex(start, stop) gi = GenericIndex(np.arange(start, stop)) assert rg.equals(gi) assert gi.equals(rg) assert not rg[:-1].equals(gi) assert rg[:-1].equals(gi[:-1])
def test_rangeindex_get_slice_bound_step(bounds, label, side, kind): start, stop, step = bounds pd_index = pd.RangeIndex(start, stop, step) cudf_index = RangeIndex(start, stop, step) expect = pd_index.get_slice_bound(label, side, kind) got = cudf_index.get_slice_bound(label, side, kind) assert expect == got
def test_index_immutable(): start, stop = 10, 34 rg = RangeIndex(start, stop) with pytest.raises(TypeError): rg[1] = 5 gi = GenericIndex(np.arange(start, stop)) with pytest.raises(TypeError): gi[1] = 5
def create_df(f, m, n): X = np.random.uniform(-1, 1, (m, n)) ret = cudf.DataFrame([(i, X[:, i].astype(np.float32)) for i in range(n)], index=RangeIndex(f * m, f * m + m, 1)) return ret
def test_index_find_label_range_rangeindex(): """Cudf specific """ # step > 0 # 3, 8, 13, 18 ridx = RangeIndex(3, 20, 5) assert ridx.find_label_range(3, 8) == (0, 2) assert ridx.find_label_range(0, 7) == (0, 1) assert ridx.find_label_range(3, 19) == (0, 4) assert ridx.find_label_range(2, 21) == (0, 4) # step < 0 # 20, 15, 10, 5 ridx = RangeIndex(20, 3, -5) assert ridx.find_label_range(15, 10) == (1, 3) assert ridx.find_label_range(10, 0) == (2, 4) assert ridx.find_label_range(30, 13) == (0, 2) assert ridx.find_label_range(30, 0) == (0, 4)
def _getitem_tuple_arg(self, arg): from cudf import MultiIndex from cudf.core.column import column from cudf.core.index import as_index # Iloc Step 1: # Gather the columns specified by the second tuple arg columns_df = self._get_column_selection(arg[1]) columns_df._index = self._df._index # Iloc Step 2: # Gather the rows specified by the first tuple arg if isinstance(columns_df.index, MultiIndex): if isinstance(arg[0], slice): df = columns_df[arg[0]] else: df = columns_df.index._get_row_major(columns_df, arg[0]) if (len(df) == 1 and len(columns_df) >= 1) and not ( isinstance(arg[0], slice) or isinstance(arg[1], slice) ): # Pandas returns a numpy scalar in this case return df.iloc[0] if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df else: if isinstance(arg[0], slice): df = columns_df._slice(arg[0]) elif is_scalar(arg[0]): index = arg[0] if index < 0: index += len(columns_df) df = columns_df._slice(slice(index, index + 1, 1)) else: arg = (column.as_column(arg[0]), arg[1]) if pd.api.types.is_bool_dtype(arg[0]): df = columns_df._apply_boolean_mask(arg[0]) else: df = columns_df._gather(arg[0]) # Iloc Step 3: # Reindex if df.shape[0] == 1: # we have a single row without an index df.index = as_index(self._df.index[arg[0]]) # Iloc Step 4: # Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) if df.shape[0] == 0 and df.shape[1] == 0 and isinstance(arg[0], slice): from cudf.core.index import RangeIndex slice_len = len(self._df) start, stop, step = arg[0].indices(slice_len) df._index = RangeIndex(start, stop) return df
def _linear_interpolation(column, index=None): """ Interpolate over a float column. Implicitly assumes that values are evenly spaced with respect to the x-axis, for example the data [1.0, NaN, 3.0] will be interpolated assuming the NaN is half way between the two valid values, yielding [1.0, 2.0, 3.0] """ index = RangeIndex(start=0, stop=len(column), step=1) return _index_or_values_interpolation(column, index=index)
def test_range_index(testrange): index = RangeIndex(start=testrange[0], stop=testrange[1]) index_pd = pd.RangeIndex(start=testrange[0], stop=testrange[1], step=testrange[2]) assert index.is_unique == index_pd.is_unique assert index.is_monotonic == index_pd.is_monotonic assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
def test_index_rangeindex_search_range(): # step > 0 ridx = RangeIndex(-13, 17, 4) stop = ridx._start + ridx._step * len(ridx) for i in range(len(ridx)): assert i == search_range( ridx._start, stop, ridx[i], ridx._step, side="left" ) assert i + 1 == search_range( ridx._start, stop, ridx[i], ridx._step, side="right" )
def _getitem_tuple_arg(self, arg): from cudf import MultiIndex from cudf.core.dataframe import DataFrame, Series from cudf.core.index import as_index # Iloc Step 1: # Gather the columns specified by the second tuple arg columns_df = self._get_column_selection(arg[1]) columns_df._index = self._df._index # Iloc Step 2: # Gather the rows specified by the first tuple arg if isinstance(columns_df.index, MultiIndex): if isinstance(arg[0], slice): df = columns_df[arg[0]] else: df = columns_df.index._get_row_major(columns_df, arg[0]) if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance( arg[0], slice) or isinstance(arg[1], slice)): # Pandas returns a numpy scalar in this case return df[0] if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df else: df = DataFrame() for i, col in enumerate(columns_df._columns): # need Series() in case a scalar is returned df[i] = Series(col[arg[0]]) df.index = as_index(columns_df.index[arg[0]]) df.columns = columns_df.columns # Iloc Step 3: # Reindex if df.shape[0] == 1: # we have a single row without an index df.index = as_index(self._df.index[arg[0]]) # Iloc Step 4: # Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) if df.shape[0] == 0 and df.shape[1] == 0 and isinstance(arg[0], slice): from cudf.core.index import RangeIndex slice_len = len(self._df) start, stop, step = arg[0].indices(slice_len) df._index = RangeIndex(start, stop) return df
def test_rangeindex_slice_attr_name(): start, stop = 0, 10 rg = RangeIndex(start, stop, "myindex") sliced_rg = rg[0:9] assert_eq(rg.name, sliced_rg.name)
def _getitem_tuple_arg(self, arg): from cudf import MultiIndex from cudf.core.dataframe import DataFrame, Series from cudf.core.column import column_empty from cudf.core.index import as_index # Iloc Step 1: # Gather the columns specified by the second tuple arg columns = self._get_column_selection(arg[1]) if isinstance(self._df.columns, MultiIndex): columns_df = self._df.columns._get_column_major(self._df, arg[1]) if (len(columns_df) == 0 and len(columns_df.columns) == 0 and not isinstance(arg[0], slice)): result = Series(column_empty(0, dtype="float64"), name=arg[0]) result._index = columns_df.columns.copy(deep=False) return result else: if isinstance(arg[0], slice): columns_df = DataFrame() for i, col in enumerate(columns): columns_df.insert(i, col, self._df[col]) columns_df._index = self._df._index else: columns_df = self._df._columns_view(columns) # Iloc Step 2: # Gather the rows specified by the first tuple arg if isinstance(columns_df.index, MultiIndex): df = columns_df.index._get_row_major(columns_df, arg[0]) if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance( arg[0], slice) or isinstance(arg[1], slice)): # Pandas returns a numpy scalar in this case return df[0] if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df else: df = DataFrame() for i, col in enumerate(columns_df._columns): # need Series() in case a scalar is returned df[i] = Series(col[arg[0]]) df.index = as_index(columns_df.index[arg[0]]) df.columns = columns_df.columns # Iloc Step 3: # Reindex if df.shape[0] == 1: # we have a single row without an index if isinstance(arg[0], slice): start = arg[0].start if start is None: start = 0 df.index = as_index(self._df.index[start]) else: df.index = as_index(self._df.index[arg[0]]) # Iloc Step 4: # Downcast if self._can_downcast_to_series(df, arg): if isinstance(df.columns, MultiIndex): if len(df) > 0 and not (isinstance(arg[0], slice) or isinstance(arg[1], slice)): return list(df._data.values())[0][0] elif df.shape[1] > 1: result = self._downcast_to_series(df, arg) result.index = df.columns return result elif not isinstance(arg[0], slice): if len(df._data) == 0: return Series( column_empty(0, dtype="float64"), index=df.columns, name=arg[0], ) else: result_series = df[df.columns[0]] result_series.index = df.columns result_series.name = arg[0] return result_series else: return df[df.columns[0]] return self._downcast_to_series(df, arg) if df.shape[0] == 0 and df.shape[1] == 0: from cudf.core.index import RangeIndex slice_len = arg[0].stop or len(self._df) start, stop, step = arg[0].indices(slice_len) df._index = RangeIndex(start, stop) return df
def test_rangeindex_contains(): assert_eq(True, 9 in RangeIndex(start=0, stop=10, name="Index")) assert_eq(False, 10 in RangeIndex(start=0, stop=10, name="Index"))