def test_dataframe_setitem_scaler_bool_inconsistency(): df = pd.DataFrame({"a": [1, 2, 3]}) df[[True, False, True]] = pd.DataFrame({"a": [-1, -2]}) gdf = DataFrame({"a": [1, 2, 3]}) gdf[[True, False, True]] = DataFrame({"a": [-1, -2]}) assert_eq(df, gdf)
def test_dataframe_setitem_new_columns(df, arg, value): gdf = DataFrame.from_pandas(df) cudf_replace_value = value if isinstance(cudf_replace_value, pd.DataFrame): cudf_replace_value = DataFrame.from_pandas(value) df[arg] = value gdf[arg] = cudf_replace_value assert_eq(df, gdf, check_dtype=True)
def from_dlpack(pycapsule_obj): """Converts from a DLPack tensor to a cuDF object. DLPack is an open-source memory tensor structure: `dmlc/dlpack <https://github.com/dmlc/dlpack>`_. This function takes a PyCapsule object which contains a pointer to a DLPack tensor as input, and returns a cuDF object. This function deep copies the data in the DLPack tensor into a cuDF object. Parameters ---------- pycapsule_obj : PyCapsule Input DLPack tensor pointer which is encapsulated in a PyCapsule object. Returns ------- A cuDF DataFrame or Series depending on if the input DLPack tensor is 1D or 2D. Notes ----- cuDF from_dlpack() assumes column-major (Fortran order) input. If the input tensor is row-major, transpose it before passing it to this function. """ data, _ = libdlpack.from_dlpack(pycapsule_obj) if len(data) == 1: return Series._from_data(data) else: return DataFrame._from_data(data)
def from_dlpack(pycapsule_obj): """Converts from a DLPack tensor to a cuDF object. DLPack is an open-source memory tensor structure: `dmlc/dlpack <https://github.com/dmlc/dlpack>`_. This function takes a PyCapsule object which contains a pointer to a DLPack tensor as input, and returns a cuDF object. This function deep copies the data in the DLPack tensor into a cuDF object. Parameters ---------- pycapsule_obj : PyCapsule Input DLPack tensor pointer which is encapsulated in a PyCapsule object. Returns ------- A cuDF DataFrame or Series depending on if the input DLPack tensor is 1D or 2D. """ res = libdlpack.from_dlpack(pycapsule_obj) if res._num_columns == 1: return Series(res._data[0]) else: return DataFrame(data=res._data)
def read_feather(path, *args, **kwargs): """{docstring}""" warnings.warn("Using CPU via PyArrow to read feather dataset, this may " "be GPU accelerated in the future") pa_table = feather.read_table(path, *args, **kwargs) return DataFrame.from_arrow(pa_table)
def test_kernel_shallow_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=False) sr = gdf["a"] add_one[1, len(sr)](sr.to_gpu_array()) assert_eq(gdf, cdf)
def test_cudf_dataframe_copy(copy_fn, ncols, data_type): pdf = pd.DataFrame() for i in range(ncols): pdf[chr(i + ord("a"))] = pd.Series(np.random.randint( 0, 1000, 20)).astype(data_type) df = DataFrame.from_pandas(pdf) copy_df = copy_fn(df) assert_eq(df, copy_df)
def test_kernel_deep_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf["b"] add_one[1, len(sr)](sr._column.data_array_view) assert not gdf.to_string().split() == cdf.to_string().split()
def test_series_setitem_index(): df = pd.DataFrame( data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3] ) df["b"] = pd.Series(data=[12, 11, 10], index=[3, 2, 1]) gdf = DataFrame(data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3]) gdf["b"] = Series(data=[12, 11, 10], index=[3, 2, 1]) assert_eq(df, gdf, check_dtype=False)
def test_dataframe_copy_shallow(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) copy_pdf = pdf.copy(deep=False) copy_gdf = gdf.copy(deep=False) copy_pdf["b"] = [0, 0, 0] copy_gdf["b"] = [0, 0, 0] assert_eq(pdf["b"], copy_pdf["b"]) assert_eq(gdf["b"], copy_gdf["b"])
def test_kernel_deep_copy(): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf["b"] # column.to_gpu_array calls to_dense_buffer which returns a copy # need to access buffer directly and then call gpu_array add_one[1, len(sr)](sr.data.to_gpu_array()) assert not gdf.to_string().split() == cdf.to_string().split()
def test_dataframe_deep_copy_and_insert(copy_parameters): pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) gdf = DataFrame.from_pandas(pdf) copy_pdf = copy_parameters["fn"](pdf) copy_gdf = copy_parameters["fn"](gdf) copy_pdf["b"] = [0, 0, 0] copy_gdf["b"] = [0, 0, 0] pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values) gdf_is_equal = np.array_equal(gdf["b"].to_array(), copy_gdf["b"].to_array()) assert pdf_is_equal == copy_parameters["expected_equality"] assert gdf_is_equal == copy_parameters["expected_equality"]
def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type): pdf = pd.DataFrame() for i in range(ncols): pdf[chr(i + ord("a"))] = pd.Series(np.random.randint( 0, 1000, 20)).astype(data_type) df = DataFrame.from_pandas(pdf) copy_df = copy_fn(df) copy_pdf = copy_fn(pdf) copy_df["aa"] = pd.Series(np.random.randint(0, 1000, 20)).astype(data_type) copy_pdf["aa"] = pd.Series(np.random.randint(0, 1000, 20)).astype(data_type) assert not copy_pdf.to_string().split() == pdf.to_string().split() assert not copy_df.to_string().split() == df.to_string().split()
def test_setitem_dataframe_series_inplace(df): pdf = df gdf = DataFrame.from_pandas(pdf) pdf["a"].replace(1, 500, inplace=True) gdf["a"].replace(1, 500, inplace=True) assert_eq(pdf, gdf) psr_a = pdf["a"] gsr_a = gdf["a"] psr_a.replace(500, 501, inplace=True) gsr_a.replace(500, 501, inplace=True) assert_eq(pdf, gdf)
def from_dlpack(pycapsule_obj): """Converts from a DLPack tensor to a cuDF object. DLPack is an open-source memory tensor structure: `dmlc/dlpack <https://github.com/dmlc/dlpack>`_. This function takes a PyCapsule object which contains a pointer to a DLPack tensor as input, and returns a cuDF object. This function deep copies the data in the DLPack tensor into a cuDF object. Parameters ---------- pycapsule_obj : PyCapsule Input DLPack tensor pointer which is encapsulated in a PyCapsule object. Returns ------- A cuDF DataFrame or Series depending on if the input DLPack tensor is 1D or 2D. """ try: res, valids = cpp_dlpack.from_dlpack(pycapsule_obj) except GDFError as err: if str(err) == "b'GDF_DATASET_EMPTY'": raise ValueError( "Cannot create a cuDF Object from a DLPack tensor of 0 size" ) else: raise err cols = [] for idx in range(len(valids)): mask = None if valids[idx]: mask = Buffer(valids[idx]) cols.append( column.build_column( Buffer(res[idx]), dtype=res[idx].dtype, mask=mask ) ) if len(cols) == 1: return Series(cols[0]) else: df = DataFrame() for idx, col in enumerate(cols): df[idx] = col return df
def _getitem_tuple_arg(self, arg): from cudf import MultiIndex from cudf.core.dataframe import DataFrame, Series from cudf.core.index import as_index # Iloc Step 1: # Gather the columns specified by the second tuple arg columns_df = self._get_column_selection(arg[1]) columns_df._index = self._df._index # Iloc Step 2: # Gather the rows specified by the first tuple arg if isinstance(columns_df.index, MultiIndex): if isinstance(arg[0], slice): df = columns_df[arg[0]] else: df = columns_df.index._get_row_major(columns_df, arg[0]) if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance( arg[0], slice) or isinstance(arg[1], slice)): # Pandas returns a numpy scalar in this case return df[0] if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df else: df = DataFrame() for i, col in enumerate(columns_df._columns): # need Series() in case a scalar is returned df[i] = Series(col[arg[0]]) df.index = as_index(columns_df.index[arg[0]]) df.columns = columns_df.columns # Iloc Step 3: # Reindex if df.shape[0] == 1: # we have a single row without an index df.index = as_index(self._df.index[arg[0]]) # Iloc Step 4: # Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) if df.shape[0] == 0 and df.shape[1] == 0 and isinstance(arg[0], slice): from cudf.core.index import RangeIndex slice_len = len(self._df) start, stop, step = arg[0].indices(slice_len) df._index = RangeIndex(start, stop) return df
def test_cummin(dtype, nelem): if dtype == np.int8: # to keep data in range data = gen_rand(dtype, nelem, low=-2, high=2) else: data = gen_rand(dtype, nelem) decimal = 4 if dtype == np.float32 else 6 # series gs = Series(data) ps = pd.Series(data) np.testing.assert_array_almost_equal(gs.cummin().to_array(), ps.cummin(), decimal=decimal) # dataframe series (named series) gdf = DataFrame() gdf["a"] = Series(data) pdf = pd.DataFrame() pdf["a"] = pd.Series(data) np.testing.assert_array_almost_equal(gdf.a.cummin().to_array(), pdf.a.cummin(), decimal=decimal)
def _getitem_tuple_arg(self, arg): from cudf.core.dataframe import Series, DataFrame from cudf.core.column import column from cudf.core.index import as_index from cudf import MultiIndex # Step 1: Gather columns columns_df = self._get_column_selection(arg[1]) columns_df._index = self._df._index # Step 2: Gather rows if isinstance(columns_df.index, MultiIndex): return columns_df.index._get_row_major(columns_df, arg[0]) else: df = DataFrame() for col in columns_df.columns: # need Series() in case a scalar is returned df[col] = Series(columns_df[col].loc[arg[0]]) df.columns = columns_df.columns # Step 3: Gather index if df.shape[0] == 1: # we have a single row if isinstance(arg[0], slice): start = arg[0].start if start is None: start = self._df.index[0] df.index = as_index(start) else: row_selection = column.as_column(arg[0]) if pd.api.types.is_bool_dtype(row_selection.dtype): df.index = self._df.index.take(row_selection) else: df.index = as_index(row_selection) # Step 4: Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df
def _getitem_tuple_arg(self, arg): from cudf import MultiIndex from cudf.core.dataframe import DataFrame, Series from cudf.core.column import column_empty from cudf.core.index import as_index # Iloc Step 1: # Gather the columns specified by the second tuple arg columns = self._get_column_selection(arg[1]) if isinstance(self._df.columns, MultiIndex): columns_df = self._df.columns._get_column_major(self._df, arg[1]) if (len(columns_df) == 0 and len(columns_df.columns) == 0 and not isinstance(arg[0], slice)): result = Series(column_empty(0, dtype="float64"), name=arg[0]) result._index = columns_df.columns.copy(deep=False) return result else: if isinstance(arg[0], slice): columns_df = DataFrame() for i, col in enumerate(columns): columns_df.insert(i, col, self._df[col]) columns_df._index = self._df._index else: columns_df = self._df._columns_view(columns) # Iloc Step 2: # Gather the rows specified by the first tuple arg if isinstance(columns_df.index, MultiIndex): df = columns_df.index._get_row_major(columns_df, arg[0]) if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance( arg[0], slice) or isinstance(arg[1], slice)): # Pandas returns a numpy scalar in this case return df[0] if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df else: df = DataFrame() for i, col in enumerate(columns_df._columns): # need Series() in case a scalar is returned df[i] = Series(col[arg[0]]) df.index = as_index(columns_df.index[arg[0]]) df.columns = columns_df.columns # Iloc Step 3: # Reindex if df.shape[0] == 1: # we have a single row without an index if isinstance(arg[0], slice): start = arg[0].start if start is None: start = 0 df.index = as_index(self._df.index[start]) else: df.index = as_index(self._df.index[arg[0]]) # Iloc Step 4: # Downcast if self._can_downcast_to_series(df, arg): if isinstance(df.columns, MultiIndex): if len(df) > 0 and not (isinstance(arg[0], slice) or isinstance(arg[1], slice)): return list(df._data.values())[0][0] elif df.shape[1] > 1: result = self._downcast_to_series(df, arg) result.index = df.columns return result elif not isinstance(arg[0], slice): if len(df._data) == 0: return Series( column_empty(0, dtype="float64"), index=df.columns, name=arg[0], ) else: result_series = df[df.columns[0]] result_series.index = df.columns result_series.name = arg[0] return result_series else: return df[df.columns[0]] return self._downcast_to_series(df, arg) if df.shape[0] == 0 and df.shape[1] == 0: from cudf.core.index import RangeIndex slice_len = arg[0].stop or len(self._df) start, stop, step = arg[0].indices(slice_len) df._index = RangeIndex(start, stop) return df
def _getitem_tuple_arg(self, arg): from cudf.core.dataframe import DataFrame from cudf.core.column import column from cudf.core.index import as_index from cudf.utils.cudautils import arange from cudf import MultiIndex # Step 1: Gather columns if isinstance(self._df.columns, MultiIndex): columns_df = self._df.columns._get_column_major(self._df, arg[1]) else: columns = self._get_column_selection(arg[1]) columns_df = DataFrame() for col in columns: columns_df.add_column(name=col, data=self._df[col]) # Step 2: Gather rows if isinstance(columns_df.index, MultiIndex): return columns_df.index._get_row_major(columns_df, arg[0]) else: if isinstance(self._df.columns, MultiIndex): if isinstance(arg[0], slice): start, stop, step = arg[0].indices(len(columns_df)) indices = arange(start, stop, step) df = columns_df.take(indices) else: df = columns_df.take(arg[0]) else: df = DataFrame() for col in columns_df.columns: df[col] = columns_df[col].loc[arg[0]] # Step 3: Gather index if df.shape[0] == 1: # we have a single row if isinstance(arg[0], slice): start = arg[0].start if start is None: start = self._df.index[0] df.index = as_index(start) else: row_selection = column.as_column(arg[0]) if pd.api.types.is_bool_dtype(row_selection.dtype): df.index = self._df.index.take(row_selection) else: df.index = as_index(row_selection) # Step 4: Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df
def test_dataframe_setitem_scaler_keyerror(): df = DataFrame({"a": [1, 2, 3]}) with pytest.raises(KeyError): df[["x"]] = 0
def where( frame: Union[Series, Index, DataFrame], cond: Any, other: Any = None, inplace: bool = False, ) -> Optional[Union[Frame]]: """ Replace values where the condition is False. Parameters ---------- cond : bool Series/DataFrame, array-like Where cond is True, keep the original value. Where False, replace with corresponding value from other. Callables are not supported. other: scalar, list of scalars, Series/DataFrame Entries where cond is False are replaced with corresponding value from other. Callables are not supported. Default is None. DataFrame expects only Scalar or array like with scalars or dataframe with same dimension as frame. Series expects only scalar or series like with same length inplace : bool, default False Whether to perform the operation in place on the data. Returns ------- Same type as caller Examples -------- >>> import cudf >>> df = DataFrame({"A":[1, 4, 5], "B":[3, 5, 8]}) >>> df.where(df % 2 == 0, [-1, -1]) A B 0 -1 -1 1 4 -1 2 -1 8 >>> ser = Series([4, 3, 2, 1, 0]) >>> ser.where(ser > 2, 10) 0 4 1 3 2 10 3 10 4 10 dtype: int64 >>> ser.where(ser > 2) 0 4 1 3 2 <NA> 3 <NA> 4 <NA> dtype: int64 """ if isinstance(frame, DataFrame): if hasattr(cond, "__cuda_array_interface__"): cond = DataFrame(cond, columns=frame._column_names, index=frame.index) elif (hasattr(cond, "__array_interface__") and cond.__array_interface__["shape"] != frame.shape): raise ValueError("conditional must be same shape as self") elif not isinstance(cond, DataFrame): cond = frame.from_pandas(pd.DataFrame(cond)) common_cols = set(frame._column_names).intersection( set(cond._column_names)) if len(common_cols) > 0: # If `frame` and `cond` are having unequal index, # then re-index `cond`. if not frame.index.equals(cond.index): cond = cond.reindex(frame.index) else: if cond.shape != frame.shape: raise ValueError( """Array conditional must be same shape as self""") # Setting `frame` column names to `cond` # as `cond` has no column names. cond.columns = frame.columns ( source_df, others, ) = _normalize_columns_and_scalars_type(frame, other) if isinstance(other, Frame): others = others._data.columns out_df = DataFrame(index=frame.index) if len(frame._columns) != len(others): raise ValueError( """Replacement list length or number of dataframe columns should be equal to Number of columns of dataframe""") for i, column_name in enumerate(frame._column_names): input_col = source_df._data[column_name] other_column = others[i] if column_name in cond._data: if isinstance(input_col, cudf.core.column.CategoricalColumn): if cudf.utils.dtypes.is_scalar(other_column): try: other_column = input_col._encode(other_column) except ValueError: # When other is not present in categories, # fill with Null. other_column = None other_column = cudf.Scalar(other_column, dtype=input_col.codes.dtype) elif isinstance(other_column, cudf.core.column.CategoricalColumn): other_column = other_column.codes input_col = input_col.codes result = cudf._lib.copying.copy_if_else( input_col, other_column, cond._data[column_name]) if isinstance( frame._data[column_name], cudf.core.column.CategoricalColumn, ): result = cudf.core.column.build_categorical_column( categories=frame._data[column_name].categories, codes=cudf.core.column.as_column(result.base_data, dtype=result.dtype), mask=result.base_mask, size=result.size, offset=result.offset, ordered=frame._data[column_name].ordered, ) else: out_mask = cudf._lib.null_mask.create_null_mask( len(input_col), state=cudf._lib.null_mask.MaskState.ALL_NULL, ) result = input_col.set_mask(out_mask) out_df[column_name] = frame[column_name].__class__(result) return frame._mimic_inplace(out_df, inplace=inplace) else: if isinstance(other, DataFrame): raise NotImplementedError( "cannot align with a higher dimensional Frame") input_col = frame._data[frame.name] cond = cudf.core.column.as_column(cond) if len(cond) != len(frame): raise ValueError( """Array conditional must be same shape as self""") ( input_col, other, ) = _normalize_columns_and_scalars_type(frame, other, inplace) if isinstance(input_col, cudf.core.column.CategoricalColumn): if cudf.utils.dtypes.is_scalar(other): try: other = input_col._encode(other) except ValueError: # When other is not present in categories, # fill with Null. other = None other = cudf.Scalar(other, dtype=input_col.codes.dtype) elif isinstance(other, cudf.core.column.CategoricalColumn): other = other.codes input_col = input_col.codes result = cudf._lib.copying.copy_if_else(input_col, other, cond) if isinstance(frame._data[frame.name], cudf.core.column.CategoricalColumn): result = cudf.core.column.build_categorical_column( categories=cast( cudf.core.column.CategoricalColumn, frame._data[frame.name], ).categories, codes=cudf.core.column.as_column(result.base_data, dtype=result.dtype), mask=result.base_mask, size=result.size, offset=result.offset, ordered=cast( cudf.core.column.CategoricalColumn, frame._data[frame.name], ).ordered, ) if isinstance(frame, Index): result = Index(result, name=frame.name) else: result = frame._copy_construct(data=result) return frame._mimic_inplace(result, inplace=inplace)
def _parse_tdf_gpu(tdf): """ Parse the results of a select ipc_gpu into a GpuDataFrame Parameters ---------- tdf : TDataFrame Returns ------- gdf : GpuDataFrame """ import pyarrow as pa from cudf.comm.gpuarrow import GpuArrowReader from cudf.core.dataframe import DataFrame from cudf._lib.arrow._cuda import Context, IpcMemHandle from numba import cuda ipc_handle = IpcMemHandle.from_buffer(pa.py_buffer(tdf.df_handle)) ctx = Context() ipc_buf = ctx.open_ipc_buffer(ipc_handle) ipc_buf.context.synchronize() schema_buffer, shm_ptr = load_buffer(tdf.sm_handle, tdf.sm_size) buffer = pa.BufferReader(schema_buffer) schema = pa.read_schema(buffer) # Dictionary Memo functionality used to # deserialize on the C++ side is not # exposed on the pyarrow side, so we need to # handle this on our own. dict_memo = {} try: dict_batch_reader = pa.RecordBatchStreamReader(buffer) updated_fields = [] for f in schema: if pa.types.is_dictionary(f.type): msg = dict_batch_reader.read_next_batch() dict_memo[f.name] = msg.column(0) updated_fields.append(pa.field(f.name, f.type.index_type)) else: updated_fields.append(pa.field(f.name, f.type)) schema = pa.schema(updated_fields) except pa.ArrowInvalid: # This message does not have any dictionary encoded # columns pass dtype = np.dtype(np.byte) darr = cuda.devicearray.DeviceNDArray( shape=ipc_buf.size, strides=dtype.itemsize, dtype=dtype, gpu_data=ipc_buf.to_numba(), ) reader = GpuArrowReader(schema, darr) df = DataFrame() df.set_tdf = MethodType(set_tdf, df) df.get_tdf = MethodType(get_tdf, df) for k, v in reader.to_dict().items(): if k in dict_memo: df[k] = pa.DictionaryArray.from_arrays(v, dict_memo[k]) else: df[k] = v df.set_tdf(tdf) # free shared memory from Python # https://github.com/omnisci/pymapd/issues/46 # https://github.com/omnisci/pymapd/issues/31 free_sm = shmdt(ctypes.cast(shm_ptr, ctypes.c_void_p)) # noqa return df
def test_dataframe_setitem_bool_mask_scaler(df, arg, value): gdf = DataFrame.from_pandas(df) df[arg] = value gdf[arg] = value assert_eq(df, gdf)
def _getitem_tuple_arg(self, arg): from uuid import uuid4 from cudf import MultiIndex from cudf.core.column import column from cudf.core.dataframe import DataFrame from cudf.core.index import as_index # Step 1: Gather columns if isinstance(arg, tuple): columns_df = self._get_column_selection(arg[1]) columns_df._index = self._df._index else: columns_df = self._df # Step 2: Gather rows if isinstance(columns_df.index, MultiIndex): if isinstance(arg, (MultiIndex, pd.MultiIndex)): if isinstance(arg, pd.MultiIndex): arg = MultiIndex.from_pandas(arg) indices = indices_from_labels(columns_df, arg) return columns_df.take(indices) else: if isinstance(arg, tuple): return columns_df.index._get_row_major(columns_df, arg[0]) else: return columns_df.index._get_row_major(columns_df, arg) else: if isinstance(arg[0], slice): out = get_label_range_or_mask( columns_df.index, arg[0].start, arg[0].stop, arg[0].step ) if isinstance(out, slice): df = columns_df._slice(out) else: df = columns_df._apply_boolean_mask(out) else: tmp_arg = arg if is_scalar(arg[0]): # If a scalar, there is possibility of having duplicates. # Join would get all the duplicates. So, coverting it to # an array kind. tmp_arg = ([tmp_arg[0]], tmp_arg[1]) if len(tmp_arg[0]) == 0: return columns_df._empty_like(keep_index=True) tmp_arg = (column.as_column(tmp_arg[0]), tmp_arg[1]) if pd.api.types.is_bool_dtype(tmp_arg[0]): df = columns_df._apply_boolean_mask(tmp_arg[0]) else: tmp_col_name = str(uuid4()) other_df = DataFrame( {tmp_col_name: column.arange(len(tmp_arg[0]))}, index=as_index(tmp_arg[0]), ) df = other_df.join(columns_df, how="inner") # as join is not assigning any names to index, # update it over here df.index.name = columns_df.index.name df = df.sort_values(tmp_col_name) df.drop(columns=[tmp_col_name], inplace=True) # There were no indices found if len(df) == 0: raise KeyError(arg) # Step 3: Gather index if df.shape[0] == 1: # we have a single row if isinstance(arg[0], slice): start = arg[0].start if start is None: start = self._df.index[0] df.index = as_index(start) else: row_selection = column.as_column(arg[0]) if pd.api.types.is_bool_dtype(row_selection.dtype): df.index = self._df.index.take(row_selection) else: df.index = as_index(row_selection) # Step 4: Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df