def _getitem_tuple_arg(self, arg): from cudf.core.dataframe import Series, DataFrame from cudf.core.column import column from cudf.core.index import as_index from cudf.utils.cudautils import arange from cudf import MultiIndex # Step 1: Gather columns if isinstance(self._df.columns, MultiIndex): columns_df = self._df.columns._get_column_major(self._df, arg[1]) if isinstance(columns_df, Series): return columns_df else: columns = self._get_column_selection(arg[1]) columns_df = DataFrame(index=self._df.index) for i, col in enumerate(columns): columns_df.insert(i, col, self._df[col]) # Step 2: Gather rows if isinstance(columns_df.index, MultiIndex): return columns_df.index._get_row_major(columns_df, arg[0]) else: if isinstance(self._df.columns, MultiIndex): if isinstance(arg[0], slice): start, stop, step = arg[0].indices(len(columns_df)) indices = arange(start, stop, step) df = columns_df.take(indices) else: df = columns_df.take(arg[0]) else: df = DataFrame() for col in columns_df.columns: # need Series() in case a scalar is returned df[col] = Series(columns_df[col].loc[arg[0]]) df.columns = columns_df.columns # Step 3: Gather index if df.shape[0] == 1: # we have a single row if isinstance(arg[0], slice): start = arg[0].start if start is None: start = self._df.index[0] df.index = as_index(start) else: row_selection = column.as_column(arg[0]) if pd.api.types.is_bool_dtype(row_selection.dtype): df.index = self._df.index.take(row_selection) else: df.index = as_index(row_selection) # Step 4: Downcast if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df
def _getitem_tuple_arg(self, arg): from cudf import MultiIndex from cudf.core.dataframe import DataFrame, Series from cudf.core.column import column_empty from cudf.core.index import as_index # Iloc Step 1: # Gather the columns specified by the second tuple arg columns = self._get_column_selection(arg[1]) if isinstance(self._df.columns, MultiIndex): columns_df = self._df.columns._get_column_major(self._df, arg[1]) if (len(columns_df) == 0 and len(columns_df.columns) == 0 and not isinstance(arg[0], slice)): result = Series(column_empty(0, dtype="float64"), name=arg[0]) result._index = columns_df.columns.copy(deep=False) return result else: if isinstance(arg[0], slice): columns_df = DataFrame() for i, col in enumerate(columns): columns_df.insert(i, col, self._df[col]) columns_df._index = self._df._index else: columns_df = self._df._columns_view(columns) # Iloc Step 2: # Gather the rows specified by the first tuple arg if isinstance(columns_df.index, MultiIndex): df = columns_df.index._get_row_major(columns_df, arg[0]) if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance( arg[0], slice) or isinstance(arg[1], slice)): # Pandas returns a numpy scalar in this case return df[0] if self._can_downcast_to_series(df, arg): return self._downcast_to_series(df, arg) return df else: df = DataFrame() for i, col in enumerate(columns_df._columns): # need Series() in case a scalar is returned df[i] = Series(col[arg[0]]) df.index = as_index(columns_df.index[arg[0]]) df.columns = columns_df.columns # Iloc Step 3: # Reindex if df.shape[0] == 1: # we have a single row without an index if isinstance(arg[0], slice): start = arg[0].start if start is None: start = 0 df.index = as_index(self._df.index[start]) else: df.index = as_index(self._df.index[arg[0]]) # Iloc Step 4: # Downcast if self._can_downcast_to_series(df, arg): if isinstance(df.columns, MultiIndex): if len(df) > 0 and not (isinstance(arg[0], slice) or isinstance(arg[1], slice)): return list(df._data.values())[0][0] elif df.shape[1] > 1: result = self._downcast_to_series(df, arg) result.index = df.columns return result elif not isinstance(arg[0], slice): if len(df._data) == 0: return Series( column_empty(0, dtype="float64"), index=df.columns, name=arg[0], ) else: result_series = df[df.columns[0]] result_series.index = df.columns result_series.name = arg[0] return result_series else: return df[df.columns[0]] return self._downcast_to_series(df, arg) if df.shape[0] == 0 and df.shape[1] == 0: from cudf.core.index import RangeIndex slice_len = arg[0].stop or len(self._df) start, stop, step = arg[0].indices(slice_len) df._index = RangeIndex(start, stop) return df