def _get_row_major(self, df, row_tuple): valid_indices = self._compute_validity_mask(df, row_tuple) from cudf import Series result = df.take(Series(valid_indices)) # Build new index - INDEX based MultiIndex # --------------- from cudf import DataFrame out_index = DataFrame() # Select the last n-k columns where n is the number of source # levels and k is the length of the indexing tuple for k in range(len(row_tuple), len(df.index.levels)): out_index.add_column(df.index.names[k], df.index.codes[df.index.codes.columns[k]]) # If there's only one column remaining in the output index, convert # it into a StringIndex and name the final index values according # to the proper codes. if len(out_index.columns) == 1: out_index = [] for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]: # noqa: E501 out_index.append(result.index.levels[ len(result.index.codes.columns)-1][val]) # TODO: Warning! The final index column could be arbitrarily # ordered integers, not Strings, so we need to check for that # dtype and produce a GenericIndex instead of a StringIndex out_index = StringIndex(out_index) out_index.name = result.index.names[len(result.index.names)-1] result.index = out_index else: # Otherwise pop the leftmost levels, names, and codes from the # source index until it has the correct number of columns (n-k) if(len(out_index.columns)) > 0: result.reset_index(drop=True) result.index = result.index._popn(len(row_tuple)) return result
def _get_row_major(self, df, row_tuple): slice_access = False if isinstance(row_tuple[0], numbers.Number): valid_indices = row_tuple[0] elif isinstance(row_tuple[0], slice): # 1. empty slice compute if row_tuple[0].stop == 0: valid_indices = [] else: slice_access = True start = row_tuple[0].start or 0 stop = row_tuple[0].stop or len(df) step = row_tuple[0].step or 1 valid_indices = cudautils.arange(start, stop, step) else: valid_indices = self._compute_validity_mask(df, row_tuple) from cudf import Series result = df.take(Series(valid_indices)) # Build new index - INDEX based MultiIndex # --------------- from cudf import DataFrame out_index = DataFrame() # Select the last n-k columns where n is the number of source # levels and k is the length of the indexing tuple size = 0 if not isinstance(row_tuple[0], (numbers.Number, slice)): size = len(row_tuple) for k in range(size, len(df.index.levels)): out_index.add_column(df.index.names[k], df.index.codes[df.index.codes.columns[k]]) # If there's only one column remaining in the output index, convert # it into an Index and name the final index values according # to the proper codes. if len(out_index.columns) == 1: out_index = [] for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]: # noqa: E501 out_index.append(result.index.levels[ len(result.index.codes.columns)-1][val]) out_index = as_index(out_index) out_index.name = result.index.names[len(result.index.names)-1] result.index = out_index else: if len(result) == 1 and size == 0 and slice_access is False: # If the final result is one row and it was not mapped into # directly result = result.T result = result[result.columns[0]] # convert to Series series_name = [] for idx, code in enumerate(result.columns.codes): series_name.append(result.columns.levels[idx][ result.columns.codes[code][0]]) result = Series(list(result._cols.values())[0], name=series_name) result.name = tuple(series_name) elif(len(out_index.columns)) > 0: # Otherwise pop the leftmost levels, names, and codes from the # source index until it has the correct number of columns (n-k) result.reset_index(drop=True) result.index = result.index._popn(size) return result