def _get_row_major(self, df, row_tuple): valid_indices = self._compute_validity_mask(df, row_tuple) from cudf import Series result = df.take(Series(valid_indices)) # Build new index - INDEX based MultiIndex # --------------- from cudf import DataFrame out_index = DataFrame() # Select the last n-k columns where n is the number of source # levels and k is the length of the indexing tuple for k in range(len(row_tuple), len(df.index.levels)): out_index.add_column(df.index.names[k], df.index.codes[df.index.codes.columns[k]]) # If there's only one column remaining in the output index, convert # it into a StringIndex and name the final index values according # to the proper codes. if len(out_index.columns) == 1: out_index = [] for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]: # noqa: E501 out_index.append(result.index.levels[ len(result.index.codes.columns)-1][val]) # TODO: Warning! The final index column could be arbitrarily # ordered integers, not Strings, so we need to check for that # dtype and produce a GenericIndex instead of a StringIndex out_index = StringIndex(out_index) out_index.name = result.index.names[len(result.index.names)-1] result.index = out_index else: # Otherwise pop the leftmost levels, names, and codes from the # source index until it has the correct number of columns (n-k) if(len(out_index.columns)) > 0: result.reset_index(drop=True) result.index = result.index._popn(len(row_tuple)) return result
def _index_and_downcast(self, result, index, index_key): from cudf import DataFrame from cudf import Series if isinstance(index_key, (numbers.Number, slice)): index_key = [index_key] if ( len(index_key) > 0 and not isinstance(index_key, tuple) ) or isinstance(index_key[0], slice): index_key = index_key[0] slice_access = False if isinstance(index_key, slice): slice_access = True out_index = DataFrame() # Select the last n-k columns where n is the number of _source_data # columns and k is the length of the indexing tuple size = 0 if not isinstance(index_key, (numbers.Number, slice)): size = len(index_key) for k in range(size, len(index._source_data.columns)): if index.names is None: name = k else: name = index.names[k] out_index.add_column( name, index._source_data[index._source_data.columns[k]] ) if len(result) == 1 and size == 0 and slice_access is False: # If the final result is one row and it was not mapped into # directly, return a Series with a tuple as name. result = result.T result = result[result.columns[0]] elif len(result) == 0 and slice_access is False: # Pandas returns an empty Series with a tuple as name # the one expected result column series_name = [] for idx, code in enumerate(index._source_data.columns): series_name.append(index._source_data[code][0]) result = Series([]) result.name = tuple(series_name) elif len(out_index.columns) == 1: # If there's only one column remaining in the output index, convert # it into an Index and name the final index values according # to the _source_data column names last_column = index._source_data.columns[-1] out_index = index._source_data[last_column] out_index = as_index(out_index) out_index.name = index.names[len(index.names) - 1] index = out_index elif len(out_index.columns) > 1: # Otherwise pop the leftmost levels, names, and codes from the # source index until it has the correct number of columns (n-k) result.reset_index(drop=True) index = index._popn(size) if isinstance(index_key, tuple): result = result.set_index(index) return result
def _get_row_major(self, df, row_tuple): slice_access = False if isinstance(row_tuple[0], numbers.Number): valid_indices = row_tuple[0] elif isinstance(row_tuple[0], slice): # 1. empty slice compute if row_tuple[0].stop == 0: valid_indices = [] else: slice_access = True start = row_tuple[0].start or 0 stop = row_tuple[0].stop or len(df) step = row_tuple[0].step or 1 valid_indices = cudautils.arange(start, stop, step) else: valid_indices = self._compute_validity_mask(df, row_tuple) from cudf import Series result = df.take(Series(valid_indices)) # Build new index - INDEX based MultiIndex # --------------- from cudf import DataFrame out_index = DataFrame() # Select the last n-k columns where n is the number of source # levels and k is the length of the indexing tuple size = 0 if not isinstance(row_tuple[0], (numbers.Number, slice)): size = len(row_tuple) for k in range(size, len(df.index.levels)): out_index.add_column(df.index.names[k], df.index.codes[df.index.codes.columns[k]]) # If there's only one column remaining in the output index, convert # it into an Index and name the final index values according # to the proper codes. if len(out_index.columns) == 1: out_index = [] for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]: # noqa: E501 out_index.append(result.index.levels[ len(result.index.codes.columns)-1][val]) out_index = as_index(out_index) out_index.name = result.index.names[len(result.index.names)-1] result.index = out_index else: if len(result) == 1 and size == 0 and slice_access is False: # If the final result is one row and it was not mapped into # directly result = result.T result = result[result.columns[0]] # convert to Series series_name = [] for idx, code in enumerate(result.columns.codes): series_name.append(result.columns.levels[idx][ result.columns.codes[code][0]]) result = Series(list(result._cols.values())[0], name=series_name) result.name = tuple(series_name) elif(len(out_index.columns)) > 0: # Otherwise pop the leftmost levels, names, and codes from the # source index until it has the correct number of columns (n-k) result.reset_index(drop=True) result.index = result.index._popn(size) return result