コード例 #1
0
ファイル: multiindex.py プロジェクト: jimmytuc/cudf
 def _get_row_major(self, df, row_tuple):
     valid_indices = self._compute_validity_mask(df, row_tuple)
     from cudf import Series
     result = df.take(Series(valid_indices))
     # Build new index - INDEX based MultiIndex
     # ---------------
     from cudf import DataFrame
     out_index = DataFrame()
     # Select the last n-k columns where n is the number of source
     # levels and k is the length of the indexing tuple
     for k in range(len(row_tuple), len(df.index.levels)):
         out_index.add_column(df.index.names[k],
                              df.index.codes[df.index.codes.columns[k]])
     # If there's only one column remaining in the output index, convert
     # it into a StringIndex and name the final index values according
     # to the proper codes.
     if len(out_index.columns) == 1:
         out_index = []
         for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]:  # noqa: E501
             out_index.append(result.index.levels[
                     len(result.index.codes.columns)-1][val])
         # TODO: Warning! The final index column could be arbitrarily
         # ordered integers, not Strings, so we need to check for that
         # dtype and produce a GenericIndex instead of a StringIndex
         out_index = StringIndex(out_index)
         out_index.name = result.index.names[len(result.index.names)-1]
         result.index = out_index
     else:
         # Otherwise pop the leftmost levels, names, and codes from the
         # source index until it has the correct number of columns (n-k)
         if(len(out_index.columns)) > 0:
             result.reset_index(drop=True)
             result.index = result.index._popn(len(row_tuple))
     return result
コード例 #2
0
 def _get_row_major(self, df, row_tuple):
     slice_access = False
     if isinstance(row_tuple[0], numbers.Number):
         valid_indices = row_tuple[0]
     elif isinstance(row_tuple[0], slice):
         # 1. empty slice compute
         if row_tuple[0].stop == 0:
             valid_indices = []
         else:
             slice_access = True
             start = row_tuple[0].start or 0
             stop = row_tuple[0].stop or len(df)
             step = row_tuple[0].step or 1
             valid_indices = cudautils.arange(start, stop, step)
     else:
         valid_indices = self._compute_validity_mask(df, row_tuple)
     from cudf import Series
     result = df.take(Series(valid_indices))
     # Build new index - INDEX based MultiIndex
     # ---------------
     from cudf import DataFrame
     out_index = DataFrame()
     # Select the last n-k columns where n is the number of source
     # levels and k is the length of the indexing tuple
     size = 0
     if not isinstance(row_tuple[0], (numbers.Number, slice)):
         size = len(row_tuple)
     for k in range(size, len(df.index.levels)):
         out_index.add_column(df.index.names[k],
                              df.index.codes[df.index.codes.columns[k]])
     # If there's only one column remaining in the output index, convert
     # it into an Index and name the final index values according
     # to the proper codes.
     if len(out_index.columns) == 1:
         out_index = []
         for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]:  # noqa: E501
             out_index.append(result.index.levels[
                     len(result.index.codes.columns)-1][val])
         out_index = as_index(out_index)
         out_index.name = result.index.names[len(result.index.names)-1]
         result.index = out_index
     else:
         if len(result) == 1 and size == 0 and slice_access is False:
             # If the final result is one row and it was not mapped into
             # directly
             result = result.T
             result = result[result.columns[0]]
             # convert to Series
             series_name = []
             for idx, code in enumerate(result.columns.codes):
                 series_name.append(result.columns.levels[idx][
                         result.columns.codes[code][0]])
             result = Series(list(result._cols.values())[0],
                             name=series_name)
             result.name = tuple(series_name)
         elif(len(out_index.columns)) > 0:
             # Otherwise pop the leftmost levels, names, and codes from the
             # source index until it has the correct number of columns (n-k)
             result.reset_index(drop=True)
             result.index = result.index._popn(size)
     return result