コード例 #1
0
ファイル: multiindex.py プロジェクト: jimmytuc/cudf
 def _get_row_major(self, df, row_tuple):
     valid_indices = self._compute_validity_mask(df, row_tuple)
     from cudf import Series
     result = df.take(Series(valid_indices))
     # Build new index - INDEX based MultiIndex
     # ---------------
     from cudf import DataFrame
     out_index = DataFrame()
     # Select the last n-k columns where n is the number of source
     # levels and k is the length of the indexing tuple
     for k in range(len(row_tuple), len(df.index.levels)):
         out_index.add_column(df.index.names[k],
                              df.index.codes[df.index.codes.columns[k]])
     # If there's only one column remaining in the output index, convert
     # it into a StringIndex and name the final index values according
     # to the proper codes.
     if len(out_index.columns) == 1:
         out_index = []
         for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]:  # noqa: E501
             out_index.append(result.index.levels[
                     len(result.index.codes.columns)-1][val])
         # TODO: Warning! The final index column could be arbitrarily
         # ordered integers, not Strings, so we need to check for that
         # dtype and produce a GenericIndex instead of a StringIndex
         out_index = StringIndex(out_index)
         out_index.name = result.index.names[len(result.index.names)-1]
         result.index = out_index
     else:
         # Otherwise pop the leftmost levels, names, and codes from the
         # source index until it has the correct number of columns (n-k)
         if(len(out_index.columns)) > 0:
             result.reset_index(drop=True)
             result.index = result.index._popn(len(row_tuple))
     return result
コード例 #2
0
ファイル: multiindex.py プロジェクト: sriramch/cudf
    def _index_and_downcast(self, result, index, index_key):
        from cudf import DataFrame
        from cudf import Series

        if isinstance(index_key, (numbers.Number, slice)):
            index_key = [index_key]
        if (
            len(index_key) > 0 and not isinstance(index_key, tuple)
        ) or isinstance(index_key[0], slice):
            index_key = index_key[0]

        slice_access = False
        if isinstance(index_key, slice):
            slice_access = True
        out_index = DataFrame()
        # Select the last n-k columns where n is the number of _source_data
        # columns and k is the length of the indexing tuple
        size = 0
        if not isinstance(index_key, (numbers.Number, slice)):
            size = len(index_key)
        for k in range(size, len(index._source_data.columns)):
            if index.names is None:
                name = k
            else:
                name = index.names[k]
            out_index.add_column(
                name, index._source_data[index._source_data.columns[k]]
            )

        if len(result) == 1 and size == 0 and slice_access is False:
            # If the final result is one row and it was not mapped into
            # directly, return a Series with a tuple as name.
            result = result.T
            result = result[result.columns[0]]
        elif len(result) == 0 and slice_access is False:
            # Pandas returns an empty Series with a tuple as name
            # the one expected result column
            series_name = []
            for idx, code in enumerate(index._source_data.columns):
                series_name.append(index._source_data[code][0])
            result = Series([])
            result.name = tuple(series_name)
        elif len(out_index.columns) == 1:
            # If there's only one column remaining in the output index, convert
            # it into an Index and name the final index values according
            # to the _source_data column names
            last_column = index._source_data.columns[-1]
            out_index = index._source_data[last_column]
            out_index = as_index(out_index)
            out_index.name = index.names[len(index.names) - 1]
            index = out_index
        elif len(out_index.columns) > 1:
            # Otherwise pop the leftmost levels, names, and codes from the
            # source index until it has the correct number of columns (n-k)
            result.reset_index(drop=True)
            index = index._popn(size)
        if isinstance(index_key, tuple):
            result = result.set_index(index)
        return result
コード例 #3
0
ファイル: multiindex.py プロジェクト: jimmytuc/cudf
    def _popn(self, n):
        """ Returns a copy of this index without the left-most n values.

        Removes n names, labels, and codes in order to build a new index
        for results.
        """
        from cudf import DataFrame
        codes = DataFrame()
        for idx in self.codes.columns[n:]:
            codes.add_column(idx, self.codes[idx])
        result = MultiIndex(self.levels[n:], codes)
        result.names = self.names[n:]
        return result
コード例 #4
0
 def _get_row_major(self, df, row_tuple):
     slice_access = False
     if isinstance(row_tuple[0], numbers.Number):
         valid_indices = row_tuple[0]
     elif isinstance(row_tuple[0], slice):
         # 1. empty slice compute
         if row_tuple[0].stop == 0:
             valid_indices = []
         else:
             slice_access = True
             start = row_tuple[0].start or 0
             stop = row_tuple[0].stop or len(df)
             step = row_tuple[0].step or 1
             valid_indices = cudautils.arange(start, stop, step)
     else:
         valid_indices = self._compute_validity_mask(df, row_tuple)
     from cudf import Series
     result = df.take(Series(valid_indices))
     # Build new index - INDEX based MultiIndex
     # ---------------
     from cudf import DataFrame
     out_index = DataFrame()
     # Select the last n-k columns where n is the number of source
     # levels and k is the length of the indexing tuple
     size = 0
     if not isinstance(row_tuple[0], (numbers.Number, slice)):
         size = len(row_tuple)
     for k in range(size, len(df.index.levels)):
         out_index.add_column(df.index.names[k],
                              df.index.codes[df.index.codes.columns[k]])
     # If there's only one column remaining in the output index, convert
     # it into an Index and name the final index values according
     # to the proper codes.
     if len(out_index.columns) == 1:
         out_index = []
         for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]:  # noqa: E501
             out_index.append(result.index.levels[
                     len(result.index.codes.columns)-1][val])
         out_index = as_index(out_index)
         out_index.name = result.index.names[len(result.index.names)-1]
         result.index = out_index
     else:
         if len(result) == 1 and size == 0 and slice_access is False:
             # If the final result is one row and it was not mapped into
             # directly
             result = result.T
             result = result[result.columns[0]]
             # convert to Series
             series_name = []
             for idx, code in enumerate(result.columns.codes):
                 series_name.append(result.columns.levels[idx][
                         result.columns.codes[code][0]])
             result = Series(list(result._cols.values())[0],
                             name=series_name)
             result.name = tuple(series_name)
         elif(len(out_index.columns)) > 0:
             # Otherwise pop the leftmost levels, names, and codes from the
             # source index until it has the correct number of columns (n-k)
             result.reset_index(drop=True)
             result.index = result.index._popn(size)
     return result