Example #1
0
    def take(self, indices, ignore_index=False):
        """Return Series by taking values from the corresponding *indices*.
        """
        indices = Buffer(indices).to_gpu_array()
        # Handle zero size
        if indices.size == 0:
            return self._copy_construct(data=self.data[:0],
                                        index=self.index[:0])

        if self.dtype == np.dtype("object"):
            return self[indices]

        data = cudautils.gather(data=self.data.to_gpu_array(), index=indices)

        if self._column.mask:
            mask = self._get_mask_as_series().take(indices).as_mask()
            mask = Buffer(mask)
        else:
            mask = None
        if ignore_index:
            index = RangeIndex(indices.size)
        else:
            index = self.index.take(indices)

        col = self._column.replace(data=Buffer(data), mask=mask)
        return self._copy_construct(data=col, index=index)
Example #2
0
 def unique(self, method='sort'):
     # method variable will indicate what algorithm to use to
     # calculate unique, not used right now
     if method is not 'sort':
         msg = 'non sort based unique() not implemented yet'
         raise NotImplementedError(msg)
     segs, sortedvals = self._unique_segments()
     # gather result
     out = cudautils.gather(data=sortedvals, index=segs)
     return self.replace(data=Buffer(out), mask=None)
Example #3
0
 def take(self, indices):
     assert indices.dtype.kind in 'iu'
     if indices.size == 0:
         # Empty indices
         return RangeIndex(indices.size)
     else:
         # Gather
         index = cudautils.gather(data=self.gpu_values, index=indices)
         col = self.as_column().replace(data=Buffer(index))
         return GenericIndex(col)
Example #4
0
 def value_counts(self, method='sort'):
     if method is not 'sort':
         msg = 'non sort based value_count() not implemented yet'
         raise NotImplementedError(msg)
     segs, sortedvals = self._unique_segments()
     # Return both values and their counts
     out1 = cudautils.gather(data=sortedvals, index=segs)
     out2 = cudautils.value_count(segs, len(sortedvals))
     out_vals = self.replace(data=Buffer(out1), mask=None)
     out_counts = NumericalColumn(data=Buffer(out2), dtype=np.intp)
     return out_vals, out_counts
Example #5
0
def column_select_by_position(column, positions):
    """Select by a series of dtype int64 indicating positions.

    Returns (selected_column, selected_positions)
    """
    from cudf.dataframe.numerical import NumericalColumn
    assert column.null_count == 0

    selvals = cudautils.gather(column.data.to_gpu_array(),
                               positions.data.to_gpu_array())

    selected_values = column.replace(data=Buffer(selvals))
    selected_index = Buffer(positions.data.to_gpu_array())

    return selected_values, NumericalColumn(data=selected_index,
                                            dtype=selected_index.dtype)
Example #6
0
    def take(self, indices):
        """Gather only the specific subset of indices

        Parameters
        ---
        indices: An array-like that maps to values contained in this Index.
        """
        assert indices.dtype.kind in 'iu'
        if indices.size == 0:
            # Empty indices
            return RangeIndex(indices.size)
        else:
            # Gather
            index = cudautils.gather(data=self.gpu_values, index=indices)
            col = self.as_column().replace(data=Buffer(index))
            return as_index(col)
Example #7
0
    def take(self, indices, ignore_index=False):
        """Return Column by taking values from the corresponding *indices*.
        """
        indices = Buffer(indices).to_gpu_array()
        # Handle zero size
        if indices.size == 0:
            return self.copy()

        data = cudautils.gather(data=self._data.to_gpu_array(), index=indices)

        if self._mask:
            mask = self._get_mask_as_column().take(indices).as_mask()
            mask = Buffer(mask)
        else:
            mask = None

        return self.replace(data=Buffer(data), mask=mask)
Example #8
0
 def sort_by_values(self, ascending=True, na_position="last"):
     sort_inds = get_sorted_inds(self, ascending, na_position)
     col_keys = cudautils.gather(data=self.data.mem,
                                 index=sort_inds.data.mem)
     mask = None
     if self.mask:
         mask = self._get_mask_as_column()\
             .take(sort_inds.data.to_gpu_array()).as_mask()
         mask = Buffer(mask)
     col_keys = self.replace(data=Buffer(col_keys),
                             mask=mask,
                             null_count=self.null_count,
                             dtype=self.dtype)
     col_inds = self.replace(data=sort_inds.data,
                             mask=sort_inds.mask,
                             dtype=sort_inds.data.dtype)
     return col_keys, col_inds