Example #1
0
    def __getitem__(self, arg):
        if isinstance(arg, Number):
            arg = int(arg)
            return self.element_indexing(arg)
        elif isinstance(arg, slice):
            # compute mask slice
            start, stop = utils.normalize_slice(arg, len(self))
            if self.null_count > 0:
                if arg.step is not None and arg.step != 1:
                    raise NotImplementedError(arg)

                # slicing data
                subdata = self.data[arg]
                # slicing mask
                bytemask = cudautils.expand_mask_bits(
                    self.data.size,
                    self.mask.to_gpu_array(),
                )
                submask = Buffer(cudautils.compact_mask_bytes(bytemask[arg]))
                col = self.replace(data=subdata, mask=submask)
                return col
            else:
                newbuffer = self.data[arg]
                return self.replace(data=newbuffer)
        else:
            raise NotImplementedError(type(arg))
Example #2
0
    def to_pandas(self):
        arr = self.data_array_view
        sr = pd.Series(arr.copy_to_host())

        if self.nullable:
            mask_bytes = (cudautils.expand_mask_bits(
                len(self), self.mask_array_view).copy_to_host().astype(bool))
            sr[~mask_bytes] = None
        return sr
Example #3
0
 def to_arrow(self):
     mask = None
     if self.has_null_mask:
         # Necessary because PyArrow doesn't support from_buffers for
         # DictionaryArray yet
         mask = pa.array(
             # Why does expand_mask_bits return as int32?
             cudautils.expand_mask_bits(
                 len(self),
                 self.nullmask.mem).copy_to_host().astype('int8'))
     indices = pa.array(self.cat().codes.data.mem.copy_to_host())
     ordered = self.cat()._ordered
     dictionary = pa.array(self.cat().categories)
     return pa.DictionaryArray.from_arrays(indices=indices,
                                           dictionary=dictionary,
                                           mask=mask,
                                           from_pandas=True,
                                           ordered=ordered)
Example #4
0
    def __getitem__(self, arg):
        from cudf.dataframe import columnops

        if isinstance(arg, Number):
            arg = int(arg)
            return self.element_indexing(arg)
        elif isinstance(arg, slice):
            # compute mask slice
            if self.null_count > 0:
                if arg.step is not None and arg.step != 1:
                    raise NotImplementedError(arg)

                # slicing data
                subdata = self.data[arg]
                # slicing mask
                if self.dtype == "object":
                    data_size = self.data.size()
                else:
                    data_size = self.data.size
                bytemask = cudautils.expand_mask_bits(data_size,
                                                      self.mask.to_gpu_array())
                submask = Buffer(cudautils.compact_mask_bytes(bytemask[arg]))
                col = self.replace(data=subdata, mask=submask)
                return col
            else:
                newbuffer = self.data[arg]
                return self.replace(data=newbuffer)
        else:
            arg = columnops.as_column(arg)
            if len(arg) == 0:
                arg = columnops.as_column([], dtype="int32")
            if pd.api.types.is_integer_dtype(arg.dtype):
                return self.take(arg.data.mem)
            if pd.api.types.is_bool_dtype(arg.dtype):
                return self.apply_boolean_mask(arg)
            raise NotImplementedError(type(arg))
Example #5
0
    def __getitem__(self, arg):
        from cudf.core.column import column

        if isinstance(arg, Number):
            arg = int(arg)
            return self.element_indexing(arg)
        elif isinstance(arg, slice):

            if is_categorical_dtype(self):
                codes = self.codes[arg]
                return build_column(
                    data=None,
                    dtype=self.dtype,
                    mask=codes.mask,
                    children=(codes,),
                )

            start, stop, stride = arg.indices(len(self))
            if start == stop:
                return column_empty(0, self.dtype, masked=True)
            # compute mask slice
            if self.has_nulls:
                if arg.step is not None and arg.step != 1:
                    raise NotImplementedError(arg)

                # slicing data
                slice_data = self.data_array_view[arg]
                # slicing mask
                data_size = self.size
                bytemask = cudautils.expand_mask_bits(
                    data_size, self.mask_array_view
                )
                slice_mask = cudautils.compact_mask_bytes(bytemask[arg])
            else:
                slice_data = self.data_array_view[arg]
                slice_mask = None
            if self.dtype == "object":
                return as_column(slice_data)
            else:
                if arg.step is not None and arg.step != 1:
                    slice_data = cudautils.as_contiguous(slice_data)
                    slice_data = Buffer(slice_data)
                else:
                    # data Buffer lifetime is tied to self:
                    slice_data = Buffer(
                        data=slice_data.device_ctypes_pointer.value,
                        size=slice_data.nbytes,
                        owner=self,
                    )

                # mask Buffer lifetime is not:
                if slice_mask is not None:
                    slice_mask = Buffer(slice_mask)

                return build_column(slice_data, self.dtype, mask=slice_mask)
        else:
            arg = column.as_column(arg)
            if len(arg) == 0:
                arg = column.as_column([], dtype="int32")
            if pd.api.types.is_integer_dtype(arg.dtype):
                return self.take(arg)
            if pd.api.types.is_bool_dtype(arg.dtype):
                return self.apply_boolean_mask(arg)
            raise NotImplementedError(type(arg))