Esempio n. 1
0
    def deserialize(cls, header, frames):
        # Deserialize the mask, value, and offset frames
        arrays = []

        for each_frame in frames:
            if hasattr(each_frame, "__cuda_array_interface__"):
                each_frame = cuda.as_cuda_array(each_frame)
            elif isinstance(each_frame, memoryview):
                each_frame = np.asarray(each_frame)
                each_frame = cudautils.to_device(each_frame)

            arrays.append(libcudf.cudf.get_ctype_ptr(each_frame))

        # Use from_offsets to get nvstring data.
        # Note: array items = [nbuf, sbuf, obuf]
        scount = header["nvstrings"]
        data = nvstrings.from_offsets(
            arrays[1],
            arrays[2],
            scount,
            nbuf=arrays[0],
            ncount=header["null_count"],
            bdevmem=True,
        )
        return column.as_column(data)
Esempio n. 2
0
    def deserialize(cls, header, frames):
        # Deserialize the mask, value, and offset frames
        arrays = []

        for i, frame in enumerate(frames):
            if isinstance(frame, memoryview):
                sheader = header["subheaders"][i]
                dtype = sheader["dtype"]
                frame = np.frombuffer(frame, dtype=dtype)
                frame = cudautils.to_device(frame)

            arrays.append(libcudf.cudf.get_ctype_ptr(frame))

        # Use from_offsets to get nvstring data.
        # Note: array items = [nbuf, sbuf, obuf]
        scount = header["nvstrings"]
        data = nvstrings.from_offsets(
            arrays[1],
            arrays[2],
            scount,
            nbuf=arrays[0],
            ncount=header["null_count"],
            bdevmem=True,
        )
        typ = pickle.loads(header["type"])
        return typ(data)
Esempio n. 3
0
 def __init__(self, mem, size=None, capacity=None, categorical=False):
     if size is None:
         if categorical:
             size = len(mem)
         else:
             size = mem.size
     if capacity is None:
         capacity = size
     self.mem = cudautils.to_device(mem)
     _BufferSentry(self.mem).ndim(1)
     self.size = size
     self.capacity = capacity
     self.dtype = self.mem.dtype
Esempio n. 4
0
    def deserialize(cls, header, frames):
        # Deserialize the mask, value, and offset frames
        arrays = []

        for i, frame in enumerate(frames):
            if isinstance(frame, memoryview):
                sheader = header["subheaders"][i]
                dtype = sheader["dtype"]
                frame = np.frombuffer(frame, dtype=dtype)
                frame = cudautils.to_device(frame)
            elif not (isinstance(frame, np.ndarray)
                      or numba.cuda.driver.is_device_memory(frame)):
                # this is probably a ucp_py.BufferRegion memory object
                # check the header for info -- this should be encoded from
                # serialization process.  Lastly, `typestr` and `shape` *must*
                # manually set *before* consuming the buffer as a DeviceNDArray
                sheader = header["subheaders"][i]
                frame.typestr = sheader.get("dtype", "B")
                frame.shape = sheader.get("shape", len(frame))
                frame = np.frombuffer(frame, dtype=dtype)
                frame = cudautils.to_device(frame)

            arrays.append(get_ctype_ptr(frame))

        # Use from_offsets to get nvstring data.
        # Note: array items = [nbuf, sbuf, obuf]
        scount = header["nvstrings"]
        data = nvstrings.from_offsets(
            arrays[1],
            arrays[2],
            scount,
            nbuf=arrays[0],
            ncount=header["null_count"],
            bdevmem=True,
        )
        return data
Esempio n. 5
0
 def __init__(self, mem, size=None, capacity=None, categorical=False):
     if size is None:
         if categorical:
             size = len(mem)
         elif hasattr(mem, '__len__'):
             if hasattr(mem, 'ndim') and mem.ndim == 0:
                 pass
             elif len(mem) == 0:
                 size = 0
         if hasattr(mem, 'size'):
             size = mem.size
     if capacity is None:
         capacity = size
     self.mem = cudautils.to_device(mem)
     _BufferSentry(self.mem).ndim(1)
     self.size = size
     self.capacity = capacity
     self.dtype = self.mem.dtype
Esempio n. 6
0
 def __init__(self,
              mem,
              size=None,
              capacity=None,
              categorical=False,
              header=None):
     if size is None:
         if categorical:
             size = len(mem)
         elif hasattr(mem, "__len__"):
             if hasattr(mem, "ndim") and mem.ndim == 0:
                 pass
             elif len(mem) == 0:
                 size = 0
         if hasattr(mem, "size"):
             size = mem.size
     if capacity is None:
         capacity = size
     # memoryviews can come from UCX when the length of the DataFrame
     # is 0 -- for example: joins resulting in empty frames or metadata
     if isinstance(mem, memoryview):
         mem = np.frombuffer(mem, dtype=header["dtype"])
         size = mem.size
     if not (isinstance(mem, np.ndarray)
             or numba.cuda.driver.is_device_memory(mem)):
         # this is probably a ucp_py.BufferRegion memory object
         # check the header for info -- this should be encoded from
         # serialization process.  Lastly, `typestr` and `shape` *must*
         # manually set *before* consuming the buffer as a DeviceNDArray
         mem.typestr = header.get("dtype", "B")
         mem.shape = header.get("shape", len(mem))
         size = mem.shape[0]
     self.mem = cudautils.to_device(mem)
     _BufferSentry(self.mem).ndim(1)
     self.size = size
     self.capacity = capacity
     self.dtype = self.mem.dtype