Exemple #1
0
def _mask_from_cuda_array_interface_desc(desc):
    from cudf.utils.utils import calc_chunk_size, mask_dtype, mask_bitsize
    from cudf.utils.cudautils import compact_mask_bytes

    mask = desc.get("mask", None)

    if mask is not None:
        desc = mask.__cuda_array_interface__
        ptr = desc["data"][0]
        nelem = desc["shape"][0]
        typestr = desc["typestr"]
        typecode = typestr[1]
        if typecode == "t":
            mask = rmm.device_array_from_ptr(
                ptr,
                nelem=calc_chunk_size(nelem, mask_bitsize),
                dtype=mask_dtype,
                finalizer=None,
            )
            mask = Buffer(mask)
        elif typecode == "b":
            dtype = np.dtype(typestr)
            mask = compact_mask_bytes(
                rmm.device_array_from_ptr(ptr,
                                          nelem=nelem,
                                          dtype=dtype,
                                          finalizer=None))
            mask = Buffer(mask)
        else:
            raise NotImplementedError(
                f"Cannot infer mask from typestr {typestr}")
    return mask
Exemple #2
0
def apply_join(col_lhs, col_rhs, how, method='hash'):
    """Returns a tuple of the left and right joined indices as gpu arrays.
    """
    if(len(col_lhs) != len(col_rhs)):
        msg = "Unequal #columns in list 'col_lhs' and list 'col_rhs'"
        raise ValueError(msg)

    joiner = _join_how_api[how]
    method_api = _join_method_api[method]
    gdf_context = ffi.new('gdf_context*')

    if method == 'hash':
        libgdf.gdf_context_view(gdf_context, 0, method_api, 0, 0, 0)
    elif method == 'sort':
        libgdf.gdf_context_view(gdf_context, 1, method_api, 0, 0, 0)
    else:
        msg = "method not supported"
        raise ValueError(msg)

    col_result_l = columnview(0, None, dtype=np.int32)
    col_result_r = columnview(0, None, dtype=np.int32)

    if(how in ['left', 'inner']):
        list_lhs = []
        list_rhs = []
        for i in range(len(col_lhs)):
            list_lhs.append(col_lhs[i].cffi_view)
            list_rhs.append(col_rhs[i].cffi_view)

        # Call libgdf

        joiner(len(col_lhs), list_lhs, list_rhs, col_result_l,
               col_result_r, gdf_context)
    else:
        joiner(col_lhs[0].cffi_view, col_rhs[0].cffi_view, col_result_l,
               col_result_r)

    # Extract result

    left = rmm.device_array_from_ptr(ptr=col_result_l.data,
                                     nelem=col_result_l.size,
                                     dtype=np.int32)

    right = rmm.device_array_from_ptr(ptr=col_result_r.data,
                                      nelem=col_result_r.size,
                                      dtype=np.int32)

    yield(left, right)

    libgdf.gdf_column_free(col_result_l)
    libgdf.gdf_column_free(col_result_r)
Exemple #3
0
def cffi_view_to_column_mem(cffi_view):
    intaddr = int(ffi.cast("uintptr_t", cffi_view.data))
    data = rmm.device_array_from_ptr(intaddr,
                                     nelem=cffi_view.size,
                                     dtype=gdf_to_np_dtype(cffi_view.dtype),
                                     finalizer=rmm._make_finalizer(intaddr, 0))

    if cffi_view.valid:
        intaddr = int(ffi.cast("uintptr_t", cffi_view.valid))
        mask = rmm.device_array_from_ptr(
            intaddr,
            nelem=calc_chunk_size(cffi_view.size, mask_bitsize),
            dtype=mask_dtype,
            finalizer=rmm._make_finalizer(intaddr, 0))
    else:
        mask = None

    return data, mask
Exemple #4
0
def _data_from_cuda_array_interface_desc(desc):
    ptr = desc["data"][0]
    nelem = desc["shape"][0]
    dtype = np.dtype(desc["typestr"])

    data = rmm.device_array_from_ptr(ptr,
                                     nelem=nelem,
                                     dtype=dtype,
                                     finalizer=None)
    data = Buffer(data)
    return data
Exemple #5
0
def cffi_view_to_column_mem(cffi_view):
    gdf_dtype = cffi_view.dtype
    if gdf_dtype == libgdf.GDF_STRING_CATEGORY:
        data_ptr = int(ffi.cast("uintptr_t", cffi_view.data))
        # We need to create this just to make sure the memory is properly freed
        data = rmm.device_array_from_ptr(data_ptr,
                                         nelem=cffi_view.size,
                                         dtype='int32',
                                         finalizer=rmm._make_finalizer(
                                             data_ptr, 0))
        nvcat_ptr = int(ffi.cast("uintptr_t", cffi_view.dtype_info.category))
        nvcat_obj = nvcategory.bind_cpointer(nvcat_ptr)
        nvstr_obj = nvcat_obj.to_strings()
        mask = None
        if cffi_view.valid:
            mask_ptr = int(ffi.cast("uintptr_t", cffi_view.valid))
            mask = rmm.device_array_from_ptr(
                mask_ptr,
                nelem=calc_chunk_size(cffi_view.size, mask_bitsize),
                dtype=mask_dtype,
                finalizer=rmm._make_finalizer(mask_ptr, 0))
        return nvstr_obj, mask
    else:
        intaddr = int(ffi.cast("uintptr_t", cffi_view.data))
        data = rmm.device_array_from_ptr(
            intaddr,
            nelem=cffi_view.size,
            dtype=gdf_to_np_dtype(cffi_view.dtype),
            finalizer=rmm._make_finalizer(intaddr, 0))
        mask = None
        if cffi_view.valid:
            intaddr = int(ffi.cast("uintptr_t", cffi_view.valid))
            mask = rmm.device_array_from_ptr(
                intaddr,
                nelem=calc_chunk_size(cffi_view.size, mask_bitsize),
                dtype=mask_dtype,
                finalizer=rmm._make_finalizer(intaddr, 0))

        return data, mask
Exemple #6
0
def libgdf_join(col_lhs, col_rhs, on, how, method='sort'):
    joiner = _join_how_api[how]
    method_api = _join_method_api[method]
    gdf_context = ffi.new('gdf_context*')

    libgdf.gdf_context_view(gdf_context, 0, method_api, 0, 0, 0)

    if how not in ['left', 'inner', 'outer']:
        msg = "new join api only supports left or inner"
        raise ValueError(msg)

    list_lhs = []
    list_rhs = []
    result_cols = []

    result_col_names = []

    left_idx = []
    right_idx = []
    # idx = 0
    for name, col in col_lhs.items():
        list_lhs.append(col._column.cffi_view)
        if name not in on:
            result_cols.append(columnview(0, None, dtype=col._column.dtype))
            result_col_names.append(name)

    for name in on:
        result_cols.append(columnview(0, None,
                                      dtype=col_lhs[name]._column.dtype))
        result_col_names.append(name)
        left_idx.append(list(col_lhs.keys()).index(name))
        right_idx.append(list(col_rhs.keys()).index(name))

    for name, col in col_rhs.items():
        list_rhs.append(col._column.cffi_view)
        if name not in on:
            result_cols.append(columnview(0, None, dtype=col._column.dtype))
            result_col_names.append(name)

    num_cols_to_join = len(on)
    result_num_cols = len(list_lhs) + len(list_rhs) - num_cols_to_join

    joiner(list_lhs,
           len(list_lhs),
           left_idx,
           list_rhs,
           len(list_rhs),
           right_idx,
           num_cols_to_join,
           result_num_cols,
           result_cols,
           ffi.NULL,
           ffi.NULL,
           gdf_context)

    res = []
    valids = []

    for col in result_cols:
        intaddr = int(ffi.cast("uintptr_t", col.data))
        res.append(rmm.device_array_from_ptr(ptr=intaddr,
                                             nelem=col.size,
                                             dtype=gdf_to_np_dtype(col.dtype),
                                             finalizer=rmm._make_finalizer(
                                                 intaddr, 0)))
        intaddr = int(ffi.cast("uintptr_t", col.valid))
        valids.append(rmm.device_array_from_ptr(ptr=intaddr,
                                                nelem=calc_chunk_size(
                                                    col.size, mask_bitsize),
                                                dtype=mask_dtype,
                                                finalizer=rmm._make_finalizer(
                                                    intaddr, 0)))

    return res, valids
Exemple #7
0
def gpu_view_as(buf, dtype, shape=None, strides=None):
    ptr = numba.cuda.cudadrv.driver.device_pointer(buf.to_numba())
    return rmm.device_array_from_ptr(
        ptr, buf.size // dtype.itemsize, dtype=dtype
    )