def apply_join(col_lhs, col_rhs, how, method='hash'): """Returns a tuple of the left and right joined indices as gpu arrays. """ if (len(col_lhs) != len(col_rhs)): msg = "Unequal #columns in list 'col_lhs' and list 'col_rhs'" raise ValueError(msg) joiner = _join_how_api[how] method_api = _join_method_api[method] gdf_context = ffi.new('gdf_context*') if method == 'hash': libgdf.gdf_context_view(gdf_context, 0, method_api, 0) elif method == 'sort': libgdf.gdf_context_view(gdf_context, 1, method_api, 0) else: msg = "method not supported" raise ValueError(msg) col_result_l = columnview(0, None, dtype=np.int32) col_result_r = columnview(0, None, dtype=np.int32) if (how in ['left', 'inner']): list_lhs = [] list_rhs = [] for i in range(len(col_lhs)): list_lhs.append(col_lhs[i].cffi_view) list_rhs.append(col_rhs[i].cffi_view) # Call libgdf joiner(len(col_lhs), list_lhs, list_rhs, col_result_l, col_result_r, gdf_context) else: joiner(col_lhs[0].cffi_view, col_rhs[0].cffi_view, col_result_l, col_result_r) # Extract result # yield ((ary[0], ary[1]) if datasize > 0 else (ary, ary)) left = _as_numba_devarray(intaddr=int( ffi.cast("uintptr_t", col_result_l.data)), nelem=col_result_l.size, dtype=np.int32) right = _as_numba_devarray(intaddr=int( ffi.cast("uintptr_t", col_result_r.data)), nelem=col_result_r.size, dtype=np.int32) yield (left, right) libgdf.gdf_column_free(col_result_l) libgdf.gdf_column_free(col_result_r)
def _call_join_multi(api, ncols, col_left, col_right, ctxt): l_res = new_column() r_res = new_column() api(ncols, col_left, col_right, l_res, r_res, ctxt) l_idx = _copy_int_col_to_arr(l_res) r_idx = _copy_int_col_to_arr(r_res) joined_idx = np.array([l_idx, r_idx]) libgdf.gdf_column_free(l_res) libgdf.gdf_column_free(r_res) return joined_idx