def find_last(arr, val, compare="eq"): """ Returns the index of the last occurrence of *val* in *arr*. Or the last occurence of *arr* *compare* *val*, if *compare* is not eq Otherwise, returns -1. Parameters ---------- arr : device array val : scalar compare: str ('gt', 'lt', or 'eq' (default)) """ found = rmm.device_array_like(arr) if found.size > 0: if compare == "gt": gpu_mark_gt.forall(found.size)(arr, val, found, -1) elif compare == "lt": gpu_mark_lt.forall(found.size)(arr, val, found, -1) else: if arr.dtype in ("float32", "float64"): gpu_mark_found_float.forall(found.size)(arr, val, found, -1) else: gpu_mark_found_int.forall(found.size)(arr, val, found, -1) from cudf.core.column import as_column found_col = as_column(found) max_index = found_col.max() return max_index
def _request_transfer(key, remoteinfo): logger.info("rebuild from: %s for %r", remoteinfo, key) context = zmq.Context() socket = context.socket(zmq.REQ) socket.connect("tcp://{0}:{1}".format(*remoteinfo)) myaddr = _global_addr[0] theiraddr = remoteinfo[0] if myaddr == theiraddr: # Same machine go by IPC logger.info("request by IPC") socket.send(pickle.dumps(("IPC", key))) rcv = socket.recv() ipch = pickle.loads(rcv) # Open IPC and copy to local context with ipch as data: copied = rmm.device_array_like(data) copied.copy_to_device(data) # Release _request_drop(socket, key) return copied else: # Different machine go by NET logger.info("request by NET: %s->%s", theiraddr, myaddr) socket.send(pickle.dumps(("NET", key))) rcv = socket.recv() output = rmm.to_device(pickle.loads(rcv)) # Release _request_drop(socket, key) return output
def copy_array(arr, out=None): if out is None: out = rmm.device_array_like(arr) if (arr.is_c_contiguous() and out.is_c_contiguous() and out.size == arr.size): out.copy_to_device(arr) else: if arr.size > 0: gpu_copy.forall(arr.size)(arr, out) return out
def array_tester(dtype, nelem): # data h_in = np.full(nelem, 3.2, dtype) h_result = np.empty(nelem, dtype) d_in = rmm.to_device(h_in) d_result = rmm.device_array_like(d_in) d_result.copy_to_device(d_in) h_result = d_result.copy_to_host() np.testing.assert_array_equal(h_result, h_in)
def test_rmm_csv_log(dtype, nelem): # data h_in = np.full(nelem, 3.2, dtype) d_in = rmm.to_device(h_in) d_result = rmm.device_array_like(d_in) d_result.copy_to_device(d_in) csv = rmm.csv_log() assert (csv.find("Event Type,Device ID,Address,Stream,Size (bytes)," "Free Memory,Total Memory,Current Allocs,Start,End," "Elapsed,Location") >= 0)
def run(self, arr, k): if k >= MAX_FAST_UNIQUE_K: raise NotImplementedError("k >= {}".format(MAX_FAST_UNIQUE_K)) # setup mem outsz_ptr = rmm.device_array(shape=1, dtype=np.intp) out = rmm.device_array_like(arr) # kernel self._kernel[1, 64](arr, k, out, outsz_ptr) # copy to host unique_ct = outsz_ptr.copy_to_host()[0] if unique_ct < 0: raise ValueError("too many unique value (hint: increase k)") else: hout = out.copy_to_host() return hout[:unique_ct]
def fill_mask(data, mask, value): """fill a column with the same value using a custom mask Parameters ---------- data : device array data mask : device array validity mask value : scale fill value Returns ------- device array mask filled column with scalar value """ out = rmm.device_array_like(data) out.copy_to_device(data) if data.size > 0: configured = gpu_fill_masked.forall(data.size) configured(value, mask, out) return out
def input_to_dev_array(X, order='F', deepcopy=False, check_dtype=False, convert_to_dtype=False, check_cols=False, check_rows=False, fail_on_order=False): """ Convert input X to device array suitable for C++ methods. Acceptable input formats: * cuDF Dataframe - returns a deep copy always. * cuDF Series - returns by reference or a deep copy depending on `deepcopy`. * Numpy array - returns a copy in device always * cuda array interface compliant array (like Cupy) - returns a reference unless `deepcopy`=True. * numba device array - returns a reference unless deepcopy=True Parameters ---------- X : cuDF.DataFrame, cuDF.Series, numba array, NumPy array or any cuda_array_interface compliant array like CuPy or pytorch. order: string (default: 'F') Whether to return a F-major or C-major array. Used to check the order of the input. If fail_on_order=True method will raise ValueError, otherwise it will convert X to be of order `order`. deepcopy: boolean (default: False) Set to True to always return a deep copy of X. check_dtype: np.dtype (default: False) Set to a np.dtype to throw an error if X is not of dtype `check_dtype`. convert_to_dtype: np.dtype (default: False) Set to a dtype if you want X to be converted to that dtype if it is not that dtype already. check_cols: int (default: False) Set to an int `i` to check that input X has `i` columns. Set to False (default) to not check at all. check_rows: boolean (default: False) Set to an int `i` to check that input X has `i` columns. Set to False (default) to not check at all. fail_on_order: boolean (default: False) Set to True if you want the method to raise a ValueError if X is not of order `order`. Returns ------- `inp_array`: namedtuple('inp_array', 'array pointer n_rows n_cols dtype') A new device array if the input was not a numba device array. It is a reference to the input X if it was a numba device array or cuda array interface compliant (like cupy) """ if convert_to_dtype: X = convert_dtype(X, to_dtype=convert_to_dtype) check_dtype = False if isinstance(X, cudf.DataFrame): dtype = np.dtype(X[X.columns[0]]._column.dtype) if order == 'F': X_m = X.as_gpu_matrix(order='F') elif order == 'C': X_m = cuml.utils.numba_utils.row_matrix(X) elif (isinstance(X, cudf.Series)): if deepcopy: X_m = X.to_gpu_array() else: if X.null_count == 0: # using __cuda_array_interface__ support of cudf.Series for # this temporarily while switching from rmm device_array to # rmm deviceBuffer https://github.com/rapidsai/cuml/issues/1379 X_m = cuda.as_cuda_array(X._column) else: raise ValueError("Error: cuDF Series has missing/null values") elif isinstance(X, np.ndarray): dtype = X.dtype X_m = rmm.to_device(np.array(X, order=order, copy=False)) elif cuda.is_cuda_array(X): # Use cuda array interface to create a device array by reference X_m = cuda.as_cuda_array(X) if deepcopy: out_dev_array = rmm.device_array_like(X_m) out_dev_array.copy_to_device(X_m) X_m = out_dev_array elif cuda.devicearray.is_cuda_ndarray(X): if deepcopy: out_dev_array = rmm.device_array_like(X) out_dev_array.copy_to_device(X) X_m = out_dev_array else: X_m = X else: msg = "X matrix format " + str(X.__class__) + " not supported" raise TypeError(msg) dtype = X_m.dtype if check_dtype: if isinstance(check_dtype, type) or isinstance(check_dtype, np.dtype): if dtype != check_dtype: del X_m raise TypeError("Expected " + str(check_dtype) + "input but" + " got " + str(dtype) + " instead.") elif isinstance(check_dtype, Collection) and \ not isinstance(check_dtype, str): # The 'not isinstance(check_dtype, string)' condition is needed, # because the 'float32' string is a Collection, but in this # branch we only want to process collections like # [np.float32, np.float64]. if dtype not in check_dtype: del X_m raise TypeError("Expected input to be of type in " + str(check_dtype) + " but got " + str(dtype)) else: raise ValueError("Expected a type as check_dtype arg, but got " + str(check_dtype)) n_rows = X_m.shape[0] if len(X_m.shape) > 1: n_cols = X_m.shape[1] else: n_cols = 1 if check_cols: if n_cols != check_cols: raise ValueError("Expected " + str(check_cols) + " columns but got " + str(n_cols) + " columns.") if check_rows: if n_rows != check_rows: raise ValueError("Expected " + str(check_rows) + " rows but got " + str(n_rows) + " rows.") if not check_numba_order(X_m, order): if fail_on_order: raise ValueError("Expected " + order_to_str(order) + " major order, but got the opposite.") else: warnings.warn("Expected " + order_to_str(order) + " major order, " "but got the opposite. Converting data, this will " "result in additional memory utilization.") X_m = rmm_cupy_ary(cp.array, X_m, copy=False, order=order) X_m = cuda.as_cuda_array(X_m) X_ptr = get_dev_array_ptr(X_m) return inp_array(array=X_m, pointer=X_ptr, n_rows=n_rows, n_cols=n_cols, dtype=dtype)
def apply_round(data, decimal): output_dary = rmm.device_array_like(data) if output_dary.size > 0: gpu_round.forall(output_dary.size)(data, output_dary, decimal) return output_dary