def get_handle(use_handle, n_streams=0): if not use_handle: return None, None h = cuml.Handle(n_streams) s = cuml.cuda.Stream() h.setStream(s) return h, s
def test_base_class_usage_with_handle(): handle = cuml.Handle() stream = cuml.cuda.Stream() handle.setStream(stream) base = cuml.Base(handle=handle) base.handle.sync() del base
def test_svm_memleak_on_exception(params, n_rows=1000, n_iter=10, n_cols=1000, dataset='blobs'): """ Test whether there is any mem leak when we exit training with an exception. The poly kernel with degree=30 will overflow, and triggers the 'SMO error: NaN found...' exception. """ X_train, y_train = make_blobs(n_samples=n_rows, n_features=n_cols, random_state=137, centers=2) X_train = X_train.astype(np.float32) stream = cuml.cuda.Stream() handle = cuml.Handle(stream=stream) # Warmup. Some modules that are used in SVC allocate space on the device # and consume memory. Here we make sure that this allocation is done # before the first call to get_memory_info. tmp = cu_svm.SVC(handle=handle, **params) with pytest.raises(RuntimeError): tmp.fit(X_train, y_train) # SMO error: NaN found during fitting. free_mem = cuda.current_context().get_memory_info()[0] # Main test loop for i in range(n_iter): cuSVC = cu_svm.SVC(handle=handle, **params) with pytest.raises(RuntimeError): cuSVC.fit(X_train, y_train) # SMO error: NaN found during fitting. del(cuSVC) handle.sync() delta_mem = free_mem - cuda.current_context().get_memory_info()[0] print("Delta GPU mem: {} bytes".format(delta_mem)) assert delta_mem == 0
def run_classification(datatype, penalty, loss, dims, nclasses): t = time.perf_counter() nrows, ncols = dims X_train, X_test, y_train, y_test = make_classification_dataset( datatype, nrows, ncols, nclasses) logger.debug(f"Data generation time: {time.perf_counter() - t} s.") # solving in primal is not supported by sklearn for this loss type. skdual = loss == 'hinge' and penalty == 'l2' if loss == 'hinge' and penalty == 'l1': pytest.skip( "sklearn does not support this combination of loss and penalty") # limit the max iterations for sklearn to reduce the max test time cuit = 10000 skit = max(10, min(cuit, cuit * 1000 / nrows)) t = time.perf_counter() handle = cuml.Handle(n_streams=0) cum = cu.LinearSVC(handle=handle, loss=loss, penalty=penalty, max_iter=cuit) cum.fit(X_train, y_train) cus = cum.score(X_test, y_test) handle.sync() t = time.perf_counter() - t logger.debug(f"Cuml time: {t} s.") t = max(5, t * SKLEARN_TIMEOUT_FACTOR) # cleanup cuml objects so that we can more easily fork the process # and test sklearn del cum X_train = X_train.get() X_test = X_test.get() y_train = y_train.get() y_test = y_test.get() gc.collect() try: def run_sklearn(): skm = sk.LinearSVC(loss=loss, penalty=penalty, max_iter=skit, dual=skdual) skm.fit(X_train, y_train) return skm.score(X_test, y_test) sks = with_timeout(timeout=t, target=run_sklearn) good_enough(cus, sks, nrows) except TimeoutError: pytest.skip(f"sklearn did not finish within {t} seconds.")
def test_svm_memleak(params, n_rows, n_iter, n_cols, use_handle, dataset='blobs'): """ Test whether there is any memory leak. .. note:: small `n_rows`, and `n_cols` values will result in small model size, that will not be measured by get_memory_info. """ X_train, X_test, y_train, y_test = make_dataset(dataset, n_rows, n_cols) stream = cuml.cuda.Stream() handle = cuml.Handle() handle.setStream(stream) # Warmup. Some modules that are used in SVC allocate space on the device # and consume memory. Here we make sure that this allocation is done # before the first call to get_memory_info. tmp = cu_svm.SVC(handle=handle, **params) tmp.fit(X_train, y_train) ms = get_memsize(tmp) print("Memory consumtion of SVC object is {} MiB".format(ms / (1024 * 1024.0))) free_mem = cuda.current_context().get_memory_info()[0] # Check first whether the get_memory_info gives us the correct memory # footprint cuSVC = cu_svm.SVC(handle=handle, **params) cuSVC.fit(X_train, y_train) delta_mem = free_mem - cuda.current_context().get_memory_info()[0] assert delta_mem >= ms # Main test loop b_sum = 0 for i in range(n_iter): cuSVC = cu_svm.SVC(handle=handle, **params) cuSVC.fit(X_train, y_train) b_sum += cuSVC.intercept_ cuSVC.predict(X_train) del (cuSVC) handle.sync() delta_mem = free_mem - cuda.current_context().get_memory_info()[0] print("Delta GPU mem: {} bytes".format(delta_mem)) assert delta_mem == 0
def test_base_class_usage_with_handle(): stream = Stream() handle = cuml.Handle(stream=stream) base = cuml.Base(handle=handle) base.handle.sync() del base
def get_handle(use_handle, n_streams=0): if not use_handle: return None, None s = Stream() h = cuml.Handle(stream=s, n_streams=n_streams) return h, s