def test_redirected_logger(): new_stdout = StringIO() with logger.set_level(logger.level_trace): # We do not test trace because CUML_LOG_TRACE is not compiled by # default test_msg = "This is a debug message" with redirect_stdout(new_stdout): logger.debug(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is an info message" with redirect_stdout(new_stdout): logger.info(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is a warn message" with redirect_stdout(new_stdout): logger.warn(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is an error message" with redirect_stdout(new_stdout): logger.error(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is a critical message" with redirect_stdout(new_stdout): logger.critical(test_msg) assert test_msg in new_stdout.getvalue() # Check that logging does not error with sys.stdout of None with redirect_stdout(None): test_msg = "This is a debug message" logger.debug(test_msg)
def test_concat_memory_leak(large_clf, estimator_type): import gc import os try: import psutil except ImportError: pytest.skip("psutil not installed") process = psutil.Process(os.getpid()) X, y = large_clf X = X.astype(np.float32) # Build a series of RF models n_models = 10 if estimator_type == 'classification': base_models = [ curfc(max_depth=10, n_estimators=100, random_state=123) for i in range(n_models) ] y = y.astype(np.int32) elif estimator_type == 'regression': base_models = [ curfr(max_depth=10, n_estimators=100, random_state=123) for i in range(n_models) ] y = y.astype(np.float32) else: assert False # Pre-fit once - this is our baseline and memory usage # should not significantly exceed it after later fits for model in base_models: model.fit(X, y) # Just concatenate over and over in a loop concat_models = base_models[1:] init_model = base_models[0] other_handles = [ model._obtain_treelite_handle() for model in concat_models ] init_model._concatenate_treelite_handle(other_handles) gc.collect() initial_baseline_mem = process.memory_info().rss for i in range(10): init_model._concatenate_treelite_handle(other_handles) gc.collect() used_mem = process.memory_info().rss logger.debug("memory at rep %2d: %d m" % (i, (used_mem - initial_baseline_mem) / 1e6)) gc.collect() used_mem = process.memory_info().rss logger.info("Final memory delta: %d" % ((used_mem - initial_baseline_mem) / 1e6)) assert (used_mem - initial_baseline_mem) < 1e6
def test_logger(): logger.trace("This is a trace message") logger.debug("This is a debug message") logger.info("This is an info message") logger.warn("This is a warn message") logger.error("This is a error message") logger.critical("This is a critical message") with logger.set_level(logger.level_warn): assert (logger.should_log_for(logger.level_warn)) assert (not logger.should_log_for(logger.level_info)) with logger.set_pattern("%v"): logger.info("This is an info message")
def tree_reduce(objs, func=sum): """ Performs a binary tree reduce on an associative and commutative function in parallel across Dask workers. Since this supports dask.delayed objects, which have yet been scheduled on workers, it does not take locality into account. As a result, any local reductions should be performed before this function is called. Parameters ---------- func : Python function or dask.delayed function Function to use for reduction. The reduction function acceps a list of objects to reduce as an argument and produces a single reduced object objs : array-like of dask.delayed or future objects to reduce. Returns ------- reduced_result : dask.delayed or future if func is delayed, the result will be delayed if func is a future, the result will be a future """ func = dask.delayed(func) \ if not isinstance(func, Delayed) else func while len(objs) > 1: new_objs = [] n_objs = len(objs) for i in range(0, n_objs, 2): inputs = dask.delayed(objs[i:i + 2], pure=False) obj = func(inputs) new_objs.append(obj) wait(new_objs) objs = new_objs logger.info(str(objs)) return first(objs)
def __init__(self, ipcs, device): """ Initializes the thread with the given IPC handles for the given device :param ipcs: list[ipc] list of ipc handles with memory on the given device :param device: device id to use. """ Thread.__init__(self) self.lock = Lock() self.ipcs = ipcs # Use canonical device id self.device = get_device_id(device) logger.info("Starting new IPC thread on device %i for ipcs %s" % (self.device, str(list(ipcs)))) self.running = False
def run(self): """ Starts the current Thread instance enabling memory from the selected device to be used. """ select_device(self.device) logger.info("Opening: " + str(self.device) + " " + str(numba.cuda.get_current_device())) self.lock.acquire() try: self.arrs = [ipc.open() for ipc in self.ipcs] self.ptr_info = [x.__cuda_array_interface__ for x in self.arrs] self.running = True except Exception as e: logging.error("Error opening ipc_handle on device " + str(self.device) + ": " + str(e)) self.lock.release() while (self.running): time.sleep(0.0001) try: logging.warn("Closing: " + str(self.device) + str(numba.cuda.get_current_device())) self.lock.acquire() [ipc.close() for ipc in self.ipcs] self.lock.release() except Exception as e: logging.error("Error closing ipc_handle on device " + str(self.device) + ": " + str(e))
def batched_fmin_lbfgs_b(func, x0, num_batches, fprime=None, args=(), bounds=None, m=10, factr=1e7, pgtol=1e-5, epsilon=1e-8, iprint=-1, maxiter=15000, maxls=20): """A batch-aware L-BFGS-B implementation to minimize a loss function `f` given an initial set of parameters `x0`. Parameters ---------- func : function (x: array) -> array[M] (M = n_batches) The function to minimize. The function should return an array of size = `num_batches` x0 : array Starting parameters fprime : function (x: array) -> array[M*n_params] (optional) The gradient. Should return an array of derivatives for each parameter over batches. When omitted, uses Finite-differencing to estimate the gradient. args : Tuple Additional arguments to func and fprime bounds : List[Tuple[float, float]] Box-constrains on the parameters m : int L-BFGS parameter: number of previous arrays to store when estimating inverse Hessian. factr : float Stopping criterion when function evaluation not progressing. Stop when `|f(xk+1) - f(xk)| < factor*eps_mach` where `eps_mach` is the machine precision pgtol : float Stopping criterion when gradient is sufficiently "flat". Stop when |grad| < pgtol. epsilon : float Finite differencing step size when approximating `fprime` iprint : int -1 for no diagnostic info n=1-100 for diagnostic info every n steps. >100 for detailed diagnostic info maxiter : int Maximum number of L-BFGS iterations maxls : int Maximum number of line-search iterations. """ if has_scipy(): from scipy.optimize import _lbfgsb else: raise RuntimeError("Scipy is needed to run batched_fmin_lbfgs_b") nvtx_range_push("LBFGS") n = len(x0) // num_batches if fprime is None: def fprime_f(x): return _fd_fprime(x, func, epsilon) fprime = fprime_f if bounds is None: bounds = [(None, None)] * n nbd = np.zeros(n, np.int32) low_bnd = np.zeros(n, np.float64) upper_bnd = np.zeros(n, np.float64) bounds_map = {(None, None): 0, (1, None): 1, (1, 1): 2, (None, 1): 3} for i in range(0, n): lb, ub = bounds[i] if lb is not None: low_bnd[i] = lb lb = 1 if ub is not None: upper_bnd[i] = ub ub = 1 nbd[i] = bounds_map[lb, ub] # working arrays needed by L-BFGS-B implementation in SciPy. # One for each series x = [np.copy(np.array(x0[ib*n:(ib+1)*n], np.float64)) for ib in range(num_batches)] f = [np.copy(np.array(0.0, np.float64)) for ib in range(num_batches)] g = [np.copy(np.zeros((n,), np.float64)) for ib in range(num_batches)] wa = [np.copy(np.zeros(2*m*n + 5*n + 11*m*m + 8*m, np.float64)) for ib in range(num_batches)] iwa = [np.copy(np.zeros(3*n, np.int32)) for ib in range(num_batches)] task = [np.copy(np.zeros(1, 'S60')) for ib in range(num_batches)] csave = [np.copy(np.zeros(1, 'S60')) for ib in range(num_batches)] lsave = [np.copy(np.zeros(4, np.int32)) for ib in range(num_batches)] isave = [np.copy(np.zeros(44, np.int32)) for ib in range(num_batches)] dsave = [np.copy(np.zeros(29, np.float64)) for ib in range(num_batches)] for ib in range(num_batches): task[ib][:] = 'START' n_iterations = np.zeros(num_batches, dtype=np.int32) converged = num_batches * [False] warn_flag = np.zeros(num_batches) while not all(converged): nvtx_range_push("LBFGS-ITERATION") for ib in range(num_batches): if converged[ib]: continue _lbfgsb.setulb(m, x[ib], low_bnd, upper_bnd, nbd, f[ib], g[ib], factr, pgtol, wa[ib], iwa[ib], task[ib], iprint, csave[ib], lsave[ib], isave[ib], dsave[ib], maxls) xk = np.concatenate(x) fk = func(xk) gk = fprime(xk) for ib in range(num_batches): if converged[ib]: continue task_str = task[ib].tostring() task_str_strip = task[ib].tostring().strip(b'\x00').strip() if task_str.startswith(b'FG'): # needs function evalation f[ib] = fk[ib] g[ib] = gk[ib*n:(ib+1)*n] elif task_str.startswith(b'NEW_X'): n_iterations[ib] += 1 if n_iterations[ib] >= maxiter: task[ib][:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT' elif task_str_strip.startswith(b'CONV'): converged[ib] = True warn_flag[ib] = 0 else: converged[ib] = True warn_flag[ib] = 2 continue nvtx_range_pop() xk = np.concatenate(x) if iprint > 0: logger.info("CONVERGED in ({}-{}) iterations (|\\/f|={})".format( np.min(n_iterations), np.max(n_iterations), np.linalg.norm(fprime(xk), np.inf))) if (warn_flag > 0).any(): for ib in range(num_batches): if warn_flag[ib] > 0: logger.info("WARNING: id={} convergence issue: {}".format( ib, task[ib].tostring())) nvtx_range_pop() return xk, n_iterations, warn_flag