def CalculateNumberOfActiveThreads(numberOfTasks): if (cpu_count() == 2): return cpu_count() elif numberOfTasks < cpu_count(): return numberOfTasks else: return cpu_count()
def calculate_number_of_active_threads(numberOfTasks): """ Calculates the number of threads possible, given the number of processor cores and the number of tasks needed to be parallelized """ if(cpu_count() == 2): return cpu_count() elif numberOfTasks < cpu_count(): return numberOfTasks else: return cpu_count()
def _hdbscan_boruvka_balltree(X, min_samples=5, alpha=1.0, metric='minkowski', p=2, leaf_size=40, approx_min_span_tree=True, gen_min_span_tree=False, core_dist_n_jobs=4, **kwargs): if leaf_size < 3: leaf_size = 3 if core_dist_n_jobs < 1: core_dist_n_jobs = max(cpu_count() + 1 + core_dist_n_jobs, 1) if X.dtype != np.float64: X = X.astype(np.float64) tree = BallTree(X, metric=metric, leaf_size=leaf_size, **kwargs) alg = BallTreeBoruvkaAlgorithm(tree, min_samples, metric=metric, leaf_size=leaf_size // 3, approx_min_span_tree=approx_min_span_tree, n_jobs=core_dist_n_jobs, **kwargs) min_spanning_tree = alg.spanning_tree() # Sort edges of the min_spanning_tree by weight min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :] # Convert edge list into standard hierarchical clustering format single_linkage_tree = label(min_spanning_tree) if gen_min_span_tree: return single_linkage_tree, min_spanning_tree else: return single_linkage_tree, None
def _parallel_inner_prod(X, Y, func, n_jobs, **kwds): """Break the pairwise matrix in n_jobs even slices and compute them in parallel""" if n_jobs < 0: n_jobs = max(cpu_count() + 1 + n_jobs, 1) if Y is None: Y = X ret = Parallel(n_jobs=n_jobs, verbose=0)( delayed(func)(X[s], Y, **kwds) for s in gen_even_slices(len(X), n_jobs)) return np.hstack(ret)
def _parallel_pairwise(X, Y, func, n_jobs, **kwds): if n_jobs < 0: n_jobs = max(cpu_count() + 1 + n_jobs, 1) if Y is None: Y = X if n_jobs == 1: # Special case to avoid picklability checks in delayed return func(X, Y, **kwds) # TODO: in some cases, backend='threading' may be appropriate fd = delayed(func) ret = Parallel(n_jobs=n_jobs, verbose=0)(fd(X, Y[s], **kwds) for s in gen_even_slices(Y.shape[0], n_jobs)) return np.hstack(ret)
def parallel_predict(estimator, X, n_jobs=1, method='predict', batches_per_job=3): """ Run sklearn classifier prediction in parallel. """ n_jobs = max(cpu_count() + 1 + n_jobs, 1) # XXX: this should really be done by joblib n_batches = batches_per_job * n_jobs n_samples = len(X) batch_size = int(np.ceil(n_samples / n_batches)) parallel = Parallel(n_jobs=n_jobs, backend="threading") results = parallel( delayed(_predict, check_pickle=False)(estimator, X, method, i, i + batch_size) for i in range(0, n_samples, batch_size)) return np.concatenate(results)
def __init__(self, goldwords: dict = None, offline=True, θ=0.40, n_jobs=cpu_count() - 1): """Set up the JPL Page Classifier. :param goldwords: dict {label -> "golden words" related to that category Default: use get_goldwords() to load them from category files :param offline: bool - True if HTML can be found in standard file loc'n :param θ: float - If no score > θ, returns UNDEF. :param n_jobs: int - Set ≤1 to disable parallel. """ self.offline = offline self.θ = θ self.n_jobs = n_jobs if not goldwords: self.goldwords = get_goldwords(self.classes_, KEYWORD_DIR) else: self.goldwords = goldwords self.bleached = [] self.errors = [] self._estimator_type = "classifier"
""" Parallelized Mutual Information based Feature Selection module. Author: Daniel Homola <*****@*****.**> License: BSD 3 clause """ import numpy as np from scipy import signal from sklearn.utils import check_X_y from sklearn.preprocessing import StandardScaler from sklearn.externals.joblib.parallel import cpu_count import bottleneck as bn from . import mi NUM_CORES = cpu_count() class MutualInformationFeatureSelector(object): """ MI_FS stands for Mutual Information based Feature Selection. This class contains routines for selecting features using both continuous and discrete y variables. Three selection algorithms are implemented: JMI, JMIM and MRMR. This implementation tries to mimic the scikit-learn interface, so use fit, transform or fit_transform, to run the feature selection. Parameters ----------
from __future__ import division import numpy as np from scipy import signal from scipy.special import gamma, digamma from sklearn.neighbors import NearestNeighbors from sklearn.externals.joblib import Parallel, delayed from sklearn.utils import check_X_y from sklearn.preprocessing import StandardScaler from sklearn.externals.joblib.parallel import cpu_count import bottleneck as bn NUM_CPU = cpu_count() def _get_first_mutual_info_unwrap(*arg, **kwarg): """ Parallelize the get_first_mutual_info function """ return FetureSelection_mRmR._get_first_mutual_info(*arg, **kwarg) def _get_mutual_info_unwrap(*arg, **kwarg): """ Parallelize the get_mutual_info function """ return FetureSelection_mRmR._get_mutual_info(*arg, **kwarg) class FetureSelection_mRmR(object): """
def __init__(self, n_features, n_jobs=1): self.n_features = n_features if n_jobs == -1: n_jobs = cpu_count() self.n_jobs = n_jobs
def dict_learning1(X, n_components, alpha, max_iter=1000, tol=1e-8, method='cd', n_jobs=1, dict_init=None, code_init=None, callback=None, verbose=False, random_state=None, n_atoms=None): """Solves a dictionary learning matrix factorization problem. (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1 (U,V) with || V_k ||_2 = 1 for all 0 <= k < n_components where V is the dictionary and U is the sparse code. method: {'lars', 'cd'} lars: uses the least angle regression method to solve the lasso problem (linear_model.lars_path) cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). Lars will be faster if the estimated components are sparse. Returns ------- errors: array Vector of errors at each iteration. """ if not n_atoms is None: n_components = n_atoms warnings.warn("Parameter n_atoms has been renamed to" "'n_components' and will be removed in release 0.14.", DeprecationWarning, stacklevel=2) if method not in ('lars', 'cd'): raise ValueError('Coding method not supported as a fit algorithm.') method = 'lasso_' + method t0 = time.time() # Avoid integer division problems alpha = float(alpha) random_state = check_random_state(random_state) if n_jobs == -1: n_jobs = cpu_count() # Init the code and the dictionary with SVD of Y if code_init is not None and dict_init is not None: code = np.array(code_init, order='F') # Don't copy V, it will happen below dictionary = dict_init else: code, S, dictionary = linalg.svd(X, full_matrices=False) dictionary = S[:, np.newaxis] * dictionary r = len(dictionary) if n_components <= r: # True even if n_components=None code = code[:, :n_components] dictionary = dictionary[:n_components, :] else: code = np.c_[code, np.zeros((len(code), n_components - r))] dictionary = np.r_[dictionary, np.zeros((n_components - r, dictionary.shape[1]))] # Fortran-order dict, as we are going to access its row vectors dictionary = np.array(dictionary, order='F') residuals = 0 errors = [] current_cost = np.nan if verbose == 1: print '[dict_learning]', for ii in xrange(max_iter): dt = (time.time() - t0) if verbose == 1: sys.stdout.write(".") sys.stdout.flush() elif verbose: print ("Iteration % 3i " "(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)" % (ii, dt, dt / 60, current_cost)) # Update code if ii%2== 0: code = code_init else: code = sparse_encode(X, dictionary, algorithm="lasso_cd", alpha=alpha, init=None, n_jobs=n_jobs) # Update dictionary dictionary, residuals = _update_dict(dictionary.T, X.T, code.T, verbose=verbose, return_r2=True, random_state=random_state) dictionary = dictionary.T # Cost function current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code)) errors.append(current_cost) if ii > 0: dE = errors[-2] - errors[-1] # assert(dE >= -tol * errors[-1]) if dE*dE < tol * errors[-1]: if verbose == 1: # A line return print "this his" elif verbose: print "--- Convergence reached after %d iterations" % ii break if ii % 5 == 0 and callback is not None: callback(locals()) return code, dictionary, errors
def dict_learning2(X, n_components, alpha, max_iter=1000, tol=1e-8, method='cd', n_jobs=1, dict_init=None, code_init=None, callback=None, verbose=False, random_state=None, n_atoms=None): """Solves a dictionary learning matrix factorization problem. Finds the best dictionary and the corresponding sparse code for approximating the data matrix X by solving:: (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1 (U,V) with || V_k ||_2 = 1 for all 0 <= k < n_components where V is the dictionary and U is the sparse code. Parameters ---------- X: array of shape (n_samples, n_features) Data matrix. n_components: int, Number of dictionary atoms to extract. alpha: int, Sparsity controlling parameter. max_iter: int, Maximum number of iterations to perform. tol: float, Tolerance for the stopping condition. method: {'lars', 'cd'} lars: uses the least angle regression method to solve the lasso problem (linear_model.lars_path) cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). Lars will be faster if the estimated components are sparse. n_jobs: int, Number of parallel jobs to run, or -1 to autodetect. dict_init: array of shape (n_components, n_features), Initial value for the dictionary for warm restart scenarios. code_init: array of shape (n_samples, n_components), Initial value for the sparse code for warm restart scenarios. callback: Callable that gets invoked every five iterations. verbose: Degree of output the procedure will print. random_state: int or RandomState Pseudo number generator state used for random sampling. Returns ------- code: array of shape (n_samples, n_components) The sparse code factor in the matrix factorization. dictionary: array of shape (n_components, n_features), The dictionary factor in the matrix factorization. errors: array Vector of errors at each iteration. See also -------- dict_learning_online DictionaryLearning MiniBatchDictionaryLearning SparsePCA MiniBatchSparsePCA """ if not n_atoms is None: n_components = n_atoms warnings.warn("Parameter n_atoms has been renamed to" "'n_components' and will be removed in release 0.14.", DeprecationWarning, stacklevel=2) if method not in ('lars', 'cd'): raise ValueError('Coding method not supported as a fit algorithm.') method = 'lasso_' + method t0 = time.time() # Avoid integer division problems alpha = float(alpha) random_state = check_random_state(random_state) if n_jobs == -1: n_jobs = cpu_count() # Init the code and the dictionary with SVD of Y if code_init is not None and dict_init is not None: code = np.array(code_init, order='F') # Don't copy V, it will happen below dictionary = dict_init else: code, S, dictionary = linalg.svd(X, full_matrices=False) dictionary = S[:, np.newaxis] * dictionary r = len(dictionary) if n_components <= r: # True even if n_components=None code = code[:, :n_components] dictionary = dictionary[:n_components, :] else: code = np.c_[code, np.zeros((len(code), n_components - r))] dictionary = np.r_[dictionary, np.zeros((n_components - r, dictionary.shape[1]))] # Fortran-order dict, as we are going to access its row vectors dictionary = np.array(dictionary, order='F') residuals = 0 errors = [] current_cost = np.nan if verbose == 1: print '[dict_learning]', for ii in xrange(max_iter): dt = (time.time() - t0) if verbose == 1: sys.stdout.write(".") sys.stdout.flush() elif verbose: print ("Iteration % 3i " "(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)" % (ii, dt, dt / 60, current_cost)) # Update code if ii== 0: code = code_init else: code = sparse_encode(X, dictionary, algorithm="lasso_lars", alpha=alpha, init=None, n_jobs=n_jobs) # Update dictionary dictionary, residuals = _update_dict(dictionary.T, X.T, code.T, verbose=verbose, return_r2=True, random_state=random_state) dictionary = dictionary.T # Cost function current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code)) errors.append(current_cost) if ii > 0: dE = errors[-2] - errors[-1] # assert(dE >= -tol * errors[-1]) if dE*dE < tol * errors[-1]: if verbose == 1: # A line return print "this his" elif verbose: print "--- Convergence reached after %d iterations" % ii break if ii % 5 == 0 and callback is not None: callback(locals()) return code, dictionary, errors