コード例 #1
0
def CalculateNumberOfActiveThreads(numberOfTasks):
    if (cpu_count() == 2):
        return cpu_count()
    elif numberOfTasks < cpu_count():
        return numberOfTasks
    else:
        return cpu_count()
コード例 #2
0
ファイル: evaluation.py プロジェクト: laurensvdwiel/KeCo
def calculate_number_of_active_threads(numberOfTasks):
    """
    Calculates the number of threads possible, given the number
    of processor cores and the number of tasks needed to be 
    parallelized
    """
    if(cpu_count() == 2):
        return cpu_count()
    elif numberOfTasks < cpu_count():
        return numberOfTasks
    else:
        return cpu_count()
コード例 #3
0
def _hdbscan_boruvka_balltree(X, min_samples=5, alpha=1.0,
                              metric='minkowski', p=2, leaf_size=40,
                              approx_min_span_tree=True,
                              gen_min_span_tree=False,
                              core_dist_n_jobs=4, **kwargs):
    if leaf_size < 3:
        leaf_size = 3

    if core_dist_n_jobs < 1:
        core_dist_n_jobs = max(cpu_count() + 1 + core_dist_n_jobs, 1)

    if X.dtype != np.float64:
        X = X.astype(np.float64)

    tree = BallTree(X, metric=metric, leaf_size=leaf_size, **kwargs)
    alg = BallTreeBoruvkaAlgorithm(tree, min_samples, metric=metric,
                                   leaf_size=leaf_size // 3,
                                   approx_min_span_tree=approx_min_span_tree,
                                   n_jobs=core_dist_n_jobs, **kwargs)
    min_spanning_tree = alg.spanning_tree()
    # Sort edges of the min_spanning_tree by weight
    min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]),
                        :]
    # Convert edge list into standard hierarchical clustering format
    single_linkage_tree = label(min_spanning_tree)

    if gen_min_span_tree:
        return single_linkage_tree, min_spanning_tree
    else:
        return single_linkage_tree, None
コード例 #4
0
def _parallel_inner_prod(X, Y, func, n_jobs, **kwds):
    """Break the pairwise matrix in n_jobs even slices
    and compute them in parallel"""
    if n_jobs < 0:
        n_jobs = max(cpu_count() + 1 + n_jobs, 1)

    if Y is None:
        Y = X

    ret = Parallel(n_jobs=n_jobs, verbose=0)(
        delayed(func)(X[s], Y, **kwds)
        for s in gen_even_slices(len(X), n_jobs))

    return np.hstack(ret)
コード例 #5
0
def _parallel_inner_prod(X, Y, func, n_jobs, **kwds):
    """Break the pairwise matrix in n_jobs even slices
    and compute them in parallel"""
    if n_jobs < 0:
        n_jobs = max(cpu_count() + 1 + n_jobs, 1)

    if Y is None:
        Y = X

    ret = Parallel(n_jobs=n_jobs, verbose=0)(
        delayed(func)(X[s], Y, **kwds)
        for s in gen_even_slices(len(X), n_jobs))

    return np.hstack(ret)
コード例 #6
0
ファイル: pairwise.py プロジェクト: demonSong/DML
def _parallel_pairwise(X, Y, func, n_jobs, **kwds):

    if n_jobs < 0:
        n_jobs = max(cpu_count() + 1 + n_jobs, 1)

    if Y is None:
        Y = X

    if n_jobs == 1:
        # Special case to avoid picklability checks in delayed
        return func(X, Y, **kwds)

    # TODO: in some cases, backend='threading' may be appropriate
    fd = delayed(func)
    ret = Parallel(n_jobs=n_jobs,
                   verbose=0)(fd(X, Y[s], **kwds)
                              for s in gen_even_slices(Y.shape[0], n_jobs))

    return np.hstack(ret)
コード例 #7
0
def parallel_predict(estimator,
                     X,
                     n_jobs=1,
                     method='predict',
                     batches_per_job=3):
    """
    Run sklearn classifier prediction in parallel.
    """
    n_jobs = max(cpu_count() + 1 + n_jobs,
                 1)  # XXX: this should really be done by joblib
    n_batches = batches_per_job * n_jobs
    n_samples = len(X)
    batch_size = int(np.ceil(n_samples / n_batches))
    parallel = Parallel(n_jobs=n_jobs, backend="threading")
    results = parallel(
        delayed(_predict, check_pickle=False)(estimator, X, method, i, i +
                                              batch_size)
        for i in range(0, n_samples, batch_size))
    return np.concatenate(results)
コード例 #8
0
 def __init__(self,
              goldwords: dict = None,
              offline=True,
              θ=0.40,
              n_jobs=cpu_count() - 1):
     """Set up the JPL Page Classifier.
     
     :param goldwords: dict {label -> "golden words" related to that category
         Default: use get_goldwords() to load them from category files
     :param offline: bool - True if HTML can be found in standard file loc'n
     :param θ: float - If no score > θ, returns UNDEF.
     :param n_jobs: int - Set ≤1 to disable parallel.
             
     """
     self.offline = offline
     self.θ = θ
     self.n_jobs = n_jobs
     if not goldwords:
         self.goldwords = get_goldwords(self.classes_, KEYWORD_DIR)
     else:
         self.goldwords = goldwords
     self.bleached = []
     self.errors = []
     self._estimator_type = "classifier"
コード例 #9
0
ファイル: mifs.py プロジェクト: bacalfa/mifs
"""
Parallelized Mutual Information based Feature Selection module.

Author: Daniel Homola <*****@*****.**>
License: BSD 3 clause
"""

import numpy as np
from scipy import signal
from sklearn.utils import check_X_y
from sklearn.preprocessing import StandardScaler
from sklearn.externals.joblib.parallel import cpu_count
import bottleneck as bn
from . import mi

NUM_CORES = cpu_count()


class MutualInformationFeatureSelector(object):
    """
    MI_FS stands for Mutual Information based Feature Selection.
    This class contains routines for selecting features using both
    continuous and discrete y variables. Three selection algorithms are
    implemented: JMI, JMIM and MRMR.

    This implementation tries to mimic the scikit-learn interface, so use fit,
    transform or fit_transform, to run the feature selection.

    Parameters
    ----------
コード例 #10
0
from __future__ import division
import numpy as np
from scipy import signal
from scipy.special import gamma, digamma
from sklearn.neighbors import NearestNeighbors
from sklearn.externals.joblib import Parallel, delayed
from sklearn.utils import check_X_y
from sklearn.preprocessing import StandardScaler
from sklearn.externals.joblib.parallel import cpu_count
import bottleneck as bn

NUM_CPU = cpu_count()


def _get_first_mutual_info_unwrap(*arg, **kwarg):
    """
        Parallelize the get_first_mutual_info function
    """
    return FetureSelection_mRmR._get_first_mutual_info(*arg, **kwarg)


def _get_mutual_info_unwrap(*arg, **kwarg):
    """
        Parallelize the get_mutual_info function
    """

    return FetureSelection_mRmR._get_mutual_info(*arg, **kwarg)


class FetureSelection_mRmR(object):
    """
コード例 #11
0
 def __init__(self, n_features, n_jobs=1):
     self.n_features = n_features
     if n_jobs == -1:
         n_jobs = cpu_count()
     self.n_jobs = n_jobs
コード例 #12
0
ファイル: searchlight.py プロジェクト: rameshvs/pypreprocess
 def __init__(self, n_features, n_jobs=1):
     self.n_features = n_features
     if n_jobs == -1:
         n_jobs = cpu_count()
     self.n_jobs = n_jobs
コード例 #13
0
def dict_learning1(X, n_components, alpha, max_iter=1000, tol=1e-8,
                  method='cd', n_jobs=1, dict_init=None, code_init=None,
                  callback=None, verbose=False, random_state=None,
                  n_atoms=None):
    """Solves a dictionary learning matrix factorization problem.

        (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1
                     (U,V)
                    with || V_k ||_2 = 1 for all  0 <= k < n_components

    where V is the dictionary and U is the sparse code.
    method: {'lars', 'cd'}
        lars: uses the least angle regression method to solve the lasso problem
        (linear_model.lars_path)
        cd: uses the coordinate descent method to compute the
        Lasso solution (linear_model.Lasso). Lars will be faster if
        the estimated components are sparse.
    Returns
    -------
     errors: array
        Vector of errors at each iteration.

    """

    if not n_atoms is None:
        n_components = n_atoms
        warnings.warn("Parameter n_atoms has been renamed to"
                      "'n_components' and will be removed in release 0.14.",
                      DeprecationWarning, stacklevel=2)

    if method not in ('lars', 'cd'):
        raise ValueError('Coding method not supported as a fit algorithm.')
    method = 'lasso_' + method

    t0 = time.time()
    # Avoid integer division problems
    alpha = float(alpha)
    random_state = check_random_state(random_state)

    if n_jobs == -1:
        n_jobs = cpu_count()

    # Init the code and the dictionary with SVD of Y
    if code_init is not None and dict_init is not None:
        code = np.array(code_init, order='F')
        # Don't copy V, it will happen below
        dictionary = dict_init
    else:
        code, S, dictionary = linalg.svd(X, full_matrices=False)
        dictionary = S[:, np.newaxis] * dictionary
    r = len(dictionary)
   
    if n_components <= r:  # True even if n_components=None
        code = code[:, :n_components]
        dictionary = dictionary[:n_components, :]
    else:
        code = np.c_[code, np.zeros((len(code), n_components - r))]
        dictionary = np.r_[dictionary,
                           np.zeros((n_components - r, dictionary.shape[1]))]

    # Fortran-order dict, as we are going to access its row vectors
    dictionary = np.array(dictionary, order='F')
    residuals = 0
    errors = []
    current_cost = np.nan

    if verbose == 1:
        print '[dict_learning]',
    for ii in xrange(max_iter):
        dt = (time.time() - t0)
        if verbose == 1:
            sys.stdout.write(".")
            sys.stdout.flush()
        elif verbose:
            print ("Iteration % 3i "
                   "(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)"
                   % (ii, dt, dt / 60, current_cost))

        # Update code
        if ii%2== 0:
            code = code_init           
        else:
            code = sparse_encode(X, dictionary, algorithm="lasso_cd", alpha=alpha,
                                     init=None, n_jobs=n_jobs)

   
        # Update dictionary
        dictionary, residuals = _update_dict(dictionary.T, X.T, code.T,
                                             verbose=verbose, return_r2=True,
                                             random_state=random_state)
        dictionary = dictionary.T
        # Cost function
        current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code))
    
        errors.append(current_cost)
      
        if ii > 0:
            dE = errors[-2] - errors[-1]
            # assert(dE >= -tol * errors[-1])
       
            if dE*dE < tol * errors[-1]:
                if verbose == 1:
                    # A line return
                    print "this his"
                elif verbose:
                    print "--- Convergence reached after %d iterations" % ii
                break
        if ii % 5 == 0 and callback is not None:
            callback(locals())

    return code, dictionary, errors
コード例 #14
0
def dict_learning2(X, n_components, alpha, max_iter=1000, tol=1e-8,
                  method='cd', n_jobs=1, dict_init=None, code_init=None,
                  callback=None, verbose=False, random_state=None,
                  n_atoms=None):
    """Solves a dictionary learning matrix factorization problem.

    Finds the best dictionary and the corresponding sparse code for
    approximating the data matrix X by solving::

        (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1
                     (U,V)
                    with || V_k ||_2 = 1 for all  0 <= k < n_components

    where V is the dictionary and U is the sparse code.

    Parameters
    ----------
    X: array of shape (n_samples, n_features)
        Data matrix.

    n_components: int,
        Number of dictionary atoms to extract.

    alpha: int,
        Sparsity controlling parameter.

    max_iter: int,
        Maximum number of iterations to perform.

    tol: float,
        Tolerance for the stopping condition.

    method: {'lars', 'cd'}
        lars: uses the least angle regression method to solve the lasso problem
        (linear_model.lars_path)
        cd: uses the coordinate descent method to compute the
        Lasso solution (linear_model.Lasso). Lars will be faster if
        the estimated components are sparse.

    n_jobs: int,
        Number of parallel jobs to run, or -1 to autodetect.

    dict_init: array of shape (n_components, n_features),
        Initial value for the dictionary for warm restart scenarios.

    code_init: array of shape (n_samples, n_components),
        Initial value for the sparse code for warm restart scenarios.

    callback:
        Callable that gets invoked every five iterations.

    verbose:
        Degree of output the procedure will print.

    random_state: int or RandomState
        Pseudo number generator state used for random sampling.

    Returns
    -------
    code: array of shape (n_samples, n_components)
        The sparse code factor in the matrix factorization.

    dictionary: array of shape (n_components, n_features),
        The dictionary factor in the matrix factorization.

    errors: array
        Vector of errors at each iteration.

    See also
    --------
    dict_learning_online
    DictionaryLearning
    MiniBatchDictionaryLearning
    SparsePCA
    MiniBatchSparsePCA
    """

    if not n_atoms is None:
        n_components = n_atoms
        warnings.warn("Parameter n_atoms has been renamed to"
                      "'n_components' and will be removed in release 0.14.",
                      DeprecationWarning, stacklevel=2)

    if method not in ('lars', 'cd'):
        raise ValueError('Coding method not supported as a fit algorithm.')
    method = 'lasso_' + method

    t0 = time.time()
    # Avoid integer division problems
    alpha = float(alpha)
    random_state = check_random_state(random_state)

    if n_jobs == -1:
        n_jobs = cpu_count()

    # Init the code and the dictionary with SVD of Y
    if code_init is not None and dict_init is not None:
        
        code = np.array(code_init, order='F')
        # Don't copy V, it will happen below
        dictionary = dict_init
    else:
        code, S, dictionary = linalg.svd(X, full_matrices=False)
        dictionary = S[:, np.newaxis] * dictionary
    r = len(dictionary)
   
    if n_components <= r:  # True even if n_components=None
        code = code[:, :n_components]
        dictionary = dictionary[:n_components, :]
    else:
        code = np.c_[code, np.zeros((len(code), n_components - r))]
        dictionary = np.r_[dictionary,
                           np.zeros((n_components - r, dictionary.shape[1]))]

    # Fortran-order dict, as we are going to access its row vectors
    dictionary = np.array(dictionary, order='F')

    residuals = 0

    errors = []
    current_cost = np.nan

    if verbose == 1:
        print '[dict_learning]',
    for ii in xrange(max_iter):
        dt = (time.time() - t0)
        if verbose == 1:
            sys.stdout.write(".")
            sys.stdout.flush()
        elif verbose:
            print ("Iteration % 3i "
                   "(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)"
                   % (ii, dt, dt / 60, current_cost))

        # Update code
        if ii== 0:
            code = code_init
        else:
                   
            code = sparse_encode(X, dictionary, algorithm="lasso_lars", alpha=alpha,
                                     init=None, n_jobs=n_jobs)
        # Update dictionary
        dictionary, residuals = _update_dict(dictionary.T, X.T, code.T,
                                             verbose=verbose, return_r2=True,
                                             random_state=random_state)
        dictionary = dictionary.T
        # Cost function
        current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code))
        errors.append(current_cost)

        if ii > 0:
            dE = errors[-2] - errors[-1]
            # assert(dE >= -tol * errors[-1])

            if dE*dE < tol * errors[-1]:
                if verbose == 1:
                    # A line return
                    print "this his"
                elif verbose:
                    print "--- Convergence reached after %d iterations" % ii
                break
        if ii % 5 == 0 and callback is not None:
            callback(locals())

    return code, dictionary, errors