コード例 #1
0
def _estimate_param_scan_worker(estimator, params, X, evaluate, evaluate_args,
                                failfast):
    """ Method that runs estimation for several parameter settings.

    Defined as a worker for Parallelization

    """
    # run estimation
    model = None
    try:  # catch any exception
        estimator.estimate(X, **params)
        model = estimator.model
    except:
        e = sys.exc_info()[1]
        if isinstance(estimator, Loggable):
            estimator.logger.warning("Ignored error during estimation: %s" % e)
        if failfast:
            raise  # re-raise
        else:
            pass  # just return model=None

    # deal with results
    res = []

    # deal with result
    if evaluate is None:  # we want full models
        res.append(model)
    # we want to evaluate function(s) of the model
    elif _types.is_iterable(evaluate):
        values = []  # the function values the model
        for ieval, name in enumerate(evaluate):
            # get method/attribute name and arguments to be evaluated
            name = evaluate[ieval]
            args = ()
            if evaluate_args is not None:
                args = evaluate_args[ieval]
                # wrap single arguments in an iterable again to pass them.
                if _types.is_string(args):
                    args = (args, )
            # evaluate
            try:
                # try calling method/property/attribute
                value = _call_member(estimator.model, name, failfast, *args)
            # couldn't find method/property/attribute
            except AttributeError as e:
                if failfast:
                    raise e  # raise an AttributeError
                else:
                    value = None  # we just ignore it and return None
            values.append(value)
        # if we only have one value, unpack it
        if len(values) == 1:
            values = values[0]
        res.append(values)
    else:
        raise ValueError('Invalid setting for evaluate: ' + str(evaluate))

    if len(res) == 1:
        res = res[0]
    return res
コード例 #2
0
ファイル: featurizer.py プロジェクト: vincentn1/PyEMMA
    def add_residue_mindist(self,
                            residue_pairs='all',
                            scheme='closest-heavy',
                            ignore_nonprotein=True,
                            threshold=None,
                            periodic=True):
        r"""
        Adds the minimum distance between residues to the feature list. See below how
        the minimum distance can be defined. If the topology generated out of :py:obj:`topfile`
        contains information on periodic boundary conditions, the minimum image convention
        will be used when computing distances.

        Parameters
        ----------
        residue_pairs : can be of two types:

            'all'
                Computes distances between all pairs of residues excluding first and second neighbors

            ndarray((n, 2), dtype=int):
                n x 2 array with the pairs residues for which distances will be computed

        scheme : 'ca', 'closest', 'closest-heavy', default is closest-heavy
                Within a residue, determines the sub-group atoms that will be considered when computing distances

        ignore_nonprotein : boolean, default True
                Ignore residues that are not of protein type (e.g. water molecules, post-traslational modifications etc)

        threshold : float, optional, default is None
            distances below this threshold (in nm) will result in a feature 1.0, distances above will result in 0.0. If
            left to None, the numerical value will be returned

        periodic : bool, optional, default = True
            If `periodic` is True and the trajectory contains unitcell
            information, we will treat dihedrals that cross periodic images
            using the minimum image convention.


        .. note::
            Using :py:obj:`scheme` = 'closest' or 'closest-heavy' with :py:obj:`residue pairs` = 'all'
            will compute nearly all interatomic distances, for every frame, before extracting the closest pairs.
            This can be very time consuming. Those schemes are intended to be used with a subset of residues chosen
            via :py:obj:`residue_pairs`.


        """
        from .distances import ResidueMinDistanceFeature
        if scheme != 'ca' and is_string(residue_pairs):
            if residue_pairs == 'all':
                self.logger.warning(
                    "Using all residue pairs with schemes like closest or closest-heavy is "
                    "very time consuming. Consider reducing the residue pairs")

        f = ResidueMinDistanceFeature(self.topology, residue_pairs, scheme,
                                      ignore_nonprotein, threshold, periodic)
        self.__add_feature(f)
コード例 #3
0
ファイル: test_source.py プロジェクト: rafwiewiora/PyEMMA
 def test_topfile(self):
     types.is_string(self.inp.topfile)
コード例 #4
0
ファイル: test_source.py プロジェクト: rafwiewiora/PyEMMA
 def test_describe(self):
     desc = self.inp.describe()
     assert types.is_string(desc) or types.is_list_of_string(desc)
コード例 #5
0
ファイル: test_tica.py プロジェクト: yuxuanzhuang/PyEMMA
 def test_describe(self):
     desc = self.tica_obj.describe()
     assert types.is_string(desc) or types.is_list_of_string(desc)
     # describe on empty estimator
     tica(lag=1).describe()
コード例 #6
0
ファイル: estimator.py プロジェクト: rafwiewiora/PyEMMA
def estimate_param_scan(estimator, X, param_sets, evaluate=None, evaluate_args=None, failfast=True,
                        return_estimators=False, n_jobs=1, progress_reporter=None):
    """ Runs multiple estimations using a list of parameter settings

    Parameters
    ----------
    estimator : Estimator object or class
        An estimator object that provides an estimate(X, **params) function.
        If only a class is provided here, the Estimator objects will be
        constructed with default parameter settings, and the parameter settings
        from param_sets for each estimation. If you want to specify other
        parameter settings for those parameters not specified in param_sets,
        construct an Estimator before and pass the object.

    param_sets : iterable over dictionaries
        An iterable that provides parameter settings. Each element defines a
        parameter set, for which an estimation will be run using these
        parameters in estimate(X, **params). All other parameter settings will
        be taken from the default settings in the estimator object.

    evaluate : str or list of str
        The given methods or properties will be called on the estimated
        models, and their results will be returned instead of the full models.
        This may be useful for reducing memory overhead.

    failfast : bool
        If True, will raise an exception when estimation failed with an exception
        or trying to calls a method that doesn't exist. If False, will simply
        return None in these cases.

    Return
    ------
    models : list of model objects or evaluated function values
        A list of estimated models in the same order as param_sets. If evaluate
        is given, each element will contain the results from these method
        evaluations.

    estimators (optional) : list of estimator objects. These are returned only
        if return_estimators=True

    Examples
    --------

    Estimate a maximum likelihood Markov model at lag times 1, 2, 3.

    >>> from pyemma.msm.estimators import MaximumLikelihoodMSM
    >>>
    >>> dtraj = [0,0,1,2,1,0,1,0,1,2,2,0,0,0,1,1,2,1,0,0,1,2,1,0,0,0,1,1,0,1,2]  # mini-trajectory
    >>> param_sets=param_grid({'lag': [1,2,3]})
    >>>
    >>> estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='timescales')
    [array([ 1.24113167,  0.77454377]), array([ 2.65266703,  1.42909841]), array([ 5.34810395,  1.14784446])]

    Try also getting samples of the timescales

    >>> estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate=['timescales', 'timescales_samples'])
    [[array([ 1.24113167,  0.77454377]), None], [array([ 2.65266703,  1.42909841]), None], [array([ 5.34810395,  1.14784446]), None],

    We get Nones because the MaximumLikelihoodMSM estimator doesn't provide timescales_samples. Use for example
    a Bayesian estimator for that.

    """
    # make sure we have an estimator object
    estimator = get_estimator(estimator)
    # if we want to return estimators, make clones. Otherwise just copy references.
    # For parallel processing we always need clones
    if return_estimators or n_jobs > 1 or n_jobs is None:
        estimators = [clone_estimator(estimator) for _ in param_sets]
    else:
        estimators = [estimator for _ in param_sets]

    # if we evaluate, make sure we have a list of functions to evaluate
    if _types.is_string(evaluate):
        evaluate = [evaluate]

    # set call back for joblib
    if progress_reporter is not None:
        progress_reporter._progress_register(len(estimators), stage=0,
                                             description="estimating %s" % str(estimator.__class__.__name__))

        if n_jobs > 1:
            class CallBack(object):
                def __init__(self, index, parallel):
                    self.index = index
                    self.parallel = parallel
                    self.reporter = progress_reporter

                def __call__(self, index):
                    if self.reporter is not None:
                        self.reporter._progress_update(1, stage=0)
                    if self.parallel._original_iterable:
                        self.parallel.dispatch_next()
            import joblib.parallel
            joblib.parallel.CallBack = CallBack
        else:
            def _print(msg, msg_args):
                # NOTE: this is a ugly hack, because if we only use one job,
                # we do not get the joblib callback interface, as a workaround
                # we use the Parallel._print function, which is called with
                # msg_args = (done_jobs, total_jobs)
                if len(msg_args) == 2:
                    progress_reporter._progress_update(1, stage=0)

    # iterate over parameter settings
    from joblib import Parallel
    import joblib
    pool = Parallel(n_jobs=n_jobs)

    if progress_reporter is not None and n_jobs == 1:
        pool._print = _print
        # NOTE: verbose has to be set, otherwise our print hack does not work.
        pool.verbose = 50

    task_iter = (joblib.delayed(_estimate_param_scan_worker)(estimators[i],
                                                             param_sets[i], X,
                                                             evaluate,
                                                             evaluate_args,
                                                             failfast,
                                                             )
                 for i in range(len(param_sets)))

    # container for model or function evaluations
    res = pool(task_iter)

    if progress_reporter is not None:
        progress_reporter._progress_force_finish(0)

    # done
    if return_estimators:
        return res, estimators
    else:
        return res
コード例 #7
0
def estimate_param_scan(estimator,
                        X,
                        param_sets,
                        evaluate=None,
                        evaluate_args=None,
                        failfast=True,
                        return_estimators=False,
                        n_jobs=1,
                        progress_reporter=None,
                        show_progress=True,
                        return_exceptions=False):
    """ Runs multiple estimations using a list of parameter settings

    Parameters
    ----------
    estimator : Estimator object or class
        An estimator object that provides an estimate(X, **params) function.
        If only a class is provided here, the Estimator objects will be
        constructed with default parameter settings, and the parameter settings
        from param_sets for each estimation. If you want to specify other
        parameter settings for those parameters not specified in param_sets,
        construct an Estimator before and pass the object.

    param_sets : iterable over dictionaries
        An iterable that provides parameter settings. Each element defines a
        parameter set, for which an estimation will be run using these
        parameters in estimate(X, **params). All other parameter settings will
        be taken from the default settings in the estimator object.

    evaluate : str or list of str, optional
        The given methods or properties will be called on the estimated
        models, and their results will be returned instead of the full models.
        This may be useful for reducing memory overhead.

    evaluate_args: iterable of iterable, optional
        Arguments to be passed to evaluated methods. Note, that size has to match to the size of evaluate.

    failfast : bool
        If True, will raise an exception when estimation failed with an exception
        or trying to calls a method that doesn't exist. If False, will simply
        return None in these cases.

    return_estimators: bool
        If True, return a list estimators in addition to the models.

    show_progress: bool
        if the given estimator supports show_progress interface, we set the flag
        prior doing estimations.

    return_exceptions: bool, default=False
        if failfast is False while this setting is True, returns the exception thrown at the actual grid element,
        instead of None.

    Returns
    -------
    models : list of model objects or evaluated function values
        A list of estimated models in the same order as param_sets. If evaluate
        is given, each element will contain the results from these method
        evaluations.

    estimators (optional) : list of estimator objects. These are returned only
        if return_estimators=True

    Examples
    --------

    Estimate a maximum likelihood Markov model at lag times 1, 2, 3.

    >>> from pyemma.msm.estimators import MaximumLikelihoodMSM, BayesianMSM
    >>>
    >>> dtraj = [0,0,1,2,1,0,1,0,1,2,2,0,0,0,1,1,2,1,0,0,1,2,1,0,0,0,1,1,0,1,2]  # mini-trajectory
    >>> param_sets=param_grid({'lag': [1,2,3]})
    >>>
    >>> estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='timescales')
    [array([ 1.24113168,  0.77454377]), array([ 2.65266698,  1.42909842]), array([ 5.34810405,  1.14784446])]

    Now we also want to get samples of the timescales using the BayesianMSM.
    >>> estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, failfast=False,
    ...     evaluate=['timescales', 'timescales_samples']) # doctest: +SKIP
    [[array([ 1.24113168,  0.77454377]), None], [array([ 2.48226337,  1.54908754]), None], [array([ 3.72339505,  2.32363131]), None]]

    We get Nones because the MaximumLikelihoodMSM estimator doesn't provide timescales_samples. Use for example
    a Bayesian estimator for that.

    Now we also want to get samples of the timescales using the BayesianMSM.
    >>> estimate_param_scan(BayesianMSM, dtraj, param_sets, show_progress=False,
    ...     evaluate=['timescales', 'sample_f'], evaluate_args=((), ('timescales', ))) # doctest: +SKIP
    [[array([ 1.24357685,  0.77609028]), [array([ 1.5963252 ,  0.73877883]), array([ 1.29915847,  0.49004912]), array([ 0.90058583,  0.73841786]), ... ]]

    """
    # make sure we have an estimator object
    estimator = get_estimator(estimator)
    if hasattr(estimator, 'show_progress'):
        estimator.show_progress = show_progress

    # if we want to return estimators, make clones. Otherwise just copy references.
    # For parallel processing we always need clones.
    # Also if the Estimator is its own Model, we have to clone.
    from pyemma._base.model import Model
    if (return_estimators or n_jobs > 1 or n_jobs is None
            or isinstance(estimator, Model)):
        estimators = [clone_estimator(estimator) for _ in param_sets]
    else:
        estimators = [estimator for _ in param_sets]

    # if we evaluate, make sure we have a list of functions to evaluate
    if _types.is_string(evaluate):
        evaluate = [evaluate]
    if _types.is_string(evaluate_args):
        evaluate_args = [evaluate_args]

    if evaluate is not None and evaluate_args is not None and len(
            evaluate) != len(evaluate_args):
        raise ValueError(
            "length mismatch: evaluate ({}) and evaluate_args ({})".format(
                len(evaluate), len(evaluate_args)))

    show_progress = progress_reporter is not None and show_progress
    if show_progress:
        progress_reporter._progress_register(len(estimators),
                                             stage=0,
                                             description="estimating %s" %
                                             str(estimator.__class__.__name__))

    if n_jobs > 1 and os.name == 'posix':
        if hasattr(estimators[0], 'logger'):
            estimators[0].logger.debug('estimating %s with n_jobs=%s',
                                       estimator, n_jobs)
        # iterate over parameter settings
        task_iter = ((estimator, param_set, X, evaluate, evaluate_args,
                      failfast, return_exceptions)
                     for estimator, param_set in zip(estimators, param_sets))

        from pathos.multiprocessing import Pool as Parallel
        pool = Parallel(processes=n_jobs)
        args = list(task_iter)
        if show_progress:
            from pyemma._base.model import SampledModel
            for a in args:
                if isinstance(a[0], SampledModel):
                    a[0].show_progress = False

            def callback(_):
                progress_reporter._progress_update(1, stage=0)
        else:
            callback = None

        import six
        if six.PY3:

            def error_callback(*args, **kw):
                if failfast:
                    raise Exception('something failed')

            with pool:
                res_async = [
                    pool.apply_async(_estimate_param_scan_worker,
                                     a,
                                     callback=callback,
                                     error_callback=error_callback)
                    for a in args
                ]
                res = [x.get() for x in res_async]
        else:
            try:
                res_async = [
                    pool.apply_async(_estimate_param_scan_worker,
                                     a,
                                     callback=callback) for a in args
                ]
                res = [x.get() for x in res_async]
            finally:
                pool.close()

    # if n_jobs=1 don't invoke the pool, but directly dispatch the iterator
    else:
        if hasattr(estimators[0], 'logger'):
            estimators[0].logger.debug(
                'estimating %s with n_jobs=1 because of the setting or '
                'you not have a POSIX system', estimator)
        res = []
        if show_progress:
            from pyemma._base.model import SampledModel
            if isinstance(estimator, SampledModel):
                for e in estimators:
                    e.show_progress = False

        for estimator, param_set in zip(estimators, param_sets):
            res.append(
                _estimate_param_scan_worker(estimator, param_set, X, evaluate,
                                            evaluate_args, failfast,
                                            return_exceptions))
            if show_progress:
                progress_reporter._progress_update(1, stage=0)

    if show_progress:
        progress_reporter._progress_force_finish(0)

    # done
    if return_estimators:
        return res, estimators
    else:
        return res
コード例 #8
0
def _parse_groupwise_input(group_definitions, group_pairs, MDlogger, mname=''):
    r"""For input of group type (add_group_mindist), prepare the array of pairs of indices
        and groups so that :py:func:`MinDistanceFeature` can work

        This function will:
            - check the input types
            - sort the 1D arrays of each entry of group_definitions
            - check for duplicates within each group_definition
            - produce the list of pairs for all needed distances
            - produce a list that maps each entry in the pairlist to a given group of distances

    Returns
    --------
        parsed_group_definitions: list
            List of of 1D arrays containing sorted, unique atom indices

        parsed_group_pairs: numpy.ndarray
            (N,2)-numpy array containing pairs of indices that represent pairs
             of groups for which the inter-group distance-pairs will be generated

        distance_pairs: numpy.ndarray
            (M,2)-numpy array with all the distance-pairs needed (regardless of their group)

        group_membership: numpy.ndarray
            (N,2)-numpy array mapping each pair in distance_pairs to their associated group pair

        """

    assert isinstance(group_definitions, list), "group_definitions has to be of type list, not %s"%type(group_definitions)
    # Handle the special case of just one group
    if len(group_definitions) == 1:
        group_pairs = np.array([0,0], ndmin=2)

    # Sort the elements within each group
    parsed_group_definitions = []
    for igroup in group_definitions:
        assert np.ndim(igroup) == 1, "The elements of the groups definition have to be of dim 1, not %u"%np.ndim(igroup)
        parsed_group_definitions.append(np.unique(igroup))

    # Check for group duplicates
    for ii, igroup in enumerate(parsed_group_definitions[:-1]):
        for jj, jgroup in enumerate(parsed_group_definitions[ii+1:]):
            if len(igroup) == len(jgroup):
                assert not np.allclose(igroup, jgroup), "Some group definitions appear to be duplicated, e.g %u and %u"%(ii,ii+jj+1)

    # Create and/or check the pair-list
    if is_string(group_pairs):
        if group_pairs == 'all':
            parsed_group_pairs = combinations(np.arange(len(group_definitions)), 2)
    else:
        assert isinstance(group_pairs, np.ndarray)
        assert group_pairs.shape[1] == 2
        assert group_pairs.max() <= len(parsed_group_definitions), "Cannot ask for group nr. %u if group_definitions only " \
                                                    "contains %u groups"%(group_pairs.max(), len(parsed_group_definitions))
        assert group_pairs.min() >= 0, "Group pairs contains negative group indices"

        parsed_group_pairs = np.zeros_like(group_pairs, dtype='int')
        for ii, ipair in enumerate(group_pairs):
            if ipair[0] == ipair[1]:
                MDlogger.warning("%s will compute the mindist of group %u with itself. Is this wanted? "%(mname, ipair[0]))
            parsed_group_pairs[ii, :] = np.sort(ipair)

    # Create the large list of distances that will be computed, and an array containing group identfiers
    # of the distances that actually characterize a pair of groups
    distance_pairs = []
    group_membership = np.zeros_like(parsed_group_pairs)
    b = 0
    for ii, pair in enumerate(parsed_group_pairs):
        if pair[0] != pair[1]:
            distance_pairs.append(product(parsed_group_definitions[pair[0]],
                                          parsed_group_definitions[pair[1]]))
        else:
            parsed = parsed_group_definitions[pair[0]]
            distance_pairs.append(combinations(parsed, 2))

        group_membership[ii, :] = [b, b + len(distance_pairs[ii])]
        b += len(distance_pairs[ii])

    return parsed_group_definitions, parsed_group_pairs, np.vstack(distance_pairs), group_membership
コード例 #9
0
ファイル: test_cluster.py プロジェクト: zzmjohn/PyEMMA
 def test_describe(self):
     for c in self.cl:
         desc = c.describe()
         assert types.is_string(desc) or types.is_list_of_string(desc)
コード例 #10
0
def estimate_param_scan(estimator,
                        X,
                        param_sets,
                        evaluate=None,
                        evaluate_args=None,
                        failfast=True,
                        return_estimators=False,
                        n_jobs=1,
                        progress_reporter=None,
                        show_progress=True):
    """ Runs multiple estimations using a list of parameter settings

    Parameters
    ----------
    estimator : Estimator object or class
        An estimator object that provides an estimate(X, **params) function.
        If only a class is provided here, the Estimator objects will be
        constructed with default parameter settings, and the parameter settings
        from param_sets for each estimation. If you want to specify other
        parameter settings for those parameters not specified in param_sets,
        construct an Estimator before and pass the object.

    param_sets : iterable over dictionaries
        An iterable that provides parameter settings. Each element defines a
        parameter set, for which an estimation will be run using these
        parameters in estimate(X, **params). All other parameter settings will
        be taken from the default settings in the estimator object.

    evaluate : str or list of str, optional
        The given methods or properties will be called on the estimated
        models, and their results will be returned instead of the full models.
        This may be useful for reducing memory overhead.

    evaluate_args: iterable of iterable, optional
        Arguments to be passed to evaluated methods. Note, that size has to match to the size of evaluate.

    failfast : bool
        If True, will raise an exception when estimation failed with an exception
        or trying to calls a method that doesn't exist. If False, will simply
        return None in these cases.

    return_estimators: bool
        If True, return a list estimators in addition to the models.

    show_progress: bool
        if the given estimator supports show_progress interface, we set the flag
        prior doing estimations.

    Return
    ------
    models : list of model objects or evaluated function values
        A list of estimated models in the same order as param_sets. If evaluate
        is given, each element will contain the results from these method
        evaluations.

    estimators (optional) : list of estimator objects. These are returned only
        if return_estimators=True

    Examples
    --------

    Estimate a maximum likelihood Markov model at lag times 1, 2, 3.

    >>> from pyemma.msm.estimators import MaximumLikelihoodMSM, BayesianMSM
    >>>
    >>> dtraj = [0,0,1,2,1,0,1,0,1,2,2,0,0,0,1,1,2,1,0,0,1,2,1,0,0,0,1,1,0,1,2]  # mini-trajectory
    >>> param_sets=param_grid({'lag': [1,2,3]})
    >>>
    >>> estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='timescales')
    [array([ 1.24113168,  0.77454377]), array([ 2.48226337,  1.54908754]), array([ 3.72339505,  2.32363131])]

    Now we also want to get samples of the timescales using the BayesianMSM.
    >>> estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, failfast=False,
    ...     evaluate=['timescales', 'timescales_samples']) # doctest: +SKIP
    [[array([ 1.24113168,  0.77454377]), None], [array([ 2.48226337,  1.54908754]), None], [array([ 3.72339505,  2.32363131]), None]]

    We get Nones because the MaximumLikelihoodMSM estimator doesn't provide timescales_samples. Use for example
    a Bayesian estimator for that.

    Now we also want to get samples of the timescales using the BayesianMSM.
    >>> estimate_param_scan(BayesianMSM, dtraj, param_sets, show_progress=False,
    ...     evaluate=['timescales', 'sample_f'], evaluate_args=((), ('timescales', ))) # doctest: +SKIP
    [[array([ 1.24357685,  0.77609028]), [array([ 1.5963252 ,  0.73877883]), array([ 1.29915847,  0.49004912]), array([ 0.90058583,  0.73841786]), ... ]]

    """
    # make sure we have an estimator object
    estimator = get_estimator(estimator)
    if hasattr(estimator, 'show_progress'):
        estimator.show_progress = show_progress
    # if we want to return estimators, make clones. Otherwise just copy references.
    # For parallel processing we always need clones
    if return_estimators or n_jobs > 1 or n_jobs is None:
        estimators = [clone_estimator(estimator) for _ in param_sets]
    else:
        estimators = [estimator for _ in param_sets]

    # if we evaluate, make sure we have a list of functions to evaluate
    if _types.is_string(evaluate):
        evaluate = [evaluate]
    if _types.is_string(evaluate_args):
        evaluate_args = [evaluate_args]

    if evaluate is not None and evaluate_args is not None and len(
            evaluate) != len(evaluate_args):
        raise ValueError(
            "length mismatch: evaluate ({}) and evaluate_args ({})".format(
                len(evaluate), len(evaluate_args)))

    # set call back for joblib
    if progress_reporter is not None and show_progress:
        progress_reporter._progress_register(len(estimators),
                                             stage=0,
                                             description="estimating %s" %
                                             str(estimator.__class__.__name__))

        if n_jobs > 1:
            try:
                from joblib.parallel import BatchCompletionCallBack
                batch_comp_call_back = True
            except ImportError:
                from joblib.parallel import CallBack as BatchCompletionCallBack
                batch_comp_call_back = False

            class CallBack(BatchCompletionCallBack):
                def __init__(self, *args, **kw):
                    self.reporter = progress_reporter
                    super(CallBack, self).__init__(*args, **kw)

                def __call__(self, *args, **kw):
                    self.reporter._progress_update(1, stage=0)
                    super(CallBack, self).__call__(*args, **kw)

            import joblib.parallel
            if batch_comp_call_back:
                joblib.parallel.BatchCompletionCallBack = CallBack
            else:
                joblib.parallel.CallBack = CallBack
        else:

            def _print(msg, msg_args):
                # NOTE: this is a ugly hack, because if we only use one job,
                # we do not get the joblib callback interface, as a workaround
                # we use the Parallel._print function, which is called with
                # msg_args = (done_jobs, total_jobs)
                if len(msg_args) == 2:
                    progress_reporter._progress_update(1, stage=0)

    # iterate over parameter settings
    from joblib import Parallel
    import joblib, mock, six

    if six.PY34:
        from multiprocessing import get_context
        try:
            ctx = get_context(method='forkserver')
        except ValueError:  # forkserver NA
            try:
                # this is slower in creation, but will not use as much memory!
                ctx = get_context(method='spawn')
            except ValueError:
                ctx = get_context(None)
                print(
                    "WARNING: using default multiprocessing start method {}. "
                    "This could potentially lead to memory issues.".format(
                        ctx))

        with mock.patch('joblib.parallel.DEFAULT_MP_CONTEXT', ctx):
            pool = Parallel(n_jobs=n_jobs)
    else:
        pool = Parallel(n_jobs=n_jobs)

    if progress_reporter is not None and n_jobs == 1:
        pool._print = _print
        # NOTE: verbose has to be set, otherwise our print hack does not work.
        pool.verbose = 50

    if n_jobs > 1:
        # if n_jobs=1 don't invoke the pool, but directly dispatch the iterator
        task_iter = (joblib.delayed(_estimate_param_scan_worker)(
            estimators[i],
            param_sets[i],
            X,
            evaluate,
            evaluate_args,
            failfast,
        ) for i in range(len(param_sets)))

        # container for model or function evaluations
        res = pool(task_iter)
    else:
        res = []
        for i, param in enumerate(param_sets):
            res.append(
                _estimate_param_scan_worker(estimators[i], param, X, evaluate,
                                            evaluate_args, failfast))
            if progress_reporter is not None and show_progress:
                progress_reporter._progress_update(1, stage=0)

    if progress_reporter is not None and show_progress:
        progress_reporter._progress_force_finish(0)

    # done
    if return_estimators:
        return res, estimators
    else:
        return res
コード例 #11
0
ファイル: test_cluster.py プロジェクト: ismaelresp/PyEMMA
 def test_describe(self):
     for c in self.cl:
         desc = c.describe()
         assert types.is_string(desc) or types.is_list_of_string(desc)
コード例 #12
0
ファイル: test_pca.py プロジェクト: ismaelresp/PyEMMA
 def test_describe(self):
     desc = self.pca_obj.describe()
     assert types.is_string(desc) or types.is_list_of_string(desc)
コード例 #13
0
ファイル: test_assign.py プロジェクト: ismaelresp/PyEMMA
 def test_describe(self):
     c = self.ass
     desc = c.describe()
     assert types.is_string(desc) or types.is_list_of_string(desc)