Esempio n. 1
0
    def register_progress_callback(self, call_back, stage=0):
        """ Registers the progress reporter.

        Parameters
        ----------
        call_back : function
            This function will be called with the following arguments:

            1. stage (int)
            2. instance of pyemma.utils.progressbar.ProgressBar
            3. optional \*args and named keywords (\*\*kw), for future changes

        stage: int, optional, default=0
            The stage you want the given call back function to be fired.
        """
        if not self.show_progress:
            return

        assert callable(call_back)
        # check we have the desired function signature
        from pyemma.util.reflection import getargspec_no_self
        argspec = getargspec_no_self(call_back)
        assert len(argspec.args) == 2
        assert argspec.varargs is not None
        assert argspec.keywords is not None

        if stage not in self._prog_rep_callbacks:
            self._prog_rep_callbacks[stage] = []

        self._prog_rep_callbacks[stage].append(call_back)
Esempio n. 2
0
 def _get_model_param_names(cls):
     r"""Get parameter names for the model"""
     # fetch model parameters
     if hasattr(cls, 'set_model_params'):
         # introspect the constructor arguments to find the model parameters
         # to represent
         args, varargs, kw, default = getargspec_no_self(
             cls.set_model_params)
         if varargs is not None:
             raise RuntimeError(
                 "PyEMMA models should always specify their parameters in the signature"
                 " of their set_model_params (no varargs). %s doesn't follow this convention."
                 % (cls, ))
         return args
     else:
         # No parameters known
         return []
Esempio n. 3
0
    def _get_param_names(cls):
        """Get parameter names for the estimator"""
        # fetch the constructor or the original constructor before
        # deprecation wrapping if any
        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
        if init is object.__init__:
            # No explicit constructor to introspect
            return []

        # introspect the constructor arguments to find the model parameters
        # to represent
        args, varargs, kw, default = getargspec_no_self(init)
        if varargs is not None:
            raise RuntimeError("scikit-learn estimators should always "
                               "specify their parameters in the signature"
                               " of their __init__ (no varargs)."
                               " %s doesn't follow this convention." % (cls, ))
        args.sort()
        return args
Esempio n. 4
0
 def _get_model_param_names(self):
     r"""Get parameter names for the model"""
     # fetch model parameters
     if hasattr(self, 'set_model_params'):
         set_model_param_method = getattr(self, 'set_model_params')
         # introspect the constructor arguments to find the model parameters
         # to represent
         args, varargs, kw, default = getargspec_no_self(
             set_model_param_method)
         if varargs is not None:
             raise RuntimeError(
                 "pyEMMA models should always specify their parameters in the signature"
                 " of their set_model_params (no varargs). %s doesn't follow this convention."
                 % (self, ))
         # Remove 'self'
         # XXX: This is going to fail if the init is a staticmethod, but
         # who would do this?
         args.pop(0)
         args.sort()
         return args
     else:
         # No parameters known
         return []
Esempio n. 5
0
    def count_lagged(self,
                     lag,
                     count_mode='sliding',
                     mincount_connectivity='1/n',
                     show_progress=True):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)

        show_progress: bool, default=True
            show the progress for the expensive effective count mode computation.

        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            from pyemma.util.reflection import getargspec_no_self
            argspec = getargspec_no_self(msmest.effective_count_matrix)
            kw = {}
            if show_progress and 'callback' in argspec.args:
                from pyemma._base.progress import ProgressReporter
                from pyemma._base.parallel import get_n_jobs

                pg = ProgressReporter()
                # this is a fast operation
                C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True)
                pg.register(C_temp.nnz, 'compute statistical inefficiencies')
                del C_temp
                callback = lambda: pg.update(1)
                kw['callback'] = callback
                kw['n_jobs'] = get_n_jobs()

            self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # store mincount_connectivity
        if mincount_connectivity == '1/n':
            mincount_connectivity = 1.0 / np.shape(self._C)[0]
        self._mincount_connectivity = mincount_connectivity

        # Compute reversibly connected sets
        if self._mincount_connectivity > 0:
            self._connected_sets = \
                self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity)
        else:
            self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            # self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        self._lcs = self._connected_sets[0]

        # if lcs has no counts, make lcs empty
        if submatrix(self._C, self._lcs).sum() == 0:
            self._lcs = np.array([], dtype=int)

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[self._lcs] = np.arange(len(self._lcs))

        # remember that this function was called
        self._counted_at_lag = True
Esempio n. 6
0
    def count_lagged(self, lag, count_mode='sliding', mincount_connectivity='1/n',
                     show_progress=True, n_jobs=None, name='', core_set=None, milestoning_method='last_core'):
        r""" Counts transitions at given lag time

        Parameters
        ----------
        lag : int
            lagtime in trajectory steps

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be one of:

            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.

            * 'sample' : A trajectory of length T will have :math:`T / \tau` counts
              at time indexes
              .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T)

        show_progress: bool, default=True
            show the progress for the expensive effective count mode computation.

        n_jobs: int or None

        """
        # store lag time
        self._lag = lag

        # Compute count matrix
        count_mode = count_mode.lower()
        if core_set is not None and count_mode in ('sliding', 'sample'):
            if milestoning_method == 'last_core':

                # assign -1 frames to last visited core
                for d in self._dtrajs:
                    assert d[0] != -1
                    while -1 in d:
                        mask = (d == -1)
                        d[mask] = d[np.roll(mask, -1)]
                self._C = msmest.count_matrix(self._dtrajs, lag, sliding=count_mode == 'sliding')

            else:
                raise NotImplementedError('Milestoning method {} not implemented.'.format(milestoning_method))


        elif count_mode == 'sliding':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True)
        elif count_mode == 'sample':
            self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False)
        elif count_mode == 'effective':
            if core_set is not None:
                raise RuntimeError('Cannot estimate core set MSM with effective counting.')
            from pyemma.util.reflection import getargspec_no_self
            argspec = getargspec_no_self(msmest.effective_count_matrix)
            kw = {}
            from pyemma.util.contexts import nullcontext
            ctx = nullcontext()
            if 'callback' in argspec.args:  # msmtools effective cmatrix ready for multiprocessing?
                from pyemma._base.progress import ProgressReporter
                from pyemma._base.parallel import get_n_jobs

                kw['n_jobs'] = get_n_jobs() if n_jobs is None else n_jobs

                if show_progress:
                    pg = ProgressReporter()
                    # this is a fast operation
                    C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True)
                    pg.register(C_temp.nnz, '{}: compute stat. inefficiencies'.format(name), stage=0)
                    del C_temp
                    kw['callback'] = pg.update
                    ctx = pg.context(stage=0)
            with ctx:
                self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw)
        else:
            raise ValueError('Count mode ' + count_mode + ' is unknown.')

        # store mincount_connectivity
        if mincount_connectivity == '1/n':
            mincount_connectivity = 1.0 / np.shape(self._C)[0]
        self._mincount_connectivity = mincount_connectivity

        # Compute reversibly connected sets
        if self._mincount_connectivity > 0:
            self._connected_sets = \
                self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity)
        else:
            self._connected_sets = msmest.connected_sets(self._C)

        # set sizes and count matrices on reversibly connected sets
        self._connected_set_sizes = np.zeros((len(self._connected_sets)))
        self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object)
        for i in range(len(self._connected_sets)):
            # set size
            self._connected_set_sizes[i] = len(self._connected_sets[i])
            # submatrix
            # self._C_sub[i] = submatrix(self._C, self._connected_sets[i])

        # largest connected set
        self._lcs = self._connected_sets[0]

        # if lcs has no counts, make lcs empty
        if submatrix(self._C, self._lcs).sum() == 0:
            self._lcs = np.array([], dtype=int)

        # mapping from full to lcs
        self._full2lcs = -1 * np.ones((self._nstates), dtype=int)
        self._full2lcs[self._lcs] = np.arange(len(self._lcs))

        # remember that this function was called
        self._counted_at_lag = True