def register_progress_callback(self, call_back, stage=0): """ Registers the progress reporter. Parameters ---------- call_back : function This function will be called with the following arguments: 1. stage (int) 2. instance of pyemma.utils.progressbar.ProgressBar 3. optional \*args and named keywords (\*\*kw), for future changes stage: int, optional, default=0 The stage you want the given call back function to be fired. """ if not self.show_progress: return assert callable(call_back) # check we have the desired function signature from pyemma.util.reflection import getargspec_no_self argspec = getargspec_no_self(call_back) assert len(argspec.args) == 2 assert argspec.varargs is not None assert argspec.keywords is not None if stage not in self._prog_rep_callbacks: self._prog_rep_callbacks[stage] = [] self._prog_rep_callbacks[stage].append(call_back)
def _get_model_param_names(cls): r"""Get parameter names for the model""" # fetch model parameters if hasattr(cls, 'set_model_params'): # introspect the constructor arguments to find the model parameters # to represent args, varargs, kw, default = getargspec_no_self( cls.set_model_params) if varargs is not None: raise RuntimeError( "PyEMMA models should always specify their parameters in the signature" " of their set_model_params (no varargs). %s doesn't follow this convention." % (cls, )) return args else: # No parameters known return []
def _get_param_names(cls): """Get parameter names for the estimator""" # fetch the constructor or the original constructor before # deprecation wrapping if any init = getattr(cls.__init__, 'deprecated_original', cls.__init__) if init is object.__init__: # No explicit constructor to introspect return [] # introspect the constructor arguments to find the model parameters # to represent args, varargs, kw, default = getargspec_no_self(init) if varargs is not None: raise RuntimeError("scikit-learn estimators should always " "specify their parameters in the signature" " of their __init__ (no varargs)." " %s doesn't follow this convention." % (cls, )) args.sort() return args
def _get_model_param_names(self): r"""Get parameter names for the model""" # fetch model parameters if hasattr(self, 'set_model_params'): set_model_param_method = getattr(self, 'set_model_params') # introspect the constructor arguments to find the model parameters # to represent args, varargs, kw, default = getargspec_no_self( set_model_param_method) if varargs is not None: raise RuntimeError( "pyEMMA models should always specify their parameters in the signature" " of their set_model_params (no varargs). %s doesn't follow this convention." % (self, )) # Remove 'self' # XXX: This is going to fail if the init is a staticmethod, but # who would do this? args.pop(0) args.sort() return args else: # No parameters known return []
def count_lagged(self, lag, count_mode='sliding', mincount_connectivity='1/n', show_progress=True): r""" Counts transitions at given lag time Parameters ---------- lag : int lagtime in trajectory steps count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1) * 'effective' : Uses an estimate of the transition counts that are statistically uncorrelated. Recommended when used with a Bayesian MSM. * 'sample' : A trajectory of length T will have :math:`T / \tau` counts at time indexes .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T) show_progress: bool, default=True show the progress for the expensive effective count mode computation. """ # store lag time self._lag = lag # Compute count matrix count_mode = count_mode.lower() if count_mode == 'sliding': self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True) elif count_mode == 'sample': self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False) elif count_mode == 'effective': from pyemma.util.reflection import getargspec_no_self argspec = getargspec_no_self(msmest.effective_count_matrix) kw = {} if show_progress and 'callback' in argspec.args: from pyemma._base.progress import ProgressReporter from pyemma._base.parallel import get_n_jobs pg = ProgressReporter() # this is a fast operation C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True) pg.register(C_temp.nnz, 'compute statistical inefficiencies') del C_temp callback = lambda: pg.update(1) kw['callback'] = callback kw['n_jobs'] = get_n_jobs() self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw) else: raise ValueError('Count mode ' + count_mode + ' is unknown.') # store mincount_connectivity if mincount_connectivity == '1/n': mincount_connectivity = 1.0 / np.shape(self._C)[0] self._mincount_connectivity = mincount_connectivity # Compute reversibly connected sets if self._mincount_connectivity > 0: self._connected_sets = \ self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity) else: self._connected_sets = msmest.connected_sets(self._C) # set sizes and count matrices on reversibly connected sets self._connected_set_sizes = np.zeros((len(self._connected_sets))) self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object) for i in range(len(self._connected_sets)): # set size self._connected_set_sizes[i] = len(self._connected_sets[i]) # submatrix # self._C_sub[i] = submatrix(self._C, self._connected_sets[i]) # largest connected set self._lcs = self._connected_sets[0] # if lcs has no counts, make lcs empty if submatrix(self._C, self._lcs).sum() == 0: self._lcs = np.array([], dtype=int) # mapping from full to lcs self._full2lcs = -1 * np.ones((self._nstates), dtype=int) self._full2lcs[self._lcs] = np.arange(len(self._lcs)) # remember that this function was called self._counted_at_lag = True
def count_lagged(self, lag, count_mode='sliding', mincount_connectivity='1/n', show_progress=True, n_jobs=None, name='', core_set=None, milestoning_method='last_core'): r""" Counts transitions at given lag time Parameters ---------- lag : int lagtime in trajectory steps count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1) * 'effective' : Uses an estimate of the transition counts that are statistically uncorrelated. Recommended when used with a Bayesian MSM. * 'sample' : A trajectory of length T will have :math:`T / \tau` counts at time indexes .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T) show_progress: bool, default=True show the progress for the expensive effective count mode computation. n_jobs: int or None """ # store lag time self._lag = lag # Compute count matrix count_mode = count_mode.lower() if core_set is not None and count_mode in ('sliding', 'sample'): if milestoning_method == 'last_core': # assign -1 frames to last visited core for d in self._dtrajs: assert d[0] != -1 while -1 in d: mask = (d == -1) d[mask] = d[np.roll(mask, -1)] self._C = msmest.count_matrix(self._dtrajs, lag, sliding=count_mode == 'sliding') else: raise NotImplementedError('Milestoning method {} not implemented.'.format(milestoning_method)) elif count_mode == 'sliding': self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True) elif count_mode == 'sample': self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False) elif count_mode == 'effective': if core_set is not None: raise RuntimeError('Cannot estimate core set MSM with effective counting.') from pyemma.util.reflection import getargspec_no_self argspec = getargspec_no_self(msmest.effective_count_matrix) kw = {} from pyemma.util.contexts import nullcontext ctx = nullcontext() if 'callback' in argspec.args: # msmtools effective cmatrix ready for multiprocessing? from pyemma._base.progress import ProgressReporter from pyemma._base.parallel import get_n_jobs kw['n_jobs'] = get_n_jobs() if n_jobs is None else n_jobs if show_progress: pg = ProgressReporter() # this is a fast operation C_temp = msmest.count_matrix(self._dtrajs, lag, sliding=True) pg.register(C_temp.nnz, '{}: compute stat. inefficiencies'.format(name), stage=0) del C_temp kw['callback'] = pg.update ctx = pg.context(stage=0) with ctx: self._C = msmest.effective_count_matrix(self._dtrajs, lag, **kw) else: raise ValueError('Count mode ' + count_mode + ' is unknown.') # store mincount_connectivity if mincount_connectivity == '1/n': mincount_connectivity = 1.0 / np.shape(self._C)[0] self._mincount_connectivity = mincount_connectivity # Compute reversibly connected sets if self._mincount_connectivity > 0: self._connected_sets = \ self._compute_connected_sets(self._C, mincount_connectivity=self._mincount_connectivity) else: self._connected_sets = msmest.connected_sets(self._C) # set sizes and count matrices on reversibly connected sets self._connected_set_sizes = np.zeros((len(self._connected_sets))) self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object) for i in range(len(self._connected_sets)): # set size self._connected_set_sizes[i] = len(self._connected_sets[i]) # submatrix # self._C_sub[i] = submatrix(self._C, self._connected_sets[i]) # largest connected set self._lcs = self._connected_sets[0] # if lcs has no counts, make lcs empty if submatrix(self._C, self._lcs).sum() == 0: self._lcs = np.array([], dtype=int) # mapping from full to lcs self._full2lcs = -1 * np.ones((self._nstates), dtype=int) self._full2lcs[self._lcs] = np.arange(len(self._lcs)) # remember that this function was called self._counted_at_lag = True