def n_jobs(self, val): """ set number of jobs/threads to use via assignment of data. Parameters ---------- val: int or None a positive int for the number of jobs. Or None to usage all available resources. Notes ----- """ from pyemma.util.reflection import get_default_args def_args = get_default_args(self.__init__) # default value from constructor? if val == def_args['n_jobs']: omp_threads_from_env = os.getenv('OMP_NUM_THREADS', None) import psutil n_cpus = psutil.cpu_count() if omp_threads_from_env: try: self._n_jobs = int(omp_threads_from_env) self.logger.info( "number of threads obtained from env variable" " 'OMP_NUM_THREADS'=%s" % omp_threads_from_env) except ValueError as ve: self.logger.warning( "could not parse env variable 'OMP_NUM_THREADS'." " Value='{}'. Error={}. Will use {} jobs.".format( omp_threads_from_env, ve, n_cpus)) self._n_jobs = n_cpus else: self._n_jobs = n_cpus else: self._n_jobs = int(val)
def __init__(self, dim=-1, var_cutoff=0.95, mean=None): r""" Principal component analysis. Given a sequence of multivariate data :math:`X_t`, computes the mean-free covariance matrix. .. math:: C = (X - \mu)^T (X - \mu) and solves the eigenvalue problem .. math:: C r_i = \sigma_i r_i, where :math:`r_i` are the principal components and :math:`\sigma_i` are their respective variances. When used as a dimension reduction method, the input data is projected onto the dominant principal components. Parameters ---------- dim : int, optional, default -1 the number of dimensions (independent components) to project onto. A call to the :func:`map <pyemma.coordinates.transform.TICA.map>` function reduces the d-dimensional input to only dim dimensions such that the data preserves the maximum possible autocorrelation amongst dim-dimensional linear projections. -1 means all numerically available dimensions will be used unless reduced by var_cutoff. Setting dim to a positive value is exclusive with var_cutoff. var_cutoff : float in the range [0,1], optional, default 0.95 Determines the number of output dimensions by including dimensions until their cumulative kinetic variance exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim mean : ndarray, optional, default None Optionally pass pre-calculated means to avoid their re-computation. The shape has to match the input dimension. """ super(PCA, self).__init__() self._dim = dim self._var_cutoff = var_cutoff default_var_cutoff = get_default_args(self.__init__)['var_cutoff'] if dim != -1 and var_cutoff != default_var_cutoff: raise ValueError('Trying to set both the number of dimension and the subspace variance. Use either or.') self._dot_prod_tmp = None self.Y = None self._N_mean = 0 self._N_cov = 0 self.mu = mean # set up result variables self.eigenvalues = None self.eigenvectors = None self.cumvar = None # output options self._custom_param_progress_handling = True
def __init__(self, dim=-1, var_cutoff=0.95, mean=None, stride=1, skip=0): r""" Principal component analysis. .. deprecated:: 2.5.11 Use the scikit-learn `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`__ implementation instead. Will be removed in PyEMMA 3. Given a sequence of multivariate data :math:`X_t`, computes the mean-free covariance matrix. .. math:: C = (X - \mu)^T (X - \mu) and solves the eigenvalue problem .. math:: C r_i = \sigma_i r_i, where :math:`r_i` are the principal components and :math:`\sigma_i` are their respective variances. When used as a dimension reduction method, the input data is projected onto the dominant principal components. Parameters ---------- dim : int, optional, default -1 the number of dimensions (independent components) to project onto. A call to the :func:`map <pyemma.coordinates.transform.TICA.map>` function reduces the d-dimensional input to only dim dimensions such that the data preserves the maximum possible autocorrelation amongst dim-dimensional linear projections. -1 means all numerically available dimensions will be used unless reduced by var_cutoff. Setting dim to a positive value is exclusive with var_cutoff. var_cutoff : float in the range [0,1], optional, default 0.95 Determines the number of output dimensions by including dimensions until their cumulative kinetic variance exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim mean : ndarray, optional, default None Optionally pass pre-calculated means to avoid their re-computation. The shape has to match the input dimension. skip: int, default 0 skip the first n frames of each trajectory. """ super(PCA, self).__init__() default_var_cutoff = get_default_args(self.__init__)['var_cutoff'] if dim != -1 and var_cutoff != default_var_cutoff: raise ValueError('Trying to set both the number of dimension and the subspace variance. Use either or.') self._model = PCAModel() self.set_params(dim=dim, var_cutoff=var_cutoff, mean=mean, stride=stride, skip=skip)
def __init__(self, lag, dim=-1, var_cutoff=0.95, kinetic_map=True, commute_map=False, epsilon=1e-6, stride=1, skip=0, reversible=True, weights=None, ncov_max=float('inf')): r""" Time-lagged independent component analysis (TICA) [1]_, [2]_, [3]_. Parameters ---------- lag : int lag time dim : int, optional, default -1 Maximum number of significant independent components to use to reduce dimension of input data. -1 means all numerically available dimensions (see epsilon) will be used unless reduced by var_cutoff. Setting dim to a positive value is exclusive with var_cutoff. var_cutoff : float in the range [0,1], optional, default 0.95 Determines the number of output dimensions by including dimensions until their cumulative kinetic variance exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim kinetic_map : bool, optional, default True Eigenvectors will be scaled by eigenvalues. As a result, Euclidean distances in the transformed data approximate kinetic distances [4]_. This is a good choice when the data is further processed by clustering. commute_map : bool, optional, default False Eigenvector_i will be scaled by sqrt(timescale_i / 2). As a result, Euclidean distances in the transformed data will approximate commute distances [5]_. epsilon : float eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be cut off. The remaining number of eigenvalues define the size of the output. stride: int, optional, default = 1 Use only every stride-th time step. By default, every time step is used. skip : int, default=0 skip the first initial n frames per trajectory. reversible: bool, default=True symmetrize correlation matrices C_0, C_{\tau}. At the moment, setting reversible=False is not implemented. weights: object, optional, default = None An object that allows to compute re-weighting factors to estimate equilibrium means and correlations from off-equilibrium data. The only requirement is that weights possesses a method weights(X), that accepts a trajectory X (np.ndarray(T, n)) and returns a vector of re-weighting factors (np.ndarray(T,)). Notes ----- Given a sequence of multivariate data :math:`X_t`, computes the mean-free covariance and time-lagged covariance matrix: .. math:: C_0 &= (X_t - \mu)^T (X_t - \mu) \\ C_{\tau} &= (X_t - \mu)^T (X_{t + \tau} - \mu) and solves the eigenvalue problem .. math:: C_{\tau} r_i = C_0 \lambda_i(tau) r_i, where :math:`r_i` are the independent components and :math:`\lambda_i(tau)` are their respective normalized time-autocorrelations. The eigenvalues are related to the relaxation timescale by .. math:: t_i(tau) = -\tau / \ln |\lambda_i|. When used as a dimension reduction method, the input data is projected onto the dominant independent components. References ---------- .. [1] Perez-Hernandez G, F Paul, T Giorgino, G De Fabritiis and F Noe. 2013. Identification of slow molecular order parameters for Markov model construction J. Chem. Phys. 139, 015102. doi:10.1063/1.4811489 .. [2] Schwantes C, V S Pande. 2013. Improvements in Markov State Model Construction Reveal Many Non-Native Interactions in the Folding of NTL9 J. Chem. Theory. Comput. 9, 2000-2009. doi:10.1021/ct300878a .. [3] L. Molgedey and H. G. Schuster. 1994. Separation of a mixture of independent signals using time delayed correlations Phys. Rev. Lett. 72, 3634. .. [4] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 .. [5] Noe, F., Banisch, R., Clementi, C. 2016. Commute maps: separating slowly-mixing molecular configurations for kinetic modeling. J. Chem. Theory. Comput. doi:10.1021/acs.jctc.6b00762 """ default_var_cutoff = get_default_args(self.__init__)['var_cutoff'] if dim != -1 and var_cutoff != default_var_cutoff: raise ValueError( 'Trying to set both the number of dimension and the subspace variance. Use either or.' ) if kinetic_map and commute_map: raise ValueError( 'Trying to use both kinetic_map and commute_map. Use either or.' ) if not reversible: raise NotImplementedError( "Reversible=False is currently not implemented.") # if (kinetic_map or commute_map) and not reversible: # raise NotImplementedError('kinetic_map and commute_map are not yet implemented for irreversible processes.') super(TICA, self).__init__() if dim > -1: var_cutoff = 1.0 self._covar = LaggedCovariance(c00=True, c0t=True, ctt=False, remove_data_mean=True, reversible=reversible, lag=lag, bessel=False, stride=stride, skip=skip, weights=weights, ncov_max=ncov_max) # empty dummy model instance self._model = TICAModel() self.set_params(lag=lag, dim=dim, var_cutoff=var_cutoff, kinetic_map=kinetic_map, commute_map=commute_map, epsilon=epsilon, reversible=reversible, stride=stride, skip=skip, weights=weights, ncov_max=ncov_max)
def __init__(self, lag, dim=-1, var_cutoff=0.95, kinetic_map=True, epsilon=1e-6, force_eigenvalues_le_one=False, mean=None): r""" Time-lagged independent component analysis (TICA) [1]_, [2]_, [3]_. Parameters ---------- tau : int lag time dim : int, optional, default -1 Maximum number of significant independent components to use to reduce dimension of input data. -1 means all numerically available dimensions (see epsilon) will be used unless reduced by var_cutoff. Setting dim to a positive value is exclusive with var_cutoff. var_cutoff : float in the range [0,1], optional, default 0.95 Determines the number of output dimensions by including dimensions until their cumulative kinetic variance exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim kinetic_map : bool, optional, default True Eigenvectors will be scaled by eigenvalues. As a result, Euclidean distances in the transformed data approximate kinetic distances [4]_. This is a good choice when the data is further processed by clustering. epsilon : float eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be cut off. The remaining number of eigenvalues define the size of the output. force_eigenvalues_le_one : boolean Compute covariance matrix and time-lagged covariance matrix such that the generalized eigenvalues are always guaranteed to be <= 1. mean : ndarray, optional, default None Optionally pass pre-calculated means to avoid their re-computation. The shape has to match the input dimension. Notes ----- Given a sequence of multivariate data :math:`X_t`, computes the mean-free covariance and time-lagged covariance matrix: .. math:: C_0 &= (X_t - \mu)^T (X_t - \mu) \\ C_{\tau} &= (X_t - \mu)^T (X_{t + \tau} - \mu) and solves the eigenvalue problem .. math:: C_{\tau} r_i = C_0 \lambda_i(tau) r_i, where :math:`r_i` are the independent components and :math:`\lambda_i(tau)` are their respective normalized time-autocorrelations. The eigenvalues are related to the relaxation timescale by .. math:: t_i(tau) = -\tau / \ln |\lambda_i|. When used as a dimension reduction method, the input data is projected onto the dominant independent components. References ---------- .. [1] Perez-Hernandez G, F Paul, T Giorgino, G De Fabritiis and F Noe. 2013. Identification of slow molecular order parameters for Markov model construction J. Chem. Phys. 139, 015102. doi:10.1063/1.4811489 .. [2] Schwantes C, V S Pande. 2013. Improvements in Markov State Model Construction Reveal Many Non-Native Interactions in the Folding of NTL9 J. Chem. Theory. Comput. 9, 2000-2009. doi:10.1021/ct300878a .. [3] L. Molgedey and H. G. Schuster. 1994. Separation of a mixture of independent signals using time delayed correlations Phys. Rev. Lett. 72, 3634. .. [4] Noe, F. and C. Clementi. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation http://arxiv.org/abs/1506.06259 """ super(TICA, self).__init__() # store lag time to set it appropriately in second pass of parametrize self._lag = lag self._dim = dim self._var_cutoff = var_cutoff default_var_cutoff = get_default_args(self.__init__)['var_cutoff'] if dim != -1 and var_cutoff != default_var_cutoff: raise ValueError( 'Trying to set both the number of dimension and the subspace variance. Use either or.' ) self._kinetic_map = kinetic_map self._epsilon = epsilon self._force_eigenvalues_le_one = force_eigenvalues_le_one # covariances self.cov = None self.cov_tau = None # mean self.mu = mean self._N_mean = 0 self._N_cov = 0 self._N_cov_tau = 0 self._eigenvalues = None self._eigenvectors = None self._cumvar = None self._custom_param_progress_handling = True # skipped trajectories self._skipped_trajs = []