Esempio n. 1
0
    def n_jobs(self, val):
        """ set number of jobs/threads to use via assignment of data.
        Parameters
        ----------
        val: int or None
            a positive int for the number of jobs. Or None to usage all available resources.

        Notes
        -----

        """
        from pyemma.util.reflection import get_default_args
        def_args = get_default_args(self.__init__)

        # default value from constructor?
        if val == def_args['n_jobs']:
            omp_threads_from_env = os.getenv('OMP_NUM_THREADS', None)
            import psutil
            n_cpus = psutil.cpu_count()
            if omp_threads_from_env:
                try:
                    self._n_jobs = int(omp_threads_from_env)
                    self.logger.info(
                        "number of threads obtained from env variable"
                        " 'OMP_NUM_THREADS'=%s" % omp_threads_from_env)
                except ValueError as ve:
                    self.logger.warning(
                        "could not parse env variable 'OMP_NUM_THREADS'."
                        " Value='{}'. Error={}. Will use {} jobs.".format(
                            omp_threads_from_env, ve, n_cpus))
                    self._n_jobs = n_cpus
            else:
                self._n_jobs = n_cpus
        else:
            self._n_jobs = int(val)
Esempio n. 2
0
    def __init__(self, dim=-1, var_cutoff=0.95, mean=None):
        r""" Principal component analysis.

        Given a sequence of multivariate data :math:`X_t`,
        computes the mean-free covariance matrix.

        .. math:: C = (X - \mu)^T (X - \mu)

        and solves the eigenvalue problem

        .. math:: C r_i = \sigma_i r_i,

        where :math:`r_i` are the principal components and :math:`\sigma_i` are
        their respective variances.

        When used as a dimension reduction method, the input data is projected onto
        the dominant principal components.

        Parameters
        ----------
        dim : int, optional, default -1
            the number of dimensions (independent components) to project onto. A call to the
            :func:`map <pyemma.coordinates.transform.TICA.map>` function reduces the d-dimensional
            input to only dim dimensions such that the data preserves the maximum possible autocorrelation
            amongst dim-dimensional linear projections.
            -1 means all numerically available dimensions will be used unless reduced by var_cutoff.
            Setting dim to a positive value is exclusive with var_cutoff.

        var_cutoff : float in the range [0,1], optional, default 0.95
            Determines the number of output dimensions by including dimensions until their cumulative kinetic variance
            exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions
            (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim

        mean : ndarray, optional, default None
            Optionally pass pre-calculated means to avoid their re-computation.
            The shape has to match the input dimension.

        """
        super(PCA, self).__init__()
        self._dim = dim
        self._var_cutoff = var_cutoff
        default_var_cutoff = get_default_args(self.__init__)['var_cutoff']
        if dim != -1 and var_cutoff != default_var_cutoff:
            raise ValueError('Trying to set both the number of dimension and the subspace variance. Use either or.')
        self._dot_prod_tmp = None
        self.Y = None
        self._N_mean = 0
        self._N_cov = 0

        self.mu = mean

        # set up result variables
        self.eigenvalues = None
        self.eigenvectors = None
        self.cumvar = None

        # output options
        self._custom_param_progress_handling = True
Esempio n. 3
0
    def __init__(self, dim=-1, var_cutoff=0.95, mean=None, stride=1, skip=0):
        r""" Principal component analysis.

        .. deprecated:: 2.5.11
            Use the scikit-learn
            `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`__
            implementation instead. Will be removed in PyEMMA 3.

        Given a sequence of multivariate data :math:`X_t`,
        computes the mean-free covariance matrix.

        .. math:: C = (X - \mu)^T (X - \mu)

        and solves the eigenvalue problem

        .. math:: C r_i = \sigma_i r_i,

        where :math:`r_i` are the principal components and :math:`\sigma_i` are
        their respective variances.

        When used as a dimension reduction method, the input data is projected onto
        the dominant principal components.

        Parameters
        ----------
        dim : int, optional, default -1
            the number of dimensions (independent components) to project onto. A call to the
            :func:`map <pyemma.coordinates.transform.TICA.map>` function reduces the d-dimensional
            input to only dim dimensions such that the data preserves the maximum possible autocorrelation
            amongst dim-dimensional linear projections.
            -1 means all numerically available dimensions will be used unless reduced by var_cutoff.
            Setting dim to a positive value is exclusive with var_cutoff.

        var_cutoff : float in the range [0,1], optional, default 0.95
            Determines the number of output dimensions by including dimensions until their cumulative kinetic variance
            exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions
            (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim

        mean : ndarray, optional, default None
            Optionally pass pre-calculated means to avoid their re-computation.
            The shape has to match the input dimension.

        skip: int, default 0
            skip the first n frames of each trajectory.

        """
        super(PCA, self).__init__()
        default_var_cutoff = get_default_args(self.__init__)['var_cutoff']
        if dim != -1 and var_cutoff != default_var_cutoff:
            raise ValueError('Trying to set both the number of dimension and the subspace variance. Use either or.')

        self._model = PCAModel()
        self.set_params(dim=dim, var_cutoff=var_cutoff, mean=mean, stride=stride, skip=skip)
Esempio n. 4
0
    def __init__(self,
                 lag,
                 dim=-1,
                 var_cutoff=0.95,
                 kinetic_map=True,
                 commute_map=False,
                 epsilon=1e-6,
                 stride=1,
                 skip=0,
                 reversible=True,
                 weights=None,
                 ncov_max=float('inf')):
        r""" Time-lagged independent component analysis (TICA) [1]_, [2]_, [3]_.

        Parameters
        ----------
        lag : int
            lag time
        dim : int, optional, default -1
            Maximum number of significant independent components to use to reduce dimension of input data. -1 means
            all numerically available dimensions (see epsilon) will be used unless reduced by var_cutoff.
            Setting dim to a positive value is exclusive with var_cutoff.
        var_cutoff : float in the range [0,1], optional, default 0.95
            Determines the number of output dimensions by including dimensions until their cumulative kinetic variance
            exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions
            (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim
        kinetic_map : bool, optional, default True
            Eigenvectors will be scaled by eigenvalues. As a result, Euclidean distances in the transformed data
            approximate kinetic distances [4]_. This is a good choice when the data is further processed by clustering.
        commute_map : bool, optional, default False
            Eigenvector_i will be scaled by sqrt(timescale_i / 2). As a result, Euclidean distances in the transformed
            data will approximate commute distances [5]_.
        epsilon : float
            eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be
            cut off. The remaining number of eigenvalues define the size
            of the output.
        stride: int, optional, default = 1
            Use only every stride-th time step. By default, every time step is used.
        skip : int, default=0
            skip the first initial n frames per trajectory.
        reversible: bool, default=True
            symmetrize correlation matrices C_0, C_{\tau}. At the moment, setting reversible=False is not implemented.
        weights: object, optional, default = None
            An object that allows to compute re-weighting factors to estimate equilibrium means and correlations from
            off-equilibrium data. The only requirement is that weights possesses a method weights(X), that accepts a
            trajectory X (np.ndarray(T, n)) and returns a vector of re-weighting factors (np.ndarray(T,)).

        Notes
        -----
        Given a sequence of multivariate data :math:`X_t`, computes the mean-free
        covariance and time-lagged covariance matrix:

        .. math::

            C_0 &=      (X_t - \mu)^T (X_t - \mu) \\
            C_{\tau} &= (X_t - \mu)^T (X_{t + \tau} - \mu)

        and solves the eigenvalue problem

        .. math:: C_{\tau} r_i = C_0 \lambda_i(tau) r_i,

        where :math:`r_i` are the independent components and :math:`\lambda_i(tau)` are
        their respective normalized time-autocorrelations. The eigenvalues are
        related to the relaxation timescale by

        .. math:: t_i(tau) = -\tau / \ln |\lambda_i|.

        When used as a dimension reduction method, the input data is projected
        onto the dominant independent components.

        References
        ----------
        .. [1] Perez-Hernandez G, F Paul, T Giorgino, G De Fabritiis and F Noe. 2013.
           Identification of slow molecular order parameters for Markov model construction
           J. Chem. Phys. 139, 015102. doi:10.1063/1.4811489
        .. [2] Schwantes C, V S Pande. 2013.
           Improvements in Markov State Model Construction Reveal Many Non-Native Interactions in the Folding of NTL9
           J. Chem. Theory. Comput. 9, 2000-2009. doi:10.1021/ct300878a
        .. [3] L. Molgedey and H. G. Schuster. 1994.
           Separation of a mixture of independent signals using time delayed correlations
           Phys. Rev. Lett. 72, 3634.
        .. [4] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation.
            J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553
        .. [5] Noe, F., Banisch, R., Clementi, C. 2016. Commute maps: separating slowly-mixing molecular configurations
           for kinetic modeling. J. Chem. Theory. Comput. doi:10.1021/acs.jctc.6b00762

        """
        default_var_cutoff = get_default_args(self.__init__)['var_cutoff']
        if dim != -1 and var_cutoff != default_var_cutoff:
            raise ValueError(
                'Trying to set both the number of dimension and the subspace variance. Use either or.'
            )
        if kinetic_map and commute_map:
            raise ValueError(
                'Trying to use both kinetic_map and commute_map. Use either or.'
            )
        if not reversible:
            raise NotImplementedError(
                "Reversible=False is currently not implemented.")
        # if (kinetic_map or commute_map) and not reversible:
        #     raise NotImplementedError('kinetic_map and commute_map are not yet implemented for irreversible processes.')
        super(TICA, self).__init__()

        if dim > -1:
            var_cutoff = 1.0

        self._covar = LaggedCovariance(c00=True,
                                       c0t=True,
                                       ctt=False,
                                       remove_data_mean=True,
                                       reversible=reversible,
                                       lag=lag,
                                       bessel=False,
                                       stride=stride,
                                       skip=skip,
                                       weights=weights,
                                       ncov_max=ncov_max)

        # empty dummy model instance
        self._model = TICAModel()
        self.set_params(lag=lag,
                        dim=dim,
                        var_cutoff=var_cutoff,
                        kinetic_map=kinetic_map,
                        commute_map=commute_map,
                        epsilon=epsilon,
                        reversible=reversible,
                        stride=stride,
                        skip=skip,
                        weights=weights,
                        ncov_max=ncov_max)
Esempio n. 5
0
    def __init__(self,
                 lag,
                 dim=-1,
                 var_cutoff=0.95,
                 kinetic_map=True,
                 epsilon=1e-6,
                 force_eigenvalues_le_one=False,
                 mean=None):
        r""" Time-lagged independent component analysis (TICA) [1]_, [2]_, [3]_.

        Parameters
        ----------
        tau : int
            lag time
        dim : int, optional, default -1
            Maximum number of significant independent components to use to reduce dimension of input data. -1 means
            all numerically available dimensions (see epsilon) will be used unless reduced by var_cutoff.
            Setting dim to a positive value is exclusive with var_cutoff.
        var_cutoff : float in the range [0,1], optional, default 0.95
            Determines the number of output dimensions by including dimensions until their cumulative kinetic variance
            exceeds the fraction subspace_variance. var_cutoff=1.0 means all numerically available dimensions
            (see epsilon) will be used, unless set by dim. Setting var_cutoff smaller than 1.0 is exclusive with dim
        kinetic_map : bool, optional, default True
            Eigenvectors will be scaled by eigenvalues. As a result, Euclidean distances in the transformed data
            approximate kinetic distances [4]_. This is a good choice when the data is further processed by clustering.
        epsilon : float
            eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be
            cut off. The remaining number of eigenvalues define the size
            of the output.
        force_eigenvalues_le_one : boolean
            Compute covariance matrix and time-lagged covariance matrix such
            that the generalized eigenvalues are always guaranteed to be <= 1.
        mean : ndarray, optional, default None
            Optionally pass pre-calculated means to avoid their re-computation.
            The shape has to match the input dimension.

        Notes
        -----
        Given a sequence of multivariate data :math:`X_t`, computes the mean-free
        covariance and time-lagged covariance matrix:

        .. math::

            C_0 &=      (X_t - \mu)^T (X_t - \mu) \\
            C_{\tau} &= (X_t - \mu)^T (X_{t + \tau} - \mu)

        and solves the eigenvalue problem

        .. math:: C_{\tau} r_i = C_0 \lambda_i(tau) r_i,

        where :math:`r_i` are the independent components and :math:`\lambda_i(tau)` are
        their respective normalized time-autocorrelations. The eigenvalues are
        related to the relaxation timescale by

        .. math:: t_i(tau) = -\tau / \ln |\lambda_i|.

        When used as a dimension reduction method, the input data is projected
        onto the dominant independent components.

        References
        ----------
        .. [1] Perez-Hernandez G, F Paul, T Giorgino, G De Fabritiis and F Noe. 2013.
           Identification of slow molecular order parameters for Markov model construction
           J. Chem. Phys. 139, 015102. doi:10.1063/1.4811489
        .. [2] Schwantes C, V S Pande. 2013.
           Improvements in Markov State Model Construction Reveal Many Non-Native Interactions in the Folding of NTL9
           J. Chem. Theory. Comput. 9, 2000-2009. doi:10.1021/ct300878a
        .. [3] L. Molgedey and H. G. Schuster. 1994.
           Separation of a mixture of independent signals using time delayed correlations
           Phys. Rev. Lett. 72, 3634.
        .. [4] Noe, F. and C. Clementi. 2015.
            Kinetic distance and kinetic maps from molecular dynamics simulation
            http://arxiv.org/abs/1506.06259

        """
        super(TICA, self).__init__()

        # store lag time to set it appropriately in second pass of parametrize
        self._lag = lag
        self._dim = dim
        self._var_cutoff = var_cutoff
        default_var_cutoff = get_default_args(self.__init__)['var_cutoff']
        if dim != -1 and var_cutoff != default_var_cutoff:
            raise ValueError(
                'Trying to set both the number of dimension and the subspace variance. Use either or.'
            )
        self._kinetic_map = kinetic_map
        self._epsilon = epsilon
        self._force_eigenvalues_le_one = force_eigenvalues_le_one

        # covariances
        self.cov = None
        self.cov_tau = None
        # mean
        self.mu = mean

        self._N_mean = 0
        self._N_cov = 0
        self._N_cov_tau = 0
        self._eigenvalues = None
        self._eigenvectors = None
        self._cumvar = None

        self._custom_param_progress_handling = True

        # skipped trajectories
        self._skipped_trajs = []