Ejemplo n.º 1
0
    def _estimate(self, trajs):
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest state counts
        self.state_counts_full = _util.state_counts(ttrajs,
                                                    dtrajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # active set
        self.active_set = _np.where(self.state_counts_full.sum(axis=0) > 0)[0]
        self.state_counts = _np.ascontiguousarray(
            self.state_counts_full[:, self.active_set].astype(_np.intc))
        self.bias_energies = _np.ascontiguousarray(
            self.bias_energies_full[:, self.active_set], dtype=_np.float64)

        # run estimator
        pg = _ProgressReporter()
        stage = 'WHAM'
        with pg.context(stage=stage):
            self.therm_energies, self.conf_energies, self.increments, self.loglikelihoods = \
                _wham.estimate(
                    self.state_counts, self.bias_energies,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, stage, self.maxiter, self.maxerr))

        # get stationary models
        models = [
            _StationaryModel(
                pi=_np.exp(self.therm_energies[K, _np.newaxis] -
                           self.bias_energies[K, :] - self.conf_energies),
                f=self.bias_energies[K, :] + self.conf_energies,
                normalize_energy=False,
                label="K=%d" % K) for K in range(self.nthermo)
        ]

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Ejemplo n.º 2
0
 def set_model_params(self,
                      models=None,
                      f_therm=None,
                      pi=None,
                      f=None,
                      label='ground state'):
     # don't normalize f, because in a multiensemble the relative energy levels matter
     _StationaryModel.set_model_params(self, pi=pi, f=f, normalize_f=False)
     # check and set other parameters
     _types.assert_array(f_therm, ndim=1, kind='numeric')
     f_therm = _np.array(f_therm, dtype=float)
     for m in models:
         assert issubclass(m.__class__, _Model)
     self.update_model_params(models=models, f_therm=f_therm)
Ejemplo n.º 3
0
    def set_model_params(self, pi=None, f=None, normalize_f=None, label=None):
        r"""Call to set all basic model parameters.

        Parameters
        ----------
        pi : ndarray(n)
            Stationary distribution. If not already normalized, pi will be
            scaled to fulfill :math:`\sum_i \pi_i = 1`. The free energies f
            will then be computed from pi via :math:`f_i = - \log(\pi_i)`.
        f : ndarray(n)
            Discrete-state free energies. If normalized_f = True, a constant
            will be added to normalize the stationary distribution. Otherwise
            f is left as given. Then, pi will be computed from f via :math:`\pi_i = \exp(-f_i)`
            and, if necessary, scaled to fulfill :math:`\sum_i \pi_i = 1`. If
            both (pi and f) are given, f takes precedence over pi.
        normalize_energy : bool, default=True
            If parametrized by free energy f, normalize them such that
            :math:`\sum_i \pi_i = 1`, which is achieved by :math:`\log \sum_i \exp(-f_i) = 0`.
        label : str, default=None
            Human-readable description for the thermodynamic state of this
            model. May contain a temperature description, such as '300 K' or
            a description of bias energy such as 'unbiased' or 'Umbrella 1'.
        """
        if f is not None:
            _types.assert_array(f, ndim=1, kind='numeric')
            f = _np.array(f, dtype=float)
            if normalize_f:
                f += _logsumexp(
                    -f
                )  # normalize on the level on energies to achieve sum_i pi_i = 1
            pi = _np.exp(-f)
        elif pi is not None:  # if f is not given, use pi. pi can't be None at this point
            _types.assert_array(pi, ndim=1, kind='numeric')
            pi = _np.array(pi, dtype=float)
            f = -_np.log(pi)
            f += _logsumexp(-f)  # always shift f when set by pi
        else:
            raise ValueError(
                "Trying to initialize model without parameters: both pi (stationary distribution)" \
                " and f (free energy) are None. At least one of them needs to be set.")
        # set parameters (None does not overwrite)
        self.update_model_params(pi=pi,
                                 f=f,
                                 normalize_energy=normalize_f,
                                 label=label)
Ejemplo n.º 4
0
    def _estimate(self, X):
        ttrajs, dtrajs_full, btrajs = X
        # shape and type checks
        assert len(ttrajs) == len(dtrajs_full) == len(btrajs)
        for t in ttrajs:
            _types.assert_array(t, ndim=1, kind='i')
        for d in dtrajs_full:
            _types.assert_array(d, ndim=1, kind='i')
        for b in btrajs:
            _types.assert_array(b, ndim=2, kind='f')
        # find dimensions
        nstates_full = max(_np.max(d) for d in dtrajs_full) + 1
        if self.nstates_full is None:
            self.nstates_full = nstates_full
        elif self.nstates_full < nstates_full:
            raise RuntimeError(
                "Found more states (%d) than specified by nstates_full (%d)" %
                (nstates_full, self.nstates_full))
        self.nthermo = max(_np.max(t) for t in ttrajs) + 1
        # dimensionality checks
        for t, d, b, in zip(ttrajs, dtrajs_full, btrajs):
            assert t.shape[0] == d.shape[0] == b.shape[0]
            assert b.shape[1] == self.nthermo

        # cast types and change axis order if needed
        ttrajs = [
            _np.require(t, dtype=_np.intc, requirements='C') for t in ttrajs
        ]
        dtrajs_full = [
            _np.require(d, dtype=_np.intc, requirements='C')
            for d in dtrajs_full
        ]
        btrajs = [
            _np.require(b, dtype=_np.float64, requirements='C') for b in btrajs
        ]

        # if equilibrium information is given, separate the trajectories
        if self.equilibrium is not None:
            assert len(self.equilibrium) == len(ttrajs)
            _ttrajs, _dtrajs_full, _btrajs = ttrajs, dtrajs_full, btrajs
            ttrajs = [
                ttraj for eq, ttraj in zip(self.equilibrium, _ttrajs) if not eq
            ]
            dtrajs_full = [
                dtraj for eq, dtraj in zip(self.equilibrium, _dtrajs_full)
                if not eq
            ]
            self.btrajs = [
                btraj for eq, btraj in zip(self.equilibrium, _btrajs) if not eq
            ]
            equilibrium_ttrajs = [
                ttraj for eq, ttraj in zip(self.equilibrium, _ttrajs) if eq
            ]
            equilibrium_dtrajs_full = [
                dtraj for eq, dtraj in zip(self.equilibrium, _dtrajs_full)
                if eq
            ]
            self.equilibrium_btrajs = [
                btraj for eq, btraj in zip(self.equilibrium, _btrajs) if eq
            ]
        else:  # set dummy values
            equilibrium_ttrajs = []
            equilibrium_dtrajs_full = []
            self.equilibrium_btrajs = []
            self.btrajs = btrajs

        # find state visits and transition counts
        state_counts_full = _util.state_counts(ttrajs,
                                               dtrajs_full,
                                               nstates=self.nstates_full,
                                               nthermo=self.nthermo)
        count_matrices_full = _util.count_matrices(ttrajs,
                                                   dtrajs_full,
                                                   self.lag,
                                                   sliding=self.count_mode,
                                                   sparse_return=False,
                                                   nstates=self.nstates_full,
                                                   nthermo=self.nthermo)
        self.therm_state_counts_full = state_counts_full.sum(axis=1)

        if self.equilibrium is not None:
            self.equilibrium_state_counts_full = _util.state_counts(
                equilibrium_ttrajs,
                equilibrium_dtrajs_full,
                nstates=self.nstates_full,
                nthermo=self.nthermo)
        else:
            self.equilibrium_state_counts_full = _np.zeros(
                (self.nthermo, self.nstates_full), dtype=_np.float64)

        pg = _ProgressReporter()
        stage = 'cset'
        with pg.context(stage=stage):
            self.csets, pcset = _cset.compute_csets_TRAM(
                self.connectivity,
                state_counts_full,
                count_matrices_full,
                equilibrium_state_counts=self.equilibrium_state_counts_full,
                ttrajs=ttrajs + equilibrium_ttrajs,
                dtrajs=dtrajs_full + equilibrium_dtrajs_full,
                bias_trajs=self.btrajs + self.equilibrium_btrajs,
                nn=self.nn,
                factor=self.connectivity_factor,
                callback=_IterationProgressIndicatorCallBack(
                    pg, 'finding connected set', stage=stage))
            self.active_set = pcset

        # check for empty states
        for k in range(self.nthermo):
            if len(self.csets[k]) == 0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no samples after reducing to the connected set.', EmptyState)

        # deactivate samples not in the csets, states are *not* relabeled
        self.state_counts, self.count_matrices, self.dtrajs, _ = _cset.restrict_to_csets(
            self.csets,
            state_counts=state_counts_full,
            count_matrices=count_matrices_full,
            ttrajs=ttrajs,
            dtrajs=dtrajs_full)

        if self.equilibrium is not None:
            self.equilibrium_state_counts, _, self.equilibrium_dtrajs, _ = _cset.restrict_to_csets(
                self.csets,
                state_counts=self.equilibrium_state_counts_full,
                ttrajs=equilibrium_ttrajs,
                dtrajs=equilibrium_dtrajs_full)
        else:
            self.equilibrium_state_counts = _np.zeros(
                (self.nthermo, self.nstates_full),
                dtype=_np.intc)  # (remember: no relabeling)
            self.equilibrium_dtrajs = []

        # self-consistency tests
        assert _np.all(self.state_counts >= _np.maximum(self.count_matrices.sum(axis=1), \
            self.count_matrices.sum(axis=2)))
        assert _np.all(
            _np.sum([
                _np.bincount(d[d >= 0], minlength=self.nstates_full)
                for d in self.dtrajs
            ],
                    axis=0) == self.state_counts.sum(axis=0))
        assert _np.all(
            _np.sum([
                _np.bincount(t[d >= 0], minlength=self.nthermo)
                for t, d in zip(ttrajs, self.dtrajs)
            ],
                    axis=0) == self.state_counts.sum(axis=1))
        if self.equilibrium is not None:
            assert _np.all(
                _np.sum([
                    _np.bincount(d[d >= 0], minlength=self.nstates_full)
                    for d in self.equilibrium_dtrajs
                ],
                        axis=0) == self.equilibrium_state_counts.sum(axis=0))
            assert _np.all(
                _np.sum([
                    _np.bincount(t[d >= 0], minlength=self.nthermo) for t, d in
                    zip(equilibrium_ttrajs, self.equilibrium_dtrajs)
                ],
                        axis=0) == self.equilibrium_state_counts.sum(axis=1))

        # check for empty states
        for k in range(self.state_counts.shape[0]):
            if self.count_matrices[k, :, :].sum(
            ) == 0 and self.equilibrium_state_counts[k, :].sum() == 0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no transitions and no equilibrium data after reducing to the connected set.', EmptyState)

        if self.init == 'mbar' and self.biased_conf_energies is None:
            if self.direct_space:
                mbar = _mbar_direct
            else:
                mbar = _mbar
            stage = 'MBAR init.'
            with pg.context(stage=stage):
                self.mbar_therm_energies, self.mbar_unbiased_conf_energies, \
                    self.mbar_biased_conf_energies, _ = mbar.estimate(
                        (state_counts_full.sum(axis=1)+self.equilibrium_state_counts_full.sum(axis=1)).astype(_np.intc),
                        self.btrajs+self.equilibrium_btrajs, dtrajs_full+equilibrium_dtrajs_full,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.init_maxiter, self.init_maxerr),
                        n_conf_states=self.nstates_full)
            self.biased_conf_energies = self.mbar_biased_conf_energies.copy()

        # run estimator
        if self.direct_space:
            tram = _tram_direct
            trammbar = _trammbar_direct
        else:
            tram = _tram
            trammbar = _trammbar
        #import warnings
        #with warnings.catch_warnings() as cm:
        # warnings.filterwarnings('ignore', RuntimeWarning)
        stage = 'TRAM'
        with pg.context(stage=stage):
            if self.equilibrium is None:
                self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
                    self.increments, self.loglikelihoods = tram.estimate(
                        self.count_matrices, self.state_counts, self.btrajs, self.dtrajs,
                        maxiter=self.maxiter, maxerr=self.maxerr,
                        biased_conf_energies=self.biased_conf_energies,
                        log_lagrangian_mult=self.log_lagrangian_mult,
                        save_convergence_info=self.save_convergence_info,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.maxiter, self.maxerr, subcallback=self.callback),
                        N_dtram_accelerations=self.N_dtram_accelerations)
            else:  # use trammbar
                self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
                    self.increments, self.loglikelihoods = trammbar.estimate(
                        self.count_matrices, self.state_counts, self.btrajs, self.dtrajs,
                        equilibrium_therm_state_counts=self.equilibrium_state_counts.sum(axis=1).astype(_np.intc),
                        equilibrium_bias_energy_sequences=self.equilibrium_btrajs, equilibrium_state_sequences=self.equilibrium_dtrajs,
                        maxiter=self.maxiter, maxerr=self.maxerr,
                        save_convergence_info=self.save_convergence_info,
                        biased_conf_energies=self.biased_conf_energies,
                        log_lagrangian_mult=self.log_lagrangian_mult,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.maxiter, self.maxerr, subcallback=self.callback),
                        N_dtram_accelerations=self.N_dtram_accelerations,
                        overcounting_factor=self.overcounting_factor)

        # compute models
        fmsms = [
            _np.ascontiguousarray((_tram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.biased_conf_energies,
                self.count_matrices, None,
                K)[self.active_set, :])[:, self.active_set])
            for K in range(self.nthermo)
        ]

        active_sets = [
            _largest_connected_set(msm, directed=False) for msm in fmsms
        ]
        fmsms = [
            _np.ascontiguousarray((msm[lcc, :])[:, lcc])
            for msm, lcc in zip(fmsms, active_sets)
        ]

        models = []
        for i, (msm, acs) in enumerate(zip(fmsms, active_sets)):
            models.append(
                _ThermoMSM(msm,
                           self.active_set[acs],
                           self.nstates_full,
                           pi=_np.exp(self.therm_energies[i] -
                                      self.biased_conf_energies[i, :]),
                           dt_model=self.timestep_traj.get_scaled(self.lag)))

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=conf_energies[self.active_set].copy())

        return self
Ejemplo n.º 5
0
    def _estimate(self, trajs):
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest transition counts
        self.count_matrices_full = _util.count_matrices(
            ttrajs,
            dtrajs,
            self.lag,
            sliding=self.count_mode,
            sparse_return=False,
            nstates=self.nstates_full)
        # harvest state counts (for WHAM)
        self.state_counts_full = _util.state_counts(ttrajs,
                                                    dtrajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # restrict to connected set
        C_sum = self.count_matrices_full.sum(axis=0)
        # TODO: use improved cset
        _, cset = _cset.compute_csets_dTRAM(self.connectivity,
                                            self.count_matrices_full)
        self.active_set = cset
        # correct counts
        self.count_matrices = self.count_matrices_full[:, cset[:, _np.newaxis],
                                                       cset]
        self.count_matrices = _np.require(self.count_matrices,
                                          dtype=_np.intc,
                                          requirements=['C', 'A'])
        # correct bias matrix
        self.bias_energies = self.bias_energies_full[:, cset]
        self.bias_energies = _np.require(self.bias_energies,
                                         dtype=_np.float64,
                                         requirements=['C', 'A'])
        # correct state counts
        self.state_counts = self.state_counts_full[:, cset]
        self.state_counts = _np.require(self.state_counts,
                                        dtype=_np.intc,
                                        requirements=['C', 'A'])

        # run initialisation
        pg = _ProgressReporter()

        if self.init is not None and self.init == 'wham':
            stage = 'WHAM init.'
            with pg.context(stage=stage):
                self.therm_energies, self.conf_energies, _increments, _loglikelihoods = \
                    _wham.estimate(
                        self.state_counts, self.bias_energies,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr, save_convergence_info=0,
                        therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.init_maxiter, self.init_maxerr))

        # run estimator
        stage = 'DTRAM'
        with pg.context(stage=stage):
            self.therm_energies, self.conf_energies, self.log_lagrangian_mult, \
                self.increments, self.loglikelihoods = _dtram.estimate(
                    self.count_matrices, self.bias_energies,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    log_lagrangian_mult=self.log_lagrangian_mult,
                    conf_energies=self.conf_energies,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, stage, self.maxiter, self.maxerr))

        # compute models
        fmsms = [
            _dtram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.bias_energies,
                self.conf_energies, self.count_matrices,
                _np.zeros(shape=self.conf_energies.shape,
                          dtype=_np.float64), K) for K in range(self.nthermo)
        ]

        active_sets = [
            _largest_connected_set(msm, directed=False) for msm in fmsms
        ]
        fmsms = [
            _np.ascontiguousarray((msm[lcc, :])[:, lcc])
            for msm, lcc in zip(fmsms, active_sets)
        ]

        models = []
        for i, (msm, acs) in enumerate(zip(fmsms, active_sets)):
            models.append(
                _ThermoMSM(
                    msm,
                    self.active_set[acs],
                    self.nstates_full,
                    pi=_np.exp(self.therm_energies[i] -
                               self.bias_energies[i, :] - self.conf_energies),
                    dt_model=self.timestep_traj.get_scaled(self.lag)))

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Ejemplo n.º 6
0
    def _estimate(self, X):
        ttrajs, dtrajs_full, btrajs = X
        # shape and type checks
        assert len(ttrajs) == len(dtrajs_full) == len(btrajs)
        for t in ttrajs:
            _types.assert_array(t, ndim=1, kind='i')
        for d in dtrajs_full:
            _types.assert_array(d, ndim=1, kind='i')
        for b in btrajs:
            _types.assert_array(b, ndim=2, kind='f')
        # find dimensions
        self.nstates_full = max(_np.max(d) for d in dtrajs_full) + 1
        self.nthermo = max(_np.max(t) for t in ttrajs) + 1
        # dimensionality checks
        for t, d, b, in zip(ttrajs, dtrajs_full, btrajs):
            assert t.shape[0] == d.shape[0] == b.shape[0]
            assert b.shape[1] == self.nthermo

        # cast types and change axis order if needed
        ttrajs = [
            _np.require(t, dtype=_np.intc, requirements='C') for t in ttrajs
        ]
        dtrajs_full = [
            _np.require(d, dtype=_np.intc, requirements='C')
            for d in dtrajs_full
        ]
        btrajs = [
            _np.require(b, dtype=_np.float64, requirements='C') for b in btrajs
        ]

        # find state visits
        self.state_counts_full = _util.state_counts(ttrajs, dtrajs_full)
        self.therm_state_counts_full = self.state_counts_full.sum(axis=1)

        self.active_set = _np.sort(
            _np.where(self.state_counts_full.sum(axis=0) > 0)[0])
        self.state_counts = _np.ascontiguousarray(
            self.state_counts_full[:, self.active_set].astype(_np.intc))

        if self.direct_space:
            mbar = _mbar_direct
        else:
            mbar = _mbar
        pg = _ProgressReporter()
        with pg.context():
            self.therm_energies, self.unbiased_conf_energies_full, self.biased_conf_energies_full, \
                self.increments = mbar.estimate(
                    self.state_counts_full.sum(axis=1), btrajs, dtrajs_full,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, 'MBAR', self.maxiter, self.maxerr),
                    n_conf_states=self.nstates_full)
        try:
            self.loglikelihoods = _np.nan * self.increments
        except TypeError:
            self.loglikelihoods = None

        # get stationary models
        models = [
            _StationaryModel(f=self.biased_conf_energies_full[K,
                                                              self.active_set],
                             normalize_energy=False,
                             label="K=%d" % K) for K in range(self.nthermo)
        ]

        # set model parameters to self
        self.set_model_params(
            models=models,
            f_therm=self.therm_energies,
            f=self.unbiased_conf_energies_full[self.active_set])

        self.btrajs = btrajs

        # done
        return self