예제 #1
0
    def _estimate(self, trajs):
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest transition counts
        self.count_matrices_full = _util.count_matrices(
            ttrajs,
            dtrajs,
            self.lag,
            sliding=self.count_mode,
            sparse_return=False,
            nstates=self.nstates_full)
        # harvest state counts (for WHAM)
        self.state_counts_full = _util.state_counts(ttrajs,
                                                    dtrajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # restrict to connected set
        C_sum = self.count_matrices_full.sum(axis=0)
        # TODO: use improved cset
        _, cset = _cset.compute_csets_dTRAM(self.connectivity,
                                            self.count_matrices_full)
        self.active_set = cset
        # correct counts
        self.count_matrices = self.count_matrices_full[:, cset[:, _np.newaxis],
                                                       cset]
        self.count_matrices = _np.require(self.count_matrices,
                                          dtype=_np.intc,
                                          requirements=['C', 'A'])
        # correct bias matrix
        self.bias_energies = self.bias_energies_full[:, cset]
        self.bias_energies = _np.require(self.bias_energies,
                                         dtype=_np.float64,
                                         requirements=['C', 'A'])
        # correct state counts
        self.state_counts = self.state_counts_full[:, cset]
        self.state_counts = _np.require(self.state_counts,
                                        dtype=_np.intc,
                                        requirements=['C', 'A'])

        # run initialisation
        pg = _ProgressReporter()

        if self.init is not None and self.init == 'wham':
            stage = 'WHAM init.'
            with pg.context(stage=stage):
                self.therm_energies, self.conf_energies, _increments, _loglikelihoods = \
                    _wham.estimate(
                        self.state_counts, self.bias_energies,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr, save_convergence_info=0,
                        therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.init_maxiter, self.init_maxerr))

        # run estimator
        stage = 'DTRAM'
        with pg.context(stage=stage):
            self.therm_energies, self.conf_energies, self.log_lagrangian_mult, \
                self.increments, self.loglikelihoods = _dtram.estimate(
                    self.count_matrices, self.bias_energies,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    log_lagrangian_mult=self.log_lagrangian_mult,
                    conf_energies=self.conf_energies,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, stage, self.maxiter, self.maxerr))

        # compute models
        fmsms = [
            _dtram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.bias_energies,
                self.conf_energies, self.count_matrices,
                _np.zeros(shape=self.conf_energies.shape,
                          dtype=_np.float64), K) for K in range(self.nthermo)
        ]

        active_sets = [
            _largest_connected_set(msm, directed=False) for msm in fmsms
        ]
        fmsms = [
            _np.ascontiguousarray((msm[lcc, :])[:, lcc])
            for msm, lcc in zip(fmsms, active_sets)
        ]

        models = []
        for i, (msm, acs) in enumerate(zip(fmsms, active_sets)):
            models.append(
                _ThermoMSM(
                    msm,
                    self.active_set[acs],
                    self.nstates_full,
                    pi=_np.exp(self.therm_energies[i] -
                               self.bias_energies[i, :] - self.conf_energies),
                    dt_model=self.timestep_traj.get_scaled(self.lag)))

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
예제 #2
0
    def _estimate(self, X):
        ttrajs, dtrajs_full, btrajs = X
        # shape and type checks
        assert len(ttrajs) == len(dtrajs_full) == len(btrajs)
        for t in ttrajs:
            _types.assert_array(t, ndim=1, kind='i')
        for d in dtrajs_full:
            _types.assert_array(d, ndim=1, kind='i')
        for b in btrajs:
            _types.assert_array(b, ndim=2, kind='f')
        # find dimensions
        nstates_full = max(_np.max(d) for d in dtrajs_full) + 1
        if self.nstates_full is None:
            self.nstates_full = nstates_full
        elif self.nstates_full < nstates_full:
            raise RuntimeError("Found more states (%d) than specified by nstates_full (%d)" % (
                nstates_full, self.nstates_full))
        self.nthermo = max(_np.max(t) for t in ttrajs) + 1
        # dimensionality checks
        for t, d, b, in zip(ttrajs, dtrajs_full, btrajs):
            assert t.shape[0] == d.shape[0] == b.shape[0]
            assert b.shape[1] == self.nthermo

        # cast types and change axis order if needed
        ttrajs = [_np.require(t, dtype=_np.intc, requirements='C') for t in ttrajs]
        dtrajs_full = [_np.require(d, dtype=_np.intc, requirements='C') for d in dtrajs_full]
        btrajs = [_np.require(b, dtype=_np.float64, requirements='C') for b in btrajs]

        # if equilibrium information is given, separate the trajectories
        if self.equilibrium is not None:
            assert len(self.equilibrium) == len(ttrajs)
            _ttrajs, _dtrajs_full, _btrajs = ttrajs, dtrajs_full, btrajs
            ttrajs = [ttraj for eq, ttraj in zip(self.equilibrium, _ttrajs) if not eq]
            dtrajs_full = [dtraj for eq, dtraj in zip(self.equilibrium, _dtrajs_full) if not eq]
            self.btrajs = [btraj for eq, btraj in zip(self.equilibrium, _btrajs) if not eq]
            equilibrium_ttrajs = [ttraj for eq, ttraj in zip(self.equilibrium, _ttrajs) if eq]
            equilibrium_dtrajs_full = [dtraj for eq, dtraj in zip(self.equilibrium, _dtrajs_full) if eq]
            self.equilibrium_btrajs = [btraj for eq, btraj in zip(self.equilibrium, _btrajs) if eq]
        else: # set dummy values
            equilibrium_ttrajs = []
            equilibrium_dtrajs_full = []
            self.equilibrium_btrajs = []
            self.btrajs = btrajs

        # find state visits and transition counts
        state_counts_full = _util.state_counts(ttrajs, dtrajs_full, nstates=self.nstates_full, nthermo=self.nthermo)
        count_matrices_full = _util.count_matrices(ttrajs, dtrajs_full,
            self.lag, sliding=self.count_mode, sparse_return=False, nstates=self.nstates_full, nthermo=self.nthermo)
        self.therm_state_counts_full = state_counts_full.sum(axis=1)

        if self.equilibrium is not None:
            self.equilibrium_state_counts_full = _util.state_counts(equilibrium_ttrajs, equilibrium_dtrajs_full,
                nstates=self.nstates_full, nthermo=self.nthermo)
        else:
            self.equilibrium_state_counts_full = _np.zeros((self.nthermo, self.nstates_full), dtype=_np.float64)

        pg = _ProgressReporter()
        stage = 'cset'
        with pg.context(stage=stage):
            self.csets, pcset = _cset.compute_csets_TRAM(
                self.connectivity, state_counts_full, count_matrices_full,
                equilibrium_state_counts=self.equilibrium_state_counts_full,
                ttrajs=ttrajs+equilibrium_ttrajs, dtrajs=dtrajs_full+equilibrium_dtrajs_full, bias_trajs=self.btrajs+self.equilibrium_btrajs,
                nn=self.nn, factor=self.connectivity_factor,
                callback=_IterationProgressIndicatorCallBack(pg, 'finding connected set', stage=stage))
            self.active_set = pcset

        # check for empty states
        for k in range(self.nthermo):
            if len(self.csets[k]) == 0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no samples after reducing to the connected set.', EmptyState)

        # deactivate samples not in the csets, states are *not* relabeled
        self.state_counts, self.count_matrices, self.dtrajs, _  = _cset.restrict_to_csets(
            self.csets,
            state_counts=state_counts_full, count_matrices=count_matrices_full,
            ttrajs=ttrajs, dtrajs=dtrajs_full)

        if self.equilibrium is not None:
            self.equilibrium_state_counts, _, self.equilibrium_dtrajs, _ =  _cset.restrict_to_csets(
                self.csets,
                state_counts=self.equilibrium_state_counts_full, ttrajs=equilibrium_ttrajs, dtrajs=equilibrium_dtrajs_full)
        else:
            self.equilibrium_state_counts = _np.zeros((self.nthermo, self.nstates_full), dtype=_np.intc) # (remember: no relabeling)
            self.equilibrium_dtrajs = []

        # self-consistency tests
        assert _np.all(self.state_counts >= _np.maximum(self.count_matrices.sum(axis=1), \
            self.count_matrices.sum(axis=2)))
        assert _np.all(_np.sum(
            [_np.bincount(d[d>=0], minlength=self.nstates_full) for d in self.dtrajs],
            axis=0) == self.state_counts.sum(axis=0))
        assert _np.all(_np.sum(
            [_np.bincount(t[d>=0], minlength=self.nthermo) for t, d in zip(ttrajs, self.dtrajs)],
            axis=0) == self.state_counts.sum(axis=1))
        if self.equilibrium is not None:
            assert _np.all(_np.sum(
                [_np.bincount(d[d >= 0], minlength=self.nstates_full) for d in self.equilibrium_dtrajs],
                axis=0) == self.equilibrium_state_counts.sum(axis=0))
            assert _np.all(_np.sum(
                [_np.bincount(t[d >= 0], minlength=self.nthermo) for t, d in zip(equilibrium_ttrajs, self.equilibrium_dtrajs)],
                axis=0) ==  self.equilibrium_state_counts.sum(axis=1))

        # check for empty states
        for k in range(self.state_counts.shape[0]):
            if self.count_matrices[k, :, :].sum() == 0 and self.equilibrium_state_counts[k, :].sum()==0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no transitions and no equilibrium data after reducing to the connected set.', EmptyState)

        if self.init == 'mbar' and self.biased_conf_energies is None:
            if self.direct_space:
                mbar = _mbar_direct
            else:
                mbar = _mbar
            stage = 'MBAR init.'
            with pg.context(stage=stage):
                self.mbar_therm_energies, self.mbar_unbiased_conf_energies, \
                    self.mbar_biased_conf_energies, _ = mbar.estimate(
                        (state_counts_full.sum(axis=1)+self.equilibrium_state_counts_full.sum(axis=1)).astype(_np.intc),
                        self.btrajs+self.equilibrium_btrajs, dtrajs_full+equilibrium_dtrajs_full,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.init_maxiter, self.init_maxerr),
                        n_conf_states=self.nstates_full)
            self.biased_conf_energies = self.mbar_biased_conf_energies.copy()

        # run estimator
        if self.direct_space:
            tram = _tram_direct
            trammbar = _trammbar_direct
        else:
            tram = _tram
            trammbar = _trammbar
        #import warnings
        #with warnings.catch_warnings() as cm:
        # warnings.filterwarnings('ignore', RuntimeWarning)
        stage = 'TRAM'
        with pg.context(stage=stage):
            if self.equilibrium is None:
                self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
                    self.increments, self.loglikelihoods = tram.estimate(
                        self.count_matrices, self.state_counts, self.btrajs, self.dtrajs,
                        maxiter=self.maxiter, maxerr=self.maxerr,
                        biased_conf_energies=self.biased_conf_energies,
                        log_lagrangian_mult=self.log_lagrangian_mult,
                        save_convergence_info=self.save_convergence_info,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.maxiter, self.maxerr, subcallback=self.callback),
                        N_dtram_accelerations=self.N_dtram_accelerations)
            else: # use trammbar
                self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
                    self.increments, self.loglikelihoods = trammbar.estimate(
                        self.count_matrices, self.state_counts, self.btrajs, self.dtrajs,
                        equilibrium_therm_state_counts=self.equilibrium_state_counts.sum(axis=1).astype(_np.intc),
                        equilibrium_bias_energy_sequences=self.equilibrium_btrajs, equilibrium_state_sequences=self.equilibrium_dtrajs,
                        maxiter=self.maxiter, maxerr=self.maxerr,
                        save_convergence_info=self.save_convergence_info,
                        biased_conf_energies=self.biased_conf_energies,
                        log_lagrangian_mult=self.log_lagrangian_mult,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.maxiter, self.maxerr, subcallback=self.callback),
                        N_dtram_accelerations=self.N_dtram_accelerations,
                        overcounting_factor=self.overcounting_factor)

        # compute models
        fmsms = [_np.ascontiguousarray((
            _tram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.biased_conf_energies, self.count_matrices, None,
                K)[self.active_set, :])[:, self.active_set]) for K in range(self.nthermo)]

        active_sets = [_largest_connected_set(msm, directed=False) for msm in fmsms]
        fmsms = [_np.ascontiguousarray(
            (msm[lcc, :])[:, lcc]) for msm, lcc in zip(fmsms, active_sets)]

        models = []
        for i, (msm, acs) in enumerate(zip(fmsms, active_sets)):
            pi_acs = _np.exp(self.therm_energies[i] - self.biased_conf_energies[i, :])[self.active_set[acs]]
            pi_acs = pi_acs / pi_acs.sum()
            models.append(_ThermoMSM(
                msm, self.active_set[acs], self.nstates_full, pi=pi_acs,
                dt_model=self.timestep_traj.get_scaled(self.lag)))

        # set model parameters to self
        self.set_model_params(
            models=models, f_therm=self.therm_energies, f=conf_energies[self.active_set].copy())

        return self