Example #1
0
    def setUpClass(cls):
        # load observations
        import pyemma.datasets
        obs = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10
        obs -= np.min(obs)  # remove empty states

        # hidden states
        nstates = 2

        # run with lag 1 and 10
        cls.msm_lag1 = msm.estimate_markov_model([obs],
                                                 1,
                                                 reversible=True,
                                                 connectivity='largest')
        cls.hmsm_lag1 = msm.estimate_hidden_markov_model([obs],
                                                         nstates,
                                                         1,
                                                         reversible=True,
                                                         observe_nonempty=True)
        cls.msm_lag10 = msm.estimate_markov_model([obs],
                                                  10,
                                                  reversible=True,
                                                  connectivity='largest')
        cls.hmsm_lag10 = msm.estimate_hidden_markov_model(
            [obs], nstates, 10, reversible=True, observe_nonempty=True)
Example #2
0
 def test_separate_states(self):
     dtrajs = [np.array([0, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1]),
               np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2]),]
     hmm = msm.estimate_hidden_markov_model(dtrajs, 3, lag=1, separate=[0])
     # we expect zeros in all samples at the following indexes:
     pobs_zeros = [[0, 1, 2, 2, 2], [0, 0, 1, 2, 3]]
     assert np.allclose(hmm.observation_probabilities[pobs_zeros], 0)
Example #3
0
    def test_submodel_simple(self):
        # sanity check for submodel;
        # call should not alter self
        from copy import deepcopy
        # dtrj = np.random.randint(0, 2, size=100)
        # dtrj[np.random.randint(0, dtrj.shape[0], 3)] = 2
        # hard-coded due to stochastic failures
        dtrj = [
            1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,
            1, 0, 0, 0, 0, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
            1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
            1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
            0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0
        ]

        h = msm.estimate_hidden_markov_model(dtrj, 3, 2)
        h_original = deepcopy(h)

        hs = h.submodel_largest(mincount_connectivity=5)

        self.assertTrue(h == h_original)

        self.assertEqual(hs.timescales().shape[0], 1)
        self.assertEqual(hs.pi.shape[0], 2)
        self.assertEqual(hs.transition_matrix.shape, (2, 2))
Example #4
0
 def markovModel(self, lag, macronum):
     from pyemma.msm import estimate_hidden_markov_model
     self.hmm = estimate_hidden_markov_model(self.data.St.tolist(),
                                             macronum,
                                             lag,
                                             connectivity='largest')
     print('Active set includes macrostates: {}'.format(
         self.hmm.active_set))
Example #5
0
    def test_failfast_false(self):
        """ test, that no exception is raised during estimation"""
        from pyemma._base.estimator import _estimate_param_scan_worker
        failfast = False

        @wraps(_estimate_param_scan_worker)
        def worker_wrapper(*args):
            args = list(args)
            args[5] = failfast
            return _estimate_param_scan_worker(*args)

        with mock.patch('pyemma._base.estimator._estimate_param_scan_worker', worker_wrapper):
            hmm = msm.estimate_hidden_markov_model([0, 0, 0, 1, 1, 1, 0, 0], 2, 1, )
            hmm.cktest(n_jobs=1)
Example #6
0
    def test_failfast_true(self):
        """ test that exception is thrown for failfast=True"""
        from pyemma._base.estimator import _estimate_param_scan_worker
        failfast = True

        @wraps(_estimate_param_scan_worker)
        def worker_wrapper(*args):
            args = list(args)
            args[5] = failfast
            return _estimate_param_scan_worker(*args)

        with self.assertRaises(BaseException):
            with mock.patch('pyemma._base.estimator._estimate_param_scan_worker', worker_wrapper):
                hmm = msm.estimate_hidden_markov_model([0, 0, 0, 1, 1, 1, 0, 0], 2, 1, )
                hmm.cktest(n_jobs=1)
Example #7
0
 def test_its_hmsm(self):
     MLHMM = msm.estimate_hidden_markov_model(
         [self.double_well_data.dtraj_T100K_dt10_n6good], 2, 10)
     ck = MLHMM.cktest(mlags=[0, 1, 10])
     estref = np.array([[[1., 0.], [0., 1.]],
                        [[0.98515058, 0.01484942], [0.01442843,
                                                    0.98557157]],
                        [[0.88172685, 0.11827315], [0.11878823,
                                                    0.88121177]]])
     predref = np.array([[[1., 0.], [0., 1.]],
                         [[0.98515058, 0.01484942],
                          [0.01442843, 0.98557157]],
                         [[0.86961812, 0.13038188],
                          [0.12668553, 0.87331447]]])
     # rough agreement with MLE
     assert np.allclose(ck.estimates, estref, rtol=0.1, atol=10.0)
     assert ck.estimates_conf[0] is None
     assert ck.estimates_conf[1] is None
     assert np.allclose(ck.predictions, predref, rtol=0.1, atol=10.0)
     assert ck.predictions_conf[0] is None
     assert ck.predictions_conf[1] is None
        #with open('clustering'+sys_nm+'_trajfrac-'+str(traj_frac)+'.pkl', 'rb') as f:
        #    clustering = pickle.load(f)
        dtrajs = clustering.dtrajs
        cc = clustering.clustercenters[:, 0]
        print 'n_clusters = ' + str(len(cc))
    else:
        dtrajs = None

    # send the dtraj info
    dtrajs = comm.bcast(dtrajs, root=0)

    # HMSM
    if (rank < size - Nign):  # leave out some processors
        nstates = n_Hstates
        for lag in range(lag0[traj_frac], len(lags)):

            print 'Starting lag ' + str(lag) + ' of ' + str(len(lags))
            hmsm = msm.estimate_hidden_markov_model(dtrajs,
                                                    nstates,
                                                    lags[lag],
                                                    reversible=True,
                                                    stationary=False,
                                                    stride=1)
            #hmsm = msm.bayesian_hidden_markov_model(dtrajs, nstates, lags[lag], nsamples=Nsamples, reversible=True, stationary=False, stride=1, conf=Iconf)
            save_object(
                'HMSM_' + sys_nm + '_trajfrac-' + str(traj_frac) + '_lag-' +
                str(lags[lag]) + '.pkl', hmsm)
            # already did this, read it in
            #with open('HMSM_'+sys_nm+'_trajfrac-'+str(traj_frac)+'_lag-'+str(lags[lag])+'.pkl', 'rb') as f:
            #    hmsm = pickle.load(f)
Example #9
0
 def markovModel(self, lag, macronum):
     from pyemma.msm import estimate_hidden_markov_model
     self.hmm = estimate_hidden_markov_model(self.data.St.tolist(), macronum, lag, connectivity='largest')
     print('Active set includes macrostates: {}'.format(self.hmm.active_set))
Example #10
0
connectivity_type = 'largest'

initial = 10
ending = 400
interval = ending * 1.0 / 20
lag_times = []
for j in range(20):
    lag_times.append(int(initial + j * interval))

print lag_times
print "unit is 20 ps"
for j in range(20):
    print "now we are dealing with lagtime ", lag_times[j]
    mm = msm.estimate_hidden_markov_model(kcenters_sequences,
                                          nMacro,
                                          lag=lag_times[j],
                                          reversible=reversible_type,
                                          connectivity=connectivity_type)
    np.savetxt(
        "hmm_%d_state_%d_lagtime_transition_matrix.txt" %
        (nMacro, lag_times[j]), mm.transition_matrix)
    np.savetxt(
        "hmm_%d_state_%d_lagtime_stationary_pop.txt" % (nMacro, lag_times[j]),
        mm.stationary_distribution)
    np.savetxt("hmm_%d_state_%d_lagtime_emisson.txt" % (nMacro, lag_times[j]),
               mm.metastable_distributions)
    np.savetxt(
        "hmm_%d_state_%d_lagtime_membership.txt" % (nMacro, lag_times[j]),
        mm.metastable_memberships)
    print mm.timescales()
    print('tau = ', tau)
    for k in [ks[-1]]:
        print('\tk = ', k)

        m = MaximumLikelihoodMSM(lag=tau,
                                 connectivity='largest',
                                 reversible=True)
        m.fit(erg_dtrajs)

        assert m.active_count_fraction == 1.0, 'Active count fraction not 1.0'

        print('\tFitting HMM')

        hmm = estimate_hidden_markov_model(dtrajs=erg_dtrajs,
                                           nstates=int(k),
                                           lag=tau,
                                           stationary=False,
                                           reversible=True,
                                           connectivity='largest')
        #hmm = MaximumLikelihoodHMSM(nstates=int(k), lag=tau, stationary=False, reversible=True, connectivity='largest', msm_init=m)
        #hmm.fit(erg_dtrajs[:2])

        results['k'].append(k)
        results['tau'].append(tau)
        results['bic'].append(bic(hmm))
        results['aic'].append(aic(hmm))
        results['icl'].append(icl(hmm))
        results['entropy'].append(class_entropy(hmm))
        results['dofs'].append(dof(hmm))
        results['n_obs'].append(n_obs(hmm))

    pd.DataFrame(results).to_pickle('h_state_selection_tau-{}.p'.format(tau))