Ejemplo n.º 1
0
 def test_2(self):
     t2 = timescales(self.P2)[1]
     lags = [1, 2, 3, 4, 5]
     its = msm.timescales_msm([self.dtraj2], lags=lags)
     est = its.timescales[0]
     np.testing.assert_array_less(est, t2 + 2.0)
     np.testing.assert_array_less(t2 - 2.0, est)
Ejemplo n.º 2
0
 def test_2(self):
     t2 = timescales(self.P2)[1]
     lags = [1, 2, 3, 4, 5]
     its = msm.timescales_msm([self.dtraj2], lags=lags)
     est = its.timescales[0]
     assert (np.alltrue(est < t2 + 2.0))
     assert (np.alltrue(est > t2 - 2.0))
Ejemplo n.º 3
0
 def test_4_2(self):
     t4 = timescales(self.P4)[1]
     lags = [int(t4)]
     its = msm.timescales_msm([self.dtraj4_2], lags=lags)
     est = its.timescales[0]
     np.testing.assert_array_less(est, t4 + 20.0)
     np.testing.assert_array_less(t4 - 20.0, est)
Ejemplo n.º 4
0
 def test_4_2(self):
     t4 = timescales(self.P4)[1]
     lags = [int(t4)]
     its = msm.timescales_msm([self.dtraj4_2], lags=lags)
     est = its.timescales[0]
     assert (np.alltrue(est < t4 + 20.0))
     assert (np.alltrue(est > t4 - 20.0))
Ejemplo n.º 5
0
 def test_timescales(self):
     from pyemma.msm import timescales_msm
     its = timescales_msm(self.dtraj,
                          lags=[1, 2],
                          mincount_connectivity=0,
                          errors=None)
     assert its.estimator.mincount_connectivity == 0
Ejemplo n.º 6
0
 def test_its_msm(self):
     estimator = msm.timescales_msm([self.double_well_data.dtraj_T100K_dt10_n6good], lags = [1, 10, 100, 1000])
     ref = np.array([[ 174.22244263,    3.98335928,    1.61419816,    1.1214093 ,    0.87692952],
                     [ 285.56862305,    6.66532284,    3.05283223,    2.6525504 ,    1.9138432 ],
                     [ 325.35442195,   24.17388446,   20.52185604,   20.10058217,    17.35451648],
                     [ 343.53679359,  255.92796581,  196.26969348,  195.56163418,    170.58422303]])
     # rough agreement with MLE
     assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0)
Ejemplo n.º 7
0
    def compute_nice(self, reversible):
        """
        Tests if standard its estimates run without errors

        :return:
        """
        for i in range(len(self.dtrajs)):
            its = msm.timescales_msm(self.dtrajs[i], reversible=reversible)
Ejemplo n.º 8
0
 def test_too_large_lagtime(self):
     dtraj = [[0, 1, 1, 1, 0]]
     lags = [1, 2, 3, 4, 5, 6, 7, 8]
     expected_lags = [1, 2]  # 3, 4 is impossible because no finite timescales.
     import warnings
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         its = msm.timescales_msm(dtraj, lags=lags, reversible=False)
         # FIXME: we do not trigger a UserWarning, but msmtools.exceptions.SpectralWarning, intended?
         #assert issubclass(w[-1].category, UserWarning)
     np.testing.assert_equal(its.lags, expected_lags)
Ejemplo n.º 9
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                           glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20/self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100))  # heuristic
        if K > datadr.numFrames / 3: # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Ejemplo n.º 10
0
    def _numMacrostates(self, data):
        """ Heuristic for calculating the number of macrostates for the Markov model """
        macronum = self.macronum
        if data.K < macronum:
            macronum = np.ceil(data.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        # Calculating how many timescales are above the lag time to limit number of macrostates
        from pyemma.msm import timescales_msm
        timesc = timescales_msm(data.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))
        return macronum
Ejemplo n.º 11
0
    def _numMacrostates(self, data):
        """ Heuristic for calculating the number of macrostates for the Markov model """
        macronum = self.macronum
        if data.K < macronum:
            macronum = np.ceil(data.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        # Calculating how many timescales are above the lag time to limit number of macrostates
        from pyemma.msm import timescales_msm
        timesc = timescales_msm(data.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))
        return macronum
Ejemplo n.º 12
0
 def test_too_large_lagtime(self):
     dtraj = [[0, 1, 1, 1, 0]]
     lags = [1, 2, 3, 4, 5, 6, 7, 8]
     expected_lags = [1, 2]  # 3, 4 is impossible because no finite timescales.
     its = msm.timescales_msm(dtraj, lags=lags, reversible=False)
     # TODO: should catch warnings!
     # with warnings.catch_warnings(record=True) as w:
     # warnings.simplefilter("always")
     # assert issubclass(w[-1].category, UserWarning)
     got_lags = its.lagtimes
     assert (np.shape(got_lags) == np.shape(expected_lags))
     assert (np.allclose(got_lags, expected_lags))
Ejemplo n.º 13
0
 def test_too_large_lagtime(self):
     dtraj = [[0, 1, 1, 1, 0]]
     lags = [1, 2, 3, 4, 5, 6, 7, 8]
     expected_lags = [1,
                      2]  # 3, 4 is impossible because no finite timescales.
     import warnings
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         its = msm.timescales_msm(dtraj,
                                  lags=lags,
                                  reversible=False,
                                  n_jobs=1)
     np.testing.assert_equal(its.lags, expected_lags)
Ejemplo n.º 14
0
 def test_fraction_of_frames(self):
     dtrajs = [
         [0, 1, 0],  # These two will fail for lag >2
         [1, 0, 1],  # These two will fail for lag >2
         [0, 1, 1, 1],
         [1, 0, 0, 1],
         [0, 1, 0, 1, 0],
         [1, 0, 1, 0, 1],
     ]
     lengths = [len(traj) for traj in dtrajs]
     lags = [1, 2, 3]
     its = msm.timescales_msm(dtrajs, lags=lags)
     all_frames = np.sum(lengths)
     longer_than_3 = np.sum(lengths[2:])
     test_frac = longer_than_3 / all_frames
     assert np.allclose(its.fraction_of_frames, np.array([1, 1, test_frac]))
Ejemplo n.º 15
0
 def test_its_bmsm(self):
     estimator = msm.timescales_msm(
         [self.double_well_data.dtraj_T100K_dt10_n6good],
         lags=[10, 50, 200],
         errors='bayes',
         nsamples=1000,
         n_jobs=1)
     ref = np.array([
         [284.87479737, 6.68390402, 3.0375248, 2.65314172, 1.93066562],
         [320.08583492, 11.14612743, 10.3450663, 9.42799075, 8.2109752],
         [351.41541961, 42.87427869, 41.17841657, 37.35485197, 23.24254608]
     ])
     # rough agreement with MLE
     assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0)
     # within left / right intervals. This test should fail only 1 out of 1000 times.
     L, R = estimator.get_sample_conf(conf=0.999)
     # we only test the first timescale, because the second is already ambiguous (deviations after the first place),
     # which makes this tests fail stochastically.
     np.testing.assert_array_less(L[0], estimator.timescales[0])
     np.testing.assert_array_less(estimator.timescales[0], R[0])
Ejemplo n.º 16
0
    fig, (ax1, ax2) = pp.subplots(1,2)
    ax1.scatter(cc_x, cc_y, marker='o', color='black') 
    ax2 = mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png'))
    if args.display:
        pp.show()
    pp.clf()
    pp.close()
###
#actually generate MSM from data
msm_from_data = msm.estimate_markov_model(dtrajs=mapped_data, lag=lagtime)

#plot and/or save implied timescales, if specified
if args.timescales:
    its = msm.timescales_msm(dtrajs=mapped_data, lags=500)
    mplt.plot_implied_timescales(its, show_mean=False, ylog=True, dt=25, units='ps', linewidth=2)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_its.png'))
    if args.display:
        pp.show()
pp.clf()
pp.close()

####
#pcca cluster using specified n_sets
msm_from_data.pcca(n_sets)
pcca_return = msm_from_data.pcca(n_sets)
pcca_return.metastable_sets
pcca_return.metastable_assignment
pcca_return.transition_matrix
Ejemplo n.º 17
0
lcc_sorted_456 = map(int, lcc_sorted_456)

# In[24]:

dtrajs_1D_234_sorted = []
dtrajs_1D_345_sorted = []
dtrajs_1D_456_sorted = []
for i in range( dtrajs_1D_234[0].shape[0] ):
    dtrajs_1D_234_sorted.append(lcc_sorted_234[dtrajs_1D_234[0][i]])
    dtrajs_1D_345_sorted.append(lcc_sorted_345[dtrajs_1D_345[0][i]])
    dtrajs_1D_456_sorted.append(lcc_sorted_456[dtrajs_1D_456[0][i]])

# In[25]:

lags = np.linspace(1,1000,200,dtype='int')
its_234 = msm.timescales_msm(dtrajs_1D_234_sorted, lags=lags, nits=n_clusters)
its_345 = msm.timescales_msm(dtrajs_1D_345_sorted, lags=lags, nits=n_clusters)
its_456 = msm.timescales_msm(dtrajs_1D_456_sorted, lags=lags, nits=n_clusters)

# In[27]:

tau = 400
Cmat_234 = pyemma.msm.estimation.count_matrix(dtrajs_1D_234_sorted, tau, sliding=True, sparse_return=False)
Cmat_345 = pyemma.msm.estimation.count_matrix(dtrajs_1D_345_sorted, tau, sliding=True, sparse_return=False)
Cmat_456 = pyemma.msm.estimation.count_matrix(dtrajs_1D_456_sorted, tau, sliding=True, sparse_return=False)


# In[28]:
Cmat_totind = Cmat_234+Cmat_345+Cmat_456
Cmat_totind
Ejemplo n.º 18
0
 def test_lag_generation(self):
     its = msm.timescales_msm(self.dtraj4_2, lags=1000)
     np.testing.assert_array_equal(
         its.lags,
         [1, 2, 3, 5, 8, 12, 18, 27, 41, 62, 93, 140, 210, 315, 473, 710])
Ejemplo n.º 19
0
    ax2 = mplt.plot_free_energy(np.vstack(Y)[:, 0],
                                np.vstack(Y)[:, 1],
                                cbar_label=None)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png'))
    if args.display:
        pp.show()
    pp.clf()
    pp.close()
###
#actually generate MSM from data
msm_from_data = msm.estimate_markov_model(dtrajs=mapped_data, lag=lagtime)

#plot and/or save implied timescales, if specified
if args.timescales:
    its = msm.timescales_msm(dtrajs=mapped_data, lags=500)
    mplt.plot_implied_timescales(its,
                                 show_mean=False,
                                 ylog=True,
                                 dt=25,
                                 units='ps',
                                 linewidth=2)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_its.png'))
    if args.display:
        pp.show()
pp.clf()
pp.close()

####
#pcca cluster using specified n_sets
Ejemplo n.º 20
0
lags = [
    1, 5, 10, 20, 35, 50, 75, 100, 150, 200, 300, 400, 500, 600, 700, 800, 900,
    100
]

implied_ts = pyemma.msm.its(dtrajs=dtrajs, lags=lags, nits=5)
pyemma.plots.plot_implied_timescales(implied_ts,
                                     units='time-steps',
                                     ylog=False)
#plt.vlines(2,ymin=0,ymax=350,linestyles='dashed')
#plt.annotate("selected model", xy=(lags[-3], implied_ts.timescales[-3][0]), xytext=(15,250),
#                 arrowprops=dict(facecolor='black', shrink=0.001, width=0.1,headwidth=8))
plt.figure(figsize=(10, 10), dpi=600)
plt.ylim([0, 150])

print(implied_ts)

its = msm.timescales_msm(dtrajs, lags=50, nits=10)
print(its)
mplt.plot_implied_timescales(its, ylog=False, units='steps', linewidth=2)
#plt.xlim(0, 40); plt.ylim(0, 50)

its = msm.timescales_msm(dtrajs, lags=50, nits=10, errors='bayes', n_jobs=-1)
plt.figure(figsize=(8, 5))
mplt.plot_implied_timescales(its,
                             show_mean=False,
                             ylog=False,
                             dt=0.1,
                             units='ns',
                             linewidth=2)
#plt.xlim(0, 5); plt.ylim(0.1,60);
Ejemplo n.º 21
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(
            glob(path.join(self.datapath, '*', '')),
            glob(path.join(self.inputpath, '*', 'structure.pdb')),
            glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist,
                             self.filteredpath,
                             filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1,
                                  self.metricsel2,
                                  metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20 / self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(
            max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50),
                100))  # heuristic
        if K > datadr.numFrames / 3:  # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning(
                'Using less macrostates than requested due to lack of microstates. macronum = '
                + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(),
                                lags=self.lag,
                                nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx,
                                          spawncounts[stateIdx],
                                          statetype='micro',
                                          replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Ejemplo n.º 22
0
plt.xlabel("IC 1")
plt.ylabel("IC 2")
plt.title("FES IC1-2")
plt.savefig("fes_IC1-2.png")

plt.figure(figsize=(8, 5))
mplt.plot_free_energy(xall, np.vstack(Y)[:, 2], cmap="Spectral")
plt.plot(cc_x, cc_z, linewidth=0, marker='o', markersize=5, color='black')
plt.xlabel("IC 1")
plt.ylabel("IC 3")
plt.title("FES IC1-3")
plt.savefig("fes_IC1-3.png")

lags = None
plt.figure(figsize=(8, 5))
its = msm.timescales_msm(dtrajs, lags=lags, nits=10)
mplt.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2)
plt.savefig("its.png")

# its = msm.timescales_msm(dtrajs, lags=lags, nits=10, errors='bayes', n_jobs=-1)
# plt.figure(figsize=(8, 5))
# mplt.plot_implied_timescales(its, show_mean=False, ylog=False, units='steps', linewidth=2)
# plt.savefig("its_errors.png")

M = msm.estimate_markov_model(dtrajs, msm_lag)
print('fraction of states used = ', M.active_state_fraction)
print('fraction of counts used = ', M.active_count_fraction)

f = plt.figure(figsize=(8, 5))
pi = M.stationary_distribution
ax = mplt.scatter_contour(cc_x[M.active_set], cc_y[M.active_set], pi, fig=f)