def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20/self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def _algorithm(self): """ Select random frames for respawning """ from htmd.projections.metric import Metric from htmd.molecule.molecule import Molecule from htmd.projections.metriccoordinate import MetricCoordinate from htmd.simlist import simlist sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) metr = Metric(sims) metr.projection(MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA')) data = metr.project() simframes = data.abs2sim(np.random.randint(0, data.numFrames, self.nmax-self.running)) self._writeInputs(simframes)
def _algorithm(self): """ Select random frames for respawning """ from htmd.projections.metric import Metric from htmd.molecule.molecule import Molecule from htmd.projections.metriccoordinate import MetricCoordinate from htmd.simlist import simlist sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) metr = Metric(sims) metr.projection( MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA')) data = metr.project() simframes = data.abs2sim( np.random.randint(0, data.numFrames, self.nmax - self.running)) self._writeInputs(simframes)
from htmd.molecule.molecule import Molecule from htmd.home import home import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricTMscore(ref, 'protein and name CA') data = metr.project(mol) lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811, 0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426, 0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32) assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')], path.join(dd, 'generators', '1', 'structure.pdb')) ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb')) metr2 = Metric(fsims) metr2.projection(MetricTMscore(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 1)
return datatica if __name__ == '__main__': from htmd.simlist import simlist from glob import glob from htmd.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir='villin') sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb')) met = Metric(sims[0:2]) met.projection(MetricSelfDistance('protein and name CA')) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [[ 3.69098878, -0.33862674, 0.85779184], [ 3.77816105, -0.31887317, 0.87724227], [ 3.83537507, -0.11878026, 0.65236956]] assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.all(datatica.description.iloc[[587, 588]].type == 'tica') assert np.all(datatica.description.iloc[range(587)].type == 'distance') print('In-memory TICA with subset of dimensions passed test.')
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist( glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20 / self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int( max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning( 'Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricRmsd(ref, 'protein and name CA') data = metr.project(mol) lastrmsd = np.array([1.30797791, 1.29860222, 1.25042927, 1.31319737, 1.27044261, 1.40294552, 1.25354612, 1.30127883, 1.40618336, 1.18303752, 1.24414587, 1.34513164, 1.31932807, 1.34282494, 1.2261436 , 1.36359048, 1.26243281, 1.21157813, 1.26476419, 1.29413617], dtype=np.float32) assert np.all(np.abs(data[-20:] - lastrmsd) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') ref = Molecule(dd+"/generators/1/structure.pdb") metr2 = Metric(fsims) metr2.projection(MetricRmsd(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6,1) pass
mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricRmsd(ref, 'protein and name CA') data = metr.project(mol) lastrmsd = np.array([ 1.30797791, 1.29860222, 1.25042927, 1.31319737, 1.27044261, 1.40294552, 1.25354612, 1.30127883, 1.40618336, 1.18303752, 1.24414587, 1.34513164, 1.31932807, 1.34282494, 1.2261436, 1.36359048, 1.26243281, 1.21157813, 1.26476419, 1.29413617 ], dtype=np.float32) assert np.all( np.abs(data[-20:] - lastrmsd) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') ref = Molecule(dd + "/generators/1/structure.pdb") metr2 = Metric(fsims) metr2.projection(MetricRmsd(ref, 'protein and name CA')) data2 = metr2.project() assert data2.dat[0].shape == (6, 1) pass
An array containing the null data. """ trajlen = mol.numFrames data = np.zeros((trajlen, self._ndim), dtype=np.float32) return data if __name__ == "__main__": import htmd.home from htmd.simlist import simlist from htmd.projections.metric import Metric import htmd.projections.metricnull dd = htmd.home.home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr2 = Metric(fsims) metr2.projection(htmd.projections.metricnull.MetricNull(2)) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 2) metr1 = Metric(fsims) metr1.projection(htmd.projections.metricnull.MetricNull(1)) data1 = metr1.project() assert data1.trajectories[0].projection.shape == (6, 1) pass
import doctest doctest.testmod() # One simulation mol = Molecule(os.path.join(htmd.home(), 'data', '1kdx', '1kdx_0.pdb')) mol.read(os.path.join(htmd.home(), 'data', '1kdx', '1kdx.dcd')) metric = MetricPlumed2(['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6']) # metric = MetricPlumed2(['']) # to test exceptions data = metric.project(mol) ref = np.array([0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701, 21.833056, 19.901318]) assert np.all(np.abs(ref - data[:, 0]) < 0.01), 'Plumed demo calculation is broken' # Simlist # datadirs=glob(os.path.join(home(), 'data', 'adaptive', 'data', '*' ) # fsims=simlist(glob(os.path.join(home(), 'data', 'adaptive', 'data', '*', '/')), # os.path.join(home(), 'data', 'adaptive', 'generators', '1','structure.pdb')) dd = htmd.home(dataDir="adaptive") fsims = htmd.simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr = Metric(fsims) metr.projection(MetricPlumed2( ['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6'])) data2 = metr.project() pass
from htmd.molecule.molecule import Molecule from htmd.home import home import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricTMscore(ref, 'protein and name CA') data = metr.project(mol) lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811, 0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426, 0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32) assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')], path.join(dd, 'generators', '1', 'structure.pdb')) ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb')) metr2 = Metric(fsims) metr2.projection(MetricTMscore(ref, 'protein and name CA')) data2 = metr2.project() assert data2.dat[0].shape == (6, 1)
try: _getPlumedRoot() except: print("Tests in %s skipped because plumed executable not found." % __file__) sys.exit() import doctest doctest.testmod() # Simlist dd = htmd.home(dataDir="adaptive") fsims = htmd.simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr = Metric(fsims) metr.projection( MetricPlumed2(['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6'])) data2 = metr.project() # One simulation testpath = os.path.join(htmd.home(), 'data', '1kdx') mol = Molecule(os.path.join(testpath, '1kdx_0.pdb')) mol.read(os.path.join(htmd.home(), 'data', '1kdx', '1kdx.dcd')) metric = MetricPlumed2( ['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6']) data = metric.project(mol) ref = np.array([ 0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701, 21.833056, 19.901318 ])
if self.dimensions is not None: # If TICA is done on a subset of dims datatica.map = keepdimdesc.append(datatica.map, ignore_index=True) return datatica if __name__ == "__main__": from htmd import * from htmd.home import home from os.path import join testfolder = home(dataDir="villin") sims = simlist(glob(join(testfolder, "*", "")), join(testfolder, "filtered.pdb")) met = Metric(sims[0:2]) met.projection(MetricSelfDistance("protein and name CA")) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units="ns", dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [ [3.69098878, -0.33862674, 0.85779184], [3.77816105, -0.31887317, 0.87724227], [3.83537507, -0.11878026, 0.65236956], ] assert np.allclose( np.abs(datatica.dat[0][-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01 )