def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist( glob(join(home(dataDir="adaptive"), "data", "*", "")), glob(join(home(dataDir="adaptive"), "input", "*")), ) fsims = simfilter(sims, tempname(), "not water") metr = Metric(fsims) metr.set( MetricDistance( "protein and resid 10 and name CA", "resname BEN and noh", periodic="selections", metric="contacts", groupsel1="residue", threshold=4, ) ) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def _getSimlist(self): logger.info('Postprocessing new data') sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', '')), glob(path.join(self.inputpath, '*', ''))) if self.filter: sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel) return sims
def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), glob(join(home(dataDir='adaptive'), 'input', '*'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set( MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def _algorithm(self): logger.info('Postprocessing new data') sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) if self.filter: sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel) metr = Metric(sims, skip=self.skip) metr.set(self.projection) #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) if self.ticadim > 0: # tica = TICA(metr, int(max(2, np.ceil(self.ticalag)))) # gianni: without project it was tooooo slow tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag)))) datadr = tica.project(self.ticadim) else: datadr = metr.project() datadr.dropTraj( ) # Preferably we should do this before any projections. Corrupted sims can affect TICA datadr.cluster( self.clustmethod(n_clusters=self._numClusters(datadr.numFrames))) self._model = Model(datadr) self._model.markovModel(self.lag, self._numMacrostates(datadr)) if self.save: self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat') relFrames = self._getSpawnFrames(self._model, datadr) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def _getSimlist(self): logger.info("Postprocessing new data") sims = simlist( glob(path.join(self.datapath, "*", "")), glob(path.join(self.inputpath, "*", "")), glob(path.join(self.inputpath, "*", "")), ) if self.filter: sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel) return sims
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20/self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def _algorithm(self): logger.info('Postprocessing new data') sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) if self.filter: sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel) metr = Metric(sims, skip=self.skip) metr.set(self.projection) # if self.contactsym is not None: # contactSymmetry(data, self.contactsym) if self.ticadim > 0: # tica = TICA(metr, int(max(2, np.ceil(self.ticalag)))) # gianni: without project it was tooooo slow tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag)))) datadr = tica.project(self.ticadim) else: datadr = metr.project() datadr.dropTraj( ) # Preferably we should do this before any projections. Corrupted sims can affect TICA datadr.cluster( self.clustmethod(n_clusters=self._numClusters(datadr.numFrames))) model = Model(datadr) self._model = model self._model.markovModel(self.lag, self._numMacrostates(datadr)) if self.save: self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat') # Undirected component uc = -model.data.N # Lower counts should give higher score hence the - if self.statetype == 'micro': uc = uc[model.cluster_ofmicro] if self.statetype == 'macro': uc = macroAccumulate(model, uc[model.cluster_ofmicro]) # Calculating the directed component dc = self._calculateDirectedComponent(sims, model.data.St, model.data.N) if self.statetype == 'micro': dc = dc[model.cluster_ofmicro] if self.statetype == 'macro': dc = macroAccumulate(model, dc[model.cluster_ofmicro]) uc = self._featScale(uc) dc = self._featScale(dc) reward = dc + self.ucscale * uc relFrames = self._getSpawnFrames(reward, self._model, datadr) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def _algorithm(self): """ Select random frames for respawning """ from htmd.projections.metric import Metric from htmd.molecule.molecule import Molecule from htmd.projections.metriccoordinate import MetricCoordinate from htmd.simlist import simlist sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) metr = Metric(sims) metr.projection(MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA')) data = metr.project() simframes = data.abs2sim(np.random.randint(0, data.numFrames, self.nmax-self.running)) self._writeInputs(simframes)
def _algorithm(self): """ Select random frames for respawning """ from htmd.projections.metriccoordinate import MetricCoordinate from htmd.simlist import simlist sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) data = MetricCoordinate.project(sims, sims[0].molfile, 'protein and name CA', 'protein and name CA') simframes = data.abs2sim( np.random.randint(0, data.numFrames, self.nmax - self.running)) self._writeInputs(simframes)
def _getsimlist(self, folder): from htmd.simlist import simlist from glob import glob simfolders = glob(f'{folder}/filtered/*/') tmp_sims = [] #To avoid problems while merging multiples data sources clean_names = set([i.split("/")[-2] for i in simfolders]) for sim in simfolders: tmp_name = sim.split("/")[-2] if tmp_name in clean_names: tmp_sims.append(sim) clean_names.remove(tmp_name) simfolders = tmp_sims all_folders = glob(folder)[0] sims = simlist(simfolders, f'{all_folders}/filtered/filtered.pdb') return sims
def removeCorrupted(): from htmd.simlist import simlist from htmd.projections.metric import Metric from os import path from glob import glob import shutil print("Removing Corrupted Simulations") try: sims = simlist(glob("./filtered/*/"), "./filtered/filtered.pdb") except: return met = Metric(sims) met.set(corruptMetric) dat = met.project() for i, s in zip(dat.dat, dat.simlist): if np.sum(i): pt = path.dirname(s.trajectory[0]) shutil.move(pt, f"/tmp/{pt}")
mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricRmsd(ref, 'protein and name CA') data = metr.project(mol) lastrmsd = np.array([ 1.30797791, 1.29860222, 1.25042927, 1.31319737, 1.27044261, 1.40294552, 1.25354612, 1.30127883, 1.40618336, 1.18303752, 1.24414587, 1.34513164, 1.31932807, 1.34282494, 1.2261436, 1.36359048, 1.26243281, 1.21157813, 1.26476419, 1.29413617 ], dtype=np.float32) assert np.all( np.abs(data[-20:] - lastrmsd) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') ref = Molecule(dd + "/generators/1/structure.pdb") metr2 = Metric(fsims) metr2.set(MetricRmsd(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 1) pass
datatica.description = keepdimdesc.append(datatica.description, ignore_index=True) return datatica if __name__ == "__main__": from htmd.simlist import simlist from glob import glob from moleculekit.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir="villin") sims = simlist(glob(join(testfolder, "*", "")), join(testfolder, "filtered.pdb")) met = Metric(sims[0:2]) met.set(MetricSelfDistance("protein and name CA")) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units="ns", dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [ [3.69098878, -0.33862674, 0.85779184], [3.77816105, -0.31887317, 0.87724227], [3.83537507, -0.11878026, 0.65236956], ] assert np.allclose(
datatica.description = keepdimdesc.append(datatica.description, ignore_index=True) return datatica if __name__ == '__main__': from htmd.simlist import simlist from glob import glob from htmd.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir='villin') sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb')) met = Metric(sims[0:2]) met.set(MetricSelfDistance('protein and name CA')) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [[3.69098878, -0.33862674, 0.85779184], [3.77816105, -0.31887317, 0.87724227], [3.83537507, -0.11878026, 0.65236956]] assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0,
if self.dimensions is not None and keepdimdesc is not None: # If TICA is done on a subset of dims datatica.description = keepdimdesc.append(datatica.description, ignore_index=True) return datatica if __name__ == '__main__': from htmd.simlist import simlist from glob import glob from htmd.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir='villin') sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb')) met = Metric(sims[0:2]) met.projection(MetricSelfDistance('protein and name CA')) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [[ 3.69098878, -0.33862674, 0.85779184], [ 3.77816105, -0.31887317, 0.87724227], [ 3.83537507, -0.11878026, 0.65236956]] assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.all(datatica.description.iloc[[587, 588]].type == 'tica')
]) # None can be replaced by any other "not in b" value if __name__ == '__main__': from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from htmd.projections.metricdistance import MetricDistance from htmd.projections.metricdihedral import MetricDihedral from htmd.util import tempname from htmd.home import home from os.path import join testfolder = home(dataDir='adaptive') sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set( MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) data1 = metr.project() metr.set(MetricDihedral()) data2 = metr.project() # Testing combining of metrics data1.combine(data2)
mol = Molecule(path.join(home(), 'data', '1kdx', '1kdx_0.pdb')) mol.read(path.join(home(), 'data', '1kdx', '1kdx.dcd')) metric = MetricPlumed2( ['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6']) data = metric.project(mol) ref = np.array([ 0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701, 21.833056, 19.901318 ]) assert np.all( np.abs(ref - data[:, 0]) < 0.01), 'Plumed demo calculation is broken' # Simlist # datadirs=glob(path.join(home(), 'data', 'adaptive', 'data', '*' ) # fsims=simlist(glob(path.join(home(), 'data', 'adaptive', 'data', '*', '/')), # path.join(home(), 'data', 'adaptive', 'generators', '1','structure.pdb')) fsims = simlist([ '/home/toni/work/htmd/htmd/htmd/data/adaptive/data/e1s1_1/', '/home/toni/work/htmd/htmd/htmd/data/adaptive/data/e1s2_1/' ], '/home/toni/work/htmd/htmd/htmd/data/adaptive/generators/1/structure.pdb' ) metr = Metric(fsims) metr.projection( MetricPlumed2(['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6'])) data2 = metr.project() print(data2.dat)
sim = s break if sim is None: raise NameError(f"Could not find parent of simulation {simname}.") return sim, prevpiece, prevframe, epo if __name__ == "__main__": import htmd import os from htmd.simlist import Frame, simlist from htmd.util import tempname filedir = htmd.home.home() + "/data/adaptive/" sims = simlist( glob(os.path.join(filedir, "data", "*", "")), glob(os.path.join(filedir, "input", "*", "")), glob(os.path.join(filedir, "input", "*", "")), ) outf = tempname() os.makedirs(outf) f = Frame(sims[0], 0, 5) _writeInputsFunction(1, f, 2, outf, "input.coor") mol = Molecule(sims[0]) mol.read(os.path.join(outf, "e2s2_e1s1p0f5", "input.coor")) shutil.rmtree(outf)
md.run() # Cleaning up inputodel = glob(path.join(home(), 'data', 'adaptive', 'input', 'e2*')) for i in inputodel: shutil.rmtree(i, ignore_errors=True, acemd='/shared/acemd/bin/acemd') os.remove(path.join(home(), 'data', 'adaptive', 'input', 'e2_writeinputs.log'))''' import htmd import os import shutil from htmd.queues.localqueue import LocalGPUQueue from htmd.simlist import Frame, simlist from htmd.util import tempname filedir = htmd.home.home()+'/data/adaptive/' sims = simlist(glob(os.path.join(filedir, 'data', '*', '')), glob(os.path.join(filedir, 'input', '*', '')), glob(os.path.join(filedir, 'input', '*', ''))) outf = tempname() os.makedirs(outf) f = Frame(sims[0], 0, 5) _writeInputsFunction(1, f, 2, outf, 'input.coor') mol = Molecule(sims[0]) mol.read(os.path.join(outf, 'e2s2_e1s1p0f5', 'input.coor')) shutil.rmtree(outf)
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist( glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20 / self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int( max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning( 'Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
import htmd.home from htmd.simlist import simlist from htmd.projections.metricplumed2 import MetricPlumed2 from htmd.projections.metric import Metric try: _getPlumedRoot() except: print("Tests in %s skipped because plumed executable not found." % __file__) sys.exit() # Simlist dd = htmd.home.home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr = Metric(fsims) metr.set(MetricPlumed2( ['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6'])) data2 = metr.project() # One simulation testpath=os.path.join(htmd.home.home(), 'data', '1kdx') mol = Molecule(os.path.join(testpath, '1kdx_0.pdb')) mol.read(os.path.join(htmd.home.home(), 'data', '1kdx', '1kdx.dcd')) metric = MetricPlumed2(['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6']) data = metric.project(mol)
return np.array([bind.get(itm, -1) for itm in a]) # None can be replaced by any other "not in b" value if __name__ == '__main__': from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from htmd.projections.metricdistance import MetricDistance from htmd.projections.metricdihedral import MetricDihedral from htmd.util import tempname from htmd.home import home from os.path import join testfolder = home(dataDir='adaptive') sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) data1 = metr.project() metr.set(MetricDihedral()) data2 = metr.project() # Testing combining of metrics data1.combine(data2) # Testing dimensions assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct' assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct' assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'
from htmd.molecule.molecule import Molecule from htmd.home import home import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricTMscore(ref, 'protein and name CA') data = metr.project(mol) lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811, 0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426, 0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32) assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')], path.join(dd, 'generators', '1', 'structure.pdb')) ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb')) metr2 = Metric(fsims) metr2.projection(MetricTMscore(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 1)
def analyze_folder(folder=None, out_folder="/tmp", skip=1, metrics=None, clu=500, tica=True, ticadim=5, tica_lag=20, model_lag=10, model_units='ns', macro_N=10, bulk_split=False, fes=True, rg_analysis=True, save=True, data_fstep=None): """Analysis script for create a Markov State Model Creates and returns a Markov State Model given a data folder. Intented to follow up the evolution of an adaptive sampling run. Allows to save the model ans several informative plots Parameters ---------- folder : str Data folder where adaptive is running out_folder : str Output folder to store derived data skip : int Number of frames to skip while projecting the MD data metrics : [:class: `Metric` object] Metric array used to project the data clu : int Number of cluster to create using the MiniBatchKMeans method. tica: bool Wether to use TICA of GWPCA for dimensionality reduction ticadim : int Number of TICA dimension to project the data. If None, the model will be created using the raw projected data tica_lag : int, optional Description model_lag : int Number of ns used to create the model model_units : str, optional Description macro_N : int Number of macrostate to split the final Markov State Model fes : bool, optional If true it will save a plot projecting the first two TICA dimension. Requires ticadim to be defined rg_analysis : bool, optional If true, a plot with information relative to the radious of gyration of the molecule will be created. save : bool, optional If true, the model will be saved in the outputs folder Returns ------- :class:`Model` Final model """ from htmd.model import Model from htmd.molecule.molecule import Molecule from htmd.simlist import simlist from htmd.projections.metric import Metric from sklearn.cluster import MiniBatchKMeans from IDP_htmd.IDP_model import plot_RG from IDP_htmd.model_utils import create_bulk from glob import glob import os try: os.mkdir(out_folder) except: print("Folder already exists") try: fsims = np.load(f"{folder}/simlist.npy", allow_pickle=True) print(f"Loaded {folder}/simlist.npy") except: print("Creating simlist") sims = glob(folder + 'filtered/*/') fsims = simlist(sims, folder + 'filtered/filtered.pdb') metr = Metric(fsims, skip=skip) metr.set(metrics) #Check if this gives problems to ITS try: model = Model(file=f"{out_folder}/model.dat") out_data = model.data print(f"Loading model: {out_folder}/model.dat") except: if tica and ticadim: from htmd.projections.tica import TICA print("Projecting TICA") tica = TICA(metr, tica_lag) out_data = tica.project(ticadim) elif not tica and ticadim: from htmd.projections.gwpca import GWPCA data = metr.project() data.dropTraj() print("using GWPCA") gwpca = GWPCA(data, tica_lag) out_data = gwpca.project(ticadim) else: print("Not using TICA") data = metr.project() data.dropTraj() out_data = data #Avoid some possibles error while clustering if data_fstep: out_data.fstep = data_fstep x = True while x: try: out_data.cluster(MiniBatchKMeans(n_clusters=clu), mergesmall=5) x = False except Exception as e: raise Exception("Error " + str(e)) model = Model(out_data) model.plotTimescales(plot=False, save=f"{out_folder}/1_its.png") if macro_N: model.markovModel(model_lag, macro_N, units=model_units) if bulk_split: try: print("Starting bulk splitting") create_bulk(model, bulk_split) except Exception as e: print("Could not perform the bulk splitting") print(e) model.eqDistribution(plot=False, save=f"{out_folder}/1.2_eqDistribution.png") if rg_analysis: from IDP_htmd.IDP_analysis import rg_analysis mol = Molecule(model.data.simlist[0].molfile) rg_data = rg_analysis(model, skip=skip) plot_RG(rg_data, mol, save=f"{out_folder}/1.4_rg.png") # if fes and ticadim: # model.plotFES(0, 1, temperature=310, states=True, # plot=False, save=f"{out_folder}/1.3_fes.png") if save: model.save(f"{out_folder}/model.dat") return model
md.datapath = path.join(home(), 'data', 'adaptive', 'data') md.run() # Cleaning up inputodel = glob(path.join(home(), 'data', 'adaptive', 'input', 'e2*')) for i in inputodel: shutil.rmtree(i, ignore_errors=True, acemd='/shared/acemd/bin/acemd') os.remove(path.join(home(), 'data', 'adaptive', 'input', 'e2_writeinputs.log'))''' import htmd import os import shutil from htmd.queues.localqueue import LocalGPUQueue from htmd.simlist import Frame, simlist from htmd.util import tempname filedir = htmd.home.home() + '/data/adaptive/' sims = simlist(glob(os.path.join(filedir, 'data', '*', '')), glob(os.path.join(filedir, 'input', '*', '')), glob(os.path.join(filedir, 'input', '*', ''))) outf = tempname() os.makedirs(outf) f = Frame(sims[0], 0, 5) _writeInputsFunction(1, f, 2, outf, 'input.coor') mol = Molecule(sims[0]) mol.read(os.path.join(outf, 'e2s2_e1s1p0f5', 'input.coor')) shutil.rmtree(outf)
from htmd.molecule.molecule import Molecule from htmd.home import home import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricTMscore(ref, 'protein and name CA') data = metr.project(mol) lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811, 0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426, 0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32) assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')], path.join(dd, 'generators', '1', 'structure.pdb')) ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb')) metr2 = Metric(fsims) metr2.set(MetricTMscore(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 1)