def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist( glob(join(home(dataDir="adaptive"), "data", "*", "")), glob(join(home(dataDir="adaptive"), "input", "*")), ) fsims = simfilter(sims, tempname(), "not water") metr = Metric(fsims) metr.set( MetricDistance( "protein and resid 10 and name CA", "resname BEN and noh", periodic="selections", metric="contacts", groupsel1="residue", threshold=4, ) ) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def _algorithm(self): logger.info('Postprocessing new data') sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) if self.filter: sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel) metr = Metric(sims, skip=self.skip) metr.set(self.projection) #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) if self.ticadim > 0: # tica = TICA(metr, int(max(2, np.ceil(self.ticalag)))) # gianni: without project it was tooooo slow tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag)))) datadr = tica.project(self.ticadim) else: datadr = metr.project() datadr.dropTraj( ) # Preferably we should do this before any projections. Corrupted sims can affect TICA datadr.cluster( self.clustmethod(n_clusters=self._numClusters(datadr.numFrames))) self._model = Model(datadr) self._model.markovModel(self.lag, self._numMacrostates(datadr)) if self.save: self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat') relFrames = self._getSpawnFrames(self._model, datadr) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), glob(join(home(dataDir='adaptive'), 'input', '*'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set( MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def _algorithm(self): logger.info('Postprocessing new data') sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) if self.filter: sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel) metr = Metric(sims, skip=self.skip) metr.set(self.projection) # if self.contactsym is not None: # contactSymmetry(data, self.contactsym) if self.ticadim > 0: # tica = TICA(metr, int(max(2, np.ceil(self.ticalag)))) # gianni: without project it was tooooo slow tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag)))) datadr = tica.project(self.ticadim) else: datadr = metr.project() datadr.dropTraj( ) # Preferably we should do this before any projections. Corrupted sims can affect TICA datadr.cluster( self.clustmethod(n_clusters=self._numClusters(datadr.numFrames))) model = Model(datadr) self._model = model self._model.markovModel(self.lag, self._numMacrostates(datadr)) if self.save: self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat') # Undirected component uc = -model.data.N # Lower counts should give higher score hence the - if self.statetype == 'micro': uc = uc[model.cluster_ofmicro] if self.statetype == 'macro': uc = macroAccumulate(model, uc[model.cluster_ofmicro]) # Calculating the directed component dc = self._calculateDirectedComponent(sims, model.data.St, model.data.N) if self.statetype == 'micro': dc = dc[model.cluster_ofmicro] if self.statetype == 'macro': dc = macroAccumulate(model, dc[model.cluster_ofmicro]) uc = self._featScale(uc) dc = self._featScale(dc) reward = dc + self.ucscale * uc relFrames = self._getSpawnFrames(reward, self._model, datadr) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def fitBaselineWithMetrics(projected_simlist, base_simlist, metric, ticalag=25, ticadim=4, ticaunits='frames', tica=False): from htmd.projections.tica import TICA from htmd.projections.metric import Metric from htmd.model import Model """ Implement a MetricB that returns the TICA tranformation of a MetricA 1) Calculate MetricA for each trajectory 2) TICA transform based on a basetica (basetica.tic.t) """ basetica_metric = Metric(base_simlist) basetica_metric.set(metric) basetica = TICA(basetica_metric, ticalag, units=ticaunits) basetica.tic.set_params(dim=ticadim) def metricToTica(mol, metric, tica): metric_data = metric.project(mol) return tica.tic.transform(metric_data) tica_metric = Metric(projected_simlist) tica_metric.set((metricToTica, (metric, basetica))) projectdata = tica_metric.project().dat if tica: return projectdata, basetica return projectdata
def get_data(model, metr, skip=1): """ Returns the projected data of metric applied to a model Parameters ---------- mod : htmd.model.Model Model to get the simlist metric : htmd.projections.MetricData MetricData with the metric we want to project skip : int Frames to skip while projecting the data. Default = 1 """ from htmd.model import Model from htmd.projections.metric import Metric if isinstance(model, Model): simlist = model.data.simlist elif isinstance(model, np.ndarray): simlist = model else: raise TypeError( "Model should be either an htmd.model.Model or a simlist") metric = Metric(simlist, skip=skip) metric.set(metr) data = metric.project() return data
def _getGoalData(self, sims): from htmd.projections.metric import Metric logger.debug('Starting projection of directed component') metr = Metric(sims, skip=self.skip) metr.set(self.goalfunction) data = metr.project() logger.debug('Finished calculating directed component') return data
def _getData(self, sims): metr = Metric(sims, skip=self.skip) metr.set(self.projection) # if self.contactsym is not None: # contactSymmetry(data, self.contactsym) if self.ticadim > 0: # tica = TICA(metr, int(max(2, np.ceil(self.ticalag)))) # gianni: without project it was tooooo slow data = metr.project() data.dropTraj() # Drop before TICA to avoid broken trajectories ticalag = int( np.ceil(max(2, min(np.min(data.trajLengths) / 2, self.ticalag)))) # 1 < ticalag < (trajLen / 2) tica = TICA(data, ticalag) datadr = tica.project(self.ticadim) else: datadr = metr.project() datadr.dropTraj() # Preferably we should do this before any projections. Corrupted sims can affect TICA return datadr
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20/self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
def _algorithm(self): """ Select random frames for respawning """ from htmd.projections.metric import Metric from htmd.molecule.molecule import Molecule from htmd.projections.metriccoordinate import MetricCoordinate from htmd.simlist import simlist sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) metr = Metric(sims) metr.projection(MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA')) data = metr.project() simframes = data.abs2sim(np.random.randint(0, data.numFrames, self.nmax-self.running)) self._writeInputs(simframes)
def _computeChiDihedrals(self, fstep=0.1, skip=1): chis = [] protmol = self.mol.copy() protmol.filter('protein') caidx = self.mol.atomselect('protein and name CA') resids = self.mol.resid[caidx] resnames = self.mol.resname[caidx] for residue, resname in zip(resids, resnames): ch = Dihedral.chi1(protmol, residue) if ch is not None: chis.append(ch) metr = Metric(self.model.data.simlist, skip=skip) metr.set(MetricDihedral(chis, sincos=False)) data = metr.project() data.fstep = fstep self.chi = data
def _algorithm(self): """ Select random frames for respawning """ from htmd.projections.metric import Metric from htmd.molecule.molecule import Molecule from htmd.projections.metriccoordinate import MetricCoordinate from htmd.simlist import simlist sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) metr = Metric(sims) metr.projection( MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA')) data = metr.project() simframes = data.abs2sim( np.random.randint(0, data.numFrames, self.nmax - self.running)) self._writeInputs(simframes)
def removeCorrupted(): from htmd.simlist import simlist from htmd.projections.metric import Metric from os import path from glob import glob import shutil print("Removing Corrupted Simulations") try: sims = simlist(glob("./filtered/*/"), "./filtered/filtered.pdb") except: return met = Metric(sims) met.set(corruptMetric) dat = met.project() for i, s in zip(dat.dat, dat.simlist): if np.sum(i): pt = path.dirname(s.trajectory[0]) shutil.move(pt, f"/tmp/{pt}")
def _precalculateData(self, metricData, folder, skip=1, fstep=None): from htmd.projections.metric import Metric max_epoch = max(self.epoch_analysis) max_epoch_sim = np.concatenate( np.array([ self.epoch_sim_indexes[i] for i in range(1, max_epoch + 1) if i in list(self.epoch_sim_indexes.keys()) ])) if self.test: sims = self._sims[0:100] else: sims = np.array([self._sims[i] for i in max_epoch_sim]) metr = Metric(sims, skip=skip) metr.set(metricData) data = metr.project() if fstep: data.fstep = fstep return data
def analyze_folder(folder=None, out_folder="/tmp", skip=1, metrics=None, clu=500, tica=True, ticadim=5, tica_lag=20, model_lag=10, model_units='ns', macro_N=10, bulk_split=False, fes=True, rg_analysis=True, save=True, data_fstep=None): """Analysis script for create a Markov State Model Creates and returns a Markov State Model given a data folder. Intented to follow up the evolution of an adaptive sampling run. Allows to save the model ans several informative plots Parameters ---------- folder : str Data folder where adaptive is running out_folder : str Output folder to store derived data skip : int Number of frames to skip while projecting the MD data metrics : [:class: `Metric` object] Metric array used to project the data clu : int Number of cluster to create using the MiniBatchKMeans method. tica: bool Wether to use TICA of GWPCA for dimensionality reduction ticadim : int Number of TICA dimension to project the data. If None, the model will be created using the raw projected data tica_lag : int, optional Description model_lag : int Number of ns used to create the model model_units : str, optional Description macro_N : int Number of macrostate to split the final Markov State Model fes : bool, optional If true it will save a plot projecting the first two TICA dimension. Requires ticadim to be defined rg_analysis : bool, optional If true, a plot with information relative to the radious of gyration of the molecule will be created. save : bool, optional If true, the model will be saved in the outputs folder Returns ------- :class:`Model` Final model """ from htmd.model import Model from htmd.molecule.molecule import Molecule from htmd.simlist import simlist from htmd.projections.metric import Metric from sklearn.cluster import MiniBatchKMeans from IDP_htmd.IDP_model import plot_RG from IDP_htmd.model_utils import create_bulk from glob import glob import os try: os.mkdir(out_folder) except: print("Folder already exists") try: fsims = np.load(f"{folder}/simlist.npy", allow_pickle=True) print(f"Loaded {folder}/simlist.npy") except: print("Creating simlist") sims = glob(folder + 'filtered/*/') fsims = simlist(sims, folder + 'filtered/filtered.pdb') metr = Metric(fsims, skip=skip) metr.set(metrics) #Check if this gives problems to ITS try: model = Model(file=f"{out_folder}/model.dat") out_data = model.data print(f"Loading model: {out_folder}/model.dat") except: if tica and ticadim: from htmd.projections.tica import TICA print("Projecting TICA") tica = TICA(metr, tica_lag) out_data = tica.project(ticadim) elif not tica and ticadim: from htmd.projections.gwpca import GWPCA data = metr.project() data.dropTraj() print("using GWPCA") gwpca = GWPCA(data, tica_lag) out_data = gwpca.project(ticadim) else: print("Not using TICA") data = metr.project() data.dropTraj() out_data = data #Avoid some possibles error while clustering if data_fstep: out_data.fstep = data_fstep x = True while x: try: out_data.cluster(MiniBatchKMeans(n_clusters=clu), mergesmall=5) x = False except Exception as e: raise Exception("Error " + str(e)) model = Model(out_data) model.plotTimescales(plot=False, save=f"{out_folder}/1_its.png") if macro_N: model.markovModel(model_lag, macro_N, units=model_units) if bulk_split: try: print("Starting bulk splitting") create_bulk(model, bulk_split) except Exception as e: print("Could not perform the bulk splitting") print(e) model.eqDistribution(plot=False, save=f"{out_folder}/1.2_eqDistribution.png") if rg_analysis: from IDP_htmd.IDP_analysis import rg_analysis mol = Molecule(model.data.simlist[0].molfile) rg_data = rg_analysis(model, skip=skip) plot_RG(rg_data, mol, save=f"{out_folder}/1.4_rg.png") # if fes and ticadim: # model.plotFES(0, 1, temperature=310, states=True, # plot=False, save=f"{out_folder}/1.3_fes.png") if save: model.save(f"{out_folder}/model.dat") return model
def _algorithm(self): from htmd.kinetics import Kinetics sims = self._getSimlist() metr = Metric(sims, skip=self.skip) metr.set(self.projection) data = metr.project() data.dropTraj() # Drop before TICA to avoid broken trajectories if self.goalfunction is not None: goaldata = self._getGoalData(data.simlist) if len(data.simlist) != len(goaldata.simlist): raise RuntimeError('The goal function was not able to project all trajectories that the MSM projection could. Check for possible errors in the goal function.') goaldataconcat = np.concatenate(goaldata.dat) if self.save: makedirs('saveddata', exist_ok=True) goaldata.save(path.join('saveddata', 'e{}_goaldata.dat'.format(self._getEpoch()))) # tica = TICA(metr, int(max(2, np.ceil(self.ticalag)))) # gianni: without project it was tooooo slow if self.ticadim > 0: ticalag = int(np.ceil(max(2, min(np.min(data.trajLengths) / 2, self.ticalag)))) # 1 < ticalag < (trajLen / 2) tica = TICA(data, ticalag) datatica = tica.project(self.ticadim) if not self._checkNFrames(datatica): return False self._createMSM(datatica) else: if not self._checkNFrames(data): return False self._createMSM(data) confstatdist = self.conformationStationaryDistribution(self._model) if self.actionspace == 'metric': if not data.K: data.cluster(self.clustmethod(n_clusters=self._numClusters(data.numFrames))) data_q = data.copy() elif self.actionspace == 'goal': data_q = goaldata.copy() elif self.actionspace == 'tica': data_q = datatica.copy() elif self.actionspace == 'ticapcca': data_q = datatica.copy() for traj in data_q.trajectories: traj.cluster = self._model.macro_ofcluster[traj.cluster] data_q.K = self._model.macronum if self.recluster: print('Reclustering with {}'.format(self.reclusterMethod)) data_q.cluster(self.reclusterMethod) numstates = data_q.K print('Numstates: {}'.format(numstates)) currepoch = self._getEpoch() q_values = np.zeros(numstates, dtype=np.float32) n_values = np.zeros(numstates, dtype=np.int32) if self.random: # If random mode respawn from random action states action_sel = np.zeros(numstates, dtype=int) N = self.nmax - self._running randomactions = np.bincount(np.random.randint(numstates, size=N)) action_sel[:len(randomactions)] = randomactions if self.save_qval: makedirs('saveddata', exist_ok=True) np.save(path.join('saveddata', 'e{}_actions.npy'.format(currepoch)), action_sel) relFrames = self._getSpawnFrames_UCB(action_sel, data_q) self._writeInputs(data.rel2sim(np.concatenate(relFrames))) return True if self.goalfunction is not None: ## For every cluster in data_q, get the max score and initialize if self.goal_preprocess is not None: goaldataconcat = self.goal_preprocess(goaldataconcat) qstconcat = np.concatenate(data_q.St) statemaxes = np.zeros(numstates) np.maximum.at(statemaxes, qstconcat, np.squeeze(goaldataconcat)) if not self.pucb: goalenergies = -Kinetics._kB * self.temperature * np.log(1-statemaxes) q_values = goalenergies n_values += int((self.nframes / self._numClusters(self.nframes)) * self.goal_init) ## Needs nframes to be set properly!!!!!!!! rewardtraj = np.arange(data_q.numTrajectories) # Recalculate reward for all states rewards = self.getRewards(rewardtraj, data_q, confstatdist, numstates, self.reward_method, self.reward_mode, self.reward_window) for i in range(numstates): if len(rewards[i]) == 0: continue q_values[i] = updatingMean(q_values[i], n_values[i], rewards[i]) n_values += np.array([len(x) for x in rewards]) if self.save_qval: makedirs('saveddata', exist_ok=True) np.save(path.join('saveddata', 'e{}_qval.npy'.format(currepoch)), q_values) np.save(path.join('saveddata', 'e{}_nval.npy'.format(currepoch)), n_values) if self.pucb: ucb_values = np.array([self.count_pucb(q_values[clust], self.exploration, statemaxes[clust], currepoch + 1, n_values[clust]) for clust in range(numstates)]) else: ucb_values = np.array([self.count_ucb(q_values[clust], self.exploration, currepoch + 1, n_values[clust]) for clust in range(numstates)]) if self.save_qval: makedirs('saveddata', exist_ok=True) np.save(path.join('saveddata', 'e{}_ucbvals.npy'.format(currepoch)), ucb_values) N = self.nmax - self._running if self.actionpool <= 0: self.actionpool = N topactions = np.argsort(-ucb_values)[:self.actionpool] action = np.random.choice(topactions, N, replace=False) action_sel = np.zeros(numstates, dtype=int) action_sel[action] += 1 while np.sum(action_sel) < N: # When K is lower than N repeat some actions for a in action: action_sel[a] +=1 if np.sum(action_sel) == N: break if self.save_qval: np.save(path.join('saveddata', 'e{}_actions.npy'.format(currepoch)), action_sel) relFrames = self._getSpawnFrames_UCB(action_sel, data_q) self._writeInputs(data.rel2sim(np.concatenate(relFrames))) return True
from htmd.molecule.molecule import Molecule from htmd.home import home import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricTMscore(ref, 'protein and name CA') data = metr.project(mol) lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811, 0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426, 0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32) assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')], path.join(dd, 'generators', '1', 'structure.pdb')) ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb')) metr2 = Metric(fsims) metr2.projection(MetricTMscore(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 1)
_getPlumedRoot() except: print("Tests in %s skipped because plumed executable not found." % __file__) sys.exit() # Simlist dd = htmd.home.home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr = Metric(fsims) metr.set(MetricPlumed2( ['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6'])) data2 = metr.project() # One simulation testpath=os.path.join(htmd.home.home(), 'data', '1kdx') mol = Molecule(os.path.join(testpath, '1kdx_0.pdb')) mol.read(os.path.join(htmd.home.home(), 'data', '1kdx', '1kdx.dcd')) metric = MetricPlumed2(['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6']) data = metric.project(mol) ref = np.array([0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701, 21.833056, 19.901318]) assert np.all(np.abs(ref - data[:, 0]) < 0.01), 'Plumed demo calculation is broken'
from glob import glob from htmd.projections.metric import Metric from htmd.projections.metricdistance import MetricDistance from htmd.projections.metricdihedral import MetricDihedral from htmd.util import tempname from htmd.home import home from os.path import join testfolder = home(dataDir='adaptive') sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) data1 = metr.project() metr.set(MetricDihedral()) data2 = metr.project() # Testing combining of metrics data1.combine(data2) # Testing dimensions assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct' assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct' assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct' # Testing dimension dropping / keeping datatmp = data1.copy() data1.dropDimensions(range(9)) assert np.array_equal(data1.description.shape, (888, 3)), 'dropDimensions not working correct'
except: print("Tests in %s skipped because plumed executable not found." % __file__) sys.exit() import doctest doctest.testmod() # Simlist dd = htmd.home(dataDir="adaptive") fsims = htmd.simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr = Metric(fsims) metr.projection( MetricPlumed2(['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6'])) data2 = metr.project() # One simulation testpath = os.path.join(htmd.home(), 'data', '1kdx') mol = Molecule(os.path.join(testpath, '1kdx_0.pdb')) mol.read(os.path.join(htmd.home(), 'data', '1kdx', '1kdx.dcd')) metric = MetricPlumed2( ['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6']) data = metric.project(mol) ref = np.array([ 0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701, 21.833056, 19.901318 ]) assert np.all(
if __name__ == '__main__': from htmd.simlist import simlist from glob import glob from htmd.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir='villin') sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb')) met = Metric(sims[0:2]) met.set(MetricSelfDistance('protein and name CA')) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [[3.69098878, -0.33862674, 0.85779184], [3.77816105, -0.31887317, 0.87724227], [3.83537507, -0.11878026, 0.65236956]] assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)),
return datatica if __name__ == '__main__': from htmd.simlist import simlist from glob import glob from htmd.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir='villin') sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb')) met = Metric(sims[0:2]) met.projection(MetricSelfDistance('protein and name CA')) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [[ 3.69098878, -0.33862674, 0.85779184], [ 3.77816105, -0.31887317, 0.87724227], [ 3.83537507, -0.11878026, 0.65236956]] assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.all(datatica.description.iloc[[587, 588]].type == 'tica') assert np.all(datatica.description.iloc[range(587)].type == 'distance') print('In-memory TICA with subset of dimensions passed test.')
import numpy as np from os import path mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb')) mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc')) ref = mol.copy() ref.coords = np.atleast_3d(ref.coords[:, :, 0]) metr = MetricRmsd(ref, 'protein and name CA') data = metr.project(mol) lastrmsd = np.array([1.30797791, 1.29860222, 1.25042927, 1.31319737, 1.27044261, 1.40294552, 1.25354612, 1.30127883, 1.40618336, 1.18303752, 1.24414587, 1.34513164, 1.31932807, 1.34282494, 1.2261436 , 1.36359048, 1.26243281, 1.21157813, 1.26476419, 1.29413617], dtype=np.float32) assert np.all(np.abs(data[-20:] - lastrmsd) < 0.001), 'Coordinates calculation is broken' from htmd.simlist import simlist from htmd.projections.metric import Metric dd = home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') ref = Molecule(dd+"/generators/1/structure.pdb") metr2 = Metric(fsims) metr2.set(MetricRmsd(ref, 'protein and name CA')) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6,1) pass
An array containing the null data. """ trajlen = mol.numFrames data = np.zeros((trajlen, self._ndim), dtype=np.float32) return data if __name__ == "__main__": import htmd.home from htmd.simlist import simlist from htmd.projections.metric import Metric import htmd.projections.metricnull dd = htmd.home.home(dataDir="adaptive") fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'], dd + '/generators/1/structure.pdb') metr2 = Metric(fsims) metr2.projection(htmd.projections.metricnull.MetricNull(2)) data2 = metr2.project() assert data2.trajectories[0].projection.shape == (6, 2) metr1 = Metric(fsims) metr1.projection(htmd.projections.metricnull.MetricNull(1)) data1 = metr1.project() assert data1.trajectories[0].projection.shape == (6, 1) pass
from htmd.home import home from os.path import join testfolder = home(dataDir='adaptive') sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set( MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) data1 = metr.project() metr.set(MetricDihedral()) data2 = metr.project() # Testing combining of metrics data1.combine(data2) # Testing dimensions assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct' assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct' assert np.array_equal( np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'
def _algorithm(self): logger.info('Postprocessing new data') datalist = simlist( glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')), glob(path.join(self.inputpath, '*', ''))) filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel) if hasattr(self, 'metricsel2') and self.metricsel2 is not None: proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype) else: proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype) metr = Metric(filtlist, skip=self.skip) metr.projection(proj) data = metr.project() #if self.contactsym is not None: # contactSymmetry(data, self.contactsym) data.dropTraj() if self.ticadim > 0: tica = TICA(data, int(max(2, np.ceil(20 / self.skip)))) datadr = tica.project(self.ticadim) else: datadr = data K = int( max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50), 100)) # heuristic if K > datadr.numFrames / 3: # Freaking ugly patches ... K = int(datadr.numFrames / 3) datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5) replacement = False if datadr.K < 10: datadr.cluster(self.clustmethod(n_clusters=K)) replacement = True model = Model(datadr) macronum = self.macronum if datadr.K < macronum: macronum = np.ceil(datadr.K / 2) logger.warning( 'Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum)) from pyemma.msm import timescales_msm timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) model.markovModel(self.lag, macronum) p_i = self._criteria(model, self.method) (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running) logger.debug('spawncounts {}'.format(spawncounts)) stateIdx = np.where(spawncounts > 0)[0] _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement) logger.debug('relFrames {}'.format(relFrames)) self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))