def _maccsClustering(rdkit_mols): """ Returns the tanimoto distance matrix based on maccs method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- tanimotomatrix: np.array The numpy array containing the tanimoto matrix """ from rdkit.Chem import MACCSkeys # calcola MACCS keys fps = [] for m in tqdm(rdkit_mols): fps.append(MACCSkeys.GenMACCSKeys(m)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) tanimoto_matrix = aprun(total=len(fps), desc='MACCS Distance') \ (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps) return np.array(tanimoto_matrix)
def _pathFingerprintsClustering(rdkit_mols): """ Returns the tanimoto distance matrix based on fingerprints method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- tanimotomatrix: np.array The numpy array containing the tanimoto matrix """ from rdkit.Chem.Fingerprints import FingerprintMols # calcola path fingerprints fps = [] for m in tqdm(rdkit_mols): fps.append(FingerprintMols.FingerprintMol(m)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) tanimoto_matrix = aprun(total=len(fps), desc='PathFingerprints Distance') \ (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps) return np.array(tanimoto_matrix)
def _maccsClustering( rdkit_mols): """ Returns the tanimoto distance matrix based on maccs method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- tanimotomatrix: np.array The numpy array containing the tanimoto matrix """ from rdkit.Chem import MACCSkeys # calcola MACCS keys fps = [] for m in tqdm(rdkit_mols): fps.append(MACCSkeys.GenMACCSKeys(m)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) tanimoto_matrix = aprun(total=len(fps), desc='MACCS Distance') \ (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps) return np.array(tanimoto_matrix)
def _circularFingerprintsClustering(rdkit_mols, radius=2): """ Returns the dice distance matrix based on circularfingerprints method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects radius: int The radius of the MorganCircularFingerprint Default: 2 Returns ------- dicematrix: np.array The numpy array containing the dice matrix """ from rdkit.Chem import AllChem # calcola circular fingerprints fps = [] for m in rdkit_mols: fps.append(AllChem.GetMorganFingerprint(m, radius)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) dice_matrix = aprun(total=len(fps), desc='CircularFingerprints Distance') \ (delayed(DiceDistances)(fp1, fps) for fp1 in fps) return np.array(dice_matrix)
def _torsionsFingerprintsClustering(rdkit_mols): """ Returns the dice distance matrix based on torsionsfingerprints method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- dicematrix: np.array The numpy array containing the dice matrix """ from rdkit.Chem.AtomPairs import Torsions # Topological Torsions fps = [] for m in tqdm(rdkit_mols): fps.append(Torsions.GetHashedTopologicalTorsionFingerprint(m)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) dice_matrix = aprun(total=len(fps), desc='TorsionsFingerprints Distance') \ (delayed(DiceDistances)(fp1, fps) for fp1 in fps) return np.array(dice_matrix)
def _circularFingerprintsClustering(rdkit_mols, radius=2): """ Returns the dice distance matrix based on circularfingerprints method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects radius: int The radius of the MorganCircularFingerprint Default: 2 Returns ------- dicematrix: np.array The numpy array containing the dice matrix """ from rdkit.Chem import AllChem # calcola circular fingerprints fps = [] for m in rdkit_mols: fps.append(AllChem.GetMorganFingerprint(m, radius)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) dice_matrix = aprun(total=len(fps), desc='CircularFingerprints Distance') \ (delayed(DiceDistances)(fp1, fps) for fp1 in fps) return np.array(dice_matrix)
def _torsionsFingerprintsClustering(rdkit_mols): """ Returns the dice distance matrix based on torsionsfingerprints method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- dicematrix: np.array The numpy array containing the dice matrix """ from rdkit.Chem.AtomPairs import Torsions # Topological Torsions fps = [ ] for m in tqdm(rdkit_mols): fps.append(Torsions.GetHashedTopologicalTorsionFingerprint(m)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) dice_matrix = aprun(total=len(fps), desc='TorsionsFingerprints Distance') \ (delayed(DiceDistances)(fp1, fps) for fp1 in fps) return np.array(dice_matrix)
def _pathFingerprintsClustering(rdkit_mols): """ Returns the tanimoto distance matrix based on fingerprints method Parameters ---------- rdkit_mols: list The list of rdkit.Chem.rdchem.Mol objects Returns ------- tanimotomatrix: np.array The numpy array containing the tanimoto matrix """ from rdkit.Chem.Fingerprints import FingerprintMols # calcola path fingerprints fps = [ ] for m in tqdm(rdkit_mols): fps.append(FingerprintMols.FingerprintMol(m)) aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) tanimoto_matrix = aprun(total=len(fps), desc='PathFingerprints Distance') \ (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps) return np.array(tanimoto_matrix)
def simfilter(sims, outfolder, filtersel, njobs=None): """Filters a list of simulations generated by :func:`simlist` This function takes as input a list of simulations produced by `simList` and writes new trajectories containing only the desired atoms in a new directory. Parameters ---------- sims : list A simulation list produced by the `simList` function outfolder : str The folder in which to write the modified trajectories filtersel : str Atom selection string describing the atoms we want to keep. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ njobs : int Number of parallel jobs to spawn for filtering of trajectories. If None it will use the default from htmd.config. Returns ------- fsims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A list of filtered simulations Example ------- >>> sims = simlist(glob('data/*/'), glob('input/*/structure.pdb')) >>> fsims = simfilter(sims, 'filtered', filtersel='not water') """ if not path.exists(outfolder): makedirs(outfolder) if len(sims) > 0: _filterTopology(sims[0], outfolder, filtersel) logger.debug("Starting filtering of simulations.") from htmd.config import _config from htmd.parallelprogress import ParallelExecutor, delayed aprun = ParallelExecutor( n_jobs=njobs if njobs is not None else _config["njobs"]) filtsims = aprun(total=len(sims), desc="Filtering trajectories")( delayed(_filtSim)(i, sims, outfolder, filtersel) for i in range(len(sims))) logger.debug("Finished filtering of simulations") return np.array(filtsims)
def simfilter(sims, outfolder, filtersel): """ Filters a list of simulations generated by :func:`simlist` This function takes as input a list of simulations produced by `simList` and writes new trajectories containing only the desired atoms in a new directory. Parameters ---------- sims : list A simulation list produced by the `simList` function outFolder : str The folder in which to write the modified trajectories filterSel : str An atomselection string describing the atoms we want to keep Returns ------- fsims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A list of filtered simulations Example ------- >>> sims = simlist(glob('data/*/'), glob('input/*/structure.pdb')) >>> fsims = simfilter(sims, 'filtered', filtersel='not water') """ if not path.exists(outfolder): makedirs(outfolder) if len(sims) > 0: _filterTopology(sims[0], outfolder, filtersel) logger.debug('Starting filtering of simulations.') from htmd.config import _config from htmd.parallelprogress import ParallelExecutor, delayed aprun = ParallelExecutor(n_jobs=_config['ncpus']) filtsims = aprun(total=len(sims), description='Filtering trajectories')( delayed(_filtSim)(i, sims, outfolder, filtersel) for i in range(len(sims))) logger.debug('Finished filtering of simulations') return np.array(filtsims)
def simfilter(sims, outfolder, filtersel): """ Filters a list of simulations generated by :func:`simlist` This function takes as input a list of simulations produced by `simList` and writes new trajectories containing only the desired atoms in a new directory. Parameters ---------- sims : list A simulation list produced by the `simList` function outfolder : str The folder in which to write the modified trajectories filtersel : str Atom selection string describing the atoms we want to keep. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ Returns ------- fsims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A list of filtered simulations Example ------- >>> sims = simlist(glob('data/*/'), glob('input/*/structure.pdb')) >>> fsims = simfilter(sims, 'filtered', filtersel='not water') """ if not path.exists(outfolder): makedirs(outfolder) if len(sims) > 0: _filterTopology(sims[0], outfolder, filtersel) logger.debug('Starting filtering of simulations.') from htmd.config import _config from htmd.parallelprogress import ParallelExecutor, delayed aprun = ParallelExecutor(n_jobs=_config['ncpus']) filtsims = aprun(total=len(sims), desc='Filtering trajectories')(delayed(_filtSim)(i, sims, outfolder, filtersel) for i in range(len(sims))) logger.debug('Finished filtering of simulations') return np.array(filtsims)
def getStates(self, states=None, statetype='macro', wrapsel='protein', alignsel='name CA', alignmol=None, samplemode='weighted', numsamples=50, simlist=None): """ Get samples of MSM states in Molecule classes Parameters ---------- states : ndarray, optional A list of states to visualize statetype : ['macro','micro','cluster'], optional The type of state to visualize wrapsel : str, optional, default='protein' A selection to use for wrapping alignsel : str, optional, default='name CA' A selection used for aligning all frames alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A reference molecule onto which to align all others samplemode : ['weighted','random'], optional, default='weighted' How to obtain the samples from the states numsamples : int Number of samples (conformations) for each state. simlist : simlist Optionally pass a different (but matching, i.e. filtered) simlist for creating the Molecules. Returns ------- mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state Examples -------- >>> model = Model(data) >>> model.markovModel(100, 5) >>> mols = model.getStates() >>> for m in mols: >>> m.view() """ self._integrityCheck(postmsm=(statetype != 'cluster')) if simlist is None: simlist = self.data.simlist else: if len(simlist) != len(self.data.simlist): raise AttributeError('Provided simlist has different number of trajectories than the one used by the model.') (single, molfile) = _singleMolfile(simlist) refmol = None if not single: raise NameError('Visualizer does not support yet visualization of systems with different number of atoms') if alignmol is None: alignmol = molfile if statetype != 'macro' and statetype != 'micro' and statetype != 'cluster': raise NameError("'statetype' must be either 'macro', 'micro' or ''cluster'") if states is None: if statetype == 'macro': states = range(self.macronum) elif statetype == 'micro': states = range(self.micronum) elif statetype == 'cluster': states = range(self.data.K) if len(states) == 0: raise NameError('No ' + statetype + ' states exist in the model') if len(alignsel) > 0 and len(alignmol) > 0: refmol = Molecule(alignmol) (tmp, relframes) = self.sampleStates(states, [numsamples]*len(states), statetype=statetype, samplemode=samplemode) from htmd.config import _config from htmd.parallelprogress import ParallelExecutor, delayed # This loop really iterates over states. sampleStates returns an array of arrays # Removed ncpus because it was giving errors on some systems. aprun = ParallelExecutor(n_jobs=1) # _config['ncpus']) mols = aprun(total=len(relframes), description='Getting state Molecules')\ (delayed(_loadMols)(self, rel, molfile, wrapsel, alignsel, refmol, simlist) for rel in relframes) return np.array(mols, dtype=object)
def cluster(smallmol_list, method, distThresholds=0.2, returnDetails=True, removeHs=True): """ Rreturn the SmallMol objects grouped in the cluster. It can also return the details of the clusters computed. Parameters ---------- smallmol_list: list The list of htmd.smallmol.smallmol.SmallMol objects method: str The cluster methods. Can be ['maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints', 'circularFingerprints', 'shape', 'mcs'] distThresholds: float The disance cutoff for the clusters Default: 0.2 returnDetails: bool If True, the cluster details are also returned Default: True removeHs: bool If True, the hydrogens are not considered Default: True Returns ------- clusters: list List of lists, That contains the SmallMol objects grouped based on the cluster belongings details: list A list with all the cluster details """ from sklearn.cluster import DBSCAN import sys this_module = sys.modules[__name__] _methods = [ 'maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints', 'circularFingerprints', 'shape', 'mcs' ] if method not in _methods: raise ValueError( 'The method provided {} does not exists. The ones available are the following: {}' .format(method, _methods)) smallmol_list = np.array([sm.copy() for sm in smallmol_list]) if removeHs: tmp_smallmol_list = [] for sm in smallmol_list: B = Builder(sm) B.removeHydrogens() sm = B.getSmallMol() tmp_smallmol_list.append(sm) #sm._removeAtoms(sm.get('element H', 'idx')) smallmol_list = np.array(tmp_smallmol_list) #rdkitMols_list = [sm.toRdkitMol(includeConformer=True) for sm in smallmol_list] rdkitMols_list = [] wrong = [] for n, sm in enumerate(smallmol_list): try: rdkitMols_list.append(sm.toRdkitMol(includeConformer=True)) except: wrong.append(n) print('{} problematic molecules. Indexes: {}'.format(len(wrong), wrong)) clustmethod = getattr(this_module, '_{}Clustering'.format(method)) if method not in ['shape', 'mcs']: matrix = clustmethod(rdkitMols_list) else: aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) matrix = aprun(total=len(rdkitMols_list), desc='{} Distance'.format(method)) \ (delayed(clustmethod)(mol1, rdkitMols_list) for mol1 in rdkitMols_list) matrix = np.array(matrix) db = DBSCAN(eps=distThresholds, min_samples=0, metric='precomputed').fit(matrix) labels = db.labels_ populations = np.bincount(labels) n_clusters = np.max(labels) clusters_idx = np.empty((n_clusters, ), dtype=object) clusters_smallmols = np.empty((n_clusters, ), dtype=object) for n_cl in np.arange(n_clusters): idxs = np.where(labels == n_cl)[0] clusters_idx[n_cl] = idxs clusters_smallmols[n_cl] = smallmol_list[idxs] if returnDetails: details = { 'numClusters': n_clusters, 'populations': populations, 'clusters': clusters_idx } return clusters_smallmols, details return clusters_smallmols
def cluster(smallmol_list, method, distThresholds=0.2, returnDetails=True, removeHs=True ): """ Rreturn the SmallMol objects grouped in the cluster. It can also return the details of the clusters computed. Parameters ---------- smallmol_list: list The list of htmd.smallmol.smallmol.SmallMol objects method: str The cluster methods. Can be ['maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints', 'circularFingerprints', 'shape', 'mcs'] distThresholds: float The disance cutoff for the clusters Default: 0.2 returnDetails: bool If True, the cluster details are also returned Default: True removeHs: bool If True, the hydrogens are not considered Default: True Returns ------- clusters: list List of lists, That contains the SmallMol objects grouped based on the cluster belongings details: list A list with all the cluster details """ from sklearn.cluster import DBSCAN import sys this_module = sys.modules[__name__] _methods = ['maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints', 'circularFingerprints', 'shape', 'mcs'] if method not in _methods: raise ValueError('The method provided {} does not exists. The ones available are the following: {}'.format(method, _methods)) smallmol_list = np.array([sm.copy() for sm in smallmol_list]) if removeHs: tmp_smallmol_list = [] for sm in smallmol_list: B = Builder(sm) B.removeHydrogens() sm = B.getSmallMol() tmp_smallmol_list.append(sm) #sm._removeAtoms(sm.get('element H', 'idx')) smallmol_list = np.array(tmp_smallmol_list) #rdkitMols_list = [sm.toRdkitMol(includeConformer=True) for sm in smallmol_list] rdkitMols_list = [] wrong = [] for n, sm in enumerate(smallmol_list): try: rdkitMols_list.append(sm.toRdkitMol(includeConformer=True)) except: wrong.append(n) print('{} problematic molecules. Indexes: {}'.format(len(wrong), wrong)) clustmethod = getattr(this_module, '_{}Clustering'.format(method)) if method not in ['shape', 'mcs']: matrix = clustmethod(rdkitMols_list) else: aprun = ParallelExecutor(n_jobs=-1) # _config['ncpus']) matrix = aprun(total=len(rdkitMols_list), desc='{} Distance'.format(method)) \ (delayed(clustmethod)(mol1, rdkitMols_list) for mol1 in rdkitMols_list) matrix = np.array(matrix) db = DBSCAN(eps=distThresholds, min_samples=0, metric='precomputed' ).fit(matrix) labels = db.labels_ populations = np.bincount(labels) n_clusters = np.max(labels) clusters_idx = np.empty((n_clusters,), dtype=object) clusters_smallmols = np.empty((n_clusters,), dtype=object) for n_cl in np.arange(n_clusters): idxs = np.where(labels == n_cl)[0] clusters_idx[n_cl] = idxs clusters_smallmols[n_cl] = smallmol_list[idxs] if returnDetails: details = {'numClusters':n_clusters, 'populations':populations, 'clusters':clusters_idx} return clusters_smallmols, details return clusters_smallmols
def ffevaluate(mol, prm, betweensets=None, dist_thresh=0, threads=1): """ Evaluates energies and forces of the forcefield for a given Molecule Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A Molecule object. Can contain multiple frames. prm : :class:`ParameterSet <parmed.ParameterSet>` object Forcefield parameters. betweensets : tuple of strings Only calculate energies between two sets of atoms given as atomselect strings. Only computes LJ and electrostatics. dist_thresh : float If set to a value != 0 it will only calculate LJ, electrostatics and bond energies for atoms which are closer than the threshold Returns ------- energies : np.ndarray A (6, nframes) shaped matrix containing the individual energy components of each simulation frame. Rows correspond to the following energies 0: bond 1: LJ 2: Electrostatic 3: angle 4: dihedral 5: improper forces : np.ndarray A (natoms, 3, nframes) shaped matrix containing the total force on each atom for each simulation frame. atmnrg : np.ndarray A (natoms, 6, nframes) shaped matrix containing the approximate potential energy components of each atom at each simulation frame. The 6 indexes are the same as in the `energies` return argument. Examples -------- >>> from htmd.ffevaluation.ffevaluate import * >>> from htmd.ffevaluation.test_ffevaluate import fixParameters, drawForce >>> from htmd.ui import * >>> import parmed >>> mol = Molecule('./htmd/data/test-ffevaluate/waterbox/structure.psf') >>> mol.read('./htmd/data/test-ffevaluate/waterbox/output.xtc') >>> prm = parmed.charmm.CharmmParameterSet(fixParameters('./htmd/data/test-ffevaluate/waterbox/parameters.prm')) >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water')) >>> mol.view() >>> for cc, ff in zip(mol.coords[:, :, 0], forces[:, :, 0]): >>> drawForce(cc, ff) Amber style >>> prmtop = parmed.amber.AmberParm('structure.prmtop') >>> prm = parmed.amber.AmberParameterSet.from_structure(prmtop) >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water')) """ mol = mol.copy() coords = mol.coords box = mol.box setA, setB = calculateSets(mol, betweensets) args = list(init(mol, prm)) args.append(setA) args.append(setB) args.append(dist_thresh) if threads == 1: energies, forces, atmnrg = _ffevaluate(coords, box, *args) else: from htmd.parallelprogress import ParallelExecutor, delayed aprun = ParallelExecutor(n_jobs=threads) res = aprun(total=mol.numFrames, description='Evaluating energies')( delayed(_ffevaluate)(np.atleast_3d(coords[:, :, f]), box[:, f].reshape(3, 1), *args) for f in range(mol.numFrames)) energies = np.hstack([r[0] for r in res]) forces = np.concatenate([r[1] for r in res], axis=2) atmnrg = np.concatenate([r[2] for r in res], axis=2) return energies, forces, atmnrg
def ffevaluate(mol, prm, betweensets=None, cutoff=0, rfa=False, solventDielectric=78.5, threads=1, fromstruct=False): """ Evaluates energies and forces of the forcefield for a given Molecule Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A Molecule object. Can contain multiple frames. prm : :class:`ParameterSet <parmed.ParameterSet>` object Forcefield parameters. betweensets : tuple of strings Only calculate energies between two sets of atoms given as atomselect strings. Only computes LJ and electrostatics. cutoff : float If set to a value != 0 it will only calculate LJ, electrostatics and bond energies for atoms which are closer than the threshold rfa : bool Use with `cutoff` to enable the reaction field approximation for scaling of the electrostatics up to the cutoff. Uses the value of `solventDielectric` to model everything beyond the cutoff distance as solvent with uniform dielectric. solventDielectric : float Used together with `cutoff` and `rfa` Returns ------- energies : np.ndarray A (6, nframes) shaped matrix containing the individual energy components of each simulation frame. Rows correspond to the following energies 0: bond 1: LJ 2: Electrostatic 3: angle 4: dihedral 5: improper forces : np.ndarray A (natoms, 3, nframes) shaped matrix containing the total force on each atom for each simulation frame. atmnrg : np.ndarray A (natoms, 6, nframes) shaped matrix containing the approximate potential energy components of each atom at each simulation frame. The 6 indexes are the same as in the `energies` return argument. Examples -------- >>> from htmd.ffevaluation.ffevaluate import * >>> from htmd.ffevaluation.test_ffevaluate import fixParameters, drawForce >>> from htmd.ui import * >>> import parmed >>> mol = Molecule('./htmd/data/test-ffevaluate/waterbox/structure.psf') >>> mol.read('./htmd/data/test-ffevaluate/waterbox/output.xtc') >>> prm = parmed.charmm.CharmmParameterSet(fixParameters('./htmd/data/test-ffevaluate/waterbox/parameters.prm')) >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water')) >>> mol.view() >>> for cc, ff in zip(mol.coords[:, :, 0], forces[:, :, 0]): >>> drawForce(cc, ff) Amber style >>> prmtop = parmed.amber.AmberParm('structure.prmtop') >>> prm = parmed.amber.AmberParameterSet.from_structure(prmtop) >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water')) """ if mol.box.shape[0] != 3 or mol.box.shape[1] != mol.coords.shape[2]: raise ValueError( 'Box dimensions have to be (3, numFrames), your Molecule has box of shape {}' .format(mol.box.shape)) mol = mol.copy() coords = mol.coords.astype(np.float32) box = mol.box.astype(np.float32) setA, setB = calculateSets(mol, betweensets) args = list(init(mol, prm, fromstruct)) args.append(setA) args.append(setB) args.append(cutoff) args.append(rfa) args.append(solventDielectric) if threads == 1: energies, forces, atmnrg = _ffevaluate(coords, box, *args) else: from htmd.parallelprogress import ParallelExecutor, delayed aprun = ParallelExecutor(n_jobs=threads) res = aprun(total=mol.numFrames, desc='Evaluating energies')(delayed(_ffevaluate)( np.atleast_3d(coords[:, :, f]), box[:, f].reshape(3, 1), *args) for f in range(mol.numFrames)) energies = np.hstack([r[0] for r in res]) forces = np.concatenate([r[1] for r in res], axis=2) atmnrg = np.concatenate([r[2] for r in res], axis=2) return energies, forces, atmnrg
def getStates(self, states=None, statetype='macro', wrapsel='protein', alignsel='name CA', alignmol=None, samplemode='weighted', numsamples=50, simlist=None): """ Get samples of MSM states in Molecule classes Parameters ---------- states : ndarray, optional A list of states to visualize statetype : ['macro','micro','cluster'], optional The type of state to visualize wrapsel : str, optional, default='protein' A selection to use for wrapping alignsel : str, optional, default='name CA' A selection used for aligning all frames. Set to None to disable aligning alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A reference molecule onto which to align all others samplemode : ['weighted','random'], optional, default='weighted' How to obtain the samples from the states numsamples : int Number of samples (conformations) for each state. simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects Optionally pass a different (but matching, i.e. filtered) simlist for creating the Molecules. Returns ------- mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state Examples -------- >>> model = Model(data) >>> model.markovModel(100, 5) >>> mols = model.getStates() >>> for m in mols: >>> m.view() """ self._integrityCheck(postmsm=(statetype != 'cluster')) if simlist is None: simlist = self.data.simlist else: if len(simlist) != len(self.data.simlist): raise AttributeError( 'Provided simlist has different number of trajectories than the one used by the model.' ) (single, molfile) = _singleMolfile(simlist) if not single: raise NameError( 'Visualizer does not support yet visualization of systems with different structure files. ' 'The simlist should be created with a single molfile (for example a filtered one)' ) if alignmol is None: alignmol = Molecule(molfile) if statetype != 'macro' and statetype != 'micro' and statetype != 'cluster': raise NameError( "'statetype' must be either 'macro', 'micro' or ''cluster'") if states is None: if statetype == 'macro': states = range(self.macronum) elif statetype == 'micro': states = range(self.micronum) elif statetype == 'cluster': states = range(self.data.K) if len(states) == 0: raise NameError('No ' + statetype + ' states exist in the model') (tmp, relframes) = self.sampleStates(states, numsamples, statetype=statetype, samplemode=samplemode) from htmd.config import _config from htmd.parallelprogress import ParallelExecutor, delayed # This loop really iterates over states. sampleStates returns an array of arrays # Removed ncpus because it was giving errors on some systems. aprun = ParallelExecutor(n_jobs=1) # _config['ncpus']) mols = aprun(total=len(relframes), description='Getting state Molecules')\ (delayed(_loadMols)(self, rel, molfile, wrapsel, alignsel, alignmol, simlist) for rel in relframes) return np.array(mols, dtype=object)