Exemple #1
0
def _maccsClustering(rdkit_mols):
    """
    Returns the tanimoto distance matrix based on maccs method

    Parameters
    ----------
    rdkit_mols: list
        The list of rdkit.Chem.rdchem.Mol objects

    Returns
    -------
    tanimotomatrix: np.array
        The numpy array containing the tanimoto matrix
    """
    from rdkit.Chem import MACCSkeys  # calcola MACCS keys

    fps = []
    for m in tqdm(rdkit_mols):
        fps.append(MACCSkeys.GenMACCSKeys(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    tanimoto_matrix = aprun(total=len(fps), desc='MACCS Distance') \
            (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps)

    return np.array(tanimoto_matrix)
Exemple #2
0
def _pathFingerprintsClustering(rdkit_mols):
    """
        Returns the tanimoto distance matrix based on fingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        Returns
        -------
        tanimotomatrix: np.array
            The numpy array containing the tanimoto matrix
        """
    from rdkit.Chem.Fingerprints import FingerprintMols  # calcola path fingerprints

    fps = []
    for m in tqdm(rdkit_mols):
        fps.append(FingerprintMols.FingerprintMol(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    tanimoto_matrix = aprun(total=len(fps), desc='PathFingerprints Distance') \
        (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps)

    return np.array(tanimoto_matrix)
Exemple #3
0
def _maccsClustering( rdkit_mols):
    """
    Returns the tanimoto distance matrix based on maccs method

    Parameters
    ----------
    rdkit_mols: list
        The list of rdkit.Chem.rdchem.Mol objects

    Returns
    -------
    tanimotomatrix: np.array
        The numpy array containing the tanimoto matrix
    """
    from rdkit.Chem import MACCSkeys  # calcola MACCS keys

    fps = []
    for m in tqdm(rdkit_mols):
       fps.append(MACCSkeys.GenMACCSKeys(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    tanimoto_matrix = aprun(total=len(fps), desc='MACCS Distance') \
            (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps)


    return np.array(tanimoto_matrix)
Exemple #4
0
def _circularFingerprintsClustering(rdkit_mols, radius=2):
    """
        Returns the dice distance matrix based on circularfingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        radius: int
            The radius of the MorganCircularFingerprint
            Default: 2

        Returns
        -------
        dicematrix: np.array
            The numpy array containing the dice matrix
        """
    from rdkit.Chem import AllChem  # calcola circular fingerprints

    fps = []
    for m in rdkit_mols:
        fps.append(AllChem.GetMorganFingerprint(m, radius))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    dice_matrix = aprun(total=len(fps), desc='CircularFingerprints Distance') \
        (delayed(DiceDistances)(fp1, fps) for fp1 in fps)

    return np.array(dice_matrix)
Exemple #5
0
def _torsionsFingerprintsClustering(rdkit_mols):
    """
        Returns the dice distance matrix based on torsionsfingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        Returns
        -------
        dicematrix: np.array
            The numpy array containing the dice matrix
        """
    from rdkit.Chem.AtomPairs import Torsions  # Topological Torsions

    fps = []
    for m in tqdm(rdkit_mols):
        fps.append(Torsions.GetHashedTopologicalTorsionFingerprint(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    dice_matrix = aprun(total=len(fps), desc='TorsionsFingerprints Distance') \
        (delayed(DiceDistances)(fp1, fps) for fp1 in fps)

    return np.array(dice_matrix)
Exemple #6
0
def _circularFingerprintsClustering(rdkit_mols, radius=2):
    """
        Returns the dice distance matrix based on circularfingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        radius: int
            The radius of the MorganCircularFingerprint
            Default: 2

        Returns
        -------
        dicematrix: np.array
            The numpy array containing the dice matrix
        """
    from rdkit.Chem import AllChem  # calcola circular fingerprints

    fps = []
    for m in rdkit_mols:
        fps.append(AllChem.GetMorganFingerprint(m, radius))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    dice_matrix = aprun(total=len(fps), desc='CircularFingerprints Distance') \
        (delayed(DiceDistances)(fp1, fps) for fp1 in fps)

    return np.array(dice_matrix)
Exemple #7
0
def _torsionsFingerprintsClustering(rdkit_mols):
    """
        Returns the dice distance matrix based on torsionsfingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        Returns
        -------
        dicematrix: np.array
            The numpy array containing the dice matrix
        """
    from rdkit.Chem.AtomPairs import Torsions  # Topological Torsions

    fps = [ ]
    for m in tqdm(rdkit_mols):
        fps.append(Torsions.GetHashedTopologicalTorsionFingerprint(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    dice_matrix = aprun(total=len(fps), desc='TorsionsFingerprints Distance') \
        (delayed(DiceDistances)(fp1, fps) for fp1 in fps)

    return np.array(dice_matrix)
Exemple #8
0
def _pathFingerprintsClustering(rdkit_mols):
    """
        Returns the tanimoto distance matrix based on fingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        Returns
        -------
        tanimotomatrix: np.array
            The numpy array containing the tanimoto matrix
        """
    from rdkit.Chem.Fingerprints import FingerprintMols  # calcola path fingerprints

    fps = [ ]
    for m in tqdm(rdkit_mols):
        fps.append(FingerprintMols.FingerprintMol(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    tanimoto_matrix = aprun(total=len(fps), desc='PathFingerprints Distance') \
        (delayed(TanimotoDistances)(fp1, fps) for fp1 in fps)

    return np.array(tanimoto_matrix)
Exemple #9
0
def simfilter(sims, outfolder, filtersel, njobs=None):
    """Filters a list of simulations generated by :func:`simlist`

    This function takes as input a list of simulations produced by `simList` and writes new trajectories containing only
    the desired atoms in a new directory.

    Parameters
    ----------
    sims : list
        A simulation list produced by the `simList` function
    outfolder : str
        The folder in which to write the modified trajectories
    filtersel : str
        Atom selection string describing the atoms we want to keep.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    njobs : int
        Number of parallel jobs to spawn for filtering of trajectories. If None it will use the default from htmd.config.

    Returns
    -------
    fsims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of filtered simulations

    Example
    -------
    >>> sims  = simlist(glob('data/*/'), glob('input/*/structure.pdb'))
    >>> fsims = simfilter(sims, 'filtered', filtersel='not water')
    """
    if not path.exists(outfolder):
        makedirs(outfolder)

    if len(sims) > 0:
        _filterTopology(sims[0], outfolder, filtersel)

    logger.debug("Starting filtering of simulations.")

    from htmd.config import _config
    from htmd.parallelprogress import ParallelExecutor, delayed

    aprun = ParallelExecutor(
        n_jobs=njobs if njobs is not None else _config["njobs"])
    filtsims = aprun(total=len(sims), desc="Filtering trajectories")(
        delayed(_filtSim)(i, sims, outfolder, filtersel)
        for i in range(len(sims)))

    logger.debug("Finished filtering of simulations")
    return np.array(filtsims)
Exemple #10
0
def simfilter(sims, outfolder, filtersel):
    """ Filters a list of simulations generated by :func:`simlist`

    This function takes as input a list of simulations produced by `simList` and writes new trajectories containing only
    the desired atoms in a new directory.

    Parameters
    ----------
    sims : list
        A simulation list produced by the `simList` function
    outFolder : str
        The folder in which to write the modified trajectories
    filterSel : str
        An atomselection string describing the atoms we want to keep

    Returns
    -------
    fsims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of filtered simulations

    Example
    -------
    >>> sims  = simlist(glob('data/*/'), glob('input/*/structure.pdb'))
    >>> fsims = simfilter(sims, 'filtered', filtersel='not water')
    """
    if not path.exists(outfolder):
        makedirs(outfolder)

    if len(sims) > 0:
        _filterTopology(sims[0], outfolder, filtersel)

    logger.debug('Starting filtering of simulations.')

    from htmd.config import _config
    from htmd.parallelprogress import ParallelExecutor, delayed
    aprun = ParallelExecutor(n_jobs=_config['ncpus'])
    filtsims = aprun(total=len(sims), description='Filtering trajectories')(
        delayed(_filtSim)(i, sims, outfolder, filtersel)
        for i in range(len(sims)))

    logger.debug('Finished filtering of simulations')
    return np.array(filtsims)
Exemple #11
0
def simfilter(sims, outfolder, filtersel):
    """ Filters a list of simulations generated by :func:`simlist`

    This function takes as input a list of simulations produced by `simList` and writes new trajectories containing only
    the desired atoms in a new directory.

    Parameters
    ----------
    sims : list
        A simulation list produced by the `simList` function
    outfolder : str
        The folder in which to write the modified trajectories
    filtersel : str
        Atom selection string describing the atoms we want to keep.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__

    Returns
    -------
    fsims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of filtered simulations

    Example
    -------
    >>> sims  = simlist(glob('data/*/'), glob('input/*/structure.pdb'))
    >>> fsims = simfilter(sims, 'filtered', filtersel='not water')
    """
    if not path.exists(outfolder):
        makedirs(outfolder)

    if len(sims) > 0:
        _filterTopology(sims[0], outfolder, filtersel)

    logger.debug('Starting filtering of simulations.')

    from htmd.config import _config
    from htmd.parallelprogress import ParallelExecutor, delayed
    aprun = ParallelExecutor(n_jobs=_config['ncpus'])
    filtsims = aprun(total=len(sims), desc='Filtering trajectories')(delayed(_filtSim)(i, sims, outfolder, filtersel) for i in range(len(sims)))

    logger.debug('Finished filtering of simulations')
    return np.array(filtsims)
Exemple #12
0
    def getStates(self, states=None, statetype='macro', wrapsel='protein', alignsel='name CA', alignmol=None, samplemode='weighted', numsamples=50, simlist=None):
        """ Get samples of MSM states in Molecule classes

        Parameters
        ----------
        states : ndarray, optional
            A list of states to visualize
        statetype : ['macro','micro','cluster'], optional
            The type of state to visualize
        wrapsel : str, optional, default='protein'
            A selection to use for wrapping
        alignsel : str, optional, default='name CA'
            A selection used for aligning all frames
        alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule onto which to align all others
        samplemode : ['weighted','random'], optional, default='weighted'
            How to obtain the samples from the states
        numsamples : int
            Number of samples (conformations) for each state.
        simlist : simlist
            Optionally pass a different (but matching, i.e. filtered) simlist for creating the Molecules.

        Returns
        -------
        mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects
            A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> mols = model.getStates()
        >>> for m in mols:
        >>>     m.view()
        """
        self._integrityCheck(postmsm=(statetype != 'cluster'))
        if simlist is None:
            simlist = self.data.simlist
        else:
            if len(simlist) != len(self.data.simlist):
                raise AttributeError('Provided simlist has different number of trajectories than the one used by the model.')

        (single, molfile) = _singleMolfile(simlist)
        refmol = None
        if not single:
            raise NameError('Visualizer does not support yet visualization of systems with different number of atoms')
        if alignmol is None:
            alignmol = molfile
        if statetype != 'macro' and statetype != 'micro' and statetype != 'cluster':
            raise NameError("'statetype' must be either 'macro', 'micro' or ''cluster'")
        if states is None:
            if statetype == 'macro':
                states = range(self.macronum)
            elif statetype == 'micro':
                states = range(self.micronum)
            elif statetype == 'cluster':
                states = range(self.data.K)
        if len(states) == 0:
            raise NameError('No ' + statetype + ' states exist in the model')
        if len(alignsel) > 0 and len(alignmol) > 0:
            refmol = Molecule(alignmol)

        (tmp, relframes) = self.sampleStates(states, [numsamples]*len(states), statetype=statetype, samplemode=samplemode)

        from htmd.config import _config
        from htmd.parallelprogress import ParallelExecutor, delayed
        # This loop really iterates over states. sampleStates returns an array of arrays
        # Removed ncpus because it was giving errors on some systems.
        aprun = ParallelExecutor(n_jobs=1)  # _config['ncpus'])
        mols = aprun(total=len(relframes), description='Getting state Molecules')\
            (delayed(_loadMols)(self, rel, molfile, wrapsel, alignsel, refmol, simlist) for rel in relframes)
        return np.array(mols, dtype=object)
Exemple #13
0
def cluster(smallmol_list,
            method,
            distThresholds=0.2,
            returnDetails=True,
            removeHs=True):
    """
    Rreturn the SmallMol objects grouped in the cluster. It can also return the details of the clusters computed.

    Parameters
    ----------
    smallmol_list: list
        The list of htmd.smallmol.smallmol.SmallMol objects
    method: str
        The cluster methods. Can be  ['maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints',
        'circularFingerprints', 'shape', 'mcs']
    distThresholds: float
        The disance cutoff for the clusters
        Default: 0.2
    returnDetails: bool
        If True, the cluster details are also returned
        Default: True
    removeHs: bool
        If True, the hydrogens are not considered
        Default: True

    Returns
    -------
    clusters: list
        List of lists, That contains the SmallMol objects grouped based on the cluster belongings
    details: list
        A list with all the cluster details
    """

    from sklearn.cluster import DBSCAN

    import sys
    this_module = sys.modules[__name__]

    _methods = [
        'maccs', 'pathFingerprints', 'atomsFingerprints',
        'torsionsFingerprints', 'circularFingerprints', 'shape', 'mcs'
    ]

    if method not in _methods:
        raise ValueError(
            'The method provided {} does not exists. The ones available are the following: {}'
            .format(method, _methods))

    smallmol_list = np.array([sm.copy() for sm in smallmol_list])

    if removeHs:
        tmp_smallmol_list = []
        for sm in smallmol_list:
            B = Builder(sm)
            B.removeHydrogens()
            sm = B.getSmallMol()
            tmp_smallmol_list.append(sm)
            #sm._removeAtoms(sm.get('element H', 'idx'))

        smallmol_list = np.array(tmp_smallmol_list)

    #rdkitMols_list = [sm.toRdkitMol(includeConformer=True) for sm in smallmol_list]
    rdkitMols_list = []
    wrong = []
    for n, sm in enumerate(smallmol_list):
        try:
            rdkitMols_list.append(sm.toRdkitMol(includeConformer=True))
        except:
            wrong.append(n)
    print('{} problematic molecules. Indexes: {}'.format(len(wrong), wrong))

    clustmethod = getattr(this_module, '_{}Clustering'.format(method))

    if method not in ['shape', 'mcs']:
        matrix = clustmethod(rdkitMols_list)

    else:
        aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
        matrix = aprun(total=len(rdkitMols_list), desc='{} Distance'.format(method)) \
                            (delayed(clustmethod)(mol1, rdkitMols_list) for mol1 in rdkitMols_list)

        matrix = np.array(matrix)

    db = DBSCAN(eps=distThresholds, min_samples=0,
                metric='precomputed').fit(matrix)

    labels = db.labels_

    populations = np.bincount(labels)
    n_clusters = np.max(labels)

    clusters_idx = np.empty((n_clusters, ), dtype=object)
    clusters_smallmols = np.empty((n_clusters, ), dtype=object)

    for n_cl in np.arange(n_clusters):
        idxs = np.where(labels == n_cl)[0]
        clusters_idx[n_cl] = idxs
        clusters_smallmols[n_cl] = smallmol_list[idxs]

    if returnDetails:
        details = {
            'numClusters': n_clusters,
            'populations': populations,
            'clusters': clusters_idx
        }
        return clusters_smallmols, details

    return clusters_smallmols
Exemple #14
0
def cluster(smallmol_list, method, distThresholds=0.2, returnDetails=True, removeHs=True ):
    """
    Rreturn the SmallMol objects grouped in the cluster. It can also return the details of the clusters computed.

    Parameters
    ----------
    smallmol_list: list
        The list of htmd.smallmol.smallmol.SmallMol objects
    method: str
        The cluster methods. Can be  ['maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints',
        'circularFingerprints', 'shape', 'mcs']
    distThresholds: float
        The disance cutoff for the clusters
        Default: 0.2
    returnDetails: bool
        If True, the cluster details are also returned
        Default: True
    removeHs: bool
        If True, the hydrogens are not considered
        Default: True

    Returns
    -------
    clusters: list
        List of lists, That contains the SmallMol objects grouped based on the cluster belongings
    details: list
        A list with all the cluster details
    """


    from sklearn.cluster import DBSCAN

    import sys
    this_module = sys.modules[__name__]

    _methods = ['maccs', 'pathFingerprints', 'atomsFingerprints', 'torsionsFingerprints',
                'circularFingerprints', 'shape', 'mcs']

    if method not in _methods:
        raise ValueError('The method provided {} does not exists. The ones available are the following: {}'.format(method, _methods))

    smallmol_list = np.array([sm.copy() for sm in smallmol_list])

    if removeHs:
        tmp_smallmol_list = []
        for sm in smallmol_list:
            B = Builder(sm)
            B.removeHydrogens()
            sm = B.getSmallMol()
            tmp_smallmol_list.append(sm)
            #sm._removeAtoms(sm.get('element H', 'idx'))

        smallmol_list = np.array(tmp_smallmol_list)

    #rdkitMols_list = [sm.toRdkitMol(includeConformer=True) for sm in smallmol_list]
    rdkitMols_list = []
    wrong = []
    for n, sm in enumerate(smallmol_list):
        try:
            rdkitMols_list.append(sm.toRdkitMol(includeConformer=True))
        except:
            wrong.append(n)
    print('{} problematic molecules. Indexes: {}'.format(len(wrong), wrong))

    clustmethod = getattr(this_module, '_{}Clustering'.format(method))

    if method not in ['shape', 'mcs']:
        matrix = clustmethod(rdkitMols_list)

    else:
        aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
        matrix = aprun(total=len(rdkitMols_list), desc='{} Distance'.format(method)) \
                            (delayed(clustmethod)(mol1, rdkitMols_list) for mol1 in rdkitMols_list)

        matrix = np.array(matrix)


    db = DBSCAN(eps=distThresholds, min_samples=0, metric='precomputed' ).fit(matrix)

    labels = db.labels_

    populations = np.bincount(labels)
    n_clusters = np.max(labels)

    clusters_idx = np.empty((n_clusters,), dtype=object)
    clusters_smallmols = np.empty((n_clusters,), dtype=object)

    for n_cl in np.arange(n_clusters):
        idxs = np.where(labels == n_cl)[0]
        clusters_idx[n_cl] = idxs
        clusters_smallmols[n_cl] = smallmol_list[idxs]


    if returnDetails:
        details = {'numClusters':n_clusters,
                   'populations':populations,
                   'clusters':clusters_idx}
        return clusters_smallmols, details

    return clusters_smallmols
Exemple #15
0
def ffevaluate(mol, prm, betweensets=None, dist_thresh=0, threads=1):
    """  Evaluates energies and forces of the forcefield for a given Molecule

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A Molecule object. Can contain multiple frames.
    prm : :class:`ParameterSet <parmed.ParameterSet>` object
        Forcefield parameters.
    betweensets : tuple of strings
        Only calculate energies between two sets of atoms given as atomselect strings.
        Only computes LJ and electrostatics.
    dist_thresh : float
        If set to a value != 0 it will only calculate LJ, electrostatics and bond energies for atoms which are closer
        than the threshold

    Returns
    -------
    energies : np.ndarray
        A (6, nframes) shaped matrix containing the individual energy components of each simulation frame.
        Rows correspond to the following energies 0: bond 1: LJ 2: Electrostatic 3: angle 4: dihedral 5: improper
    forces : np.ndarray
        A (natoms, 3, nframes) shaped matrix containing the total force on each atom for each simulation frame.
    atmnrg : np.ndarray
        A (natoms, 6, nframes) shaped matrix containing the approximate potential energy components of each atom at each
        simulation frame. The 6 indexes are the same as in the `energies` return argument.

    Examples
    --------
    >>> from htmd.ffevaluation.ffevaluate import *
    >>> from htmd.ffevaluation.test_ffevaluate import fixParameters, drawForce
    >>> from htmd.ui import *
    >>> import parmed
    >>> mol = Molecule('./htmd/data/test-ffevaluate/waterbox/structure.psf')
    >>> mol.read('./htmd/data/test-ffevaluate/waterbox/output.xtc')
    >>> prm = parmed.charmm.CharmmParameterSet(fixParameters('./htmd/data/test-ffevaluate/waterbox/parameters.prm'))
    >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water'))

    >>> mol.view()
    >>> for cc, ff in zip(mol.coords[:, :, 0], forces[:, :, 0]):
    >>>     drawForce(cc, ff)

    Amber style
    >>> prmtop = parmed.amber.AmberParm('structure.prmtop')
    >>> prm = parmed.amber.AmberParameterSet.from_structure(prmtop)
    >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water'))
    """
    mol = mol.copy()
    coords = mol.coords
    box = mol.box
    setA, setB = calculateSets(mol, betweensets)

    args = list(init(mol, prm))
    args.append(setA)
    args.append(setB)
    args.append(dist_thresh)

    if threads == 1:
        energies, forces, atmnrg = _ffevaluate(coords, box, *args)
    else:
        from htmd.parallelprogress import ParallelExecutor, delayed
        aprun = ParallelExecutor(n_jobs=threads)
        res = aprun(total=mol.numFrames, description='Evaluating energies')(
            delayed(_ffevaluate)(np.atleast_3d(coords[:, :, f]),
                                 box[:, f].reshape(3, 1),
                                 *args) for f in range(mol.numFrames))
        energies = np.hstack([r[0] for r in res])
        forces = np.concatenate([r[1] for r in res], axis=2)
        atmnrg = np.concatenate([r[2] for r in res], axis=2)

    return energies, forces, atmnrg
Exemple #16
0
def ffevaluate(mol,
               prm,
               betweensets=None,
               cutoff=0,
               rfa=False,
               solventDielectric=78.5,
               threads=1,
               fromstruct=False):
    """  Evaluates energies and forces of the forcefield for a given Molecule

    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A Molecule object. Can contain multiple frames.
    prm : :class:`ParameterSet <parmed.ParameterSet>` object
        Forcefield parameters.
    betweensets : tuple of strings
        Only calculate energies between two sets of atoms given as atomselect strings.
        Only computes LJ and electrostatics.
    cutoff : float
        If set to a value != 0 it will only calculate LJ, electrostatics and bond energies for atoms which are closer
        than the threshold
    rfa : bool
        Use with `cutoff` to enable the reaction field approximation for scaling of the electrostatics up to the cutoff.
        Uses the value of `solventDielectric` to model everything beyond the cutoff distance as solvent with uniform
        dielectric.
    solventDielectric : float
        Used together with `cutoff` and `rfa`

    Returns
    -------
    energies : np.ndarray
        A (6, nframes) shaped matrix containing the individual energy components of each simulation frame.
        Rows correspond to the following energies 0: bond 1: LJ 2: Electrostatic 3: angle 4: dihedral 5: improper
    forces : np.ndarray
        A (natoms, 3, nframes) shaped matrix containing the total force on each atom for each simulation frame.
    atmnrg : np.ndarray
        A (natoms, 6, nframes) shaped matrix containing the approximate potential energy components of each atom at each
        simulation frame. The 6 indexes are the same as in the `energies` return argument.

    Examples
    --------
    >>> from htmd.ffevaluation.ffevaluate import *
    >>> from htmd.ffevaluation.test_ffevaluate import fixParameters, drawForce
    >>> from htmd.ui import *
    >>> import parmed
    >>> mol = Molecule('./htmd/data/test-ffevaluate/waterbox/structure.psf')
    >>> mol.read('./htmd/data/test-ffevaluate/waterbox/output.xtc')
    >>> prm = parmed.charmm.CharmmParameterSet(fixParameters('./htmd/data/test-ffevaluate/waterbox/parameters.prm'))
    >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water'))

    >>> mol.view()
    >>> for cc, ff in zip(mol.coords[:, :, 0], forces[:, :, 0]):
    >>>     drawForce(cc, ff)

    Amber style
    >>> prmtop = parmed.amber.AmberParm('structure.prmtop')
    >>> prm = parmed.amber.AmberParameterSet.from_structure(prmtop)
    >>> energies, forces, atmnrg = ffevaluate(mol, prm, betweensets=('resname SOD', 'water'))
    """
    if mol.box.shape[0] != 3 or mol.box.shape[1] != mol.coords.shape[2]:
        raise ValueError(
            'Box dimensions have to be (3, numFrames), your Molecule has box of shape {}'
            .format(mol.box.shape))

    mol = mol.copy()
    coords = mol.coords.astype(np.float32)
    box = mol.box.astype(np.float32)
    setA, setB = calculateSets(mol, betweensets)

    args = list(init(mol, prm, fromstruct))
    args.append(setA)
    args.append(setB)
    args.append(cutoff)
    args.append(rfa)
    args.append(solventDielectric)

    if threads == 1:
        energies, forces, atmnrg = _ffevaluate(coords, box, *args)
    else:
        from htmd.parallelprogress import ParallelExecutor, delayed
        aprun = ParallelExecutor(n_jobs=threads)
        res = aprun(total=mol.numFrames,
                    desc='Evaluating energies')(delayed(_ffevaluate)(
                        np.atleast_3d(coords[:, :,
                                             f]), box[:,
                                                      f].reshape(3, 1), *args)
                                                for f in range(mol.numFrames))
        energies = np.hstack([r[0] for r in res])
        forces = np.concatenate([r[1] for r in res], axis=2)
        atmnrg = np.concatenate([r[2] for r in res], axis=2)

    return energies, forces, atmnrg
Exemple #17
0
    def getStates(self,
                  states=None,
                  statetype='macro',
                  wrapsel='protein',
                  alignsel='name CA',
                  alignmol=None,
                  samplemode='weighted',
                  numsamples=50,
                  simlist=None):
        """ Get samples of MSM states in Molecule classes

        Parameters
        ----------
        states : ndarray, optional
            A list of states to visualize
        statetype : ['macro','micro','cluster'], optional
            The type of state to visualize
        wrapsel : str, optional, default='protein'
            A selection to use for wrapping
        alignsel : str, optional, default='name CA'
            A selection used for aligning all frames. Set to None to disable aligning
        alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule onto which to align all others
        samplemode : ['weighted','random'], optional, default='weighted'
            How to obtain the samples from the states
        numsamples : int
            Number of samples (conformations) for each state.
        simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
            Optionally pass a different (but matching, i.e. filtered) simlist for creating the Molecules.

        Returns
        -------
        mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects
            A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> mols = model.getStates()
        >>> for m in mols:
        >>>     m.view()
        """
        self._integrityCheck(postmsm=(statetype != 'cluster'))
        if simlist is None:
            simlist = self.data.simlist
        else:
            if len(simlist) != len(self.data.simlist):
                raise AttributeError(
                    'Provided simlist has different number of trajectories than the one used by the model.'
                )

        (single, molfile) = _singleMolfile(simlist)
        if not single:
            raise NameError(
                'Visualizer does not support yet visualization of systems with different structure files. '
                'The simlist should be created with a single molfile (for example a filtered one)'
            )
        if alignmol is None:
            alignmol = Molecule(molfile)
        if statetype != 'macro' and statetype != 'micro' and statetype != 'cluster':
            raise NameError(
                "'statetype' must be either 'macro', 'micro' or ''cluster'")
        if states is None:
            if statetype == 'macro':
                states = range(self.macronum)
            elif statetype == 'micro':
                states = range(self.micronum)
            elif statetype == 'cluster':
                states = range(self.data.K)
        if len(states) == 0:
            raise NameError('No ' + statetype + ' states exist in the model')

        (tmp, relframes) = self.sampleStates(states,
                                             numsamples,
                                             statetype=statetype,
                                             samplemode=samplemode)

        from htmd.config import _config
        from htmd.parallelprogress import ParallelExecutor, delayed
        # This loop really iterates over states. sampleStates returns an array of arrays
        # Removed ncpus because it was giving errors on some systems.
        aprun = ParallelExecutor(n_jobs=1)  # _config['ncpus'])
        mols = aprun(total=len(relframes), description='Getting state Molecules')\
            (delayed(_loadMols)(self, rel, molfile, wrapsel, alignsel, alignmol, simlist) for rel in relframes)
        return np.array(mols, dtype=object)