def __init__(self, sims, sel, simple):
        self._pc_sel = None
        self._sel = sel
        self._simple = simple

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_sel = mol.atomselect(sel)
    def __init__(self, sims, sel, simple):
        self._pc_sel = None
        self._sel = sel
        self._simple = simple

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_sel = mol.atomselect(sel)
Exemple #3
0
    def getStates(self, states=None, statetype='macro', wrapsel='protein', alignsel='name CA', alignmol=None, samplemode='weighted', numsamples=50):
        """ Get samples of MSM states in Molecule classes

        Parameters
        ----------
        states : ndarray, optional
            A list of states to visualize
        statetype : ['macro','micro','cluster'], optional
            The type of state to visualize
        wrapsel : str, optional, default='protein'
            A selection to use for wrapping
        alignsel : str, optional, default='name CA'
            A selection used for aligning all frames
        alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule onto which to align all others
        samplemode : ['weighted','random'], optional, default='weighted'
            How to obtain the samples from the states
        numsamples : int
            Number of samples (conformations) for each state.

        Returns
        -------
        mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects
            A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> mols = model.getStates()
        >>> for m in mols:
        >>>     m.view()
        """
        self._integrityCheck(postmsm=(statetype != 'cluster'))
        (single, molfile) = _singleMolfile(self.data.simlist)
        refmol = None
        if not single:
            raise NameError('Visualizer does not support yet visualization of systems with different number of atoms')
        if alignmol is None:
            alignmol = molfile
        if states is None:
            states = range(self.macronum)
        if len(states) == 0:
            raise NameError('No ' + statetype + ' states exist in the model')
        if len(alignsel) > 0 and len(alignmol) > 0:
            refmol = Molecule(alignmol)

        (tmp, relframes) = self.sampleStates(states, [numsamples]*len(states), statetype=statetype, samplemode=samplemode)

        from joblib import Parallel, delayed
        from htmd.config import _config
        # This loop really iterates over states. sampleStates returns an array of arrays
        # Removed ncpus because it was giving errors on some systems.
        mols = Parallel(n_jobs=1, verbose=11)(delayed(_loadMols)(self, i, rel, molfile, wrapsel, alignsel, refmol)
                                                  for i, rel in enumerate(relframes))
        return np.array(mols, dtype=object)
Exemple #4
0
    def test_simlist_auto_structure(self):
        from htmd.home import home
        from htmd.projections.metric import _singleMolfile

        sims = simlist(glob(path.join(home(dataDir='adaptive'), 'data', '*', '')), glob(path.join(home(dataDir='adaptive'), 'input', '*')))
        x = sims[0].copy()
        assert x == sims[0]
        assert x != sims[1]
        assert len(sims[0].molfile) == 2
        assert _singleMolfile(sims)[0]
Exemple #5
0
    def __init__(self, sims, protsel, dih=None, sincos=True):
        self._protsel = protsel
        self._sincos = sincos
        self._dih = dih  # TODO: Calculate the dihedral
        self._pc_dih = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_dih = self._dihedralPrecalc(mol, mol.atomselect(protsel))
Exemple #6
0
    def test_simlist_single_structure(self):
        from htmd.home import home
        from htmd.projections.metric import _singleMolfile

        sims = simlist(glob(path.join(home(dataDir='adaptive'), 'data', '*', '')), path.join(home(dataDir='adaptive'), 'input', 'e1s1_1', 'structure.pdb'))
        x = sims[0].copy()
        assert x == sims[0]
        assert x != sims[1]
        assert not isinstance(sims[0].molfile, list)
        assert _singleMolfile(sims)[0]
Exemple #7
0
    def __init__(self, sims, protsel, dih=None, sincos=True):
        self._protsel = protsel
        self._sincos = sincos
        self._dih = dih  # TODO: Calculate the dihedral
        self._pc_dih = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_dih = self._dihedralPrecalc(mol, mol.atomselect(protsel))
Exemple #8
0
    def test_simlist_auto_structure(self):
        from htmd.home import home
        from htmd.projections.metric import _singleMolfile

        sims = simlist(
            glob(path.join(home(dataDir="adaptive"), "data", "*", "")),
            glob(path.join(home(dataDir="adaptive"), "input", "*")),
        )
        x = sims[0].copy()
        assert x == sims[0]
        assert x != sims[1]
        assert len(sims[0].molfile) == 2
        assert _singleMolfile(sims)[0]
Exemple #9
0
    def test_simlist_single_structure(self):
        from htmd.home import home
        from htmd.projections.metric import _singleMolfile

        sims = simlist(
            glob(path.join(home(dataDir="adaptive"), "data", "*", "")),
            path.join(home(dataDir="adaptive"), "input", "e1s1_1",
                      "structure.pdb"),
        )
        x = sims[0].copy()
        assert x == sims[0]
        assert x != sims[1]
        assert not isinstance(sims[0].molfile, list)
        assert _singleMolfile(sims)[0]
Exemple #10
0
    def __init__(self, sims, sel1, sel2, numshells, shellwidth, pbc, truncate):
        """ Do not call this constructor directly. Use the static `project` method
        """
        super().__init__(sims, sel1, sel2, None, None, 'distances', 8, pbc, truncate)

        self.numshells = numshells
        self.shellwidth = shellwidth
        self.map = None
        self.shellcenters = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self.map = super()._getMapping(mol)
            self.shellcenters = np.unique(self.map[:, 0])
Exemple #11
0
    def __init__(self, sims, refmol, trajalnstr, refalnstr, atomsel, centerstr):
        self._refmol = refmol
        self._refalnsel = self._refmol.atomselect(refalnstr)
        self._trajalnsel = trajalnstr
        self._centersel = centerstr
        self._atomsel = atomsel
        self._pc_trajalnsel = None  # pc = Pre-calculated
        self._pc_atomsel = None
        self._pc_centersel = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_trajalnsel = mol.atomselect(trajalnstr)
            self._pc_atomsel = mol.atomselect(atomsel)
            self._pc_centersel = mol.atomselect(centerstr)
Exemple #12
0
    def __init__(self, sims, refmol, trajalnstr, refalnstr, atomsel, centerstr):
        self._refmol = refmol
        self._refalnsel = self._refmol.atomselect(refalnstr)
        self._trajalnsel = trajalnstr
        self._centersel = centerstr
        self._atomsel = atomsel
        self._pc_trajalnsel = None  # pc = Pre-calculated
        self._pc_atomsel = None
        self._pc_centersel = None

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self._pc_trajalnsel = mol.atomselect(trajalnstr)
            self._pc_atomsel = mol.atomselect(atomsel)
            self._pc_centersel = mol.atomselect(centerstr)
Exemple #13
0
    def __init__(self, sims, sel1, sel2, groupsel1, groupsel2,  metric, threshold, pbc, truncate):
        """ Do not call this constructor directly. Use the static `project` method
        """
        self.sel1 = sel1
        self.sel2 = sel2
        self.groupsel1 = groupsel1
        self.groupsel2 = groupsel2
        self.precalcsel1 = None
        self.precalcsel2 = None
        self.metric = metric
        self.threshold = threshold
        self.pbc = pbc
        self.truncate = truncate

        (single, molfile) = _singleMolfile(sims)
        if single:
            mol = Molecule(molfile)
            self.precalcsel1 = self._processSelection(mol, sel1, groupsel1)
            self.precalcsel2 = self._processSelection(mol, sel2, groupsel2)
Exemple #14
0
    def viewStates(self, protein=None, ligand=None, nsamples=20):
        from htmd.projections.metric import _singleMolfile
        from moleculekit.molecule import Molecule
        from moleculekit.vmdviewer import getCurrentViewer

        (single, molfile) = _singleMolfile(self.data.simlist)
        if not single:
            raise RuntimeError("Can"
                               "t visualize states without unique molfile")

        viewer = getCurrentViewer()
        colors = [0, 1, 3, 4, 5, 6, 7, 9]

        print("Active set includes macrostates: {}".format(
            self.hmm.active_set))

        # dtraj = np.vstack(self.hmm.discrete_trajectories_full)
        res = self.hmm.sample_by_observation_probabilities(nsamples)
        refmol = Molecule(molfile)

        for i, s in enumerate(self.hmm.active_set):
            mol = Molecule(molfile)
            mol.coords = []
            mol.box = []
            # idx = np.where(dtraj == i)[0]
            # samples = np.random.choice(idx, 20)
            # frames = self.data.abs2sim(samples)

            frames = self.data.rel2sim(res[i])
            for f in frames:
                mol._readTraj(f.sim.trajectory[f.piece],
                              frames=[f.frame],
                              append=True)
            mol.wrap("protein")
            mol.align("protein", refmol=refmol)
            viewer.loadMol(mol, name="hmm macro " + str(s))
            if ligand is not None:
                viewer.rep("ligand",
                           sel=ligand,
                           color=colors[np.mod(i, len(colors))])
            if protein is not None:
                viewer.rep("protein")
            viewer.send("start_sscache")
Exemple #15
0
    def viewStates(self, protein=None, ligand=None, nsamples=20):
        from htmd.projections.metric import _singleMolfile
        from htmd.molecule.molecule import Molecule
        from htmd.vmdviewer import getCurrentViewer
        (single, molfile) = _singleMolfile(self.data.simlist)
        if not single:
            raise RuntimeError('Can''t visualize states without unique molfile')

        viewer = getCurrentViewer()
        colors = [0, 1, 3, 4, 5, 6, 7, 9]

        print('Active set includes macrostates: {}'.format(self.hmm.active_set))

        # dtraj = np.vstack(self.hmm.discrete_trajectories_full)
        res = self.hmm.sample_by_observation_probabilities(nsamples)
        refmol = Molecule(molfile)

        for i, s in enumerate(self.hmm.active_set):
            mol = Molecule(molfile)
            mol.coords = []
            mol.box = []
            # idx = np.where(dtraj == i)[0]
            # samples = np.random.choice(idx, 20)
            # frames = self.data.abs2sim(samples)

            frames = self.data.rel2sim(res[i])
            for f in frames:
                mol._readTraj(f.sim.trajectory[f.piece], frames=[f.frame], append=True)
            mol.wrap('protein')
            mol.align('protein', refmol=refmol)
            viewer.loadMol(mol, name='hmm macro ' + str(s))
            if ligand is not None:
                viewer.rep('ligand', sel=ligand, color=colors[np.mod(i, len(colors))])
            if protein is not None:
                viewer.rep('protein')
            viewer.send('start_sscache')
Exemple #16
0
    def getStates(self,
                  states=None,
                  statetype='macro',
                  wrapsel='protein',
                  alignsel='name CA',
                  alignmol=None,
                  samplemode='weighted',
                  numsamples=50,
                  simlist=None):
        """ Get samples of MSM states in Molecule classes

        Parameters
        ----------
        states : ndarray, optional
            A list of states to visualize
        statetype : ['macro','micro','cluster'], optional
            The type of state to visualize
        wrapsel : str, optional, default='protein'
            A selection to use for wrapping
        alignsel : str, optional, default='name CA'
            A selection used for aligning all frames. Set to None to disable aligning
        alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule onto which to align all others
        samplemode : ['weighted','random'], optional, default='weighted'
            How to obtain the samples from the states
        numsamples : int
            Number of samples (conformations) for each state.
        simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
            Optionally pass a different (but matching, i.e. filtered) simlist for creating the Molecules.

        Returns
        -------
        mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects
            A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> mols = model.getStates()
        >>> for m in mols:
        >>>     m.view()
        """
        self._integrityCheck(postmsm=(statetype != 'cluster'))
        if simlist is None:
            simlist = self.data.simlist
        else:
            if len(simlist) != len(self.data.simlist):
                raise AttributeError(
                    'Provided simlist has different number of trajectories than the one used by the model.'
                )

        (single, molfile) = _singleMolfile(simlist)
        if not single:
            raise NameError(
                'Visualizer does not support yet visualization of systems with different structure files. '
                'The simlist should be created with a single molfile (for example a filtered one)'
            )
        if alignmol is None:
            alignmol = Molecule(molfile)
        if statetype != 'macro' and statetype != 'micro' and statetype != 'cluster':
            raise NameError(
                "'statetype' must be either 'macro', 'micro' or ''cluster'")
        if states is None:
            if statetype == 'macro':
                states = range(self.macronum)
            elif statetype == 'micro':
                states = range(self.micronum)
            elif statetype == 'cluster':
                states = range(self.data.K)
        if len(states) == 0:
            raise NameError('No ' + statetype + ' states exist in the model')

        (tmp, relframes) = self.sampleStates(states,
                                             numsamples,
                                             statetype=statetype,
                                             samplemode=samplemode)

        from htmd.config import _config
        from htmd.parallelprogress import ParallelExecutor, delayed
        # This loop really iterates over states. sampleStates returns an array of arrays
        # Removed ncpus because it was giving errors on some systems.
        aprun = ParallelExecutor(n_jobs=1)  # _config['ncpus'])
        mols = aprun(total=len(relframes), description='Getting state Molecules')\
            (delayed(_loadMols)(self, rel, molfile, wrapsel, alignsel, alignmol, simlist) for rel in relframes)
        return np.array(mols, dtype=object)
Exemple #17
0
        name = os.path.basename(os.path.dirname(foldername))
    return name


if __name__ == '__main__':
    from htmd.home import home
    from glob import glob
    from os.path import join
    from htmd.projections.metric import _singleMolfile

    sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), glob(join(home(dataDir='adaptive'), 'input', '*')))
    x = sims[0].copy()
    assert x == sims[0]
    assert x != sims[1]
    assert len(sims[0].molfile) == 2
    assert _singleMolfile(sims)[0]

    sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), glob(join(home(dataDir='adaptive'), 'input', '*', 'structure.pdb')))
    x = sims[0].copy()
    assert x == sims[0]
    assert x != sims[1]
    assert not isinstance(sims[0].molfile, list)
    assert _singleMolfile(sims)[0]

    sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), join(home(dataDir='adaptive'), 'input', 'e1s1_1', 'structure.pdb'))
    x = sims[0].copy()
    assert x == sims[0]
    assert x != sims[1]
    assert not isinstance(sims[0].molfile, list)
    assert _singleMolfile(sims)[0]
Exemple #18
0
    return name


if __name__ == '__main__':
    from htmd.home import home
    from glob import glob
    from os.path import join
    from htmd.projections.metric import _singleMolfile

    sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')),
                   glob(join(home(dataDir='adaptive'), 'input', '*')))
    x = sims[0].copy()
    assert x == sims[0]
    assert x != sims[1]
    assert len(sims[0].molfile) == 2
    assert _singleMolfile(sims)[0]

    sims = simlist(
        glob(join(home(dataDir='adaptive'), 'data', '*', '')),
        glob(join(home(dataDir='adaptive'), 'input', '*', 'structure.pdb')))
    x = sims[0].copy()
    assert x == sims[0]
    assert x != sims[1]
    assert not isinstance(sims[0].molfile, list)
    assert _singleMolfile(sims)[0]

    sims = simlist(
        glob(join(home(dataDir='adaptive'), 'data', '*', '')),
        join(home(dataDir='adaptive'), 'input', 'e1s1_1', 'structure.pdb'))
    x = sims[0].copy()
    assert x == sims[0]
Exemple #19
0
    def getStates(self, states=None, statetype='macro', wrapsel='protein', alignsel='name CA', alignmol=None, samplemode='weighted', numsamples=50, simlist=None):
        """ Get samples of MSM states in Molecule classes

        Parameters
        ----------
        states : ndarray, optional
            A list of states to visualize
        statetype : ['macro','micro','cluster'], optional
            The type of state to visualize
        wrapsel : str, optional, default='protein'
            A selection to use for wrapping
        alignsel : str, optional, default='name CA'
            A selection used for aligning all frames
        alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule onto which to align all others
        samplemode : ['weighted','random'], optional, default='weighted'
            How to obtain the samples from the states
        numsamples : int
            Number of samples (conformations) for each state.
        simlist : simlist
            Optionally pass a different (but matching, i.e. filtered) simlist for creating the Molecules.

        Returns
        -------
        mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects
            A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> mols = model.getStates()
        >>> for m in mols:
        >>>     m.view()
        """
        self._integrityCheck(postmsm=(statetype != 'cluster'))
        if simlist is None:
            simlist = self.data.simlist
        else:
            if len(simlist) != len(self.data.simlist):
                raise AttributeError('Provided simlist has different number of trajectories than the one used by the model.')

        (single, molfile) = _singleMolfile(simlist)
        refmol = None
        if not single:
            raise NameError('Visualizer does not support yet visualization of systems with different number of atoms')
        if alignmol is None:
            alignmol = molfile
        if statetype != 'macro' and statetype != 'micro' and statetype != 'cluster':
            raise NameError("'statetype' must be either 'macro', 'micro' or ''cluster'")
        if states is None:
            if statetype == 'macro':
                states = range(self.macronum)
            elif statetype == 'micro':
                states = range(self.micronum)
            elif statetype == 'cluster':
                states = range(self.data.K)
        if len(states) == 0:
            raise NameError('No ' + statetype + ' states exist in the model')
        if len(alignsel) > 0 and len(alignmol) > 0:
            refmol = Molecule(alignmol)

        (tmp, relframes) = self.sampleStates(states, [numsamples]*len(states), statetype=statetype, samplemode=samplemode)

        from htmd.config import _config
        from htmd.parallelprogress import ParallelExecutor, delayed
        # This loop really iterates over states. sampleStates returns an array of arrays
        # Removed ncpus because it was giving errors on some systems.
        aprun = ParallelExecutor(n_jobs=1)  # _config['ncpus'])
        mols = aprun(total=len(relframes), description='Getting state Molecules')\
            (delayed(_loadMols)(self, rel, molfile, wrapsel, alignsel, refmol, simlist) for rel in relframes)
        return np.array(mols, dtype=object)
Exemple #20
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        from tqdm import tqdm
        if ndim is not None:
            self.tic.set_params(dim=ndim)

        keepdata = []
        keepdim = None
        keepdimdesc = None
        if isinstance(
                self.data, Metric
        ):  # Memory efficient TICA projecting trajectories on the fly
            proj = []
            refs = []
            fstep = None

            metr = self.data
            k = -1
            droppedsims = []
            pbar = tqdm(total=len(metr.simulations))
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    if self.dimensions is not None:
                        numDimensions = pro[0].shape[1]
                        keepdim = np.setdiff1d(range(numDimensions),
                                               self.dimensions)
                        keepdata.append(pro[0][:, keepdim])
                        proj.append(
                            self.tic.transform(
                                pro[0][:, self.dimensions]).astype(np.float32)
                        )  # Sub-select dimensions for projecting
                    else:
                        proj.append(
                            self.tic.transform(pro[0]).astype(np.float32))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                pbar.update(len(projecteddata))
            pbar.close()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            parent = None
            if self.dimensions is not None:
                from htmd.projections.metric import _singleMolfile
                from htmd.molecule.molecule import Molecule
                (single, molfile) = _singleMolfile(metr.simulations)
                if single:
                    keepdimdesc = metr.getMapping(Molecule(molfile))
                    keepdimdesc = keepdimdesc.iloc[keepdim]
        else:
            if ndim is not None and self.data.numDimensions < ndim:
                raise RuntimeError(
                    'TICA cannot increase the dimensionality of your data. Your data has {} dimensions and you requested {} TICA dimensions'
                    .format(self.data.numDimensions, ndim))

            if self.dimensions is not None:
                keepdim = np.setdiff1d(range(self.data.numDimensions),
                                       self.dimensions)
                keepdata = [x[:, keepdim] for x in self.data.dat]
                if self.data.description is not None:
                    keepdimdesc = self.data.description.iloc[keepdim]
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        # If TICA is done on a subset of dimensions, combine non-projected data with projected data
        if self.dimensions is not None:
            newproj = []
            for k, t in zip(keepdata, proj):
                newproj.append(np.hstack((k, t)))
            proj = newproj

        if ndim is None:
            ndim = self.tic.dimension()
            logger.info(
                'Kept {} dimension(s) to cover 95% of kinetic variance.'.
                format(ndim))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj),
                              simlist=simlist,
                              ref=ref,
                              fstep=fstep,
                              parent=parent)
        from pandas import DataFrame
        # TODO: Make this messy pandas creation cleaner. I'm sure I can append rows to DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i + 1)]
        datatica.description = DataFrame({
            'type': types,
            'atomIndexes': indexes,
            'description': description
        })

        if self.dimensions is not None and keepdimdesc is not None:  # If TICA is done on a subset of dims
            datatica.description = keepdimdesc.append(datatica.description,
                                                      ignore_index=True)

        return datatica
Exemple #21
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            self.tic.set_params(dim=ndim)

        keepdata = []
        keepdim = None
        keepdimdesc = None
        if isinstance(self.data, Metric):  # Memory efficient TICA projecting trajectories on the fly
            proj = []
            refs = []
            fstep = None

            metr = self.data
            p = ProgressBar(len(metr.simulations))
            k = -1
            droppedsims = []
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    if self.dimensions is not None:
                        numDimensions = pro[0].shape[1]
                        keepdim = np.setdiff1d(range(numDimensions), self.dimensions)
                        keepdata.append(pro[0][:, keepdim])
                        proj.append(self.tic.transform(pro[0][:, self.dimensions]).astype(np.float32))  # Sub-select dimensions for projecting
                    else:
                        proj.append(self.tic.transform(pro[0]).astype(np.float32))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                p.progress(len(projecteddata))
            p.stop()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            parent = None
            if self.dimensions is not None:
                from htmd.projections.metric import _singleMolfile
                from htmd.molecule.molecule import Molecule
                (single, molfile) = _singleMolfile(metr.simulations)
                if single:
                    keepdimdesc = metr.getMapping(Molecule(molfile))
                    keepdimdesc = keepdimdesc.iloc[keepdim]
        else:
            if ndim is not None and self.data.numDimensions < ndim:
                raise RuntimeError('TICA cannot increase the dimensionality of your data. Your data has {} dimensions and you requested {} TICA dimensions'.format(self.data.numDimensions, ndim))

            if self.dimensions is not None:
                keepdim = np.setdiff1d(range(self.data.numDimensions), self.dimensions)
                keepdata = [x[:, keepdim] for x in self.data.dat]
                if self.data.description is not None:
                    keepdimdesc = self.data.description.iloc[keepdim]
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        # If TICA is done on a subset of dimensions, combine non-projected data with projected data
        if self.dimensions is not None:
            newproj = []
            for k, t in zip(keepdata, proj):
                newproj.append(np.hstack((k, t)))
            proj = newproj

        if ndim is None:
            ndim = self.tic.dimension()
            logger.info('Kept {} dimension(s) to cover 95% of kinetic variance.'.format(ndim))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj), simlist=simlist, ref=ref, fstep=fstep, parent=parent)
        from pandas import DataFrame
        # TODO: Make this messy pandas creation cleaner. I'm sure I can append rows to DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i+1)]
        datatica.description = DataFrame({'type': types, 'atomIndexes': indexes, 'description': description})

        if self.dimensions is not None and keepdimdesc is not None:  # If TICA is done on a subset of dims
            datatica.description = keepdimdesc.append(datatica.description, ignore_index=True)

        return datatica
Exemple #22
0
    def getStates(self,
                  states=None,
                  statetype='macro',
                  wrapsel='protein',
                  alignsel='name CA',
                  alignmol=None,
                  samplemode='weighted',
                  numsamples=50):
        """ Get samples of MSM states in Molecule classes

        Parameters
        ----------
        states : ndarray, optional
            A list of states to visualize
        statetype : ['macro','micro','cluster'], optional
            The type of state to visualize
        wrapsel : str, optional, default='protein'
            A selection to use for wrapping
        alignsel : str, optional, default='name CA'
            A selection used for aligning all frames
        alignmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
            A reference molecule onto which to align all others
        samplemode : ['weighted','random'], optional, default='weighted'
            How to obtain the samples from the states
        numsamples : int
            Number of samples (conformations) for each state.

        Returns
        -------
        mols : ndarray of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects
            A list of :class:`Molecule <htmd.molecule.molecule.Molecule>` objects containing the samples of each state

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> mols = model.getStates()
        >>> for m in mols:
        >>>     m.view()
        """
        self._integrityCheck(postmsm=(statetype != 'cluster'))
        (single, molfile) = _singleMolfile(self.data.simlist)
        refmol = None
        if not single:
            raise NameError(
                'Visualizer does not support yet visualization of systems with different number of atoms'
            )
        if alignmol is None:
            alignmol = molfile
        if states is None:
            states = range(self.macronum)
        if len(states) == 0:
            raise NameError('No ' + statetype + ' states exist in the model')
        if len(alignsel) > 0 and len(alignmol) > 0:
            refmol = Molecule(alignmol)

        (tmp, relframes) = self.sampleStates(states,
                                             [numsamples] * len(states),
                                             statetype=statetype,
                                             samplemode=samplemode)

        from joblib import Parallel, delayed
        from htmd.config import _config
        # This loop really iterates over states. sampleStates returns an array of arrays
        # Removed ncpus because it was giving errors on some systems.
        mols = Parallel(n_jobs=1,
                        verbose=11)(delayed(_loadMols)(
                            self, i, rel, molfile, wrapsel, alignsel, refmol)
                                    for i, rel in enumerate(relframes))
        return np.array(mols, dtype=object)