Example #1
0
    def load(self, filename):
        """ Load a :class:`MetricData <htmd.metricdata.MetricData>` object from disk

        Parameters
        ----------
        filename : str
            Path to the saved MetricData object

        Examples
        --------
        >>> model = Model()
        >>> model.load('./model.dat')
        """
        import sys
        import pickle
        from htmd.metricdata import MetricData
        try:
            import pandas.indexes
        except ImportError:
            import pandas.core.indexes
            sys.modules['pandas.indexes'] = pandas.core.indexes  # Hacky fix for new pandas version

        f = open(filename, 'rb')
        z = pickle.load(f)
        f.close()
        for k in z:
            if k == 'data':
                m = MetricData()
                m.load(z[k].__dict__)
                self.__dict__[k] = m
            else:
                self.__dict__[k] = z[k]
Example #2
0
File: model.py Project: tonigi/htmd
    def load(self, filename):
        """ Load a :class:`MetricData <htmd.metricdata.MetricData>` object from disk

        Parameters
        ----------
        filename : str
            Path to the saved MetricData object

        Examples
        --------
        >>> model = Model()
        >>> model.load('./model.dat')
        """
        import sys
        import pickle
        from htmd.metricdata import MetricData
        try:
            import pandas.indexes
        except ImportError:
            import pandas.core.indexes
            sys.modules[
                'pandas.indexes'] = pandas.core.indexes  # Hacky fix for new pandas version

        f = open(filename, 'rb')
        z = pickle.load(f)
        f.close()
        for k in z:
            if k == 'data':
                m = MetricData()
                m.load(z[k].__dict__)
                self.__dict__[k] = m
            else:
                self.__dict__[k] = z[k]
Example #3
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto `ndim` dimensions

        Parameters
        ----------
        ndim : int
            The number of dimensions we want to project the data on.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data

        Example
        -------
        >>> tri = KMeansTri(data)
        >>> datatri = tri.project(5)
        """
        import scipy.spatial.distance as scidist
        from sklearn.cluster import MiniBatchKMeans
        from htmd.metricdata import MetricData

        datconcat = np.concatenate(self.data.dat)
        mb = MiniBatchKMeans(n_clusters=ndim)
        mb.fit(datconcat)

        # TODO: Could make it into a loop to waste less memory
        dist = scidist.cdist(datconcat, mb.cluster_centers_)
        dist = np.mean(dist, axis=1)[:, np.newaxis] - dist
        dist[dist < 0] = 0

        return MetricData(dat=self.data.deconcatenate(dist), ref=self.data.ref, simlist=self.data.simlist,
                          fstep=self.data.fstep, parent=self.data)
Example #4
0
    def newMetricData(self, datasource, trajectories=None, olddata=None):
        """Converts trajectory indexes to a new MetricData object"""
        dat = []
        ref = []
        sim = []
        for traj in trajectories:
            d, r, s = self._collectTrajectory(datasource, traj)
            dat.append(d)
            ref.append(r)
            sim.append(s)

        newdata = MetricData(
            dat=dat,
            ref=ref,
            simlist=sim,
            description=datasource.description,
            fstep=datasource.fstep,
        )
        if olddata is not None:
            olddata.append(newdata)  # Merge with old data
            return olddata
        else:
            return newdata
Example #5
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto `ndim` dimensions

        Parameters
        ----------
        ndim : int
            The number of dimensions we want to project the data on.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data

        Example
        -------
        >>> tri = KMeansTri(data)
        >>> datatri = tri.project(5)
        """
        import scipy.spatial.distance as scidist
        from sklearn.cluster import MiniBatchKMeans
        from htmd.metricdata import MetricData

        datconcat = np.concatenate(self.data.dat)
        mb = MiniBatchKMeans(n_clusters=ndim)
        mb.fit(datconcat)

        # TODO: Could make it into a loop to waste less memory
        dist = scidist.cdist(datconcat, mb.cluster_centers_)
        dist = np.mean(dist, axis=1)[:, np.newaxis] - dist
        dist[dist < 0] = 0

        projdata = MetricData()
        projdata.simlist = self.data.simlist
        projdata.dat = self.data.deconcatenate(dist)
        projdata.ref = self.data.ref
        projdata.parent = self.data
        projdata.fstep = self.data.fstep
        return projdata
Example #6
0
    def _metrify(self, sims, skip, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.
        skip : int
               Skips every x frames.
        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self._processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        # Monkey-patching callback class
        #oldcallback = joblib.parallel.CallBack
        #joblib.parallel.CallBack = CallBack
        #p = ProgressBar(numSim, description='Projecting trajectories')
        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
            delayed(_processSimOld)(self, i, updList, uniqueMol, uqMol, skip,
                                    deletesims, metrics, ref, fstep)
            for i in range(numSim))
        #joblib.parallel.CallBack = oldcallback

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics],
                          dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref],
                          dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updList = np.delete(updList, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(
                updList)  # This is wrong but we don't use update anyways

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps were read from the simulations. Taking the statistical mode: '
                + str(data.fstep) +
                'ns. If it looks wrong, you can modify it by manually setting the MetricData.fstep property.'
            )
        else:
            logger.info(
                'Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually setting the MetricData.fstep property.'
                .format(data.fstep))

        return data
Example #7
0
File: tica.py Project: jhprinz/htmd
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            # self.tic._dim = ndim  # Old way of doing it. Deprecated since pyEMMA 2.1
            self.tic.set_params(
                dim=ndim)  # Change to this in 2.1 pyEMMA version

        if isinstance(
                self.data,
                Metric):  # Doesn't project on correct number of dimensions
            proj = []
            refs = []
            fstep = None

            metr = self.data
            p = ProgressBar(len(metr.simulations))
            k = -1
            droppedsims = []
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    proj.append(self.tic.transform(pro[0]))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                p.progress(len(projecteddata))
            p.stop()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            #fstep = 0
            parent = None
        else:
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        if ndim is None:
            logger.info(
                'Kept {} dimension(s) to cover 95% of kinetic variance.'.
                format(self.tic.dimension()))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj, dtype=object),
                              simlist=simlist,
                              ref=ref,
                              fstep=fstep,
                              parent=parent)
        from pandas import DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i + 1)]
        datatica.map = DataFrame({
            'type': types,
            'indexes': indexes,
            'description': description
        })

        return datatica
Example #8
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            # self.tic._dim = ndim  # Old way of doing it. Deprecated since pyEMMA 2.1
            self.tic.set_params(
                dim=ndim)  # Change to this in 2.1 pyEMMA version

        if isinstance(
                self.data,
                Metric):  # Doesn't project on correct number of dimensions
            proj = []
            refs = []
            fstep = None
            '''from htmd.config import _config
            from joblib import Parallel, delayed
            results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
                delayed(_test)(self.data, self.tic, i) for i in range(len(self.data.simulations)))

            for i in range(len(results)):
                proj.append(results[i][0])
                refs.append(results[i][1])
                fstep.append(results[i][2])'''

            droppedsims = []
            p = ProgressBar(len(self.data.simulations))
            for i in range(len(self.data.simulations)):
                d, r, f = self.data._projectSingle(i)
                if d is None:
                    droppedsims.append(i)
                    continue
                if fstep is None:
                    fstep = f
                refs.append(r)
                proj.append(self.tic.transform(d))
                p.progress()
            p.stop()
            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            #fstep = 0
            parent = None
        else:
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        if ndim is None:
            logger.info(
                'Kept {} dimension(s) to cover 95% of kinetic variance.'.
                format(self.tic.dimension()))
        #print(np.shape(proj))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj, dtype=object),
                              simlist=simlist,
                              ref=ref,
                              fstep=fstep,
                              parent=parent)
        '''datatica = self.data.copy()
        #datatica.dat = self.data.deconcatenate(np.squeeze(proj))
        datatica.dat = np.array(proj, dtype=object)
        datatica.parent = self.data
        datatica.St = None
        datatica.Centers = None
        datatica.N = None
        datatica.K = None
        datatica._dataid = random.random()
        datatica._clusterid = None'''
        return datatica
Example #9
0
File: tica.py Project: prokia/htmd
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        from tqdm import tqdm
        if ndim is not None:
            self.tic.set_params(dim=ndim)

        keepdata = []
        keepdim = None
        keepdimdesc = None
        if isinstance(
                self.data, Metric
        ):  # Memory efficient TICA projecting trajectories on the fly
            proj = []
            refs = []
            fstep = None

            metr = self.data
            k = -1
            droppedsims = []
            pbar = tqdm(total=len(metr.simulations))
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    if self.dimensions is not None:
                        numDimensions = pro[0].shape[1]
                        keepdim = np.setdiff1d(range(numDimensions),
                                               self.dimensions)
                        keepdata.append(pro[0][:, keepdim])
                        proj.append(
                            self.tic.transform(
                                pro[0][:, self.dimensions]).astype(np.float32)
                        )  # Sub-select dimensions for projecting
                    else:
                        proj.append(
                            self.tic.transform(pro[0]).astype(np.float32))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                pbar.update(len(projecteddata))
            pbar.close()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            parent = None
            if self.dimensions is not None:
                from htmd.projections.metric import _singleMolfile
                from htmd.molecule.molecule import Molecule
                (single, molfile) = _singleMolfile(metr.simulations)
                if single:
                    keepdimdesc = metr.getMapping(Molecule(molfile))
                    keepdimdesc = keepdimdesc.iloc[keepdim]
        else:
            if ndim is not None and self.data.numDimensions < ndim:
                raise RuntimeError(
                    'TICA cannot increase the dimensionality of your data. Your data has {} dimensions and you requested {} TICA dimensions'
                    .format(self.data.numDimensions, ndim))

            if self.dimensions is not None:
                keepdim = np.setdiff1d(range(self.data.numDimensions),
                                       self.dimensions)
                keepdata = [x[:, keepdim] for x in self.data.dat]
                if self.data.description is not None:
                    keepdimdesc = self.data.description.iloc[keepdim]
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        # If TICA is done on a subset of dimensions, combine non-projected data with projected data
        if self.dimensions is not None:
            newproj = []
            for k, t in zip(keepdata, proj):
                newproj.append(np.hstack((k, t)))
            proj = newproj

        if ndim is None:
            ndim = self.tic.dimension()
            logger.info(
                'Kept {} dimension(s) to cover 95% of kinetic variance.'.
                format(ndim))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj),
                              simlist=simlist,
                              ref=ref,
                              fstep=fstep,
                              parent=parent)
        from pandas import DataFrame
        # TODO: Make this messy pandas creation cleaner. I'm sure I can append rows to DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i + 1)]
        datatica.description = DataFrame({
            'type': types,
            'atomIndexes': indexes,
            'description': description
        })

        if self.dimensions is not None and keepdimdesc is not None:  # If TICA is done on a subset of dims
            datatica.description = keepdimdesc.append(datatica.description,
                                                      ignore_index=True)

        return datatica
Example #10
0
File: tica.py Project: jeiros/htmd
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            self.tic.set_params(dim=ndim)

        keepdata = []
        keepdim = None
        keepdimdesc = None
        if isinstance(self.data, Metric):  # Memory efficient TICA projecting trajectories on the fly
            proj = []
            refs = []
            fstep = None

            metr = self.data
            p = ProgressBar(len(metr.simulations))
            k = -1
            droppedsims = []
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    if self.dimensions is not None:
                        numDimensions = pro[0].shape[1]
                        keepdim = np.setdiff1d(range(numDimensions), self.dimensions)
                        keepdata.append(pro[0][:, keepdim])
                        proj.append(self.tic.transform(pro[0][:, self.dimensions]).astype(np.float32))  # Sub-select dimensions for projecting
                    else:
                        proj.append(self.tic.transform(pro[0]).astype(np.float32))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                p.progress(len(projecteddata))
            p.stop()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            parent = None
            if self.dimensions is not None:
                from htmd.projections.metric import _singleMolfile
                from htmd.molecule.molecule import Molecule
                (single, molfile) = _singleMolfile(metr.simulations)
                if single:
                    keepdimdesc = metr.getMapping(Molecule(molfile))
                    keepdimdesc = keepdimdesc.iloc[keepdim]
        else:
            if ndim is not None and self.data.numDimensions < ndim:
                raise RuntimeError('TICA cannot increase the dimensionality of your data. Your data has {} dimensions and you requested {} TICA dimensions'.format(self.data.numDimensions, ndim))

            if self.dimensions is not None:
                keepdim = np.setdiff1d(range(self.data.numDimensions), self.dimensions)
                keepdata = [x[:, keepdim] for x in self.data.dat]
                if self.data.description is not None:
                    keepdimdesc = self.data.description.iloc[keepdim]
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        # If TICA is done on a subset of dimensions, combine non-projected data with projected data
        if self.dimensions is not None:
            newproj = []
            for k, t in zip(keepdata, proj):
                newproj.append(np.hstack((k, t)))
            proj = newproj

        if ndim is None:
            ndim = self.tic.dimension()
            logger.info('Kept {} dimension(s) to cover 95% of kinetic variance.'.format(ndim))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj), simlist=simlist, ref=ref, fstep=fstep, parent=parent)
        from pandas import DataFrame
        # TODO: Make this messy pandas creation cleaner. I'm sure I can append rows to DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i+1)]
        datatica.description = DataFrame({'type': types, 'atomIndexes': indexes, 'description': description})

        if self.dimensions is not None and keepdimdesc is not None:  # If TICA is done on a subset of dims
            datatica.description = keepdimdesc.append(datatica.description, ignore_index=True)

        return datatica
Example #11
0
    def _metrify(self, sims, skip, verbose, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.

        skip : int
               Skips every x frames.

        verbose : int
              Verbosity toggle

        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------

        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self.processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deleteSims = np.zeros(numSim, dtype=bool)
        fstep = np.empty(numSim)

        #global parpool
        Parallel(n_jobs=6, backend="threading")(delayed(_processSimPyemma)(self, i, updList, uniqueMol, uqMol, skip, deleteSims, metrics, ref, fstep) for i in range(numSim))

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = [True if np.size(x) == 0 else False for x in metrics]
        emptyR = [True if np.size(x) == 0 else False for x in ref]
        #assert np.all(deleteSims == emptyM)# and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM))
        ref = np.delete(ref, np.where(emptyM))
        #updList = np.delete(updList, emptyM)

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(updList)

        uqfsteps = np.unique(fstep)
        data.fstep = stats.mode(fstep).mode
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps were read from the simulations. Taking the statistical mode: ' + str(data.fstep) + 'ns.')
            logger.warning('If it looks wrong, you can modify it by manually setting the MetricData.fstep property.')

        return data
Example #12
0
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise RuntimeError(
                'You need to provide projections using the Metric.set method.')

        # Projecting single Molecules
        if isinstance(self.simulations, Molecule):
            data = []
            mol = self.simulations
            for proj in self.projectionlist:
                data.append(_project(proj, mol))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                if isinstance(proj, Projection):
                    proj._precalculate(uqMol)
        else:
            logger.warning(
                'Cannot calculate description of dimensions due to different topology files for each trajectory.'
            )
        mapping = self.getMapping(uqMol)

        logger.debug('Metric: Starting projection of trajectories.')
        from htmd.config import _config
        aprun = ParallelExecutor(n_jobs=_config['ncpus'])
        results = aprun(total=numSim, description='Projecting trajectories')(
            delayed(_processSim)(self.simulations[i], self.projectionlist,
                                 uqMol, self.skip) for i in range(numSim))

        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)
        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.debug('Finished projecting the trajectories.')

        # Removing empty trajectories
        metrics, ref, updlist, fstep = self._removeEmpty(
            metrics, ref, deletesims, fstep)

        # Constructing a MetricData object
        data = MetricData(dat=metrics,
                          ref=ref,
                          description=mapping,
                          simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps [{}] ns were read from the simulations. '
                'Taking the statistical mode: {}ns. '
                'If it looks wrong, you can modify it by manually '
                'setting the MetricData.fstep property.'.format(
                    ', '.join(map(str, uqfsteps)), data.fstep))
        else:
            logger.info(
                'Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                'setting the MetricData.fstep property.'.format(data.fstep))

        return data
Example #13
0
    def project(self, njobs=None):
        """
        Applies all projections stored in Metric on all simulations.

        Parameters
        ----------
        njobs : int
            Number of parallel jobs to spawn for projection of trajectories. Take care that this can use large amounts
            of memory as multiple trajectories are loaded at once.  If None it will use the default from htmd.config.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise RuntimeError(
                "You need to provide projections using the Metric.set method.")

        # Projecting single Molecules
        if isinstance(self.simulations, Molecule):
            data = []
            mol = self.simulations
            for proj in self.projectionlist:
                data.append(_project(proj, mol))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                if isinstance(proj, Projection):
                    proj._setCache(uqMol)
        else:
            logger.warning(
                "Cannot calculate description of dimensions due to different topology files for each trajectory."
            )
        mapping = self.getMapping(uqMol)

        logger.debug("Metric: Starting projection of trajectories.")
        from htmd.config import _config

        aprun = ParallelExecutor(
            n_jobs=njobs if njobs is not None else _config["njobs"])
        results = aprun(total=numSim, desc="Projecting trajectories")(
            delayed(_processSim)(self.simulations[i], self.projectionlist,
                                 uqMol, self.skip) for i in range(numSim))

        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)
        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.debug("Finished projecting the trajectories.")

        # Removing empty trajectories
        metrics, ref, updlist, fstep = self._removeEmpty(
            metrics, ref, deletesims, fstep)

        # Constructing a MetricData object
        data = MetricData(dat=metrics,
                          ref=ref,
                          description=mapping,
                          simlist=updlist)

        uqfsteps = np.unique(fstep)
        if np.all(np.isnan(uqfsteps)):
            logger.warning(
                "No framestep could be read from the trajectories. Please manually set the MetricData.fstep"
                " property, otherwise calculations in Model and Kinetics classes can fail."
            )
        else:
            data.fstep = float(stats.mode(fstep).mode)
            if len(uqfsteps) != 1:
                logger.warning(
                    "Multiple framesteps [{}] ns were read from the simulations. "
                    "Taking the statistical mode: {}ns. "
                    "If it looks wrong, you can modify it by manually "
                    "setting the MetricData.fstep property.".format(
                        ", ".join(map(str, uqfsteps)), data.fstep))
            else:
                if data.fstep == 0:
                    logger.warning(
                        "A framestep of 0 was read from the trajectories. Please manually set the MetricData.fstep"
                        " property, otherwise calculations in Model and Kinetics classes can fail."
                    )
                else:
                    logger.info(
                        "Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually "
                        "setting the MetricData.fstep property.".format(
                            data.fstep))

        return data
Example #14
0
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise NameError('You need to provide projections using the Metric.projection method.')

        if isinstance(self.simulations, Molecule):
            data = []
            for proj in self.projectionlist:
                data.append(proj.project(self.simulations))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        map = []
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                proj._precalculate(uqMol)
                #map.append(np.array(proj.getMapping(uqMol), dtype=object))
            #map = np.hstack(map)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        # # Monkey-patching callback class
        # oldcallback = joblib.parallel.BatchCompletionCallBack
        # joblib.parallel.BatchCompletionCallBack = BatchCompletionCallBack
        # from htmd.config import _config
        # results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
        #     delayed(_processSim)(self.simulations[i], self.projectionlist, uqMol, self.skip) for i in range(numSim))
        # joblib.parallel.BatchCompletionCallBack = oldcallback

        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
                delayed(_processSim)(self.simulations[i], self.projectionlist, uqMol, self.skip) for i in range(numSim))

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics], dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref], dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updlist = np.delete(self.simulations, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        data = MetricData(dat=metrics, ref=ref, map=map, simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps were read from the simulations. '
                           'Taking the statistical mode: ' + str(data.fstep) + 'ns. '
                           'If it looks wrong, you can modify it by manually setting the MetricData.fstep property.')
        else:
            logger.info('Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                        'setting the MetricData.fstep property.'.format(data.fstep))

        return data
Example #15
0
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise NameError(
                'You need to provide projections using the Metric.projection method.'
            )

        if isinstance(self.simulations, Molecule):
            data = []
            for proj in self.projectionlist:
                data.append(proj.project(self.simulations))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        map = []
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                proj._precalculate(uqMol)
                #map.append(np.array(proj.getMapping(uqMol), dtype=object))
            #map = np.hstack(map)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
            delayed(_processSim)(self.simulations[i], self.projectionlist,
                                 uqMol, self.skip) for i in range(numSim))

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics],
                          dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref],
                          dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updlist = np.delete(self.simulations, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        data = MetricData(dat=metrics, ref=ref, map=map, simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps were read from the simulations. '
                'Taking the statistical mode: ' + str(data.fstep) + 'ns. '
                'If it looks wrong, you can modify it by manually setting the MetricData.fstep property.'
            )
        else:
            logger.info(
                'Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                'setting the MetricData.fstep property.'.format(data.fstep))

        return data
Example #16
0
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise RuntimeError('You need to provide projections using the Metric.set method.')

        # Projecting single Molecules
        if isinstance(self.simulations, Molecule):
            data = []
            mol = self.simulations
            for proj in self.projectionlist:
                data.append(_project(proj, mol))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                if isinstance(proj, Projection):
                    proj._precalculate(uqMol)
        else:
            logger.warning('Cannot calculate description of dimensions due to different topology files for each trajectory.')
        mapping = self.getMapping(uqMol)

        logger.debug('Metric: Starting projection of trajectories.')
        from htmd.config import _config
        aprun = ParallelExecutor(n_jobs=_config['ncpus'])
        results = aprun(total=numSim, description='Projecting trajectories')(delayed(_processSim)(self.simulations[i], self.projectionlist, uqMol, self.skip) for i in range(numSim))

        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)
        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.debug('Finished projecting the trajectories.')

        # Removing empty trajectories
        metrics, ref, updlist, fstep = self._removeEmpty(metrics, ref, deletesims, fstep)

        # Constructing a MetricData object
        data = MetricData(dat=metrics, ref=ref, description=mapping, simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps [{}] ns were read from the simulations. '
                           'Taking the statistical mode: {}ns. '
                           'If it looks wrong, you can modify it by manually '
                           'setting the MetricData.fstep property.'.format(', '.join(map(str,uqfsteps)), data.fstep))
        else:
            logger.info('Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                        'setting the MetricData.fstep property.'.format(data.fstep))

        return data
Example #17
0
    def _metrify(self, sims, skip, verbose, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.

        skip : int
               Skips every x frames.

        verbose : int
              Verbosity toggle

        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------

        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self.processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deleteSims = np.zeros(numSim, dtype=bool)
        fstep = np.empty(numSim)

        #global parpool
        Parallel(n_jobs=6, backend="threading")(
            delayed(_processSimPyemma)(self, i, updList, uniqueMol, uqMol,
                                       skip, deleteSims, metrics, ref, fstep)
            for i in range(numSim))

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = [True if np.size(x) == 0 else False for x in metrics]
        emptyR = [True if np.size(x) == 0 else False for x in ref]
        #assert np.all(deleteSims == emptyM)# and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM))
        ref = np.delete(ref, np.where(emptyM))
        #updList = np.delete(updList, emptyM)

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(updList)

        uqfsteps = np.unique(fstep)
        data.fstep = stats.mode(fstep).mode
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps were read from the simulations. Taking the statistical mode: '
                + str(data.fstep) + 'ns.')
            logger.warning(
                'If it looks wrong, you can modify it by manually setting the MetricData.fstep property.'
            )

        return data
Example #18
0
    def _metrify(self, sims, skip, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.
        skip : int
               Skips every x frames.
        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self._processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        # Monkey-patching callback class
        #oldcallback = joblib.parallel.CallBack
        #joblib.parallel.CallBack = CallBack
        #p = ProgressBar(numSim, description='Projecting trajectories')
        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(delayed(_processSimOld)(self, i, updList, uniqueMol, uqMol, skip, deletesims, metrics, ref, fstep) for i in range(numSim))
        #joblib.parallel.CallBack = oldcallback

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics], dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref], dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updList = np.delete(updList, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(updList)  # This is wrong but we don't use update anyways

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps were read from the simulations. Taking the statistical mode: ' + str(data.fstep) + 'ns. If it looks wrong, you can modify it by manually setting the MetricData.fstep property.')
        else:
            logger.info('Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually setting the MetricData.fstep property.'.format(data.fstep))

        return data