コード例 #1
0
ファイル: kmeanstri.py プロジェクト: jhprinz/htmd
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto `ndim` dimensions

        Parameters
        ----------
        ndim : int
            The number of dimensions we want to project the data on.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data

        Example
        -------
        >>> tri = KMeansTri(data)
        >>> datatri = tri.project(5)
        """
        import scipy.spatial.distance as scidist
        from sklearn.cluster import MiniBatchKMeans
        from htmd.metricdata import MetricData

        datconcat = np.concatenate(self.data.dat)
        mb = MiniBatchKMeans(n_clusters=ndim)
        mb.fit(datconcat)

        # TODO: Could make it into a loop to waste less memory
        dist = scidist.cdist(datconcat, mb.cluster_centers_)
        dist = np.mean(dist, axis=1)[:, np.newaxis] - dist
        dist[dist < 0] = 0

        projdata = MetricData()
        projdata.simlist = self.data.simlist
        projdata.dat = self.data.deconcatenate(dist)
        projdata.ref = self.data.ref
        projdata.parent = self.data
        projdata.fstep = self.data.fstep
        return projdata
コード例 #2
0
ファイル: kmeanstri.py プロジェクト: Acellera/htmd
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto `ndim` dimensions

        Parameters
        ----------
        ndim : int
            The number of dimensions we want to project the data on.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data

        Example
        -------
        >>> tri = KMeansTri(data)
        >>> datatri = tri.project(5)
        """
        import scipy.spatial.distance as scidist
        from sklearn.cluster import MiniBatchKMeans
        from htmd.metricdata import MetricData

        datconcat = np.concatenate(self.data.dat)
        mb = MiniBatchKMeans(n_clusters=ndim)
        mb.fit(datconcat)

        # TODO: Could make it into a loop to waste less memory
        dist = scidist.cdist(datconcat, mb.cluster_centers_)
        dist = np.mean(dist, axis=1)[:, np.newaxis] - dist
        dist[dist < 0] = 0

        projdata = MetricData()
        projdata.simlist = self.data.simlist
        projdata.dat = self.data.deconcatenate(dist)
        projdata.ref = self.data.ref
        projdata.parent = self.data
        projdata.fstep = self.data.fstep
        return projdata
コード例 #3
0
ファイル: metric.py プロジェクト: xielm12/htmd
    def _metrify(self, sims, skip, verbose, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.

        skip : int
               Skips every x frames.

        verbose : int
              Verbosity toggle

        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------

        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self.processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deleteSims = np.zeros(numSim, dtype=bool)
        fstep = np.empty(numSim)

        #global parpool
        Parallel(n_jobs=6, backend="threading")(
            delayed(_processSimPyemma)(self, i, updList, uniqueMol, uqMol,
                                       skip, deleteSims, metrics, ref, fstep)
            for i in range(numSim))

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = [True if np.size(x) == 0 else False for x in metrics]
        emptyR = [True if np.size(x) == 0 else False for x in ref]
        #assert np.all(deleteSims == emptyM)# and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM))
        ref = np.delete(ref, np.where(emptyM))
        #updList = np.delete(updList, emptyM)

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(updList)

        uqfsteps = np.unique(fstep)
        data.fstep = stats.mode(fstep).mode
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps were read from the simulations. Taking the statistical mode: '
                + str(data.fstep) + 'ns.')
            logger.warning(
                'If it looks wrong, you can modify it by manually setting the MetricData.fstep property.'
            )

        return data
コード例 #4
0
ファイル: metric.py プロジェクト: xielm12/htmd
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise NameError(
                'You need to provide projections using the Metric.projection method.'
            )

        if isinstance(self.simulations, Molecule):
            data = []
            for proj in self.projectionlist:
                data.append(proj.project(self.simulations))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        map = []
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                proj._precalculate(uqMol)
                #map.append(np.array(proj.getMapping(uqMol), dtype=object))
            #map = np.hstack(map)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
            delayed(_processSim)(self.simulations[i], self.projectionlist,
                                 uqMol, self.skip) for i in range(numSim))

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics],
                          dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref],
                          dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updlist = np.delete(self.simulations, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        data = MetricData(dat=metrics, ref=ref, map=map, simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps were read from the simulations. '
                'Taking the statistical mode: ' + str(data.fstep) + 'ns. '
                'If it looks wrong, you can modify it by manually setting the MetricData.fstep property.'
            )
        else:
            logger.info(
                'Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                'setting the MetricData.fstep property.'.format(data.fstep))

        return data
コード例 #5
0
ファイル: metric.py プロジェクト: xielm12/htmd
    def _metrify(self, sims, skip, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.
        skip : int
               Skips every x frames.
        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self._processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        # Monkey-patching callback class
        #oldcallback = joblib.parallel.CallBack
        #joblib.parallel.CallBack = CallBack
        #p = ProgressBar(numSim, description='Projecting trajectories')
        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
            delayed(_processSimOld)(self, i, updList, uniqueMol, uqMol, skip,
                                    deletesims, metrics, ref, fstep)
            for i in range(numSim))
        #joblib.parallel.CallBack = oldcallback

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics],
                          dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref],
                          dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updList = np.delete(updList, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(
                updList)  # This is wrong but we don't use update anyways

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps were read from the simulations. Taking the statistical mode: '
                + str(data.fstep) +
                'ns. If it looks wrong, you can modify it by manually setting the MetricData.fstep property.'
            )
        else:
            logger.info(
                'Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually setting the MetricData.fstep property.'
                .format(data.fstep))

        return data
コード例 #6
0
ファイル: metric.py プロジェクト: jeiros/htmd
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise RuntimeError('You need to provide projections using the Metric.set method.')

        # Projecting single Molecules
        if isinstance(self.simulations, Molecule):
            data = []
            mol = self.simulations
            for proj in self.projectionlist:
                data.append(_project(proj, mol))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                if isinstance(proj, Projection):
                    proj._precalculate(uqMol)
        else:
            logger.warning('Cannot calculate description of dimensions due to different topology files for each trajectory.')
        mapping = self.getMapping(uqMol)

        logger.debug('Metric: Starting projection of trajectories.')
        from htmd.config import _config
        aprun = ParallelExecutor(n_jobs=_config['ncpus'])
        results = aprun(total=numSim, description='Projecting trajectories')(delayed(_processSim)(self.simulations[i], self.projectionlist, uqMol, self.skip) for i in range(numSim))

        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)
        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.debug('Finished projecting the trajectories.')

        # Removing empty trajectories
        metrics, ref, updlist, fstep = self._removeEmpty(metrics, ref, deletesims, fstep)

        # Constructing a MetricData object
        data = MetricData(dat=metrics, ref=ref, description=mapping, simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps [{}] ns were read from the simulations. '
                           'Taking the statistical mode: {}ns. '
                           'If it looks wrong, you can modify it by manually '
                           'setting the MetricData.fstep property.'.format(', '.join(map(str,uqfsteps)), data.fstep))
        else:
            logger.info('Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                        'setting the MetricData.fstep property.'.format(data.fstep))

        return data
コード例 #7
0
ファイル: metric.py プロジェクト: tonigi/htmd
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise RuntimeError(
                'You need to provide projections using the Metric.set method.')

        # Projecting single Molecules
        if isinstance(self.simulations, Molecule):
            data = []
            mol = self.simulations
            for proj in self.projectionlist:
                data.append(_project(proj, mol))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                if isinstance(proj, Projection):
                    proj._precalculate(uqMol)
        else:
            logger.warning(
                'Cannot calculate description of dimensions due to different topology files for each trajectory.'
            )
        mapping = self.getMapping(uqMol)

        logger.debug('Metric: Starting projection of trajectories.')
        from htmd.config import _config
        aprun = ParallelExecutor(n_jobs=_config['ncpus'])
        results = aprun(total=numSim, description='Projecting trajectories')(
            delayed(_processSim)(self.simulations[i], self.projectionlist,
                                 uqMol, self.skip) for i in range(numSim))

        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)
        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.debug('Finished projecting the trajectories.')

        # Removing empty trajectories
        metrics, ref, updlist, fstep = self._removeEmpty(
            metrics, ref, deletesims, fstep)

        # Constructing a MetricData object
        data = MetricData(dat=metrics,
                          ref=ref,
                          description=mapping,
                          simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning(
                'Multiple framesteps [{}] ns were read from the simulations. '
                'Taking the statistical mode: {}ns. '
                'If it looks wrong, you can modify it by manually '
                'setting the MetricData.fstep property.'.format(
                    ', '.join(map(str, uqfsteps)), data.fstep))
        else:
            logger.info(
                'Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                'setting the MetricData.fstep property.'.format(data.fstep))

        return data
コード例 #8
0
ファイル: metric.py プロジェクト: AdriaPerezCulubret/htmd
    def project(self, njobs=None):
        """
        Applies all projections stored in Metric on all simulations.

        Parameters
        ----------
        njobs : int
            Number of parallel jobs to spawn for projection of trajectories. Take care that this can use large amounts
            of memory as multiple trajectories are loaded at once.  If None it will use the default from htmd.config.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise RuntimeError(
                "You need to provide projections using the Metric.set method.")

        # Projecting single Molecules
        if isinstance(self.simulations, Molecule):
            data = []
            mol = self.simulations
            for proj in self.projectionlist:
                data.append(_project(proj, mol))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                if isinstance(proj, Projection):
                    proj._setCache(uqMol)
        else:
            logger.warning(
                "Cannot calculate description of dimensions due to different topology files for each trajectory."
            )
        mapping = self.getMapping(uqMol)

        logger.debug("Metric: Starting projection of trajectories.")
        from htmd.config import _config

        aprun = ParallelExecutor(
            n_jobs=njobs if njobs is not None else _config["njobs"])
        results = aprun(total=numSim, desc="Projecting trajectories")(
            delayed(_processSim)(self.simulations[i], self.projectionlist,
                                 uqMol, self.skip) for i in range(numSim))

        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)
        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.debug("Finished projecting the trajectories.")

        # Removing empty trajectories
        metrics, ref, updlist, fstep = self._removeEmpty(
            metrics, ref, deletesims, fstep)

        # Constructing a MetricData object
        data = MetricData(dat=metrics,
                          ref=ref,
                          description=mapping,
                          simlist=updlist)

        uqfsteps = np.unique(fstep)
        if np.all(np.isnan(uqfsteps)):
            logger.warning(
                "No framestep could be read from the trajectories. Please manually set the MetricData.fstep"
                " property, otherwise calculations in Model and Kinetics classes can fail."
            )
        else:
            data.fstep = float(stats.mode(fstep).mode)
            if len(uqfsteps) != 1:
                logger.warning(
                    "Multiple framesteps [{}] ns were read from the simulations. "
                    "Taking the statistical mode: {}ns. "
                    "If it looks wrong, you can modify it by manually "
                    "setting the MetricData.fstep property.".format(
                        ", ".join(map(str, uqfsteps)), data.fstep))
            else:
                if data.fstep == 0:
                    logger.warning(
                        "A framestep of 0 was read from the trajectories. Please manually set the MetricData.fstep"
                        " property, otherwise calculations in Model and Kinetics classes can fail."
                    )
                else:
                    logger.info(
                        "Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually "
                        "setting the MetricData.fstep property.".format(
                            data.fstep))

        return data
コード例 #9
0
ファイル: metric.py プロジェクト: PabloHN/htmd
    def project(self):
        """
        Applies all projections stored in Metric on all simulations.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data.
        """
        if len(self.projectionlist) == 0:
            raise NameError('You need to provide projections using the Metric.projection method.')

        if isinstance(self.simulations, Molecule):
            data = []
            for proj in self.projectionlist:
                data.append(proj.project(self.simulations))
            return data

        numSim = len(self.simulations)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uqMol = None
        map = []
        (single, molfile) = _singleMolfile(self.simulations)
        if single:
            uqMol = Molecule(molfile)
            for proj in self.projectionlist:
                proj._precalculate(uqMol)
                #map.append(np.array(proj.getMapping(uqMol), dtype=object))
            #map = np.hstack(map)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        # # Monkey-patching callback class
        # oldcallback = joblib.parallel.BatchCompletionCallBack
        # joblib.parallel.BatchCompletionCallBack = BatchCompletionCallBack
        # from htmd.config import _config
        # results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
        #     delayed(_processSim)(self.simulations[i], self.projectionlist, uqMol, self.skip) for i in range(numSim))
        # joblib.parallel.BatchCompletionCallBack = oldcallback

        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
                delayed(_processSim)(self.simulations[i], self.projectionlist, uqMol, self.skip) for i in range(numSim))

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics], dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref], dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updlist = np.delete(self.simulations, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        data = MetricData(dat=metrics, ref=ref, map=map, simlist=updlist)

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps were read from the simulations. '
                           'Taking the statistical mode: ' + str(data.fstep) + 'ns. '
                           'If it looks wrong, you can modify it by manually setting the MetricData.fstep property.')
        else:
            logger.info('Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually '
                        'setting the MetricData.fstep property.'.format(data.fstep))

        return data
コード例 #10
0
ファイル: metric.py プロジェクト: PabloHN/htmd
    def _metrify(self, sims, skip, verbose, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.

        skip : int
               Skips every x frames.

        verbose : int
              Verbosity toggle

        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------

        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self.processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deleteSims = np.zeros(numSim, dtype=bool)
        fstep = np.empty(numSim)

        #global parpool
        Parallel(n_jobs=6, backend="threading")(delayed(_processSimPyemma)(self, i, updList, uniqueMol, uqMol, skip, deleteSims, metrics, ref, fstep) for i in range(numSim))

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = [True if np.size(x) == 0 else False for x in metrics]
        emptyR = [True if np.size(x) == 0 else False for x in ref]
        #assert np.all(deleteSims == emptyM)# and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM))
        ref = np.delete(ref, np.where(emptyM))
        #updList = np.delete(updList, emptyM)

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(updList)

        uqfsteps = np.unique(fstep)
        data.fstep = stats.mode(fstep).mode
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps were read from the simulations. Taking the statistical mode: ' + str(data.fstep) + 'ns.')
            logger.warning('If it looks wrong, you can modify it by manually setting the MetricData.fstep property.')

        return data
コード例 #11
0
ファイル: metric.py プロジェクト: PabloHN/htmd
    def _metrify(self, sims, skip, update):
        """
        Takes a set of trajectory folders and projects all trajectories within them onto the given space defined by the Metric* class.

        Parameters
        ----------

        simList : numpy list of structs
              A list of structs produced by the simList function.
        skip : int
               Skips every x frames.
        update : MetricData object
             Provide a previous MetricData object and only metrify new trajectories.

        Returns
        -------
        data : MetricData object
               Returns a MetricData object containing the projected data and the ref data.

        """

        if isinstance(sims, Molecule):
            return self._processTraj(sims)

        # [updList, oldList] = checkUpdate(simList, update, verbose);
        updList = sims
        numSim = len(updList)

        # Find out if there is a unique molfile. If there is, initialize a single Molecule to speed up calculations
        uniqueMol = 0
        uqMol = []
        map = []
        (single, molfile) = _singleMolfile(updList)
        if single:
            uniqueMol = 1
            uqMol = Molecule(molfile)
            # Calculating the mapping of metric columns to atom pair indeces
            map = self._getMapping(uqMol)

        logger.info('Metric: Starting projection of trajectories.')
        metrics = np.empty(numSim, dtype=object)
        ref = np.empty(numSim, dtype=object)
        deletesims = np.zeros(numSim, dtype=bool)
        fstep = np.zeros(numSim)

        # Monkey-patching callback class
        #oldcallback = joblib.parallel.CallBack
        #joblib.parallel.CallBack = CallBack
        #p = ProgressBar(numSim, description='Projecting trajectories')
        from htmd.config import _config
        results = Parallel(n_jobs=_config['ncpus'], verbose=11)(delayed(_processSimOld)(self, i, updList, uniqueMol, uqMol, skip, deletesims, metrics, ref, fstep) for i in range(numSim))
        #joblib.parallel.CallBack = oldcallback

        for i in range(len(results)):
            metrics[i] = results[i][0]
            ref[i] = results[i][1]
            fstep[i] = results[i][2]
            deletesims[i] = results[i][3]

        logger.info('Finished projecting the trajectories.')

        # Removing empty trajectories
        emptyM = np.array([True if x is None else False for x in metrics], dtype=bool)
        emptyR = np.array([True if x is None else False for x in ref], dtype=bool)
        assert np.all(deletesims == emptyM) and np.all(emptyR == emptyM)

        metrics = np.delete(metrics, np.where(emptyM)[0])
        ref = np.delete(ref, np.where(emptyM)[0])
        updList = np.delete(updList, np.where(emptyM)[0])

        if len(metrics) == 0:
            raise NameError('No trajectories were read')

        # Constructing a MetricData object
        if not update:
            data = MetricData(dat=metrics, ref=ref, map=map, simlist=updList)
        else:
            data = update
            data.dat.extend(metrics)
            data.ref.extend(ref)
            data.simList.extend(updList)  # This is wrong but we don't use update anyways

        uqfsteps = np.unique(fstep)
        data.fstep = float(stats.mode(fstep).mode)
        if len(uqfsteps) != 1:
            logger.warning('Multiple framesteps were read from the simulations. Taking the statistical mode: ' + str(data.fstep) + 'ns. If it looks wrong, you can modify it by manually setting the MetricData.fstep property.')
        else:
            logger.info('Frame step {}ns was read from the trajectories. If it looks wrong, redefine it by manually setting the MetricData.fstep property.'.format(data.fstep))

        return data