Exemple #1
0
 def _getSimlist(self):
     logger.info('Postprocessing new data')
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', '')),
                    glob(path.join(self.inputpath, '*', '')))
     if self.filter:
         sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)
     return sims
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(
            glob(join(home(dataDir="adaptive"), "data", "*", "")),
            glob(join(home(dataDir="adaptive"), "input", "*")),
        )
        fsims = simfilter(sims, tempname(), "not water")

        metr = Metric(fsims)
        metr.set(
            MetricDistance(
                "protein and resid 10 and name CA",
                "resname BEN and noh",
                periodic="selections",
                metric="contacts",
                groupsel1="residue",
                threshold=4,
            )
        )
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Exemple #3
0
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')),
                       glob(join(home(dataDir='adaptive'), 'input', '*')))
        fsims = simfilter(sims, tempname(), 'not water')

        metr = Metric(fsims)
        metr.set(
            MetricDistance('protein and resid 10 and name CA',
                           'resname BEN and noh',
                           metric='contacts',
                           groupsel1='residue',
                           threshold=4))
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Exemple #4
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        sims = simlist(glob(path.join(self.datapath, '*', '')),
                       glob(path.join(self.inputpath, '*', 'structure.pdb')),
                       glob(path.join(self.inputpath, '*', '')))
        if self.filter:
            sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)

        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()

        datadr.dropTraj(
        )  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        datadr.cluster(
            self.clustmethod(n_clusters=self._numClusters(datadr.numFrames)))
        self._model = Model(datadr)
        self._model.markovModel(self.lag, self._numMacrostates(datadr))
        if self.save:
            self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat')

        relFrames = self._getSpawnFrames(self._model, datadr)
        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #5
0
 def _getSimlist(self):
     logger.info('Postprocessing new data')
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', '')),
                    glob(path.join(self.inputpath, '*', '')))
     if self.filter:
         sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)
     return sims
 def _getSimlist(self):
     logger.info("Postprocessing new data")
     sims = simlist(
         glob(path.join(self.datapath, "*", "")),
         glob(path.join(self.inputpath, "*", "")),
         glob(path.join(self.inputpath, "*", "")),
     )
     if self.filter:
         sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)
     return sims
Exemple #7
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        sims = simlist(glob(path.join(self.datapath, '*', '')),
                       glob(path.join(self.inputpath, '*', 'structure.pdb')),
                       glob(path.join(self.inputpath, '*', '')))
        if self.filter:
            sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)

        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        # if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()

        datadr.dropTraj(
        )  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        datadr.cluster(
            self.clustmethod(n_clusters=self._numClusters(datadr.numFrames)))
        model = Model(datadr)
        self._model = model
        self._model.markovModel(self.lag, self._numMacrostates(datadr))
        if self.save:
            self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat')

        # Undirected component
        uc = -model.data.N  # Lower counts should give higher score hence the -
        if self.statetype == 'micro':
            uc = uc[model.cluster_ofmicro]
        if self.statetype == 'macro':
            uc = macroAccumulate(model, uc[model.cluster_ofmicro])

        # Calculating the directed component
        dc = self._calculateDirectedComponent(sims, model.data.St,
                                              model.data.N)
        if self.statetype == 'micro':
            dc = dc[model.cluster_ofmicro]
        if self.statetype == 'macro':
            dc = macroAccumulate(model, dc[model.cluster_ofmicro])

        uc = self._featScale(uc)
        dc = self._featScale(dc)

        reward = dc + self.ucscale * uc

        relFrames = self._getSpawnFrames(reward, self._model, datadr)
        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #8
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                           glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20/self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100))  # heuristic
        if K > datadr.numFrames / 3: # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #9
0
if __name__ == '__main__':
    from htmd.simlist import simlist, simfilter
    from glob import glob
    from htmd.projections.metric import Metric
    from htmd.projections.metricdistance import MetricDistance
    from htmd.projections.metricdihedral import MetricDihedral
    from htmd.util import tempname
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')),
                   glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(
        MetricDistance('protein and resid 10 and name CA',
                       'resname BEN and noh',
                       metric='contacts',
                       groupsel1='residue',
                       threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)

    # Testing dimensions
Exemple #10
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(
            glob(path.join(self.datapath, '*', '')),
            glob(path.join(self.inputpath, '*', 'structure.pdb')),
            glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist,
                             self.filteredpath,
                             filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1,
                                  self.metricsel2,
                                  metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20 / self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(
            max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50),
                100))  # heuristic
        if K > datadr.numFrames / 3:  # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning(
                'Using less macrostates than requested due to lack of microstates. macronum = '
                + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(),
                                lags=self.lag,
                                nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx,
                                          spawncounts[stateIdx],
                                          statetype='micro',
                                          replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #11
0

if __name__ == '__main__':
    from htmd.simlist import simlist, simfilter
    from glob import glob
    from htmd.projections.metric import Metric
    from htmd.projections.metricdistance import MetricDistance
    from htmd.projections.metricdihedral import MetricDihedral
    from htmd.util import tempname
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts',
                            groupsel1='residue', threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)

    # Testing dimensions
    assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct'
    assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct'
    assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'