def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(
            glob(join(home(dataDir="adaptive"), "data", "*", "")),
            glob(join(home(dataDir="adaptive"), "input", "*")),
        )
        fsims = simfilter(sims, tempname(), "not water")

        metr = Metric(fsims)
        metr.set(
            MetricDistance(
                "protein and resid 10 and name CA",
                "resname BEN and noh",
                periodic="selections",
                metric="contacts",
                groupsel1="residue",
                threshold=4,
            )
        )
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Exemple #2
0
 def _getSimlist(self):
     logger.info('Postprocessing new data')
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', '')),
                    glob(path.join(self.inputpath, '*', '')))
     if self.filter:
         sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)
     return sims
Exemple #3
0
 def _getSimlist(self):
     logger.info('Postprocessing new data')
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', '')),
                    glob(path.join(self.inputpath, '*', '')))
     if self.filter:
         sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)
     return sims
Exemple #4
0
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')),
                       glob(join(home(dataDir='adaptive'), 'input', '*')))
        fsims = simfilter(sims, tempname(), 'not water')

        metr = Metric(fsims)
        metr.set(
            MetricDistance('protein and resid 10 and name CA',
                           'resname BEN and noh',
                           metric='contacts',
                           groupsel1='residue',
                           threshold=4))
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Exemple #5
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        sims = simlist(glob(path.join(self.datapath, '*', '')),
                       glob(path.join(self.inputpath, '*', 'structure.pdb')),
                       glob(path.join(self.inputpath, '*', '')))
        if self.filter:
            sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)

        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()

        datadr.dropTraj(
        )  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        datadr.cluster(
            self.clustmethod(n_clusters=self._numClusters(datadr.numFrames)))
        self._model = Model(datadr)
        self._model.markovModel(self.lag, self._numMacrostates(datadr))
        if self.save:
            self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat')

        relFrames = self._getSpawnFrames(self._model, datadr)
        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
 def _getSimlist(self):
     logger.info("Postprocessing new data")
     sims = simlist(
         glob(path.join(self.datapath, "*", "")),
         glob(path.join(self.inputpath, "*", "")),
         glob(path.join(self.inputpath, "*", "")),
     )
     if self.filter:
         sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)
     return sims
Exemple #7
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                           glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20/self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100))  # heuristic
        if K > datadr.numFrames / 3: # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #8
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        sims = simlist(glob(path.join(self.datapath, '*', '')),
                       glob(path.join(self.inputpath, '*', 'structure.pdb')),
                       glob(path.join(self.inputpath, '*', '')))
        if self.filter:
            sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)

        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        # if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()

        datadr.dropTraj(
        )  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        datadr.cluster(
            self.clustmethod(n_clusters=self._numClusters(datadr.numFrames)))
        model = Model(datadr)
        self._model = model
        self._model.markovModel(self.lag, self._numMacrostates(datadr))
        if self.save:
            self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat')

        # Undirected component
        uc = -model.data.N  # Lower counts should give higher score hence the -
        if self.statetype == 'micro':
            uc = uc[model.cluster_ofmicro]
        if self.statetype == 'macro':
            uc = macroAccumulate(model, uc[model.cluster_ofmicro])

        # Calculating the directed component
        dc = self._calculateDirectedComponent(sims, model.data.St,
                                              model.data.N)
        if self.statetype == 'micro':
            dc = dc[model.cluster_ofmicro]
        if self.statetype == 'macro':
            dc = macroAccumulate(model, dc[model.cluster_ofmicro])

        uc = self._featScale(uc)
        dc = self._featScale(dc)

        reward = dc + self.ucscale * uc

        relFrames = self._getSpawnFrames(reward, self._model, datadr)
        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #9
0
 def _algorithm(self):
     """  Select random frames for respawning
     """
     from htmd.projections.metric import Metric
     from htmd.molecule.molecule import Molecule
     from htmd.projections.metriccoordinate import MetricCoordinate
     from htmd.simlist import simlist
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                    glob(path.join(self.inputpath, '*', '')))
     metr = Metric(sims)
     metr.projection(MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA'))
     data = metr.project()
     simframes = data.abs2sim(np.random.randint(0, data.numFrames, self.nmax-self.running))
     self._writeInputs(simframes)
Exemple #10
0
 def _algorithm(self):
     """  Select random frames for respawning
     """
     from htmd.projections.metriccoordinate import MetricCoordinate
     from htmd.simlist import simlist
     sims = simlist(glob(path.join(self.datapath, '*', '')),
                    glob(path.join(self.inputpath, '*', 'structure.pdb')),
                    glob(path.join(self.inputpath, '*', '')))
     data = MetricCoordinate.project(sims, sims[0].molfile,
                                     'protein and name CA',
                                     'protein and name CA')
     simframes = data.abs2sim(
         np.random.randint(0, data.numFrames, self.nmax - self.running))
     self._writeInputs(simframes)
Exemple #11
0
    def _getsimlist(self, folder):
        from htmd.simlist import simlist
        from glob import glob
        simfolders = glob(f'{folder}/filtered/*/')

        tmp_sims = []
        #To avoid problems while merging multiples data sources
        clean_names = set([i.split("/")[-2] for i in simfolders])
        for sim in simfolders:
            tmp_name = sim.split("/")[-2]
            if tmp_name in clean_names:
                tmp_sims.append(sim)
                clean_names.remove(tmp_name)
        simfolders = tmp_sims

        all_folders = glob(folder)[0]
        sims = simlist(simfolders, f'{all_folders}/filtered/filtered.pdb')
        return sims
Exemple #12
0
def removeCorrupted():
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    from os import path
    from glob import glob
    import shutil

    print("Removing Corrupted Simulations")
    try:
        sims = simlist(glob("./filtered/*/"), "./filtered/filtered.pdb")
    except:
        return
    met = Metric(sims)
    met.set(corruptMetric)
    dat = met.project()
    for i, s in zip(dat.dat, dat.simlist):
        if np.sum(i):
            pt = path.dirname(s.trajectory[0])
            shutil.move(pt, f"/tmp/{pt}")
Exemple #13
0
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricRmsd(ref, 'protein and name CA')
    data = metr.project(mol)

    lastrmsd = np.array([
        1.30797791, 1.29860222, 1.25042927, 1.31319737, 1.27044261, 1.40294552,
        1.25354612, 1.30127883, 1.40618336, 1.18303752, 1.24414587, 1.34513164,
        1.31932807, 1.34282494, 1.2261436, 1.36359048, 1.26243281, 1.21157813,
        1.26476419, 1.29413617
    ],
                        dtype=np.float32)
    assert np.all(
        np.abs(data[-20:] -
               lastrmsd) < 0.001), 'Coordinates calculation is broken'

    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                    dd + '/generators/1/structure.pdb')
    ref = Molecule(dd + "/generators/1/structure.pdb")

    metr2 = Metric(fsims)
    metr2.set(MetricRmsd(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6, 1)

    pass
Exemple #14
0
            datatica.description = keepdimdesc.append(datatica.description,
                                                      ignore_index=True)

        return datatica


if __name__ == "__main__":
    from htmd.simlist import simlist
    from glob import glob
    from moleculekit.projections.metricdistance import MetricSelfDistance
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir="villin")

    sims = simlist(glob(join(testfolder, "*", "")),
                   join(testfolder, "filtered.pdb"))
    met = Metric(sims[0:2])
    met.set(MetricSelfDistance("protein and name CA"))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units="ns", dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [
        [3.69098878, -0.33862674, 0.85779184],
        [3.77816105, -0.31887317, 0.87724227],
        [3.83537507, -0.11878026, 0.65236956],
    ]
    assert np.allclose(
Exemple #15
0
            datatica.description = keepdimdesc.append(datatica.description,
                                                      ignore_index=True)

        return datatica


if __name__ == '__main__':
    from htmd.simlist import simlist
    from glob import glob
    from htmd.projections.metricdistance import MetricSelfDistance
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='villin')

    sims = simlist(glob(join(testfolder, '*', '')),
                   join(testfolder, 'filtered.pdb'))
    met = Metric(sims[0:2])
    met.set(MetricSelfDistance('protein and name CA'))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [[3.69098878, -0.33862674, 0.85779184],
                [3.77816105, -0.31887317, 0.87724227],
                [3.83537507, -0.11878026, 0.65236956]]
    assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]),
                       np.abs(np.array(expected, dtype=np.float32)),
                       rtol=0,
Exemple #16
0
        if self.dimensions is not None and keepdimdesc is not None:  # If TICA is done on a subset of dims
            datatica.description = keepdimdesc.append(datatica.description, ignore_index=True)

        return datatica


if __name__ == '__main__':
    from htmd.simlist import simlist
    from glob import glob
    from htmd.projections.metricdistance import MetricSelfDistance
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='villin')

    sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb'))
    met = Metric(sims[0:2])
    met.projection(MetricSelfDistance('protein and name CA'))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [[ 3.69098878, -0.33862674,  0.85779184],
                [ 3.77816105, -0.31887317,  0.87724227],
                [ 3.83537507, -0.11878026,  0.65236956]]
    assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01)
    assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01)
    assert np.all(datatica.description.iloc[[587, 588]].type == 'tica')
Exemple #17
0
                     ])  # None can be replaced by any other "not in b" value


if __name__ == '__main__':
    from htmd.simlist import simlist, simfilter
    from glob import glob
    from htmd.projections.metric import Metric
    from htmd.projections.metricdistance import MetricDistance
    from htmd.projections.metricdihedral import MetricDihedral
    from htmd.util import tempname
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')),
                   glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(
        MetricDistance('protein and resid 10 and name CA',
                       'resname BEN and noh',
                       metric='contacts',
                       groupsel1='residue',
                       threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)
Exemple #18
0
    mol = Molecule(path.join(home(), 'data', '1kdx', '1kdx_0.pdb'))
    mol.read(path.join(home(), 'data', '1kdx', '1kdx.dcd'))

    metric = MetricPlumed2(
        ['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6'])
    data = metric.project(mol)
    ref = np.array([
        0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392,
        19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669,
        22.484084, 20.893447, 18.791701, 21.833056, 19.901318
    ])
    assert np.all(
        np.abs(ref - data[:, 0]) < 0.01), 'Plumed demo calculation is broken'

    # Simlist
    # datadirs=glob(path.join(home(), 'data', 'adaptive', 'data', '*' )
    # fsims=simlist(glob(path.join(home(), 'data', 'adaptive', 'data', '*', '/')),
    #              path.join(home(), 'data', 'adaptive', 'generators', '1','structure.pdb'))

    fsims = simlist([
        '/home/toni/work/htmd/htmd/htmd/data/adaptive/data/e1s1_1/',
        '/home/toni/work/htmd/htmd/htmd/data/adaptive/data/e1s2_1/'
    ], '/home/toni/work/htmd/htmd/htmd/data/adaptive/generators/1/structure.pdb'
                    )

    metr = Metric(fsims)
    metr.projection(
        MetricPlumed2(['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6']))
    data2 = metr.project()
    print(data2.dat)
Exemple #19
0
            sim = s
            break
    if sim is None:
        raise NameError(f"Could not find parent of simulation {simname}.")
    return sim, prevpiece, prevframe, epo


if __name__ == "__main__":
    import htmd
    import os
    from htmd.simlist import Frame, simlist
    from htmd.util import tempname

    filedir = htmd.home.home() + "/data/adaptive/"
    sims = simlist(
        glob(os.path.join(filedir, "data", "*", "")),
        glob(os.path.join(filedir, "input", "*", "")),
        glob(os.path.join(filedir, "input", "*", "")),
    )

    outf = tempname()
    os.makedirs(outf)

    f = Frame(sims[0], 0, 5)
    _writeInputsFunction(1, f, 2, outf, "input.coor")

    mol = Molecule(sims[0])
    mol.read(os.path.join(outf, "e2s2_e1s1p0f5", "input.coor"))

    shutil.rmtree(outf)
Exemple #20
0
    md.run()
    # Cleaning up
    inputodel = glob(path.join(home(), 'data', 'adaptive', 'input', 'e2*'))
    for i in inputodel:
        shutil.rmtree(i, ignore_errors=True, acemd='/shared/acemd/bin/acemd')
    os.remove(path.join(home(), 'data', 'adaptive', 'input', 'e2_writeinputs.log'))'''

    import htmd
    import os
    import shutil
    from htmd.queues.localqueue import LocalGPUQueue
    from htmd.simlist import Frame, simlist
    from htmd.util import tempname

    filedir = htmd.home.home()+'/data/adaptive/'
    sims = simlist(glob(os.path.join(filedir, 'data', '*', '')),
                   glob(os.path.join(filedir, 'input', '*', '')),
                   glob(os.path.join(filedir, 'input', '*', '')))

    outf = tempname()
    os.makedirs(outf)

    f = Frame(sims[0], 0, 5)
    _writeInputsFunction(1, f, 2, outf, 'input.coor')

    mol = Molecule(sims[0])
    mol.read(os.path.join(outf, 'e2s2_e1s1p0f5', 'input.coor'))

    shutil.rmtree(outf)

Exemple #21
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(
            glob(path.join(self.datapath, '*', '')),
            glob(path.join(self.inputpath, '*', 'structure.pdb')),
            glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist,
                             self.filteredpath,
                             filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1,
                                  self.metricsel2,
                                  metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20 / self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(
            max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50),
                100))  # heuristic
        if K > datadr.numFrames / 3:  # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning(
                'Using less macrostates than requested due to lack of microstates. macronum = '
                + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(),
                                lags=self.lag,
                                nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx,
                                          spawncounts[stateIdx],
                                          statetype='micro',
                                          replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Exemple #22
0
    import htmd.home
    from htmd.simlist import simlist
    from htmd.projections.metricplumed2 import MetricPlumed2
    from htmd.projections.metric import Metric

    try:
        _getPlumedRoot()
    except:
        print("Tests in %s skipped because plumed executable not found." % __file__)
        sys.exit()



    # Simlist
    dd = htmd.home.home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    metr = Metric(fsims)
    metr.set(MetricPlumed2(
        ['d1: DISTANCE ATOMS=2,3',
         'd2: DISTANCE ATOMS=5,6']))
    data2 = metr.project()


    # One simulation
    testpath=os.path.join(htmd.home.home(), 'data', '1kdx')
    mol = Molecule(os.path.join(testpath, '1kdx_0.pdb'))
    mol.read(os.path.join(htmd.home.home(), 'data', '1kdx', '1kdx.dcd'))

    metric = MetricPlumed2(['d1: DISTANCE ATOMS=1,200',
                            'd2: DISTANCE ATOMS=5,6'])
    data = metric.project(mol)
Exemple #23
0
    return np.array([bind.get(itm, -1) for itm in a])  # None can be replaced by any other "not in b" value


if __name__ == '__main__':
    from htmd.simlist import simlist, simfilter
    from glob import glob
    from htmd.projections.metric import Metric
    from htmd.projections.metricdistance import MetricDistance
    from htmd.projections.metricdihedral import MetricDihedral
    from htmd.util import tempname
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts',
                            groupsel1='residue', threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)

    # Testing dimensions
    assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct'
    assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct'
    assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'
Exemple #24
0
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricTMscore(ref, 'protein and name CA')
    data = metr.project(mol)

    lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811,
                       0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426,
                       0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32)
    assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')],
                    path.join(dd, 'generators', '1', 'structure.pdb'))
    ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb'))

    metr2 = Metric(fsims)
    metr2.projection(MetricTMscore(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6, 1)
Exemple #25
0
def analyze_folder(folder=None,
                   out_folder="/tmp",
                   skip=1,
                   metrics=None,
                   clu=500,
                   tica=True,
                   ticadim=5,
                   tica_lag=20,
                   model_lag=10,
                   model_units='ns',
                   macro_N=10,
                   bulk_split=False,
                   fes=True,
                   rg_analysis=True,
                   save=True,
                   data_fstep=None):
    """Analysis script for create a Markov State Model
    
    Creates and returns a Markov State Model given a data folder.
    Intented to follow up the evolution of an adaptive sampling run.
    Allows to save the model ans several informative plots
    
    Parameters
    ----------
    folder : str
        Data folder where adaptive is running
    out_folder : str
        Output folder to store derived data
    skip : int
        Number of frames to skip while projecting the MD data
    metrics : [:class: `Metric` object]
        Metric array used to project the data
    clu : int
        Number of cluster to create using the MiniBatchKMeans method.
    tica: bool
        Wether to use TICA of GWPCA for dimensionality reduction
    ticadim : int
        Number of TICA dimension to project the data. If None, the model will be created using the raw projected data
    tica_lag : int, optional
        Description
    model_lag : int
        Number of ns used to create the model
    model_units : str, optional
        Description
    macro_N : int
        Number of macrostate to split the final Markov State Model
    fes : bool, optional
        If true it will save a plot projecting the first two TICA dimension. Requires ticadim to be defined
    rg_analysis : bool, optional
        If true, a plot with information relative to the radious of gyration of the molecule will be created.
    save : bool, optional
        If true, the model will be saved in the outputs folder
    
    Returns
    -------
    :class:`Model`
        Final model
    """
    from htmd.model import Model
    from htmd.molecule.molecule import Molecule
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    from sklearn.cluster import MiniBatchKMeans
    from IDP_htmd.IDP_model import plot_RG
    from IDP_htmd.model_utils import create_bulk
    from glob import glob
    import os

    try:
        os.mkdir(out_folder)
    except:
        print("Folder already exists")

    try:
        fsims = np.load(f"{folder}/simlist.npy", allow_pickle=True)
        print(f"Loaded {folder}/simlist.npy")
    except:
        print("Creating simlist")
        sims = glob(folder + 'filtered/*/')
        fsims = simlist(sims, folder + 'filtered/filtered.pdb')
    metr = Metric(fsims, skip=skip)
    metr.set(metrics)

    #Check if this gives problems to ITS

    try:
        model = Model(file=f"{out_folder}/model.dat")
        out_data = model.data
        print(f"Loading model: {out_folder}/model.dat")
    except:
        if tica and ticadim:
            from htmd.projections.tica import TICA
            print("Projecting TICA")
            tica = TICA(metr, tica_lag)
            out_data = tica.project(ticadim)
        elif not tica and ticadim:
            from htmd.projections.gwpca import GWPCA
            data = metr.project()
            data.dropTraj()
            print("using GWPCA")
            gwpca = GWPCA(data, tica_lag)
            out_data = gwpca.project(ticadim)
        else:
            print("Not using TICA")
            data = metr.project()
            data.dropTraj()
            out_data = data

    #Avoid some possibles error while clustering
    if data_fstep: out_data.fstep = data_fstep
    x = True
    while x:
        try:
            out_data.cluster(MiniBatchKMeans(n_clusters=clu), mergesmall=5)
            x = False
        except Exception as e:
            raise Exception("Error " + str(e))

    model = Model(out_data)
    model.plotTimescales(plot=False, save=f"{out_folder}/1_its.png")

    if macro_N:
        model.markovModel(model_lag, macro_N, units=model_units)

        if bulk_split:
            try:
                print("Starting bulk splitting")
                create_bulk(model, bulk_split)
            except Exception as e:
                print("Could not perform the bulk splitting")
                print(e)

        model.eqDistribution(plot=False,
                             save=f"{out_folder}/1.2_eqDistribution.png")

        if rg_analysis:
            from IDP_htmd.IDP_analysis import rg_analysis
            mol = Molecule(model.data.simlist[0].molfile)
            rg_data = rg_analysis(model, skip=skip)
            plot_RG(rg_data, mol, save=f"{out_folder}/1.4_rg.png")

        # if fes and ticadim:
        # model.plotFES(0, 1, temperature=310, states=True,
        #     plot=False, save=f"{out_folder}/1.3_fes.png")

    if save:
        model.save(f"{out_folder}/model.dat")

    return model
Exemple #26
0
    md.datapath = path.join(home(), 'data', 'adaptive', 'data')
    md.run()
    # Cleaning up
    inputodel = glob(path.join(home(), 'data', 'adaptive', 'input', 'e2*'))
    for i in inputodel:
        shutil.rmtree(i, ignore_errors=True, acemd='/shared/acemd/bin/acemd')
    os.remove(path.join(home(), 'data', 'adaptive', 'input', 'e2_writeinputs.log'))'''

    import htmd
    import os
    import shutil
    from htmd.queues.localqueue import LocalGPUQueue
    from htmd.simlist import Frame, simlist
    from htmd.util import tempname

    filedir = htmd.home.home() + '/data/adaptive/'
    sims = simlist(glob(os.path.join(filedir, 'data', '*', '')),
                   glob(os.path.join(filedir, 'input', '*', '')),
                   glob(os.path.join(filedir, 'input', '*', '')))

    outf = tempname()
    os.makedirs(outf)

    f = Frame(sims[0], 0, 5)
    _writeInputsFunction(1, f, 2, outf, 'input.coor')

    mol = Molecule(sims[0])
    mol.read(os.path.join(outf, 'e2s2_e1s1p0f5', 'input.coor'))

    shutil.rmtree(outf)
Exemple #27
0
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricTMscore(ref, 'protein and name CA')
    data = metr.project(mol)

    lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811,
                       0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426,
                       0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32)
    assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')],
                    path.join(dd, 'generators', '1', 'structure.pdb'))
    ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb'))

    metr2 = Metric(fsims)
    metr2.set(MetricTMscore(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6, 1)