Beispiel #1
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                           glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20/self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100))  # heuristic
        if K > datadr.numFrames / 3: # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Beispiel #2
0
 def _algorithm(self):
     """  Select random frames for respawning
     """
     from htmd.projections.metric import Metric
     from htmd.molecule.molecule import Molecule
     from htmd.projections.metriccoordinate import MetricCoordinate
     from htmd.simlist import simlist
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                    glob(path.join(self.inputpath, '*', '')))
     metr = Metric(sims)
     metr.projection(MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA'))
     data = metr.project()
     simframes = data.abs2sim(np.random.randint(0, data.numFrames, self.nmax-self.running))
     self._writeInputs(simframes)
Beispiel #3
0
 def _algorithm(self):
     """  Select random frames for respawning
     """
     from htmd.projections.metric import Metric
     from htmd.molecule.molecule import Molecule
     from htmd.projections.metriccoordinate import MetricCoordinate
     from htmd.simlist import simlist
     sims = simlist(glob(path.join(self.datapath, '*', '')),
                    glob(path.join(self.inputpath, '*', 'structure.pdb')),
                    glob(path.join(self.inputpath, '*', '')))
     metr = Metric(sims)
     metr.projection(
         MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA',
                          'protein and name CA'))
     data = metr.project()
     simframes = data.abs2sim(
         np.random.randint(0, data.numFrames, self.nmax - self.running))
     self._writeInputs(simframes)
Beispiel #4
0
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricTMscore(ref, 'protein and name CA')
    data = metr.project(mol)

    lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811,
                       0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426,
                       0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32)
    assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')],
                    path.join(dd, 'generators', '1', 'structure.pdb'))
    ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb'))

    metr2 = Metric(fsims)
    metr2.projection(MetricTMscore(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6, 1)
Beispiel #5
0
        return datatica


if __name__ == '__main__':
    from htmd.simlist import simlist
    from glob import glob
    from htmd.projections.metricdistance import MetricSelfDistance
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='villin')

    sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb'))
    met = Metric(sims[0:2])
    met.projection(MetricSelfDistance('protein and name CA'))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [[ 3.69098878, -0.33862674,  0.85779184],
                [ 3.77816105, -0.31887317,  0.87724227],
                [ 3.83537507, -0.11878026,  0.65236956]]
    assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01)
    assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01)
    assert np.all(datatica.description.iloc[[587, 588]].type == 'tica')
    assert np.all(datatica.description.iloc[range(587)].type == 'distance')
    print('In-memory TICA with subset of dimensions passed test.')
Beispiel #6
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(
            glob(path.join(self.datapath, '*', '')),
            glob(path.join(self.inputpath, '*', 'structure.pdb')),
            glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist,
                             self.filteredpath,
                             filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1,
                                  self.metricsel2,
                                  metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20 / self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(
            max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50),
                100))  # heuristic
        if K > datadr.numFrames / 3:  # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning(
                'Using less macrostates than requested due to lack of microstates. macronum = '
                + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(),
                                lags=self.lag,
                                nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx,
                                          spawncounts[stateIdx],
                                          statetype='micro',
                                          replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Beispiel #7
0
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricRmsd(ref, 'protein and name CA')
    data = metr.project(mol)

    lastrmsd = np.array([1.30797791,  1.29860222,  1.25042927,  1.31319737,  1.27044261,
                          1.40294552,  1.25354612,  1.30127883,  1.40618336,  1.18303752,
                          1.24414587,  1.34513164,  1.31932807,  1.34282494,  1.2261436 ,
                          1.36359048,  1.26243281,  1.21157813,  1.26476419,  1.29413617], dtype=np.float32)
    assert np.all(np.abs(data[-20:] - lastrmsd) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    ref = Molecule(dd+"/generators/1/structure.pdb")

    metr2 = Metric(fsims)
    metr2.projection(MetricRmsd(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6,1)

    pass
Beispiel #8
0
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricRmsd(ref, 'protein and name CA')
    data = metr.project(mol)

    lastrmsd = np.array([
        1.30797791, 1.29860222, 1.25042927, 1.31319737, 1.27044261, 1.40294552,
        1.25354612, 1.30127883, 1.40618336, 1.18303752, 1.24414587, 1.34513164,
        1.31932807, 1.34282494, 1.2261436, 1.36359048, 1.26243281, 1.21157813,
        1.26476419, 1.29413617
    ],
                        dtype=np.float32)
    assert np.all(
        np.abs(data[-20:] -
               lastrmsd) < 0.001), 'Coordinates calculation is broken'

    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                    dd + '/generators/1/structure.pdb')
    ref = Molecule(dd + "/generators/1/structure.pdb")

    metr2 = Metric(fsims)
    metr2.projection(MetricRmsd(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.dat[0].shape == (6, 1)

    pass
Beispiel #9
0
            An array containing the null data.
        """

        trajlen = mol.numFrames
        data = np.zeros((trajlen, self._ndim), dtype=np.float32)

        return data


if __name__ == "__main__":
    import htmd.home
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    import htmd.projections.metricnull

    dd = htmd.home.home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                    dd + '/generators/1/structure.pdb')

    metr2 = Metric(fsims)
    metr2.projection(htmd.projections.metricnull.MetricNull(2))
    data2 = metr2.project()
    assert data2.trajectories[0].projection.shape == (6, 2)

    metr1 = Metric(fsims)
    metr1.projection(htmd.projections.metricnull.MetricNull(1))
    data1 = metr1.project()
    assert data1.trajectories[0].projection.shape == (6, 1)

    pass
Beispiel #10
0
    import doctest
    doctest.testmod()

    # One simulation
    mol = Molecule(os.path.join(htmd.home(), 'data', '1kdx', '1kdx_0.pdb'))
    mol.read(os.path.join(htmd.home(), 'data', '1kdx', '1kdx.dcd'))

    metric = MetricPlumed2(['d1: DISTANCE ATOMS=1,200',
                            'd2: DISTANCE ATOMS=5,6'])
    #    metric = MetricPlumed2([''])  # to test exceptions
    data = metric.project(mol)
    ref = np.array([0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544,
                    23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701,
                    21.833056, 19.901318])
    assert np.all(np.abs(ref - data[:, 0]) < 0.01), 'Plumed demo calculation is broken'

    # Simlist
    # datadirs=glob(os.path.join(home(), 'data', 'adaptive', 'data', '*' )
    # fsims=simlist(glob(os.path.join(home(), 'data', 'adaptive', 'data', '*', '/')),
    #              os.path.join(home(), 'data', 'adaptive', 'generators', '1','structure.pdb'))

    dd = htmd.home(dataDir="adaptive")
    fsims = htmd.simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    metr = Metric(fsims)
    metr.projection(MetricPlumed2(
        ['d1: DISTANCE ATOMS=2,3',
         'd2: DISTANCE ATOMS=5,6']))
    data2 = metr.project()
    pass
Beispiel #11
0
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricTMscore(ref, 'protein and name CA')
    data = metr.project(mol)

    lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811,
                       0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426,
                       0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32)
    assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')],
                    path.join(dd, 'generators', '1', 'structure.pdb'))
    ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb'))

    metr2 = Metric(fsims)
    metr2.projection(MetricTMscore(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.dat[0].shape == (6, 1)
Beispiel #12
0
            An array containing the null data.
        """

        trajlen = mol.numFrames
        data = np.zeros((trajlen, self._ndim), dtype=np.float32)

        return data


if __name__ == "__main__":
    import htmd.home
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    import htmd.projections.metricnull

    dd = htmd.home.home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')

    metr2 = Metric(fsims)
    metr2.projection(htmd.projections.metricnull.MetricNull(2))
    data2 = metr2.project()
    assert data2.trajectories[0].projection.shape == (6, 2)

    metr1 = Metric(fsims)
    metr1.projection(htmd.projections.metricnull.MetricNull(1))
    data1 = metr1.project()
    assert data1.trajectories[0].projection.shape == (6, 1)

    pass
Beispiel #13
0
    try:
        _getPlumedRoot()
    except:
        print("Tests in %s skipped because plumed executable not found." %
              __file__)
        sys.exit()

    import doctest
    doctest.testmod()

    # Simlist
    dd = htmd.home(dataDir="adaptive")
    fsims = htmd.simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    metr = Metric(fsims)
    metr.projection(
        MetricPlumed2(['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6']))
    data2 = metr.project()

    # One simulation
    testpath = os.path.join(htmd.home(), 'data', '1kdx')
    mol = Molecule(os.path.join(testpath, '1kdx_0.pdb'))
    mol.read(os.path.join(htmd.home(), 'data', '1kdx', '1kdx.dcd'))

    metric = MetricPlumed2(
        ['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6'])
    data = metric.project(mol)
    ref = np.array([
        0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392,
        19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669,
        22.484084, 20.893447, 18.791701, 21.833056, 19.901318
    ])
Beispiel #14
0
        if self.dimensions is not None:  # If TICA is done on a subset of dims
            datatica.map = keepdimdesc.append(datatica.map, ignore_index=True)

        return datatica


if __name__ == "__main__":
    from htmd import *
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir="villin")

    sims = simlist(glob(join(testfolder, "*", "")), join(testfolder, "filtered.pdb"))
    met = Metric(sims[0:2])
    met.projection(MetricSelfDistance("protein and name CA"))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units="ns", dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [
        [3.69098878, -0.33862674, 0.85779184],
        [3.77816105, -0.31887317, 0.87724227],
        [3.83537507, -0.11878026, 0.65236956],
    ]
    assert np.allclose(
        np.abs(datatica.dat[0][-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01
    )