Esempio n. 1
0
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(
            glob(join(home(dataDir="adaptive"), "data", "*", "")),
            glob(join(home(dataDir="adaptive"), "input", "*")),
        )
        fsims = simfilter(sims, tempname(), "not water")

        metr = Metric(fsims)
        metr.set(
            MetricDistance(
                "protein and resid 10 and name CA",
                "resname BEN and noh",
                periodic="selections",
                metric="contacts",
                groupsel1="residue",
                threshold=4,
            )
        )
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Esempio n. 2
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        sims = simlist(glob(path.join(self.datapath, '*', '')),
                       glob(path.join(self.inputpath, '*', 'structure.pdb')),
                       glob(path.join(self.inputpath, '*', '')))
        if self.filter:
            sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)

        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()

        datadr.dropTraj(
        )  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        datadr.cluster(
            self.clustmethod(n_clusters=self._numClusters(datadr.numFrames)))
        self._model = Model(datadr)
        self._model.markovModel(self.lag, self._numMacrostates(datadr))
        if self.save:
            self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat')

        relFrames = self._getSpawnFrames(self._model, datadr)
        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Esempio n. 3
0
    def setUpClass(self):
        from htmd.simlist import simlist, simfilter
        from glob import glob
        from htmd.projections.metric import Metric
        from moleculekit.projections.metricdistance import MetricDistance
        from moleculekit.projections.metricdihedral import MetricDihedral
        from moleculekit.util import tempname
        from htmd.home import home
        from os.path import join

        sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')),
                       glob(join(home(dataDir='adaptive'), 'input', '*')))
        fsims = simfilter(sims, tempname(), 'not water')

        metr = Metric(fsims)
        metr.set(
            MetricDistance('protein and resid 10 and name CA',
                           'resname BEN and noh',
                           metric='contacts',
                           groupsel1='residue',
                           threshold=4))
        self.data1 = metr.project()

        metr.set(MetricDihedral())
        self.data2 = metr.project()
Esempio n. 4
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        sims = simlist(glob(path.join(self.datapath, '*', '')),
                       glob(path.join(self.inputpath, '*', 'structure.pdb')),
                       glob(path.join(self.inputpath, '*', '')))
        if self.filter:
            sims = simfilter(sims, self.filteredpath, filtersel=self.filtersel)

        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        # if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            tica = TICA(metr.project(), int(max(2, np.ceil(self.ticalag))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()

        datadr.dropTraj(
        )  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        datadr.cluster(
            self.clustmethod(n_clusters=self._numClusters(datadr.numFrames)))
        model = Model(datadr)
        self._model = model
        self._model.markovModel(self.lag, self._numMacrostates(datadr))
        if self.save:
            self._model.save('adapt_model_e' + str(self._getEpoch()) + '.dat')

        # Undirected component
        uc = -model.data.N  # Lower counts should give higher score hence the -
        if self.statetype == 'micro':
            uc = uc[model.cluster_ofmicro]
        if self.statetype == 'macro':
            uc = macroAccumulate(model, uc[model.cluster_ofmicro])

        # Calculating the directed component
        dc = self._calculateDirectedComponent(sims, model.data.St,
                                              model.data.N)
        if self.statetype == 'micro':
            dc = dc[model.cluster_ofmicro]
        if self.statetype == 'macro':
            dc = macroAccumulate(model, dc[model.cluster_ofmicro])

        uc = self._featScale(uc)
        dc = self._featScale(dc)

        reward = dc + self.ucscale * uc

        relFrames = self._getSpawnFrames(reward, self._model, datadr)
        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Esempio n. 5
0
def fitBaselineWithMetrics(projected_simlist,
                           base_simlist,
                           metric,
                           ticalag=25,
                           ticadim=4,
                           ticaunits='frames',
                           tica=False):
    from htmd.projections.tica import TICA
    from htmd.projections.metric import Metric
    from htmd.model import Model
    """
    Implement a MetricB that returns the TICA tranformation of a MetricA
    1) Calculate MetricA for each trajectory
    2) TICA transform based on a basetica (basetica.tic.t)
    """

    basetica_metric = Metric(base_simlist)
    basetica_metric.set(metric)
    basetica = TICA(basetica_metric, ticalag, units=ticaunits)
    basetica.tic.set_params(dim=ticadim)

    def metricToTica(mol, metric, tica):
        metric_data = metric.project(mol)
        return tica.tic.transform(metric_data)

    tica_metric = Metric(projected_simlist)
    tica_metric.set((metricToTica, (metric, basetica)))
    projectdata = tica_metric.project().dat

    if tica:
        return projectdata, basetica
    return projectdata
Esempio n. 6
0
def get_data(model, metr, skip=1):
    """ Returns the projected data of metric applied to a model

        Parameters
        ----------
        mod : htmd.model.Model
            Model to get the simlist
        metric : htmd.projections.MetricData
            MetricData with the metric we want to project
        skip : int
            Frames to skip while projecting the data. Default = 1
        """
    from htmd.model import Model
    from htmd.projections.metric import Metric
    if isinstance(model, Model):
        simlist = model.data.simlist
    elif isinstance(model, np.ndarray):
        simlist = model
    else:
        raise TypeError(
            "Model should be either an htmd.model.Model or a simlist")

    metric = Metric(simlist, skip=skip)
    metric.set(metr)
    data = metric.project()
    return data
Esempio n. 7
0
 def _getGoalData(self, sims):
     from htmd.projections.metric import Metric
     logger.debug('Starting projection of directed component')
     metr = Metric(sims, skip=self.skip)
     metr.set(self.goalfunction)
     data = metr.project()
     logger.debug('Finished calculating directed component')
     return data
Esempio n. 8
0
 def _getGoalData(self, sims):
     from htmd.projections.metric import Metric
     logger.debug('Starting projection of directed component')
     metr = Metric(sims, skip=self.skip)
     metr.set(self.goalfunction)
     data = metr.project()
     logger.debug('Finished calculating directed component')
     return data
Esempio n. 9
0
    def _getData(self, sims):
        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        # if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            data = metr.project()
            data.dropTraj()  # Drop before TICA to avoid broken trajectories
            ticalag = int(
                np.ceil(max(2, min(np.min(data.trajLengths) / 2, self.ticalag))))  # 1 < ticalag < (trajLen / 2)
            tica = TICA(data, ticalag)
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()
        datadr.dropTraj()  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        return datadr
Esempio n. 10
0
    def _getData(self, sims):
        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        # if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        if self.ticadim > 0:
            # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
            data = metr.project()
            data.dropTraj()  # Drop before TICA to avoid broken trajectories
            ticalag = int(
                np.ceil(max(2, min(np.min(data.trajLengths) / 2, self.ticalag))))  # 1 < ticalag < (trajLen / 2)
            tica = TICA(data, ticalag)
            datadr = tica.project(self.ticadim)
        else:
            datadr = metr.project()
        datadr.dropTraj()  # Preferably we should do this before any projections. Corrupted sims can affect TICA
        return datadr
Esempio n. 11
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                           glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist, self.filteredpath, filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1, self.metricsel2, metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20/self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(max(np.round(0.6 * np.log10(datadr.numFrames/1000)*1000+50), 100))  # heuristic
        if K > datadr.numFrames / 3: # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning('Using less macrostates than requested due to lack of microstates. macronum = ' + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(), lags=self.lag, nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax-self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx, spawncounts[stateIdx], statetype='micro', replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Esempio n. 12
0
 def _algorithm(self):
     """  Select random frames for respawning
     """
     from htmd.projections.metric import Metric
     from htmd.molecule.molecule import Molecule
     from htmd.projections.metriccoordinate import MetricCoordinate
     from htmd.simlist import simlist
     sims = simlist(glob(path.join(self.datapath, '*', '')), glob(path.join(self.inputpath, '*', 'structure.pdb')),
                    glob(path.join(self.inputpath, '*', '')))
     metr = Metric(sims)
     metr.projection(MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA', 'protein and name CA'))
     data = metr.project()
     simframes = data.abs2sim(np.random.randint(0, data.numFrames, self.nmax-self.running))
     self._writeInputs(simframes)
Esempio n. 13
0
    def _computeChiDihedrals(self, fstep=0.1, skip=1):
        chis = []
        protmol = self.mol.copy()
        protmol.filter('protein')
        caidx = self.mol.atomselect('protein and name CA')
        resids = self.mol.resid[caidx]
        resnames = self.mol.resname[caidx]
        for residue, resname in zip(resids, resnames):
            ch = Dihedral.chi1(protmol, residue)
            if ch is not None:
                chis.append(ch)

        metr = Metric(self.model.data.simlist, skip=skip)
        metr.set(MetricDihedral(chis, sincos=False))
        data = metr.project()
        data.fstep = fstep
        self.chi = data
Esempio n. 14
0
    def _computeChiDihedrals(self, fstep=0.1, skip=1):
        chis = []
        protmol = self.mol.copy()
        protmol.filter('protein')
        caidx = self.mol.atomselect('protein and name CA')
        resids = self.mol.resid[caidx]
        resnames = self.mol.resname[caidx]
        for residue, resname in zip(resids, resnames):
            ch = Dihedral.chi1(protmol, residue)
            if ch is not None:
                chis.append(ch)

        metr = Metric(self.model.data.simlist, skip=skip)
        metr.set(MetricDihedral(chis, sincos=False))
        data = metr.project()
        data.fstep = fstep
        self.chi = data
Esempio n. 15
0
 def _algorithm(self):
     """  Select random frames for respawning
     """
     from htmd.projections.metric import Metric
     from htmd.molecule.molecule import Molecule
     from htmd.projections.metriccoordinate import MetricCoordinate
     from htmd.simlist import simlist
     sims = simlist(glob(path.join(self.datapath, '*', '')),
                    glob(path.join(self.inputpath, '*', 'structure.pdb')),
                    glob(path.join(self.inputpath, '*', '')))
     metr = Metric(sims)
     metr.projection(
         MetricCoordinate(Molecule(sims[0].molfile), 'protein and name CA',
                          'protein and name CA'))
     data = metr.project()
     simframes = data.abs2sim(
         np.random.randint(0, data.numFrames, self.nmax - self.running))
     self._writeInputs(simframes)
Esempio n. 16
0
def removeCorrupted():
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    from os import path
    from glob import glob
    import shutil

    print("Removing Corrupted Simulations")
    try:
        sims = simlist(glob("./filtered/*/"), "./filtered/filtered.pdb")
    except:
        return
    met = Metric(sims)
    met.set(corruptMetric)
    dat = met.project()
    for i, s in zip(dat.dat, dat.simlist):
        if np.sum(i):
            pt = path.dirname(s.trajectory[0])
            shutil.move(pt, f"/tmp/{pt}")
Esempio n. 17
0
    def _precalculateData(self, metricData, folder, skip=1, fstep=None):
        from htmd.projections.metric import Metric

        max_epoch = max(self.epoch_analysis)
        max_epoch_sim = np.concatenate(
            np.array([
                self.epoch_sim_indexes[i] for i in range(1, max_epoch + 1)
                if i in list(self.epoch_sim_indexes.keys())
            ]))

        if self.test:
            sims = self._sims[0:100]
        else:
            sims = np.array([self._sims[i] for i in max_epoch_sim])

        metr = Metric(sims, skip=skip)
        metr.set(metricData)
        data = metr.project()
        if fstep:
            data.fstep = fstep
        return data
Esempio n. 18
0
def analyze_folder(folder=None,
                   out_folder="/tmp",
                   skip=1,
                   metrics=None,
                   clu=500,
                   tica=True,
                   ticadim=5,
                   tica_lag=20,
                   model_lag=10,
                   model_units='ns',
                   macro_N=10,
                   bulk_split=False,
                   fes=True,
                   rg_analysis=True,
                   save=True,
                   data_fstep=None):
    """Analysis script for create a Markov State Model
    
    Creates and returns a Markov State Model given a data folder.
    Intented to follow up the evolution of an adaptive sampling run.
    Allows to save the model ans several informative plots
    
    Parameters
    ----------
    folder : str
        Data folder where adaptive is running
    out_folder : str
        Output folder to store derived data
    skip : int
        Number of frames to skip while projecting the MD data
    metrics : [:class: `Metric` object]
        Metric array used to project the data
    clu : int
        Number of cluster to create using the MiniBatchKMeans method.
    tica: bool
        Wether to use TICA of GWPCA for dimensionality reduction
    ticadim : int
        Number of TICA dimension to project the data. If None, the model will be created using the raw projected data
    tica_lag : int, optional
        Description
    model_lag : int
        Number of ns used to create the model
    model_units : str, optional
        Description
    macro_N : int
        Number of macrostate to split the final Markov State Model
    fes : bool, optional
        If true it will save a plot projecting the first two TICA dimension. Requires ticadim to be defined
    rg_analysis : bool, optional
        If true, a plot with information relative to the radious of gyration of the molecule will be created.
    save : bool, optional
        If true, the model will be saved in the outputs folder
    
    Returns
    -------
    :class:`Model`
        Final model
    """
    from htmd.model import Model
    from htmd.molecule.molecule import Molecule
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    from sklearn.cluster import MiniBatchKMeans
    from IDP_htmd.IDP_model import plot_RG
    from IDP_htmd.model_utils import create_bulk
    from glob import glob
    import os

    try:
        os.mkdir(out_folder)
    except:
        print("Folder already exists")

    try:
        fsims = np.load(f"{folder}/simlist.npy", allow_pickle=True)
        print(f"Loaded {folder}/simlist.npy")
    except:
        print("Creating simlist")
        sims = glob(folder + 'filtered/*/')
        fsims = simlist(sims, folder + 'filtered/filtered.pdb')
    metr = Metric(fsims, skip=skip)
    metr.set(metrics)

    #Check if this gives problems to ITS

    try:
        model = Model(file=f"{out_folder}/model.dat")
        out_data = model.data
        print(f"Loading model: {out_folder}/model.dat")
    except:
        if tica and ticadim:
            from htmd.projections.tica import TICA
            print("Projecting TICA")
            tica = TICA(metr, tica_lag)
            out_data = tica.project(ticadim)
        elif not tica and ticadim:
            from htmd.projections.gwpca import GWPCA
            data = metr.project()
            data.dropTraj()
            print("using GWPCA")
            gwpca = GWPCA(data, tica_lag)
            out_data = gwpca.project(ticadim)
        else:
            print("Not using TICA")
            data = metr.project()
            data.dropTraj()
            out_data = data

    #Avoid some possibles error while clustering
    if data_fstep: out_data.fstep = data_fstep
    x = True
    while x:
        try:
            out_data.cluster(MiniBatchKMeans(n_clusters=clu), mergesmall=5)
            x = False
        except Exception as e:
            raise Exception("Error " + str(e))

    model = Model(out_data)
    model.plotTimescales(plot=False, save=f"{out_folder}/1_its.png")

    if macro_N:
        model.markovModel(model_lag, macro_N, units=model_units)

        if bulk_split:
            try:
                print("Starting bulk splitting")
                create_bulk(model, bulk_split)
            except Exception as e:
                print("Could not perform the bulk splitting")
                print(e)

        model.eqDistribution(plot=False,
                             save=f"{out_folder}/1.2_eqDistribution.png")

        if rg_analysis:
            from IDP_htmd.IDP_analysis import rg_analysis
            mol = Molecule(model.data.simlist[0].molfile)
            rg_data = rg_analysis(model, skip=skip)
            plot_RG(rg_data, mol, save=f"{out_folder}/1.4_rg.png")

        # if fes and ticadim:
        # model.plotFES(0, 1, temperature=310, states=True,
        #     plot=False, save=f"{out_folder}/1.3_fes.png")

    if save:
        model.save(f"{out_folder}/model.dat")

    return model
Esempio n. 19
0
    def _algorithm(self):
        from htmd.kinetics import Kinetics
        sims = self._getSimlist()
        metr = Metric(sims, skip=self.skip)
        metr.set(self.projection)

        data = metr.project()
        data.dropTraj()  # Drop before TICA to avoid broken trajectories

        if self.goalfunction is not None:
            goaldata = self._getGoalData(data.simlist)
            if len(data.simlist) != len(goaldata.simlist):
                raise RuntimeError('The goal function was not able to project all trajectories that the MSM projection could. Check for possible errors in the goal function.')
            goaldataconcat = np.concatenate(goaldata.dat)
            if self.save:
                makedirs('saveddata', exist_ok=True)
                goaldata.save(path.join('saveddata', 'e{}_goaldata.dat'.format(self._getEpoch())))

        # tica = TICA(metr, int(max(2, np.ceil(self.ticalag))))  # gianni: without project it was tooooo slow
        if self.ticadim > 0:
            ticalag = int(np.ceil(max(2, min(np.min(data.trajLengths) / 2, self.ticalag))))  # 1 < ticalag < (trajLen / 2)
            tica = TICA(data, ticalag)
            datatica = tica.project(self.ticadim)
            if not self._checkNFrames(datatica): return False
            self._createMSM(datatica)
        else:
            if not self._checkNFrames(data): return False
            self._createMSM(data)

        confstatdist = self.conformationStationaryDistribution(self._model)
        if self.actionspace == 'metric':
            if not data.K:
                data.cluster(self.clustmethod(n_clusters=self._numClusters(data.numFrames)))
            data_q = data.copy()
        elif self.actionspace == 'goal':
            data_q = goaldata.copy()
        elif self.actionspace == 'tica':
            data_q = datatica.copy()
        elif self.actionspace == 'ticapcca':
            data_q = datatica.copy()
            for traj in data_q.trajectories:
                traj.cluster = self._model.macro_ofcluster[traj.cluster]
            data_q.K = self._model.macronum

        if self.recluster:
            print('Reclustering with {}'.format(self.reclusterMethod))
            data_q.cluster(self.reclusterMethod)
        
        numstates = data_q.K
        print('Numstates: {}'.format(numstates))
        currepoch = self._getEpoch()
        q_values = np.zeros(numstates, dtype=np.float32)
        n_values = np.zeros(numstates, dtype=np.int32)

        if self.random:  # If random mode respawn from random action states
            action_sel = np.zeros(numstates, dtype=int)
            N = self.nmax - self._running
            randomactions = np.bincount(np.random.randint(numstates, size=N))
            action_sel[:len(randomactions)] = randomactions
            if self.save_qval:
                makedirs('saveddata', exist_ok=True)
                np.save(path.join('saveddata', 'e{}_actions.npy'.format(currepoch)), action_sel)
            relFrames = self._getSpawnFrames_UCB(action_sel, data_q)
            self._writeInputs(data.rel2sim(np.concatenate(relFrames)))
            return True

        if self.goalfunction is not None:
            ## For every cluster in data_q, get the max score and initialize
            if self.goal_preprocess is not None:
                goaldataconcat = self.goal_preprocess(goaldataconcat)
            qstconcat = np.concatenate(data_q.St)
            statemaxes = np.zeros(numstates)
            np.maximum.at(statemaxes, qstconcat, np.squeeze(goaldataconcat))
            if not self.pucb:
                goalenergies = -Kinetics._kB * self.temperature * np.log(1-statemaxes)
                q_values = goalenergies
                n_values += int((self.nframes / self._numClusters(self.nframes)) * self.goal_init) ## Needs nframes to be set properly!!!!!!!!

        rewardtraj = np.arange(data_q.numTrajectories) # Recalculate reward for all states
        rewards = self.getRewards(rewardtraj, data_q, confstatdist, numstates, self.reward_method, self.reward_mode, self.reward_window)
        for i in range(numstates):
            if len(rewards[i]) == 0:
                continue
            q_values[i] = updatingMean(q_values[i], n_values[i], rewards[i])
        n_values += np.array([len(x) for x in rewards])


        if self.save_qval:
            makedirs('saveddata', exist_ok=True)
            np.save(path.join('saveddata', 'e{}_qval.npy'.format(currepoch)), q_values)
            np.save(path.join('saveddata', 'e{}_nval.npy'.format(currepoch)), n_values)

        
        if self.pucb:
            ucb_values = np.array([self.count_pucb(q_values[clust], self.exploration, statemaxes[clust], currepoch + 1, n_values[clust]) for clust in range(numstates)])
        else:
            ucb_values = np.array([self.count_ucb(q_values[clust], self.exploration, currepoch + 1, n_values[clust]) for clust in range(numstates)])

        if self.save_qval:
            makedirs('saveddata', exist_ok=True)
            np.save(path.join('saveddata', 'e{}_ucbvals.npy'.format(currepoch)), ucb_values)

        N = self.nmax - self._running
        if self.actionpool <= 0:
            self.actionpool = N
       
        topactions = np.argsort(-ucb_values)[:self.actionpool]
        action = np.random.choice(topactions, N, replace=False)

        action_sel = np.zeros(numstates, dtype=int)
        action_sel[action] += 1
        while np.sum(action_sel) < N:  # When K is lower than N repeat some actions
            for a in action:
                action_sel[a] +=1
                if np.sum(action_sel) == N:
                    break

        if self.save_qval:
            np.save(path.join('saveddata', 'e{}_actions.npy'.format(currepoch)), action_sel)
        relFrames = self._getSpawnFrames_UCB(action_sel, data_q) 
        self._writeInputs(data.rel2sim(np.concatenate(relFrames)))
        return True
Esempio n. 20
0
    from htmd.molecule.molecule import Molecule
    from htmd.home import home
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricTMscore(ref, 'protein and name CA')
    data = metr.project(mol)

    lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811,
                       0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426,
                       0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32)
    assert np.all(np.abs(data[-20:].flatten() - lasttm) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([path.join(dd, 'data', 'e1s1_1'), path.join(dd, 'data', 'e1s2_1')],
                    path.join(dd, 'generators', '1', 'structure.pdb'))
    ref = Molecule(path.join(dd, 'generators', '1', 'structure.pdb'))

    metr2 = Metric(fsims)
    metr2.projection(MetricTMscore(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6, 1)
Esempio n. 21
0
        _getPlumedRoot()
    except:
        print("Tests in %s skipped because plumed executable not found." % __file__)
        sys.exit()



    # Simlist
    dd = htmd.home.home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    metr = Metric(fsims)
    metr.set(MetricPlumed2(
        ['d1: DISTANCE ATOMS=2,3',
         'd2: DISTANCE ATOMS=5,6']))
    data2 = metr.project()


    # One simulation
    testpath=os.path.join(htmd.home.home(), 'data', '1kdx')
    mol = Molecule(os.path.join(testpath, '1kdx_0.pdb'))
    mol.read(os.path.join(htmd.home.home(), 'data', '1kdx', '1kdx.dcd'))

    metric = MetricPlumed2(['d1: DISTANCE ATOMS=1,200',
                            'd2: DISTANCE ATOMS=5,6'])
    data = metric.project(mol)
    ref = np.array([0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392, 19.16376, 20.393544,
                    23.665517, 22.298349, 22.659769, 22.667669, 22.484084, 20.893447, 18.791701,
                    21.833056, 19.901318])
    assert np.all(np.abs(ref - data[:, 0]) < 0.01), 'Plumed demo calculation is broken'
Esempio n. 22
0
    from glob import glob
    from htmd.projections.metric import Metric
    from htmd.projections.metricdistance import MetricDistance
    from htmd.projections.metricdihedral import MetricDihedral
    from htmd.util import tempname
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts',
                            groupsel1='residue', threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)

    # Testing dimensions
    assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct'
    assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct'
    assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'

    # Testing dimension dropping / keeping
    datatmp = data1.copy()
    data1.dropDimensions(range(9))
    assert np.array_equal(data1.description.shape, (888, 3)), 'dropDimensions not working correct'
Esempio n. 23
0
    except:
        print("Tests in %s skipped because plumed executable not found." %
              __file__)
        sys.exit()

    import doctest
    doctest.testmod()

    # Simlist
    dd = htmd.home(dataDir="adaptive")
    fsims = htmd.simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    metr = Metric(fsims)
    metr.projection(
        MetricPlumed2(['d1: DISTANCE ATOMS=2,3', 'd2: DISTANCE ATOMS=5,6']))
    data2 = metr.project()

    # One simulation
    testpath = os.path.join(htmd.home(), 'data', '1kdx')
    mol = Molecule(os.path.join(testpath, '1kdx_0.pdb'))
    mol.read(os.path.join(htmd.home(), 'data', '1kdx', '1kdx.dcd'))

    metric = MetricPlumed2(
        ['d1: DISTANCE ATOMS=1,200', 'd2: DISTANCE ATOMS=5,6'])
    data = metric.project(mol)
    ref = np.array([
        0.536674, 21.722393, 22.689391, 18.402114, 23.431387, 23.13392,
        19.16376, 20.393544, 23.665517, 22.298349, 22.659769, 22.667669,
        22.484084, 20.893447, 18.791701, 21.833056, 19.901318
    ])
    assert np.all(
Esempio n. 24
0
File: tica.py Progetto: prokia/htmd

if __name__ == '__main__':
    from htmd.simlist import simlist
    from glob import glob
    from htmd.projections.metricdistance import MetricSelfDistance
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='villin')

    sims = simlist(glob(join(testfolder, '*', '')),
                   join(testfolder, 'filtered.pdb'))
    met = Metric(sims[0:2])
    met.set(MetricSelfDistance('protein and name CA'))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [[3.69098878, -0.33862674, 0.85779184],
                [3.77816105, -0.31887317, 0.87724227],
                [3.83537507, -0.11878026, 0.65236956]]
    assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]),
                       np.abs(np.array(expected, dtype=np.float32)),
                       rtol=0,
                       atol=0.01)
    assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]),
                       np.abs(np.array(expected, dtype=np.float32)),
Esempio n. 25
0
File: tica.py Progetto: jeiros/htmd
        return datatica


if __name__ == '__main__':
    from htmd.simlist import simlist
    from glob import glob
    from htmd.projections.metricdistance import MetricSelfDistance
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='villin')

    sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb'))
    met = Metric(sims[0:2])
    met.projection(MetricSelfDistance('protein and name CA'))
    data = met.project()
    data.fstep = 0.1

    tica = TICA(data, 2, dimensions=range(2, 10))
    datatica = tica.project(2)
    tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10))
    datatica5 = tica5.project(2)
    expected = [[ 3.69098878, -0.33862674,  0.85779184],
                [ 3.77816105, -0.31887317,  0.87724227],
                [ 3.83537507, -0.11878026,  0.65236956]]
    assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01)
    assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01)
    assert np.all(datatica.description.iloc[[587, 588]].type == 'tica')
    assert np.all(datatica.description.iloc[range(587)].type == 'distance')
    print('In-memory TICA with subset of dimensions passed test.')
Esempio n. 26
0
    import numpy as np
    from os import path

    mol = Molecule(path.join(home(), 'data', 'metricdistance', 'filtered.pdb'))
    mol.read(path.join(home(), 'data', 'metricdistance', 'traj.xtc'))
    ref = mol.copy()
    ref.coords = np.atleast_3d(ref.coords[:, :, 0])
    metr = MetricRmsd(ref, 'protein and name CA')
    data = metr.project(mol)

    lastrmsd = np.array([1.30797791,  1.29860222,  1.25042927,  1.31319737,  1.27044261,
                          1.40294552,  1.25354612,  1.30127883,  1.40618336,  1.18303752,
                          1.24414587,  1.34513164,  1.31932807,  1.34282494,  1.2261436 ,
                          1.36359048,  1.26243281,  1.21157813,  1.26476419,  1.29413617], dtype=np.float32)
    assert np.all(np.abs(data[-20:] - lastrmsd) < 0.001), 'Coordinates calculation is broken'


    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    dd = home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')
    ref = Molecule(dd+"/generators/1/structure.pdb")

    metr2 = Metric(fsims)
    metr2.set(MetricRmsd(ref, 'protein and name CA'))
    data2 = metr2.project()

    assert data2.trajectories[0].projection.shape == (6,1)

    pass
Esempio n. 27
0
            An array containing the null data.
        """

        trajlen = mol.numFrames
        data = np.zeros((trajlen, self._ndim), dtype=np.float32)

        return data


if __name__ == "__main__":
    import htmd.home
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    import htmd.projections.metricnull

    dd = htmd.home.home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                         dd + '/generators/1/structure.pdb')

    metr2 = Metric(fsims)
    metr2.projection(htmd.projections.metricnull.MetricNull(2))
    data2 = metr2.project()
    assert data2.trajectories[0].projection.shape == (6, 2)

    metr1 = Metric(fsims)
    metr1.projection(htmd.projections.metricnull.MetricNull(1))
    data1 = metr1.project()
    assert data1.trajectories[0].projection.shape == (6, 1)

    pass
Esempio n. 28
0
    from htmd.home import home
    from os.path import join

    testfolder = home(dataDir='adaptive')

    sims = simlist(glob(join(testfolder, 'data', '*', '')),
                   glob(join(testfolder, 'input', '*', 'structure.pdb')))
    fsims = simfilter(sims, tempname(), 'not water')
    metr = Metric(fsims)
    metr.set(
        MetricDistance('protein and resid 10 and name CA',
                       'resname BEN and noh',
                       metric='contacts',
                       groupsel1='residue',
                       threshold=4))
    data1 = metr.project()
    metr.set(MetricDihedral())
    data2 = metr.project()

    # Testing combining of metrics
    data1.combine(data2)

    # Testing dimensions
    assert np.array_equal(data1.description.shape,
                          (897, 3)), 'combine not working correct'
    assert np.array_equal(data1.trajectories[0].projection.shape,
                          (6, 897)), 'combine not working correct'
    assert np.array_equal(
        np.where(data1.description.type == 'contact')[0],
        [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'
Esempio n. 29
0
    def _algorithm(self):
        logger.info('Postprocessing new data')
        datalist = simlist(
            glob(path.join(self.datapath, '*', '')),
            glob(path.join(self.inputpath, '*', 'structure.pdb')),
            glob(path.join(self.inputpath, '*', '')))
        filtlist = simfilter(datalist,
                             self.filteredpath,
                             filtersel=self.filtersel)

        if hasattr(self, 'metricsel2') and self.metricsel2 is not None:
            proj = MetricDistance(self.metricsel1,
                                  self.metricsel2,
                                  metric=self.metrictype)
        else:
            proj = MetricSelfDistance(self.metricsel1, metric=self.metrictype)
        metr = Metric(filtlist, skip=self.skip)
        metr.projection(proj)
        data = metr.project()

        #if self.contactsym is not None:
        #    contactSymmetry(data, self.contactsym)

        data.dropTraj()
        if self.ticadim > 0:
            tica = TICA(data, int(max(2, np.ceil(20 / self.skip))))
            datadr = tica.project(self.ticadim)
        else:
            datadr = data

        K = int(
            max(np.round(0.6 * np.log10(datadr.numFrames / 1000) * 1000 + 50),
                100))  # heuristic
        if K > datadr.numFrames / 3:  # Freaking ugly patches ...
            K = int(datadr.numFrames / 3)

        datadr.cluster(self.clustmethod(n_clusters=K), mergesmall=5)
        replacement = False
        if datadr.K < 10:
            datadr.cluster(self.clustmethod(n_clusters=K))
            replacement = True

        model = Model(datadr)
        macronum = self.macronum
        if datadr.K < macronum:
            macronum = np.ceil(datadr.K / 2)
            logger.warning(
                'Using less macrostates than requested due to lack of microstates. macronum = '
                + str(macronum))

        from pyemma.msm import timescales_msm
        timesc = timescales_msm(datadr.St.tolist(),
                                lags=self.lag,
                                nits=macronum).get_timescales()
        macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2))

        model.markovModel(self.lag, macronum)
        p_i = self._criteria(model, self.method)
        (spawncounts, prob) = self._spawn(p_i, self.nmax - self.running)
        logger.debug('spawncounts {}'.format(spawncounts))
        stateIdx = np.where(spawncounts > 0)[0]
        _, relFrames = model.sampleStates(stateIdx,
                                          spawncounts[stateIdx],
                                          statetype='micro',
                                          replacement=replacement)
        logger.debug('relFrames {}'.format(relFrames))

        self._writeInputs(datadr.rel2sim(np.concatenate(relFrames)))
Esempio n. 30
0
            An array containing the null data.
        """

        trajlen = mol.numFrames
        data = np.zeros((trajlen, self._ndim), dtype=np.float32)

        return data


if __name__ == "__main__":
    import htmd.home
    from htmd.simlist import simlist
    from htmd.projections.metric import Metric
    import htmd.projections.metricnull

    dd = htmd.home.home(dataDir="adaptive")
    fsims = simlist([dd + '/data/e1s1_1/', dd + '/data/e1s2_1/'],
                    dd + '/generators/1/structure.pdb')

    metr2 = Metric(fsims)
    metr2.projection(htmd.projections.metricnull.MetricNull(2))
    data2 = metr2.project()
    assert data2.trajectories[0].projection.shape == (6, 2)

    metr1 = Metric(fsims)
    metr1.projection(htmd.projections.metricnull.MetricNull(1))
    data1 = metr1.project()
    assert data1.trajectories[0].projection.shape == (6, 1)

    pass