Exemple #1
0
 def build_metadata(self, meta):
     """
     Builds an msmbuilder metadata object
     """
     if meta is None:
         try:
             self.parser = GenericParser(
                 fn_re='{}/(e\d+s\d+)_.*/Production.nc'.format(
                     self.data_folder),
                 group_names=['sim'],
                 group_transforms=[lambda x: x],
                 top_fn='',
                 step_ps=self.timestep)
             meta = gather_metadata('{}/e*/*nc'.format(self.data_folder),
                                    parser)
         except:
             logger.warning("Could not automatically build metadata")
             return None
     else:
         if not isinstance(meta, pd.DataFrame):
             meta = load_meta(meta)
     return meta
def sample_clusters():

    meta = load_meta()
    tops = preload_tops(meta)
    print('Sampling trajectories')
    ref = md.load('topology.pdb')
    for i in range(int(num_clusters)):
        print(i)
        df_smp = df.ix[df['Trajectory']==i, ['Key', 'Time_ps']].sample(100)
        inds = zip(df_smp['Key'], df_smp['Time_ps'])

        # Use loc because sample_dimension is nice
        traj = md.join(
            md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=meta.loc[traj_i]['top_fn'])
            for traj_i, frame_i in inds
        )

        # Original trajectories include both BT1 and BT2 so need to superpose
        traj.superpose(reference=ref)

        # Save
        traj_fn = "clusters/rmsd_cluster-{}.dcd".format(i)
        backup(traj_fn)
        traj.save(traj_fn)
Exemple #3
0
{{header}}

Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb
"""
import mdtraj as md

from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top

## Load
meta = load_meta()
centroids = md.load("centroids.xtc", top=preload_top(meta))

## Kernel
SIGMA = 0.3  # nm
from msmbuilder.featurizer import RMSDFeaturizer
import numpy as np

featurizer = RMSDFeaturizer(centroids)
lfeats = {}
for i, traj in itertrajs(meta):
    lfeat = featurizer.partial_transform(traj)
    lfeat = np.exp(-lfeat**2 / (2 * (SIGMA**2)))
    lfeats[i] = lfeat
save_trajs(lfeats, 'ftrajs', meta)
from msmbuilder.io import load_meta, preload_tops
from mdtraj import load
from hbonds import HBondFeaturizer
# from msmbuilder.feature_selection import FeatureSelector
from featureselector import FeatureSelector


meta = load_meta('meta.pandas.pickl')
tops = preload_tops(meta)
trajs = [load(row['traj_fn'], top=tops[row['top_fn']], stride=10)
         for i, row in meta.iterrows()]

def traj_summary(ftrajs):
    print('Length of ftraj {}'.format(len(ftrajs)))
    for traj in ftrajs:
        print('\t Shape: {}'.format(traj.shape))

def test_HBondFeaturizer(traj_list):

    feat = HBondFeaturizer(freq=0.0)
    feat.fit(traj_list)
    ftraj = feat.transform(traj_list)
    traj_summary(ftraj)


def test_HBondsFeatExtr(traj_list):

    features = [('hbonds', HBondFeaturizer())]
    feat = FeatureSelector(features, which_feat=['hbonds'])
    feat.fit(traj_list)
    ftrajs = feat.transform(traj_list)
Exemple #5
0
"""Check for abnormally high rmsd values to a reference structure

{{header}}

Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb

"""

import mdtraj as md

from msmbuilder.io import load_meta, itertrajs, save_trajs

## Load reference structure
ref = md.load("top.pdb")
meta = load_meta()

## Do calculation and save
rmsds = {k: md.rmsd(traj, ref) for k, traj in itertrajs(meta)}
save_trajs(rmsds, 'rmsds', meta)
Exemple #6
0
#!/usr/bin/env python
from msmbuilder.dataset import dataset
from msmbuilder.io import save_trajs, load_meta
import argparse
parser = argparse.ArgumentParser(
    prog='dataset_h5_to_npy_dir.py',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description='''version1''')

parser.add_argument("dataset", help="""An HDF5 dataset""", type=str)
parser.add_argument("meta", help="A metadata pickl file", type=str)
parser.add_argument("trajs",
                    help="The folder in which to store the trajs",
                    type=str,
                    default='trajs')

if __name__ == '__main__':
    args = parser.parse_args()
    meta = load_meta(args.meta)
    ds = dataset(args.dataset)
    trajs = {}
    for k, v in ds.items():
        trajs[k] = v
    save_trajs(trajs, args.trajs, meta)
#!/bin/env python

from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser
import numpy as np
import mdtraj as md
from msmbuilder.featurizer import DihedralFeaturizer, AtomPairsFeaturizer
from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic
from multiprocessing import Pool
import contextlib

meta = load_meta("meta.pandas.pickl")
tops = preload_tops(meta)

alpha_carbon_number = np.array([
    9, 26, 40, 59, 80, 92, 104, 111, 118, 139, 151, 170, 187, 194, 215, 234,
    253, 270, 289, 306, 320, 346, 358, 374, 386, 403, 419, 434, 453, 462, 474,
    490, 502, 516, 527, 538, 548, 567, 586, 605, 616, 628, 647, 669, 686, 708,
    720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941,
    948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133,
    1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324,
    1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516,
    1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713,
    1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909,
    1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126,
    2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310,
    2327, 2338
])
num = len(alpha_carbon_number)

atompair = []
for i in range(num):