Beispiel #1
0
def load_metadata(traj_dir, top):
    """
    Loads metadata of features and saves them.

    :param traj_dir: directory containing trajectories
    :param top: topology file name
    :return: metadata data frame
    """
    re_pattern = '(\w+)-([0-9]{3})k-([0-9])atm-prod([0-9]+\.[0-9]+).*BT([0-9]+)*'
    captured_group_names = [
        'PDB', 'Temp', 'Pressure', 'Prod_Round', 'Act_Site'
    ]
    captured_group_transforms = [identity, float, float, identity, int]
    time_step = 1  # in picoseconds
    file_type = 'dcd'

    parser = GenericParser(re_pattern,
                           group_names=captured_group_names,
                           group_transforms=captured_group_transforms,
                           top_fn=top,
                           step_ps=time_step)
    meta = gather_metadata(os.path.join(traj_dir, "*.{}".format(file_type)),
                           parser)
    save_meta(meta)
    return meta
"""Find trajectories and associated metadata

msmbuilder autogenerated template version 2
created 2017-05-30T15:16:59.066163
please cite msmbuilder in any publications


"""

from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser

## Construct and save the dataframe
parser = NumberedRunsParser(
    traj_fmt="trajectory-{run}.xtc",
    top_fn="top.pdb",
    step_ps=50,
)
meta = gather_metadata("trajs/*.xtc", parser)
save_meta(meta)
Beispiel #3
0
from msmbuilder.io import NumberedRunsParser, gather_metadata
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.preprocessing import MinMaxScaler
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel
from sklearn.pipeline import Pipeline
import os
from ..adaptive import create_folder

logging.disable(logging.CRITICAL)

parser = NumberedRunsParser(traj_fmt='run-{run}.nc',
                            top_fn='data_app/runs/structure.prmtop',
                            step_ps=200)
meta = gather_metadata('/'.join(['data_app/runs/', '*nc']), parser)

model = Pipeline([('feat', DihedralFeaturizer()), ('scaler', MinMaxScaler()),
                  ('tICA', tICA(lag_time=1, n_components=4)),
                  ('clusterer', MiniBatchKMeans(n_clusters=5)),
                  ('msm', MarkovStateModel(lag_time=1, n_timescales=4))])

spawns = [
    (0, 1),
]
epoch = 1


class TestAppBase:
    def __init__(self):
        self.app = App(generator_folder='data_app/generators',
"""Find trajectories and associated metadata

{{header}}

Meta
----
depends:
  - trajs
  - top.pdb
"""

from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser

## Construct and save the dataframe
parser = NumberedRunsParser(
    traj_fmt="trajectory-{run}.xtc",
    top_fn="top.pdb",
    step_ps=50,
)
meta = gather_metadata("trajs/*.xtc", parser)
save_meta(meta)
Beispiel #5
0
from msmbuilder.io import GenericParser, save_meta, gather_metadata
from os.path import join
#
# File name parsing and metadata
#
def identity(x):
    return x

re_pattern = '(\w+)-([0-9]+)-as([0-9]+)*'
captured_group_names = ['PDB', 'Traj_Num', 'Act_Site']
captured_group_transforms = [identity, int, int]
time_step = 10 #10 ps
file_type = 'nc'

#
#  Gather and save the metadata
#


parser = GenericParser(re_pattern,
                       group_names=captured_group_names,
                       group_transforms=captured_group_transforms,
                       top_fn='proc_traj/2agy-as1.prmtop', step_ps=time_step)

meta = gather_metadata(join('proc_traj', "*.{}".format(file_type)), parser)
save_meta(meta)

from msmbuilder.feature_extraction import FunctionFeaturizer
Beispiel #6
0
# Helper functions
#


def identity(x):
    return x


#
# File name parsing and metadata
#


re_pattern = '(\w+)-([0-9]{3})k-([0-9])atm-prod([0-9]+\.[0-9]+).*BT([0-9]+)*'
captured_group_names = ['PDB', 'Temp', 'Pressure', 'Prod_Round', 'Act_Site']
captured_group_transforms = [identity, float, float, identity, int]
time_step = 1
file_type = 'dcd'

#
#  Gather and save the metadata
#


parser = GenericParser(re_pattern,
                       group_names=captured_group_names,
                       group_transforms=captured_group_transforms,
                       top_fn='topology.pdb', step_ps=time_step)

meta = gather_metadata(os.path.join('traj', "*.{}".format(file_type)), parser)
save_meta(meta)
"""Find trajectories and associated metadata

msmbuilder autogenerated template version 2
created 2017-05-30T15:16:59.066163
please cite msmbuilder in any publications


"""

from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser

## Construct and save the dataframe
parser = NumberedRunsParser(
    traj_fmt="trajectory-{run}.xtc",
    top_fn="../Data/top.pdb",
    step_ps=50,
)
meta = gather_metadata("../Data/trajs/*.xtc", parser)
save_meta(meta)
Beispiel #8
0
alpha = 0.5
rmsd_target = '/scratch/jap12009/msm/fast/try1/monomer2_4us_ZN.pdb'
spawn = 10

f = open('round.txt')
lines = f.read()
f.close
round_num = int(lines)

## Construct and save the dataframe
parser = NumberedRunsParser(
    traj_fmt="trj-{run}.xtc",
    top_fn="/scratch/jap12009/msm/fast/try1/frame0nw_startingAPO.pdb",
    step_ps=240,
)
meta = gather_metadata("/scratch/jap12009/msm/fast/try1/trj/trj-*.xtc", parser)
save_meta(meta)

## Set up parameters for clustering
kcen = KCenters(
    n_clusters=num_clusters,
    metric='rmsd',
)

## Try to limit RAM usage
def guestimate_stride():
    total_data = meta['nframes'].sum()
    want = kcen.n_clusters * 20
    stride = max(1, total_data // want)
    print("Since we have", total_data, "frames, we're going to stride by",
          stride, "during fitting, because this is probably adequate for",