コード例 #1
0
ファイル: tICA.py プロジェクト: RobertArbon/AADH_Analysis
def featurize(featurizer, meta_data):

    tops = preload_tops(meta)

    def feat(irow):
        i, row = irow
        traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
        feat_traj = featurizer.partial_transform(traj)
        return i, feat_traj

    feature_trajs = dict(map(feat, meta.iterrows()))

    save_trajs(feature_trajs, 'ftrajs', meta)
    save_generic(featurizer, 'featurizer.pickl')

    return feature_trajs
コード例 #2
0
        with Pool() as pool:
            feature_trajs = dict(pool.imap_unordered(msmb_feat, args))

        # # Create save objects
        # featurizer = dict([(x[0], x[2]) for x in feature_trajs])
        # feature_trajs = dict([(x[0], x[1]) for x in feature_trajs])

        selector = VarianceThreshold()
        selector.fit([traj for traj in feature_trajs.values()])
        ftrajs = {}
        for k, v in feature_trajs.items():
            ftrajs[k] = np.squeeze(selector.transform([v]))

        # SAVE
        save_trajs(ftrajs, 'featurized_trajectories/{}-ftraj'.format(name), meta)
        save_generic(feat, 'featurized_trajectories/{}-featurizer.pickl'.format(name))


    # pyEMMA FEATURIZERS
    featurizers = [('angles', 'add_angles', angles),
                   ('dihedrals', 'add_dihedrals', dihedrals)]

    for name, feat, indices in featurizers:
        print('Featurizing {}'.format(name))
        args = zip(meta.iterrows(), [feat] * meta.shape[0], [tops] * meta.shape[0],
                   [indices]*meta.shape[0])

        # Fit features
        with Pool() as pool:
            feature_trajs = dict(pool.imap_unordered(pyemma_feat, args))
コード例 #3
0
"""Make a microstate MSM

msmbuilder autogenerated template version 2
created 2017-05-23T16:38:49.116944
please cite msmbuilder in any publications
"""

from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.msm import MarkovStateModel

## Load
meta, ktrajs = load_trajs('ktrajs')

## Fit
msm = MarkovStateModel(lag_time=2, n_timescales=10, verbose=False)
msm.fit(list(ktrajs.values()))

## Transform
microktrajs = {}
for k, v in ktrajs.items():
    microktrajs[k] = msm.partial_transform(v)

## Save
print(msm.summarize())
save_generic(msm, 'msm.pickl')
save_trajs(microktrajs, 'microktrajs', meta)
コード例 #4
0
from utilities import plot_box

if __name__ == '__main__':

    # Load
    meta, feature_trajs = load_trajs('ftraj')

    # Select scaler
    featurizer = RobustScaler()

    # Transform values
    featurizer.fit_transform(feature_trajs.values())
    scaled_trajs = {}
    for k, v in feature_trajs.items():
        scaled_trajs[k] = featurizer.partial_transform(v)

    # Save
    sample = np.concatenate([fx for fx in scaled_trajs.values()])
    sample = sample[np.random.choice(sample.shape[0], 1000, replace=False), :]
    variance = np.apply_along_axis(np.var, axis=0, arr=sample)
    order = np.argsort(variance)
    ord_var = variance[order]
    labels = [str(x) for x in ord_var[::10]]
    ind = range(variance.shape[0])
    fig, ax = plt.subplots()
    ax.plot(ind, ord_var)
    plt.savefig('ScaledFeatureVariance.png')

    save_trajs(scaled_trajs, 'straj', meta)
    save_generic(featurizer, 'scaler.pickl')
コード例 #5
0
ファイル: rmsd.py プロジェクト: xy21hb/msmbuilder
"""Check for abnormally high rmsd values to a reference structure

{{header}}

Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb

"""

import mdtraj as md

from msmbuilder.io import load_meta, itertrajs, save_trajs

## Load reference structure
ref = md.load("top.pdb")
meta = load_meta()

## Do calculation and save
rmsds = {k: md.rmsd(traj, ref) for k, traj in itertrajs(meta)}
save_trajs(rmsds, 'rmsds', meta)
コード例 #6
0
        vec = msm.left_eigenvectors_
        n_states = vec.shape[
            0]  # may be less than 200 as T may be non-ergodic.
        fig, axes = plt.subplots(nrows=m, sharex=True)
        for i in range(m):
            for j in range(m):
                mask = pcca_mapping == j
                axes[i].bar(np.arange(n_states)[mask],
                            vec[mask, i],
                            label='PCCA State {}'.format(j),
                            align='center')
            axes[i].yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
            axes[i].legend()
            axes[i].set_ylabel('Cluster projection')

        plt.xlabel('Cluster')
        plt.savefig('figures/rmsd_msm_left_eigenvectors-pcca.png',
                    transparent=True)

    # Transforms:
    msm_traj = {}
    pcca_traj = {}
    for k, v in ctraj_dict.items():
        print(k)
        msm_traj[k] = msm.partial_transform(np.squeeze(v), mode='fill')
        pcca_traj[k] = pcca.partial_transform(np.squeeze(v), mode='fill')

    save_trajs(msm_traj, 'msm-traj-200', meta)
    save_generic(msm, 'msm-200.pickl')
    save_trajs(pcca_traj, 'pcca-2-traj', meta)
    save_generic(pcca, 'pcca-2.pickl')
コード例 #7
0
{{header}}

Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb
"""
import mdtraj as md

from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top

## Load
meta = load_meta()
centroids = md.load("centroids.xtc", top=preload_top(meta))

## Kernel
SIGMA = 0.3  # nm
from msmbuilder.featurizer import RMSDFeaturizer
import numpy as np

featurizer = RMSDFeaturizer(centroids)
lfeats = {}
for i, traj in itertrajs(meta):
    lfeat = featurizer.partial_transform(traj)
    lfeat = np.exp(-lfeat**2 / (2 * (SIGMA**2)))
    lfeats[i] = lfeat
save_trajs(lfeats, 'ftrajs', meta)
コード例 #8
0
please cite msmbuilder in any publications


"""
import mdtraj as md

from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic
from multiprocessing import Pool

## Load
meta = load_meta()
tops = preload_tops(meta)
dihed_feat = DihedralFeaturizer()


## Featurize logic
def feat(irow):
    i, row = irow
    traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
    feat_traj = dihed_feat.partial_transform(traj)
    return i, feat_traj


## Do it in parallel
with Pool() as pool:
    dihed_trajs = dict(pool.imap_unordered(feat, meta.iterrows()))

## Save
save_trajs(dihed_trajs, 'ftrajs', meta)
save_generic(dihed_feat, 'featurizer.pickl')
コード例 #9
0
from msmbuilder.io import load_trajs, save_trajs
import numpy as np
from multiprocessing import Pool
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
import sys
import seaborn as sns
from sklearn.neighbors.kde import KernelDensity
from scipy.signal import argrelextrema

# Don't prune these:
for feature in ['angles', 'bonds', 'contacts']:
    meta, ftraj = load_trajs(
        'featurized_trajectories/{}-ftraj'.format(feature))
    save_trajs(ftraj, 'pruned_trajectories/{}-ftraj'.format(feature), meta)

# Prune these:

for feature in ['dihedrals']:
    meta, ftraj_dict = load_trajs(
        'featurized_trajectories/{}-ftraj'.format(feature))
    ftraj = np.concatenate([traj for traj in ftraj_dict.values()])
    cos_idx = np.arange(0, ftraj.shape[1] - 1, 2).reshape(-1, 1)
    variance = ftraj[:, cos_idx].var(axis=0).reshape(-1, 1)

    # Do KDE and split the data
    num_splits = 3
    bandwidths = np.linspace(.01, .10, num=100)
    x = np.linspace(0, .5, 1000).reshape(-1, 1)
    for bw in bandwidths:
コード例 #10
0
            return i, traj


        traj_dict = dict(map(traj_load, meta.iterrows()))
        all_trajs = [traj for traj in traj_dict.values()]

        cluster = LandmarkAgglomerative(n_clusters=200, n_landmarks=int(totframes /200), linkage='ward', metric='rmsd')
        cluster.fit(all_trajs)
        # TODO will this work?
        args = [(k,v,cluster) for k, v in traj_dict.items()]

        with Pool() as pool:
            all_ctrajs_dict = dict(pool.imap_unordered(clust, args))

        save_generic(cluster, 'cluster-200')
        save_trajs(all_ctrajs_dict, 'ctraj-200', meta)

    long_ctrajs = [np.squeeze(traj) for traj in all_ctrajs_dict.values() if traj.shape[0] > 1000]
    all_ctrajs = [np.squeeze(traj) for traj in all_ctrajs_dict.values()]

    lags = np.concatenate((np.arange(200, 1000, 200),np.arange(1000, 5000, 500)))
    all_msms = []

    for lag in lags:
        print('Fitting lag {}'.format(lag))
        if lag > 1000:
            trajs = long_ctrajs
        else:
            trajs = all_ctrajs

        msm = MarkovStateModel(lag_time=int(lag), n_timescales=100)
コード例 #11
0
ファイル: tica.py プロジェクト: jadeshi/msmbuilder-1
"""Reduce dimensionality with tICA

{{header}}
Meta
----
depends:
  - ftrajs
  - meta.pandas.pickl
"""

from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.decomposition import tICA

## Load
tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True)
meta, ftrajs = load_trajs("ftrajs")

## Fit
tica.fit(ftrajs.values())

## Transform
ttrajs = {}
for k, v in ftrajs.items():
    ttrajs[k] = tica.partial_transform(v)

## Save
save_trajs(ttrajs, "ttrajs", meta)
save_generic(tica, "tica.pickl")
コード例 #12
0
# Featurize logic
def feat(irow):
    i, row = irow
    print('Loading traj {}'.format(row['traj_fn']))
    traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
    top = traj.topology

    ctni_atoms = []
    ctnt_atoms = []
    for res in [292, 329, 337, 383]:
        ca = top.select('resid {} and name CA'.format(res - 1))
        ctni_atoms.append(ca[0])
    for res in [162, 184, 185, 237]:
        ca = top.select('resid {} and name CA'.format(res - 1))
        ctnt_atoms.append(ca[0])

    atom_indices = np.array([ctni_atoms, ctnt_atoms])

    diheds = md.compute_dihedrals(traj, atom_indices)

    return i, diheds


if __name__ == '__main__':
    meta = load_meta()
    tops = preload_tops(meta)

    with Pool() as pool:
        dtrajs = dict(pool.imap_unordered(feat, meta.iterrows()))
    save_trajs(dtrajs, 'dtrajs', meta)
コード例 #13
0
ファイル: featurize.py プロジェクト: Eigenstate/msmbuilder
{{header}}

Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb
"""
import mdtraj as md

from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top

## Load
meta = load_meta()
centroids = md.load("centroids.xtc", top=preload_top(meta))

## Kernel
SIGMA = 0.3  # nm
from msmbuilder.featurizer import RMSDFeaturizer
import numpy as np

featurizer = RMSDFeaturizer(centroids)
lfeats = {}
for i, traj in itertrajs(meta):
    lfeat = featurizer.partial_transform(traj)
    lfeat = np.exp(-lfeat ** 2 / (2 * (SIGMA ** 2)))
    lfeats[i] = lfeat
save_trajs(lfeats, 'ftrajs', meta)
コード例 #14
0
ファイル: featurize.py プロジェクト: Eigenstate/msmbuilder
  - trajs
  - top.pdb
"""
import mdtraj as md

from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic
from multiprocessing import Pool

## Load
meta = load_meta()
tops = preload_tops(meta)
dihed_feat = DihedralFeaturizer()


## Featurize logic
def feat(irow):
    i, row = irow
    traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
    feat_traj = dihed_feat.partial_transform(traj)
    return i, feat_traj


## Do it in parallel
with Pool() as pool:
    dihed_trajs = dict(pool.imap_unordered(feat, meta.iterrows()))

## Save
save_trajs(dihed_trajs, 'ftrajs', meta)
save_generic(dihed_feat, 'featurizer.pickl')
コード例 #15
0
ファイル: cluster.py プロジェクト: xy21hb/msmbuilder
"""Cluster tICA results

{{header}}

Meta
----
depends:
 - ttrajs
 - meta.pandas.pickl
"""
from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.cluster import MiniBatchKMeans

## Load
meta, ttrajs = load_trajs('ttrajs')

## Fit
dim = 5
kmeans = MiniBatchKMeans(n_clusters=500)
kmeans.fit([traj[:, :dim] for traj in ttrajs.values()])

## Transform
ktrajs = {}
for k, v in ttrajs.items():
    ktrajs[k] = kmeans.partial_transform(v[:, :dim])

## Save
print(kmeans.summarize())
save_trajs(ktrajs, 'ktrajs', meta)
save_generic(kmeans, 'kmeans.pickl')
コード例 #16
0
    clf = GridSearchCV(estimator=model,
                       param_grid=param_grid,
                       cv=cv_iter,
                       n_jobs=2)
    clf.fit(X)

    # Save results
    results = pd.DataFrame(clf.cv_results_)
    save_generic(results, '{}-grid-search-results.pickl'.format(feature_name))

    # Print Results
    print("Best parameters set found on development set:")
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

    # Fit best estimator to data
    tica = clf.best_estimator_
    ttrajs = {}
    for k, v in ftrajs.items():
        ttrajs[k] = tica.partial_transform(v)

    # Save
    save_trajs(ttrajs, '{}-ttrajs'.format(feature_name), meta)
    save_generic(tica, '{}-tica.pickl'.format(feature_name))
コード例 #17
0
# cluster
print('Attempting to cluster')
num_clusters = 20
cluster = LandmarkAgglomerative(n_clusters=num_clusters,
                                n_landmarks=int(totframes / 100),
                                linkage='ward',
                                metric='rmsd')
cluster.fit(trajs)

#
# print('Fitting cluster labels')
# ctraj = {}
# for k, v in traj_dict.items():
#     v = cluster.partial_predict(v)
#     diff = nframes-v.shape[0]
#     v = np.append(v, np.zeros(diff)-1)
#     ctraj[k] = v

# Convert to DF for plotting and sampling.
# df = to_dataframe(ctraj, nframes, dt=1)

print('Fitting cluster labels for MSM')
ctraj = {}
for k, v in traj_dict.items():
    ctraj[k] = cluster.partial_predict(v)

# Save dataframe
save_generic(df, 'clusters/rmsd_cluster_trajectory.pickl')
save_trajs(ctraj, 'ftraj', meta)
コード例 #18
0
#!/usr/bin/env python
from msmbuilder.dataset import dataset
from msmbuilder.io import save_trajs, load_meta
import argparse
parser = argparse.ArgumentParser(
    prog='dataset_h5_to_npy_dir.py',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description='''version1''')

parser.add_argument("dataset", help="""An HDF5 dataset""", type=str)
parser.add_argument("meta", help="A metadata pickl file", type=str)
parser.add_argument("trajs",
                    help="The folder in which to store the trajs",
                    type=str,
                    default='trajs')

if __name__ == '__main__':
    args = parser.parse_args()
    meta = load_meta(args.meta)
    ds = dataset(args.dataset)
    trajs = {}
    for k, v in ds.items():
        trajs[k] = v
    save_trajs(trajs, args.trajs, meta)
コード例 #19
0
if __name__ == '__main__':

    # Load
    meta = load_meta()
    tops = preload_tops(meta)

    # Select featurizer
    feature_name = 'Positions'
    reference = md.load('topology.pdb')
    featurizer = RawPositionsFeaturizer(ref_traj=reference)

    args = zip(meta.iterrows(), [featurizer] * meta.shape[0],
               [tops] * meta.shape[0])

    # Do it in parallel
    with Pool() as pool:
        feature_trajs = dict(pool.imap_unordered(feat, args))

    # Plot unscaled features
    ftrajs = np.concatenate([fx[::100] for fx in feature_trajs.values()])
    fig, ax = plt.subplots(figsize=(15, 5))
    plot_box(ax, fxx=ftrajs, feature_name='Unscaled {}'.format(feature_name))
    fig.tight_layout()
    fig.savefig("Unscaled-{}-box.pdf".format(feature_name))

    ## Save
    save_trajs(feature_trajs, 'Unscaled-{}-ftraj'.format(feature_name), meta)
    save_generic(featurizer,
                 'Unscaled-{}-featurizer.pickl'.format(feature_name))
コード例 #20
0
ファイル: microstate.py プロジェクト: Eigenstate/msmbuilder
"""Make a microstate MSM

{{header}}
"""

from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.msm import MarkovStateModel

## Load
meta, ktrajs = load_trajs('ktrajs')

## Fit
msm = MarkovStateModel(lag_time=2, n_timescales=10, verbose=False)
msm.fit(list(ktrajs.values()))

## Transform
microktrajs = {}
for k, v in ktrajs.items():
    microktrajs[k] = msm.partial_transform(v)

## Save
print(msm.summarize())
save_generic(msm, 'msm.pickl')
save_trajs(microktrajs, 'microktrajs', meta)
コード例 #21
0
ファイル: tica.py プロジェクト: RobertArbon/AADH_Analysis
"""Reduce dimensionality with tICA

msmbuilder autogenerated template version 2
created 2017-05-23T16:38:49.125259
please cite msmbuilder in any publications

"""

from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.decomposition import tICA

## Load
tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True)
meta, ftrajs = load_trajs("ftrajs")

## Fit
tica.fit(ftrajs.values())

## Transform
ttrajs = {}
for k, v in ftrajs.items():
    ttrajs[k] = tica.partial_transform(v)

## Save
save_trajs(ttrajs, 'ttrajs', meta)
save_generic(tica, 'tica.pickl')
コード例 #22
0
ファイル: 1-feature.py プロジェクト: smutaogroup/VVD_analysis
    720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941,
    948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133,
    1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324,
    1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516,
    1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713,
    1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909,
    1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126,
    2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310,
    2327, 2338
])
num = len(alpha_carbon_number)

atompair = []
for i in range(num):
    for j in range(i + 1, num):
        atompair += [[alpha_carbon_number[i], alpha_carbon_number[j]]]
dist_feat = AtomPairsFeaturizer(pair_indices=atompair)  ## Distance featurizer


def feat2(irow):
    i, row = irow
    traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
    feat_traj = dist_feat.partial_transform(traj)
    return i, feat_traj


with contextlib.closing(Pool(processes=32)) as pool:
    dist_trajs = dict(pool.imap_unordered(feat2, meta.iterrows()))

save_trajs(dist_trajs, 'alpha_carbon', meta)