def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
Example #2
0
def calculate_distances():
    print("Calculating distances...")
    traj_files = sorted(glob.glob("traj*xtc"))
    traj = [ md.load(filename, top='structure.gro') for filename in traj_files ]
    indices = [ a.index for a in traj[0].topology.atoms if a.element.symbol != 'NA' and a.element.symbol != 'CL' ]
#        indices = traj[i].topology.select('name==CA')   
    pairs = list(combinations(indices, 2))
    features = AtomPairsFeaturizer(pairs)
    transformed_data = features.fit_transform(traj)
    for i in range(len(transformed_data)):
        np.save('out_' + str(i) + '.npy', transformed_data[i])
def test_SubsetAtomPairs_2():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_SubsetAtomPairs_2():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
Example #5
0
def test_SubsetAtomPairs_2():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices,
                                 trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_AtomPairsFeaturizer_describe_features():
    current_atom_ind = list(itertools.combinations(atom_ind, 2))
    feat = AtomPairsFeaturizer(current_atom_ind)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))
    
    for f in range(25):
        f_index = np.random.choice(len(df))
        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_distances(trajectories[rnd_traj],
                                             [atom_inds])

        assert (features[0][:, f_index] == feature_value.flatten()).all()
Example #7
0
def test_SubsetAtomPairs_2():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices,
                                 trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_AtomPairsFeaturizer_describe_features():
    current_atom_ind = list(itertools.combinations(atom_ind, 2))
    feat = AtomPairsFeaturizer(current_atom_ind)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))
        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_distances(trajectories[rnd_traj],
                                             [atom_inds])

        assert (features[0][:, f_index] == feature_value.flatten()).all()
def test_SubsetAtomPairs_3():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
    X_all = featurizer.transform(trajectories)

    try:
        any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
    except AssertionError:
        pass
    else:
        raise AssertionError("Did not raise an assertion!")
Example #10
0
def test_SubsetAtomPairs_3():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
    X_all = featurizer.transform(trajectories)

    try:
        any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
    except AssertionError:
        pass
    else:
        raise AssertionError("Did not raise an assertion!")
Example #11
0
def build_dataset():
    trajs = MetEnkephalin().get().trajectories

    pairs = []
    for i in range(trajs[0].n_atoms):
        for j in range(i):
            pairs.append((i, j))
    np.random.seed(0)
    np.random.shuffle(pairs)
    n_pairs = 200

    return AtomPairsFeaturizer(pairs[:n_pairs]).transform(
        [traj[::10] for traj in trajs])
def load_met():
    from msmbuilder.example_datasets import MetEnkephalin
    print(type(MetEnkephalin))
    trajs = MetEnkephalin().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(75):
        for j in range(i):
            pairs.append((j,i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
def load_fs():

    from msmbuilder.example_datasets import MinimalFsPeptide
    trajs = MinimalFsPeptide().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(264):
        for j in range(i):
            pairs.append((j, i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
Example #14
0
def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
    plt.ylabel('%s tIC' % (str(tIC_b)))
    plt.title('tICA Heatmap (log color scale)')
    plt.colorbar()
    plt.savefig(opath)
    plt.close()


#####################begin to main program

#################3#######inputs
atom_pairs = np.loadtxt(
    'pairlist.txt', dtype=int
)  #indexes for the atom pairs you are interestd(index starts from 0): atom1 atom2
xtc_file_dir = 'trajectories/'  #folder to put xtc

featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs)

traj_list_array = []
for line in open("trajlist"):
    traj_list_array.append(line.strip())
print traj_list_array  #trajectory name

####################calculate the pairwise distances for tica
ticadist = []
for trajfile in traj_list_array:
    xyz = dataset(xtc_file_dir + trajfile, topology='test.pdb')
    temp = featurizer.fit_transform(xyz)
    ticadist.append(
        temp[0]
    )  #now we have the pairwise distance between the atoms of interest
Example #16
0
n_splits = 5
temp_num = 0
for features_file in open(pairwise_distances_files_list):
    temp_num += 1
    print(
        '----------------------------------------------------------------------------------------'
    )
    print("now we are handling the feature file:", features_file.strip())
    atom_pairs = np.loadtxt(features_file.strip(), dtype='int')
    print("the features we are handling are:\n", atom_pairs)

    sub_resultdir = resultdir + '/feature_list' + str(temp_num) + '/'
    if not os.path.exists(sub_resultdir):
        os.makedirs(sub_resultdir)

    featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs)
    data = featurizing_the_conformations(featurizer, trajectory_dir,
                                         traj_list_array, pdb_name)

    cv = KFold(n_splits=n_splits,
               shuffle=False)  #5-fold cross validation, exclusive
    fold = 0
    for (train_index, test_index) in cv.split(traj_list_array):
        fold += 1
        print("now we are handling fold %d" % (fold))
        print("training data:", [traj_list_array[i] for i in train_index])
        print("testing data", [traj_list_array[i] for i in test_index])

        train_data = [data[i] for i in train_index]
        test_data = [data[i] for i in test_index]
    atom_pair_list,
    dtype=int)  #import the pairwise distance index file as integer type
traj_list_array = []
for line in open(trajname_list):
    traj_list_array.append(line.strip())

# In[157]:

#step 1.0: tICA
#Select kinetic slow variables via tICA (time-lagged independent component analysis)
#tICA finds the linear combination of the input features that maximizing the normalized time-lagged correlation matrix
#In this example, we use pairwise distance of all heavy atoms as the input features for tICA.

#input: trajectories, output: tICA projections
#prepare data for tICA
featurizer = AtomPairsFeaturizer(
    pair_indices=atom_pairs)  #In this example, we use pairwise distances
pairdist4tica = featurizing_the_conformations(featurizer, trajectory_dir,
                                              traj_list_array, pdb_name)
print(
    "now we have prepared the data for tICA: the pairwise distances for all frames in all trajectories"
)

#run tICA
tica_model = tICA(
    lag_time=10, n_components=2
)  #tica lagged should be pre-specified, you can play with this number!
tica_trajs = tica_model.fit_transform(
    pairdist4tica)  #projected the MD data onto tica coordinates
#print("output of tica:", tica_trajs)
#plot the tica projections
draw_tica_projection(resultdir, tica_trajs, 'tica_12.png', 1, 2)
Example #18
0
    720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941,
    948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133,
    1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324,
    1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516,
    1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713,
    1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909,
    1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126,
    2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310,
    2327, 2338
])
num = len(alpha_carbon_number)

atompair = []
for i in range(num):
    for j in range(i + 1, num):
        atompair += [[alpha_carbon_number[i], alpha_carbon_number[j]]]
dist_feat = AtomPairsFeaturizer(pair_indices=atompair)  ## Distance featurizer


def feat2(irow):
    i, row = irow
    traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
    feat_traj = dist_feat.partial_transform(traj)
    return i, feat_traj


with contextlib.closing(Pool(processes=32)) as pool:
    dist_trajs = dict(pool.imap_unordered(feat2, meta.iterrows()))

save_trajs(dist_trajs, 'alpha_carbon', meta)