コード例 #1
0
def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
コード例 #2
0
ファイル: tica.py プロジェクト: yabmtm/scripts
def calculate_distances():
    print("Calculating distances...")
    traj_files = sorted(glob.glob("traj*xtc"))
    traj = [ md.load(filename, top='structure.gro') for filename in traj_files ]
    indices = [ a.index for a in traj[0].topology.atoms if a.element.symbol != 'NA' and a.element.symbol != 'CL' ]
#        indices = traj[i].topology.select('name==CA')   
    pairs = list(combinations(indices, 2))
    features = AtomPairsFeaturizer(pairs)
    transformed_data = features.fit_transform(traj)
    for i in range(len(transformed_data)):
        np.save('out_' + str(i) + '.npy', transformed_data[i])
コード例 #3
0
def test_SubsetAtomPairs_2():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
コード例 #4
0
def test_SubsetAtomPairs_2():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
コード例 #5
0
def test_SubsetAtomPairs_2():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices,
                                 trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
コード例 #6
0
def test_AtomPairsFeaturizer_describe_features():
    current_atom_ind = list(itertools.combinations(atom_ind, 2))
    feat = AtomPairsFeaturizer(current_atom_ind)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))
    
    for f in range(25):
        f_index = np.random.choice(len(df))
        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_distances(trajectories[rnd_traj],
                                             [atom_inds])

        assert (features[0][:, f_index] == feature_value.flatten()).all()
コード例 #7
0
def test_SubsetAtomPairs_2():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices,
                                 trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
コード例 #8
0
def test_AtomPairsFeaturizer_describe_features():
    current_atom_ind = list(itertools.combinations(atom_ind, 2))
    feat = AtomPairsFeaturizer(current_atom_ind)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))
        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_distances(trajectories[rnd_traj],
                                             [atom_inds])

        assert (features[0][:, f_index] == feature_value.flatten()).all()
コード例 #9
0
def test_SubsetAtomPairs_3():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
    X_all = featurizer.transform(trajectories)

    try:
        any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
    except AssertionError:
        pass
    else:
        raise AssertionError("Did not raise an assertion!")
コード例 #10
0
def test_SubsetAtomPairs_3():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
    X_all = featurizer.transform(trajectories)

    try:
        any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
    except AssertionError:
        pass
    else:
        raise AssertionError("Did not raise an assertion!")
コード例 #11
0
def build_dataset():
    trajs = MetEnkephalin().get().trajectories

    pairs = []
    for i in range(trajs[0].n_atoms):
        for j in range(i):
            pairs.append((i, j))
    np.random.seed(0)
    np.random.shuffle(pairs)
    n_pairs = 200

    return AtomPairsFeaturizer(pairs[:n_pairs]).transform(
        [traj[::10] for traj in trajs])
コード例 #12
0
def load_met():
    from msmbuilder.example_datasets import MetEnkephalin
    print(type(MetEnkephalin))
    trajs = MetEnkephalin().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(75):
        for j in range(i):
            pairs.append((j,i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
コード例 #13
0
def load_fs():

    from msmbuilder.example_datasets import MinimalFsPeptide
    trajs = MinimalFsPeptide().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(264):
        for j in range(i):
            pairs.append((j, i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
コード例 #14
0
def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
コード例 #15
0
    plt.ylabel('%s tIC' % (str(tIC_b)))
    plt.title('tICA Heatmap (log color scale)')
    plt.colorbar()
    plt.savefig(opath)
    plt.close()


#####################begin to main program

#################3#######inputs
atom_pairs = np.loadtxt(
    'pairlist.txt', dtype=int
)  #indexes for the atom pairs you are interestd(index starts from 0): atom1 atom2
xtc_file_dir = 'trajectories/'  #folder to put xtc

featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs)

traj_list_array = []
for line in open("trajlist"):
    traj_list_array.append(line.strip())
print traj_list_array  #trajectory name

####################calculate the pairwise distances for tica
ticadist = []
for trajfile in traj_list_array:
    xyz = dataset(xtc_file_dir + trajfile, topology='test.pdb')
    temp = featurizer.fit_transform(xyz)
    ticadist.append(
        temp[0]
    )  #now we have the pairwise distance between the atoms of interest
コード例 #16
0
n_splits = 5
temp_num = 0
for features_file in open(pairwise_distances_files_list):
    temp_num += 1
    print(
        '----------------------------------------------------------------------------------------'
    )
    print("now we are handling the feature file:", features_file.strip())
    atom_pairs = np.loadtxt(features_file.strip(), dtype='int')
    print("the features we are handling are:\n", atom_pairs)

    sub_resultdir = resultdir + '/feature_list' + str(temp_num) + '/'
    if not os.path.exists(sub_resultdir):
        os.makedirs(sub_resultdir)

    featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs)
    data = featurizing_the_conformations(featurizer, trajectory_dir,
                                         traj_list_array, pdb_name)

    cv = KFold(n_splits=n_splits,
               shuffle=False)  #5-fold cross validation, exclusive
    fold = 0
    for (train_index, test_index) in cv.split(traj_list_array):
        fold += 1
        print("now we are handling fold %d" % (fold))
        print("training data:", [traj_list_array[i] for i in train_index])
        print("testing data", [traj_list_array[i] for i in test_index])

        train_data = [data[i] for i in train_index]
        test_data = [data[i] for i in test_index]
コード例 #17
0
    atom_pair_list,
    dtype=int)  #import the pairwise distance index file as integer type
traj_list_array = []
for line in open(trajname_list):
    traj_list_array.append(line.strip())

# In[157]:

#step 1.0: tICA
#Select kinetic slow variables via tICA (time-lagged independent component analysis)
#tICA finds the linear combination of the input features that maximizing the normalized time-lagged correlation matrix
#In this example, we use pairwise distance of all heavy atoms as the input features for tICA.

#input: trajectories, output: tICA projections
#prepare data for tICA
featurizer = AtomPairsFeaturizer(
    pair_indices=atom_pairs)  #In this example, we use pairwise distances
pairdist4tica = featurizing_the_conformations(featurizer, trajectory_dir,
                                              traj_list_array, pdb_name)
print(
    "now we have prepared the data for tICA: the pairwise distances for all frames in all trajectories"
)

#run tICA
tica_model = tICA(
    lag_time=10, n_components=2
)  #tica lagged should be pre-specified, you can play with this number!
tica_trajs = tica_model.fit_transform(
    pairdist4tica)  #projected the MD data onto tica coordinates
#print("output of tica:", tica_trajs)
#plot the tica projections
draw_tica_projection(resultdir, tica_trajs, 'tica_12.png', 1, 2)
コード例 #18
0
ファイル: 1-feature.py プロジェクト: smutaogroup/VVD_analysis
    720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941,
    948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133,
    1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324,
    1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516,
    1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713,
    1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909,
    1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126,
    2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310,
    2327, 2338
])
num = len(alpha_carbon_number)

atompair = []
for i in range(num):
    for j in range(i + 1, num):
        atompair += [[alpha_carbon_number[i], alpha_carbon_number[j]]]
dist_feat = AtomPairsFeaturizer(pair_indices=atompair)  ## Distance featurizer


def feat2(irow):
    i, row = irow
    traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
    feat_traj = dist_feat.partial_transform(traj)
    return i, feat_traj


with contextlib.closing(Pool(processes=32)) as pool:
    dist_trajs = dict(pool.imap_unordered(feat2, meta.iterrows()))

save_trajs(dist_trajs, 'alpha_carbon', meta)