def test_that_all_featurizers_run(): # TODO: include all featurizers, perhaps with generator tests trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) featurizer = VonMisesFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) # Below doesn't work on ALA dipeptide # featurizer = msmbuilder.featurizer.ContactFeaturizer() # X_all = featurizer.transform(trajectories) featurizer = RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories)
def test_SubsetAtomPairs_1(): trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0) featurizer.subset = np.arange(len(pair_indices)) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_SubsetAtomPairs_1(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0) featurizer.subset = np.arange(len(pair_indices)) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def build_dataset(): trajs = MetEnkephalin().get().trajectories pairs = [] for i in range(trajs[0].n_atoms): for j in range(i): pairs.append((i, j)) np.random.seed(0) np.random.shuffle(pairs) n_pairs = 200 return AtomPairsFeaturizer(pairs[:n_pairs]).transform( [traj[::10] for traj in trajs])
def test_AtomPairsFeaturizer_describe_features(): current_atom_ind = list(itertools.combinations(atom_ind, 2)) feat = AtomPairsFeaturizer(current_atom_ind) rnd_traj = np.random.randint(len(trajectories)) features = feat.transform([trajectories[rnd_traj]]) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) for f in range(25): f_index = np.random.choice(len(df)) atom_inds = df.iloc[f_index].atominds feature_value = md.compute_distances(trajectories[rnd_traj], [atom_inds]) assert (features[0][:, f_index] == feature_value.flatten()).all()
def load_met(): from msmbuilder.example_datasets import MetEnkephalin print(type(MetEnkephalin)) trajs = MetEnkephalin().get().trajectories from msmbuilder.featurizer import AtomPairsFeaturizer pairs = [] for i in range(75): for j in range(i): pairs.append((j,i)) X = AtomPairsFeaturizer(pairs).fit_transform(trajs) from msmbuilder.featurizer import DihedralFeaturizer Y = DihedralFeaturizer().fit_transform(trajs) return X, Y
def load_fs(): from msmbuilder.example_datasets import MinimalFsPeptide trajs = MinimalFsPeptide().get().trajectories from msmbuilder.featurizer import AtomPairsFeaturizer pairs = [] for i in range(264): for j in range(i): pairs.append((j, i)) X = AtomPairsFeaturizer(pairs).fit_transform(trajs) from msmbuilder.featurizer import DihedralFeaturizer Y = DihedralFeaturizer().fit_transform(trajs) return X, Y
def test_SubsetAtomPairs_3(): trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1])) X_all = featurizer.transform(trajectories) try: any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)]) except AssertionError: pass else: raise AssertionError("Did not raise an assertion!")
atom_pair_list, dtype=int) #import the pairwise distance index file as integer type traj_list_array = [] for line in open(trajname_list): traj_list_array.append(line.strip()) # In[157]: #step 1.0: tICA #Select kinetic slow variables via tICA (time-lagged independent component analysis) #tICA finds the linear combination of the input features that maximizing the normalized time-lagged correlation matrix #In this example, we use pairwise distance of all heavy atoms as the input features for tICA. #input: trajectories, output: tICA projections #prepare data for tICA featurizer = AtomPairsFeaturizer( pair_indices=atom_pairs) #In this example, we use pairwise distances pairdist4tica = featurizing_the_conformations(featurizer, trajectory_dir, traj_list_array, pdb_name) print( "now we have prepared the data for tICA: the pairwise distances for all frames in all trajectories" ) #run tICA tica_model = tICA( lag_time=10, n_components=2 ) #tica lagged should be pre-specified, you can play with this number! tica_trajs = tica_model.fit_transform( pairdist4tica) #projected the MD data onto tica coordinates #print("output of tica:", tica_trajs) #plot the tica projections draw_tica_projection(resultdir, tica_trajs, 'tica_12.png', 1, 2)
plt.ylabel('%s tIC' % (str(tIC_b))) plt.title('tICA Heatmap (log color scale)') plt.colorbar() plt.savefig(opath) plt.close() #####################begin to main program #################3#######inputs atom_pairs = np.loadtxt( 'pairlist.txt', dtype=int ) #indexes for the atom pairs you are interestd(index starts from 0): atom1 atom2 xtc_file_dir = 'trajectories/' #folder to put xtc featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs) traj_list_array = [] for line in open("trajlist"): traj_list_array.append(line.strip()) print traj_list_array #trajectory name ####################calculate the pairwise distances for tica ticadist = [] for trajfile in traj_list_array: xyz = dataset(xtc_file_dir + trajfile, topology='test.pdb') temp = featurizer.fit_transform(xyz) ticadist.append( temp[0] ) #now we have the pairwise distance between the atoms of interest
720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941, 948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133, 1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324, 1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516, 1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713, 1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909, 1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126, 2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310, 2327, 2338 ]) num = len(alpha_carbon_number) atompair = [] for i in range(num): for j in range(i + 1, num): atompair += [[alpha_carbon_number[i], alpha_carbon_number[j]]] dist_feat = AtomPairsFeaturizer(pair_indices=atompair) ## Distance featurizer def feat2(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) feat_traj = dist_feat.partial_transform(traj) return i, feat_traj with contextlib.closing(Pool(processes=32)) as pool: dist_trajs = dict(pool.imap_unordered(feat2, meta.iterrows())) save_trajs(dist_trajs, 'alpha_carbon', meta)