def test_that_all_featurizers_run(): # TODO: include all featurizers, perhaps with generator tests trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) featurizer = VonMisesFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) # Below doesn't work on ALA dipeptide # featurizer = msmbuilder.featurizer.ContactFeaturizer() # X_all = featurizer.transform(trajectories) featurizer = RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories)
def calculate_distances(): print("Calculating distances...") traj_files = sorted(glob.glob("traj*xtc")) traj = [ md.load(filename, top='structure.gro') for filename in traj_files ] indices = [ a.index for a in traj[0].topology.atoms if a.element.symbol != 'NA' and a.element.symbol != 'CL' ] # indices = traj[i].topology.select('name==CA') pairs = list(combinations(indices, 2)) features = AtomPairsFeaturizer(pairs) transformed_data = features.fit_transform(traj) for i in range(len(transformed_data)): np.save('out_' + str(i) + '.npy', transformed_data[i])
def test_SubsetAtomPairs_2(): trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.arange(len(pair_indices))) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_SubsetAtomPairs_2(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.arange(len(pair_indices))) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_AtomPairsFeaturizer_describe_features(): current_atom_ind = list(itertools.combinations(atom_ind, 2)) feat = AtomPairsFeaturizer(current_atom_ind) rnd_traj = np.random.randint(len(trajectories)) features = feat.transform([trajectories[rnd_traj]]) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) for f in range(25): f_index = np.random.choice(len(df)) atom_inds = df.iloc[f_index].atominds feature_value = md.compute_distances(trajectories[rnd_traj], [atom_inds]) assert (features[0][:, f_index] == feature_value.flatten()).all()
def test_SubsetAtomPairs_3(): trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1])) X_all = featurizer.transform(trajectories) try: any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)]) except AssertionError: pass else: raise AssertionError("Did not raise an assertion!")
def build_dataset(): trajs = MetEnkephalin().get().trajectories pairs = [] for i in range(trajs[0].n_atoms): for j in range(i): pairs.append((i, j)) np.random.seed(0) np.random.shuffle(pairs) n_pairs = 200 return AtomPairsFeaturizer(pairs[:n_pairs]).transform( [traj[::10] for traj in trajs])
def load_met(): from msmbuilder.example_datasets import MetEnkephalin print(type(MetEnkephalin)) trajs = MetEnkephalin().get().trajectories from msmbuilder.featurizer import AtomPairsFeaturizer pairs = [] for i in range(75): for j in range(i): pairs.append((j,i)) X = AtomPairsFeaturizer(pairs).fit_transform(trajs) from msmbuilder.featurizer import DihedralFeaturizer Y = DihedralFeaturizer().fit_transform(trajs) return X, Y
def load_fs(): from msmbuilder.example_datasets import MinimalFsPeptide trajs = MinimalFsPeptide().get().trajectories from msmbuilder.featurizer import AtomPairsFeaturizer pairs = [] for i in range(264): for j in range(i): pairs.append((j, i)) X = AtomPairsFeaturizer(pairs).fit_transform(trajs) from msmbuilder.featurizer import DihedralFeaturizer Y = DihedralFeaturizer().fit_transform(trajs) return X, Y
plt.ylabel('%s tIC' % (str(tIC_b))) plt.title('tICA Heatmap (log color scale)') plt.colorbar() plt.savefig(opath) plt.close() #####################begin to main program #################3#######inputs atom_pairs = np.loadtxt( 'pairlist.txt', dtype=int ) #indexes for the atom pairs you are interestd(index starts from 0): atom1 atom2 xtc_file_dir = 'trajectories/' #folder to put xtc featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs) traj_list_array = [] for line in open("trajlist"): traj_list_array.append(line.strip()) print traj_list_array #trajectory name ####################calculate the pairwise distances for tica ticadist = [] for trajfile in traj_list_array: xyz = dataset(xtc_file_dir + trajfile, topology='test.pdb') temp = featurizer.fit_transform(xyz) ticadist.append( temp[0] ) #now we have the pairwise distance between the atoms of interest
n_splits = 5 temp_num = 0 for features_file in open(pairwise_distances_files_list): temp_num += 1 print( '----------------------------------------------------------------------------------------' ) print("now we are handling the feature file:", features_file.strip()) atom_pairs = np.loadtxt(features_file.strip(), dtype='int') print("the features we are handling are:\n", atom_pairs) sub_resultdir = resultdir + '/feature_list' + str(temp_num) + '/' if not os.path.exists(sub_resultdir): os.makedirs(sub_resultdir) featurizer = AtomPairsFeaturizer(pair_indices=atom_pairs) data = featurizing_the_conformations(featurizer, trajectory_dir, traj_list_array, pdb_name) cv = KFold(n_splits=n_splits, shuffle=False) #5-fold cross validation, exclusive fold = 0 for (train_index, test_index) in cv.split(traj_list_array): fold += 1 print("now we are handling fold %d" % (fold)) print("training data:", [traj_list_array[i] for i in train_index]) print("testing data", [traj_list_array[i] for i in test_index]) train_data = [data[i] for i in train_index] test_data = [data[i] for i in test_index]
atom_pair_list, dtype=int) #import the pairwise distance index file as integer type traj_list_array = [] for line in open(trajname_list): traj_list_array.append(line.strip()) # In[157]: #step 1.0: tICA #Select kinetic slow variables via tICA (time-lagged independent component analysis) #tICA finds the linear combination of the input features that maximizing the normalized time-lagged correlation matrix #In this example, we use pairwise distance of all heavy atoms as the input features for tICA. #input: trajectories, output: tICA projections #prepare data for tICA featurizer = AtomPairsFeaturizer( pair_indices=atom_pairs) #In this example, we use pairwise distances pairdist4tica = featurizing_the_conformations(featurizer, trajectory_dir, traj_list_array, pdb_name) print( "now we have prepared the data for tICA: the pairwise distances for all frames in all trajectories" ) #run tICA tica_model = tICA( lag_time=10, n_components=2 ) #tica lagged should be pre-specified, you can play with this number! tica_trajs = tica_model.fit_transform( pairdist4tica) #projected the MD data onto tica coordinates #print("output of tica:", tica_trajs) #plot the tica projections draw_tica_projection(resultdir, tica_trajs, 'tica_12.png', 1, 2)
720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941, 948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133, 1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324, 1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516, 1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713, 1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909, 1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126, 2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310, 2327, 2338 ]) num = len(alpha_carbon_number) atompair = [] for i in range(num): for j in range(i + 1, num): atompair += [[alpha_carbon_number[i], alpha_carbon_number[j]]] dist_feat = AtomPairsFeaturizer(pair_indices=atompair) ## Distance featurizer def feat2(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) feat_traj = dist_feat.partial_transform(traj) return i, feat_traj with contextlib.closing(Pool(processes=32)) as pool: dist_trajs = dict(pool.imap_unordered(feat2, meta.iterrows())) save_trajs(dist_trajs, 'alpha_carbon', meta)