lag_time = 10 n_clusters = 2000 sys = 'Src-' n_timescales = 10 lagTime = 50 # 5ns # loading the data dataset = [] import glob for file in glob.glob('highRMSF_phi_psi/*.npy'): a = np.array(np.load(file)) dataset.append(a) # building tica tica = tICA(n_components=n_components, lag_time=lag_time) tica.fit(dataset) tica_traj = tica.transform(dataset) pickle.dump(tica, open(sys + '_tICs_' + str(n_components) + '.pkl', 'wb')) # clustering states = msmbuilder.cluster.KMeans(n_clusters=n_clusters) states.fit(tica_traj) io.dump( states, sys + '_tICs_' + str(n_components) + 'nCluster_' + str(n_clusters) + '.pkl') # making MSM msm = MarkovStateModel(lag_time=lagTime, n_timescales=n_timescales) msm.fit_transform(cl.labels_) io.dump(msm, 'MSM' + sys)
dataset = [] for i in List: a = np.load(i + ".npy") dataset.append(a) tran_data = np.transpose(np.vstack(dataset)) # Preprocessing: normalize the features dataset = [] for i in List: a = np.load(i + ".npy") b = np.array(a) tran_b = np.transpose(b) for i in range(8): tran_b[i] = (tran_b[i] - np.mean(tran_data[i])) / np.std(tran_data[i]) c = np.transpose(tran_b) dataset.append(c) # Perform time-lagged independent component analysis on the normalized features from msmbuilder.decomposition import tICA tica = tICA(n_components=4, lag_time=1) tica.fit(dataset) tica_traj = tica.transform(dataset) np.save('tica_traj', tica_traj) # Perform k-means clustering based on the first 4 tICs states = msmbuilder.cluster.KMeans(n_clusters=200) states.fit(tica_traj) # Save the clustered files io.dump(states, 'clustering_tica.pkl')
# msmb AtomPairsFeaturizer --out mainnode-pair_indices_stride20-2 --pair_indices AtomIndices.txt --top A2.prmtop --trjs 'MD*/*.mdcrd' --stride 20 import numpy as np from msmbuilder.utils import io import msmbuilder.cluster import glob import pickle name_sys = '2OIQ' dataset = [] inf = {} for i in sorted(glob.glob('featurizes_RMSD+drugDist/*.npy')): dataset.append(np.load(i)) inf[i] = len(dataset) print(i) print(len(dataset)) with open('maping_'+name_sys+'.txt', 'wb') as handle: pickle.dump(inf, handle) """ with open('file.txt', 'rb') as handle: b = pickle.loads(handle.read()) """ states = msmbuilder.cluster.KMeans(n_clusters=500) states.fit(dataset) io.dump(states,'clustering_'+name_sys+'_db.pkl')
from msmbuilder.utils import io myn_clusters = 200 n_samples = 50 dataset = [] inf = open('readme','w') for i in sorted(glob.glob('2OIQ-ftrz/*.npy')): a = np.load(i) dataset.append(a) print i states = msmbuilder.cluster.KMeans(n_clusters=myn_clusters) states.fit(dataset) io.dump(states,'clustering.pkl') cluster=pickle.load(open('clustering.pkl','rb')) l = cluster.labels_ T = [] # the address should be the address of trajectories corresponding to dataset for trj in glob.glob('rawTrj/MD1-rwTrj/*.mdcrd'): T.append(trj) T.sort() # Write the output file, which have the information about population of each cluster, # trajectory name and frame number of corresponding frame adptvSampling.writeOPF_lessPop(l, T, myn_clusters, n_samples) # Based on information in output file, build the cpptraj input file, as you give it the topology name, it should be # common for all trajectories
n_timescales=10 stepS = 1.2 lag_times=[1, 2, 3,4, 5,6, 7,8, 9,10,11,12,13,14,15,16,17] l = len(lag_times) ts=np.zeros([10,l]) ns_lt=np.ndarray.tolist(stepS*np.array(lag_times)) index = 0 for i in lag_times: msm=MarkovStateModel(lag_time=i, n_timescales=n_timescales) msm.fit_transform(cl.labels_) ts[:,index]=msm.timescales_ index=index+1 io.dump(msm,'MSM'+str(i)+'.pkl') """ for i in lag_times: msm = io.load('MSM'+str(i)+'.pkl') ts[:,index]=msm.timescales_ index=index+1 """ fig, ax = plt.subplots(1,1) for i in range(10): j=i+1 if j==1: k='st' elif j==2:
lag_times=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] lag_times=[ lag * 2 for lag in lag_times ] l = len(lag_times) ### Plot the ten slowest timescales ts=np.zeros([10,l]) ns_lt=np.ndarray.tolist(stepS*np.array(lag_times)) index = 0 for i in lag_times: msm=MarkovStateModel(lag_time=i, n_timescales=n_timescales) msm.fit_transform(cl.labels_) ts[:,index]=msm.timescales_ index=index+1 io.dump(msm,'MSM'+str(i)+'.pkl') fig, ax = plt.subplots(1,1) ax.set_xlim(0,80) ax.set_ylim(10,10000) for i in range(10): j=i+1 if j==1: k='st' elif j==2: k='nd' elif j==3: k='rd' elif j>3:
[157, 291], [162, 420], [124, 439], [48, 171], [241, 320], [52, 147], [29, 436], [53, 445], [125, 155], [294, 392], [99, 189], [230, 274], [92, 128], [91, 151], [97, 338], [356, 425], [200, 444], [351, 439], [138, 197], [42, 169], [234, 425], [110, 331], [36, 43], [240, 347], [160, 245], [6, 50], [293, 396], [287, 299], [25, 158], [13, 233], [22, 321], [210, 369], [29, 204], [230, 421], [256, 275], [205, 424], [237, 313], [117, 146], [34, 63], [377, 443]], scheme='ca') ftr = [np.ndarray.tolist(dist[0][i][:]) for i in range(len(dist[0]))] dir = os.path.dirname(file) filename = file.replace(dir + '/', '', 1) np.save(filename + '.npy', ftr) from msmbuilder.utils import io dataset = [] for i in sorted(glob.glob('*.npy')): a = np.load(i) dataset.append(a) io.dump(dataset, 'dataset_nark.best_nonredu.pkl') cmd = 'rm *.npy' os.system(cmd)