Beispiel #1
0
lag_time = 10
n_clusters = 2000
sys = 'Src-'
n_timescales = 10
lagTime = 50  # 5ns

# loading the data
dataset = []
import glob
for file in glob.glob('highRMSF_phi_psi/*.npy'):
    a = np.array(np.load(file))
    dataset.append(a)

# building tica
tica = tICA(n_components=n_components, lag_time=lag_time)
tica.fit(dataset)
tica_traj = tica.transform(dataset)
pickle.dump(tica, open(sys + '_tICs_' + str(n_components) + '.pkl', 'wb'))

# clustering
states = msmbuilder.cluster.KMeans(n_clusters=n_clusters)
states.fit(tica_traj)
io.dump(
    states, sys + '_tICs_' + str(n_components) + 'nCluster_' +
    str(n_clusters) + '.pkl')

# making MSM
msm = MarkovStateModel(lag_time=lagTime, n_timescales=n_timescales)
msm.fit_transform(cl.labels_)
io.dump(msm, 'MSM' + sys)
dataset = []
for i in List:
    a = np.load(i + ".npy")
    dataset.append(a)
tran_data = np.transpose(np.vstack(dataset))

# Preprocessing: normalize the features
dataset = []
for i in List:
    a = np.load(i + ".npy")
    b = np.array(a)
    tran_b = np.transpose(b)
    for i in range(8):
        tran_b[i] = (tran_b[i] - np.mean(tran_data[i])) / np.std(tran_data[i])
    c = np.transpose(tran_b)
    dataset.append(c)

# Perform time-lagged independent component analysis on the normalized features
from msmbuilder.decomposition import tICA
tica = tICA(n_components=4, lag_time=1)
tica.fit(dataset)
tica_traj = tica.transform(dataset)
np.save('tica_traj', tica_traj)

# Perform k-means clustering based on the first 4 tICs
states = msmbuilder.cluster.KMeans(n_clusters=200)
states.fit(tica_traj)

# Save the clustered files
io.dump(states, 'clustering_tica.pkl')
Beispiel #3
0
# msmb AtomPairsFeaturizer --out mainnode-pair_indices_stride20-2 --pair_indices AtomIndices.txt --top A2.prmtop --trjs 'MD*/*.mdcrd' --stride 20
import numpy as np
from msmbuilder.utils import io
import msmbuilder.cluster
import glob
import pickle

name_sys =  '2OIQ'
dataset = []
inf = {}

for i in sorted(glob.glob('featurizes_RMSD+drugDist/*.npy')):
        dataset.append(np.load(i))
        inf[i] = len(dataset)
        print(i)
        print(len(dataset))

with open('maping_'+name_sys+'.txt', 'wb') as handle:
  pickle.dump(inf, handle)
 
"""
with open('file.txt', 'rb') as handle:
  b = pickle.loads(handle.read())
"""

states = msmbuilder.cluster.KMeans(n_clusters=500)
states.fit(dataset)

io.dump(states,'clustering_'+name_sys+'_db.pkl')
from msmbuilder.utils import io

myn_clusters = 200
n_samples = 50
dataset = []

inf = open('readme','w')
for i in sorted(glob.glob('2OIQ-ftrz/*.npy')):
  a = np.load(i)
  dataset.append(a)
  print i
  
states = msmbuilder.cluster.KMeans(n_clusters=myn_clusters)
states.fit(dataset)

io.dump(states,'clustering.pkl')

cluster=pickle.load(open('clustering.pkl','rb'))
l = cluster.labels_

T = []
# the address should be the address of trajectories corresponding to dataset
for trj in glob.glob('rawTrj/MD1-rwTrj/*.mdcrd'):
	T.append(trj)
T.sort()
# Write the output file, which have the information about population of each cluster, 
# trajectory name and frame number of corresponding frame 	
adptvSampling.writeOPF_lessPop(l, T, myn_clusters, n_samples)

# Based on information in output file, build the cpptraj input file, as you give it the topology name, it should be 
# common for all trajectories
Beispiel #5
0
n_timescales=10
stepS = 1.2
lag_times=[1, 2, 3,4, 5,6, 7,8, 9,10,11,12,13,14,15,16,17]
l = len(lag_times)

ts=np.zeros([10,l])

ns_lt=np.ndarray.tolist(stepS*np.array(lag_times))
index = 0

for i in lag_times:
    msm=MarkovStateModel(lag_time=i, n_timescales=n_timescales)
    msm.fit_transform(cl.labels_)
    ts[:,index]=msm.timescales_
    index=index+1
    io.dump(msm,'MSM'+str(i)+'.pkl')

"""
for i in lag_times:
    msm = io.load('MSM'+str(i)+'.pkl')
    ts[:,index]=msm.timescales_
    index=index+1
"""

fig, ax = plt.subplots(1,1)

for i in range(10):
  j=i+1
  if j==1:
    k='st'
  elif j==2:
lag_times=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
lag_times=[ lag * 2 for lag in lag_times ]
l = len(lag_times)

### Plot the ten slowest timescales
ts=np.zeros([10,l])

ns_lt=np.ndarray.tolist(stepS*np.array(lag_times))
index = 0

for i in lag_times:
    msm=MarkovStateModel(lag_time=i, n_timescales=n_timescales)
    msm.fit_transform(cl.labels_)
    ts[:,index]=msm.timescales_
    index=index+1
    io.dump(msm,'MSM'+str(i)+'.pkl')

fig, ax = plt.subplots(1,1)

ax.set_xlim(0,80)
ax.set_ylim(10,10000)

for i in range(10):
  j=i+1
  if j==1:
    k='st'
  elif j==2:
    k='nd'
  elif j==3:
    k='rd'
  elif j>3:
Beispiel #7
0
                                         [157, 291], [162, 420], [124, 439],
                                         [48, 171], [241, 320], [52, 147],
                                         [29, 436], [53, 445], [125, 155],
                                         [294, 392], [99, 189], [230, 274],
                                         [92, 128], [91, 151], [97, 338],
                                         [356, 425], [200, 444], [351, 439],
                                         [138, 197], [42, 169], [234, 425],
                                         [110, 331], [36, 43], [240, 347],
                                         [160, 245], [6, 50], [293, 396],
                                         [287, 299], [25, 158], [13, 233],
                                         [22, 321], [210, 369], [29, 204],
                                         [230, 421], [256, 275], [205, 424],
                                         [237, 313], [117, 146], [34, 63],
                                         [377, 443]],
                               scheme='ca')
    ftr = [np.ndarray.tolist(dist[0][i][:]) for i in range(len(dist[0]))]
    dir = os.path.dirname(file)
    filename = file.replace(dir + '/', '', 1)
    np.save(filename + '.npy', ftr)

from msmbuilder.utils import io

dataset = []
for i in sorted(glob.glob('*.npy')):
    a = np.load(i)
    dataset.append(a)

io.dump(dataset, 'dataset_nark.best_nonredu.pkl')
cmd = 'rm *.npy'
os.system(cmd)