def test_plot_voronoi():
    kmeans = KMeans(n_clusters=15)
    kmeans.fit([data])

    ax = plot_voronoi(kmeans)

    assert isinstance(ax, Subplot)
Example #2
0
    def test_plot_voronoi(self):
        kmeans = KMeans(n_clusters=15)
        kmeans.fit([data])

        ax = plot_voronoi(kmeans, xlabel='x', ylabel='y')

        assert isinstance(ax, SubplotBase)
Example #3
0
def cluster_msm(sequences,n_states, lag_times):
  for n in n_states:
    states = KMeans(n_clusters=n)
    states.fit(sequences)
    io.dump(states,str(n)+'n_cl.pkl')
    ts=np.zeros(5)
    for lag_time in lag_times:
        msm = MarkovStateModel(lag_time=lag_time, verbose=False,n_timescales=5)
        msm.fit(states.labels_)
        ts1=msm.timescales_
        ts=np.vstack((ts,ts1))
        io.dump(msm,str(n)+'n_'+str(lag_time)+'lt_msm.pkl')
    ts=np.delete(ts, (0), axis=0)
    io.dump(ts,str(n)+'n_timescales.pkl')
Example #4
0
def cluster_project_wrapper(proj_folder,feature_dict,n_states):

     if os.path.exists(proj_folder+"/assignments.pkl"):
          return verboseload(proj_folder+"/cluster_mdl.pkl"),verboseload(proj_folder+"/assignments.pkl")
     elif os.path.exists(proj_folder+"/cluster_mdl.pkl"):
          cluster_mdl = verboseload(proj_folder+"/cluster_mdl.pkl")
     else:
          cluster_mdl = KMeans(n_clusters = n_states)
          cluster_mdl.fit([feature_dict[i] for i in feature_dict.keys()])

     assignments={}
     for i in feature_dict.keys():
          assignments[i] = cluster_mdl.transform([feature_dict[i]])

     verbosedump(cluster_mdl,proj_folder+"/cluster_mdl.pkl")
     verbosedump(assignments,proj_folder+"/assignments.pkl")
     return cluster_mdl,assignments
Example #5
0
def cluster_project_wrapper(proj_folder, feature_dict, n_states):

    if os.path.exists(proj_folder + "/assignments.pkl"):
        return verboseload(proj_folder +
                           "/cluster_mdl.pkl"), verboseload(proj_folder +
                                                            "/assignments.pkl")
    elif os.path.exists(proj_folder + "/cluster_mdl.pkl"):
        cluster_mdl = verboseload(proj_folder + "/cluster_mdl.pkl")
    else:
        cluster_mdl = KMeans(n_clusters=n_states)
        cluster_mdl.fit([feature_dict[i] for i in feature_dict.keys()])

    assignments = {}
    for i in feature_dict.keys():
        assignments[i] = cluster_mdl.transform([feature_dict[i]])

    verbosedump(cluster_mdl, proj_folder + "/cluster_mdl.pkl")
    verbosedump(assignments, proj_folder + "/assignments.pkl")
    return cluster_mdl, assignments
def cluster():
    '''
    This function perfomes K-means clustering on the tICA space and saves assignsment files for each trajectory.
    Cluster centers are also saved at `microstate_centers.txt` file.
    '''
    cluster = KMeans(n_clusters=n_states,n_jobs=-1,verbose=0, max_iter=100, tol=0.0001,)
    dataset, ev0, ev1 = [], [], []
    print "Loading projected data..."
    for i in tqdm(range(start_traj, end_traj+1)):
        a = io.loadh('%s/traj%d_%s.h5' %(proj_path,i,traj_name))['arr_0']
        a = a[:,0:2]
        dataset.append(a)
	ev0.extend(a[:,0])
	ev1.extend(a[:,1])
    print "Clustering %d datapoints..." %len(ev0)
    cluster.fit(dataset)
    for i in range(start_traj,end_traj+1):
        np.savetxt('%s/assigns_%d.txt' %(out_path,i),np.array(cluster.labels_[i-start_traj]),fmt='%d')
    np.savetxt('%s/microstate_centers.txt' %out_path,np.array(cluster.cluster_centers_))
    print "Saved microstate assignments and microstate centers at %s" %out_path
    return cluster.cluster_centers_, np.array(ev0), np.array(ev1)
Example #7
0
topFile='NarK-strip.pdb'

dataset = [] 
ls = []
for i in sorted(glob.glob('*.npy')):
	a = np.load(i)
	b = np.array(a)
	dataset.append(b)
	ls.append(i)
	print(i)
np.save('list', ls)

#trajs = [np.load('data.npy')]
# make cluster of the tICs trajectories
cluster = KMeans(n_clusters=myn_clusters)
cluster.fit(dataset)
l = cluster.labels_

T = []
for trj in glob.glob('*strip.mdcrd'):
	T.append(trj)
T.sort()

# Write the output file, which have the information about population of each cluster, 
# trajectory name and frame number of corresponding frame 	
asFunctions.writeOPF(l, T, myn_clusters, n_samples)

# Based on information in output file, build the cpptraj input file
asFunctions.CpptrajInGen_commonTop(topFile)
#pickle.dump( cluster , open( "tICCluster.pkl", "wb"))
Example #8
0
    lagtime = 50

    for n in n_clusters:
        kmeans = KMeans(n_clusters=n, n_jobs=-1)
        print "Clustering data to %d clusters..." % n
        for fold in range(nFolds):
            train_data = []
            test_data = []
            for i in range(len(tica_data)):
                cv = KFold(len(tica_data[i]), n_folds=nFolds)
                for current_fold, (train_index, test_index) in enumerate(cv):
                    if current_fold == fold:
                        train_data.append(tica_data[i][train_index])
                        test_data.append(tica_data[i][test_index])
            reduced_train_data = sub_sampling_data(train_data, stride=100)
            kmeans.fit(reduced_train_data)
            assignments_train = kmeans.predict(train_data)
            assignments_test = kmeans.predict(test_data)
            msm = MarkovStateModel(lag_time=lagtime)
            msm.fit(assignments_train)
            train_score = msm.score_
            test_score = msm.score(assignments_test)

            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'n_states': n,
                'fold': fold,
                'timescales': msm.timescales_
            })