Python KMeans.fit Examples

Programming Language: Python

Namespace/Package Name: msmbuilder.cluster

Class/Type: KMeans

Method/Function: fit

Examples at hotexamples.com: 8

Python KMeans.fit - 8 examples found. These are the top rated real world Python examples of msmbuilder.cluster.KMeans.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KMeans(15)

fit(5)

fit_transform(2)

predict(2)

fit_predict(1)

transform(1)

Example #1

Show file

File: test_cluster_plot.py Project: evanfeinberg/msmexplorer

def test_plot_voronoi():
    kmeans = KMeans(n_clusters=15)
    kmeans.fit([data])

    ax = plot_voronoi(kmeans)

    assert isinstance(ax, Subplot)

Example #2

Show file

    def test_plot_voronoi(self):
        kmeans = KMeans(n_clusters=15)
        kmeans.fit([data])

        ax = plot_voronoi(kmeans, xlabel='x', ylabel='y')

        assert isinstance(ax, SubplotBase)

Example #3

Show file

File: validation.py Project: sshukla101/postmsm

def cluster_msm(sequences,n_states, lag_times):
  for n in n_states:
    states = KMeans(n_clusters=n)
    states.fit(sequences)
    io.dump(states,str(n)+'n_cl.pkl')
    ts=np.zeros(5)
    for lag_time in lag_times:
        msm = MarkovStateModel(lag_time=lag_time, verbose=False,n_timescales=5)
        msm.fit(states.labels_)
        ts1=msm.timescales_
        ts=np.vstack((ts,ts1))
        io.dump(msm,str(n)+'n_'+str(lag_time)+'lt_msm.pkl')
    ts=np.delete(ts, (0), axis=0)
    io.dump(ts,str(n)+'n_timescales.pkl')

Example #4

Show file

File: cluster_project.py Project: msultan/fah-reseeder

def cluster_project_wrapper(proj_folder,feature_dict,n_states):

     if os.path.exists(proj_folder+"/assignments.pkl"):
          return verboseload(proj_folder+"/cluster_mdl.pkl"),verboseload(proj_folder+"/assignments.pkl")
     elif os.path.exists(proj_folder+"/cluster_mdl.pkl"):
          cluster_mdl = verboseload(proj_folder+"/cluster_mdl.pkl")
     else:
          cluster_mdl = KMeans(n_clusters = n_states)
          cluster_mdl.fit([feature_dict[i] for i in feature_dict.keys()])

     assignments={}
     for i in feature_dict.keys():
          assignments[i] = cluster_mdl.transform([feature_dict[i]])

     verbosedump(cluster_mdl,proj_folder+"/cluster_mdl.pkl")
     verbosedump(assignments,proj_folder+"/assignments.pkl")
     return cluster_mdl,assignments

Example #5

Show file

def cluster_project_wrapper(proj_folder, feature_dict, n_states):

    if os.path.exists(proj_folder + "/assignments.pkl"):
        return verboseload(proj_folder +
                           "/cluster_mdl.pkl"), verboseload(proj_folder +
                                                            "/assignments.pkl")
    elif os.path.exists(proj_folder + "/cluster_mdl.pkl"):
        cluster_mdl = verboseload(proj_folder + "/cluster_mdl.pkl")
    else:
        cluster_mdl = KMeans(n_clusters=n_states)
        cluster_mdl.fit([feature_dict[i] for i in feature_dict.keys()])

    assignments = {}
    for i in feature_dict.keys():
        assignments[i] = cluster_mdl.transform([feature_dict[i]])

    verbosedump(cluster_mdl, proj_folder + "/cluster_mdl.pkl")
    verbosedump(assignments, proj_folder + "/assignments.pkl")
    return cluster_mdl, assignments

Example #6

Show file

File: 4_msm_microstates.py Project: asgharrazavi/analysing_simulations_from_HPC

def cluster():
    '''
    This function perfomes K-means clustering on the tICA space and saves assignsment files for each trajectory.
    Cluster centers are also saved at `microstate_centers.txt` file.
    '''
    cluster = KMeans(n_clusters=n_states,n_jobs=-1,verbose=0, max_iter=100, tol=0.0001,)
    dataset, ev0, ev1 = [], [], []
    print "Loading projected data..."
    for i in tqdm(range(start_traj, end_traj+1)):
        a = io.loadh('%s/traj%d_%s.h5' %(proj_path,i,traj_name))['arr_0']
        a = a[:,0:2]
        dataset.append(a)
	ev0.extend(a[:,0])
	ev1.extend(a[:,1])
    print "Clustering %d datapoints..." %len(ev0)
    cluster.fit(dataset)
    for i in range(start_traj,end_traj+1):
        np.savetxt('%s/assigns_%d.txt' %(out_path,i),np.array(cluster.labels_[i-start_traj]),fmt='%d')
    np.savetxt('%s/microstate_centers.txt' %out_path,np.array(cluster.cluster_centers_))
    print "Saved microstate assignments and microstate centers at %s" %out_path
    return cluster.cluster_centers_, np.array(ev0), np.array(ev1)

Example #7

Show file

topFile='NarK-strip.pdb'

dataset = [] 
ls = []
for i in sorted(glob.glob('*.npy')):
	a = np.load(i)
	b = np.array(a)
	dataset.append(b)
	ls.append(i)
	print(i)
np.save('list', ls)

#trajs = [np.load('data.npy')]
# make cluster of the tICs trajectories
cluster = KMeans(n_clusters=myn_clusters)
cluster.fit(dataset)
l = cluster.labels_

T = []
for trj in glob.glob('*strip.mdcrd'):
	T.append(trj)
T.sort()

# Write the output file, which have the information about population of each cluster, 
# trajectory name and frame number of corresponding frame 	
asFunctions.writeOPF(l, T, myn_clusters, n_samples)

# Based on information in output file, build the cpptraj input file
asFunctions.CpptrajInGen_commonTop(topFile)
#pickle.dump( cluster , open( "tICCluster.pkl", "wb"))

Example #8

Show file

    lagtime = 50

    for n in n_clusters:
        kmeans = KMeans(n_clusters=n, n_jobs=-1)
        print "Clustering data to %d clusters..." % n
        for fold in range(nFolds):
            train_data = []
            test_data = []
            for i in range(len(tica_data)):
                cv = KFold(len(tica_data[i]), n_folds=nFolds)
                for current_fold, (train_index, test_index) in enumerate(cv):
                    if current_fold == fold:
                        train_data.append(tica_data[i][train_index])
                        test_data.append(tica_data[i][test_index])
            reduced_train_data = sub_sampling_data(train_data, stride=100)
            kmeans.fit(reduced_train_data)
            assignments_train = kmeans.predict(train_data)
            assignments_test = kmeans.predict(test_data)
            msm = MarkovStateModel(lag_time=lagtime)
            msm.fit(assignments_train)
            train_score = msm.score_
            test_score = msm.score(assignments_test)

            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'n_states': n,
                'fold': fold,
                'timescales': msm.timescales_
            })