Ejemplo n.º 1
0
    print "Clustering."
kmeans = KMeans(n_clusters=1200).fit(reduced_data)
Gen_fn = "Gens.npy"
np.save(Gen_fn,kmeans.cluster_centers_)
if verbose:
    print "Wrote: %s"%Gen_fn
model_dir = "kmeans_model_n_1200"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
model_fn = os.path.join(model_dir,'kmeans-combined.pkl')
joblib.dump(kmeans,model_fn)
if verbose:
    print "Saved cluster model to %s"%model_fn
if verbose:
    print "Assigning.."
assignments = kmeans.predict(tica_data)
if verbose:
    print "Wrote assignments"
np.save('Assignments.npy',assignments)

if verbose:
    print "Building MSMs:"
lagtimes = [1,10,20,30,40,50,100,150,200]
msmts = []
for lagtime in lagtimes:
    if verbose:
        print "\tLagtime: %d"%lagtime
    msm = MarkovStateModel(lag_time=lagtime).fit(assignments)
    msmts.append(msm.timescales_)
lagtime_fn = "lagtimes.txt"
msmts_fn = "ImpliedTimescales.npy"
Ejemplo n.º 2
0
    for n in n_clusters:
        kmeans = KMeans(n_clusters=n, n_jobs=-1)
        print "Clustering data to %d clusters..." % n
        for fold in range(nFolds):
            train_data = []
            test_data = []
            for i in range(len(tica_data)):
                cv = KFold(len(tica_data[i]), n_folds=nFolds)
                for current_fold, (train_index, test_index) in enumerate(cv):
                    if current_fold == fold:
                        train_data.append(tica_data[i][train_index])
                        test_data.append(tica_data[i][test_index])
            reduced_train_data = sub_sampling_data(train_data, stride=100)
            kmeans.fit(reduced_train_data)
            assignments_train = kmeans.predict(train_data)
            assignments_test = kmeans.predict(test_data)
            msm = MarkovStateModel(lag_time=lagtime)
            msm.fit(assignments_train)
            train_score = msm.score_
            test_score = msm.score(assignments_test)

            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'n_states': n,
                'fold': fold,
                'timescales': msm.timescales_
            })

        results = pd.DataFrame(results)