def test_kcenters_8(): X = np.random.RandomState(1).randn(100, 2) X32 = X.astype(np.float32) X64 = X.astype(np.float64) m1 = KCenters(n_clusters=10, random_state=0).fit([X32]) m2 = KCenters(n_clusters=10, random_state=0).fit([X64]) eq(m1.cluster_centers_, m2.cluster_centers_) eq(m1.distances_[0], m2.distances_[0]) eq(m1.labels_[0], m2.labels_[0]) assert np.all(np.logical_not(np.isnan(m1.distances_[0]))) eq(m1.predict([X32])[0], m2.predict([X64])[0]) eq(m1.predict([X32])[0], m1.labels_[0]) eq(float(m1.inertia_), libdistance.assign_nearest(X32, m1.cluster_centers_, "euclidean")[1])
def test_kcenters_3(): # test for predict using euclidean distance m = KCenters(n_clusters=10) data = np.random.randn(100, 2) labels1 = m.fit_predict([data]) labels2 = m.predict([data]) eq(labels1[0], labels2[0]) all_pairs = scipy.spatial.distance.cdist(data, m.cluster_centers_) eq(labels2[0], np.argmin(all_pairs, axis=1))
def test_kcenters_4(): # test for predict() using non-euclidean distance. because of the # way the code is structructured, this takes a different path model = KCenters(n_clusters=10, metric='cityblock') data = np.random.randn(100, 2) labels1 = model.fit_predict([data]) labels2 = model.predict([data]) eq(labels1[0], labels2[0]) all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_, metric='cityblock') eq(labels2[0], np.argmin(all_pairs, axis=1))
draw_tica_projection_cross_validation( sub_resultdir, 'Fold_%d_tica_lagtime_%d_train_data_proj_tIC13.png' % (fold, tica_correlation_time), train_data_projection, test_data_projection, 1, 3) for n_tics in n_tics_range: for n_Micro in n_Micro_range: print("parameters: fold-", fold, ',tica_lagtime-', tica_correlation_time, ',n_tics-', n_tics, ',n_Micro-', n_Micro) kcenters = KCenters(n_clusters=n_Micro, metric='euclidean', random_state=0) kcenters.fit(train_data_projection) train_data_sequence = kcenters.predict( train_data_projection) test_data_sequence = kcenters.predict(test_data_projection) msm = MarkovStateModel( n_timescales=3, lag_time=100, reversible_type='transpose', verbose=False, sliding_window=True, ergodic_cutoff='on') #the parameters may change msm.fit(train_data_sequence) train_score = msm.score(train_data_sequence) test_score = msm.score(test_data_sequence) f1 = open( sub_resultdir + '/Fold_%d_tica_lagtime_%d_ntics_%d_nMicro_%d_gmrq.summary' % (fold, tica_correlation_time, n_tics, n_Micro), 'w')