def test_chunksize(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) chunksize = 1001 chain = [reader_xtc, api.tica(), api.cluster_mini_batch_kmeans()] p = api.pipeline(chain, chunksize=chunksize) assert p.chunksize == chunksize for e in p._chain: assert e.chunksize == chunksize
def test_3gaussian_1d_singletraj(self): # generate 1D data from three gaussians X = [ np.random.randn(200) - 2.0, np.random.randn(300), np.random.randn(400) + 2.0 ] X = np.hstack(X) kmeans = cluster_mini_batch_kmeans(X, batch_size=0.5, k=100, max_iter=10000) cc = kmeans.clustercenters assert (np.any(cc < 1.0)) assert (np.any((cc > -1.0) * (cc < 1.0))) assert (np.any(cc > -1.0))
def test_3gaussian_2d_multitraj(self): # generate 1D data from three gaussians X1 = np.zeros((200, 2)) X1[:, 0] = np.random.randn(200) - 2.0 X2 = np.zeros((300, 2)) X2[:, 0] = np.random.randn(300) X3 = np.zeros((400, 2)) X3[:, 0] = np.random.randn(400) + 2.0 X = [X1, X2, X3] kmeans = cluster_mini_batch_kmeans(X, batch_size=0.5, k=100, max_iter=10000) cc = kmeans.clustercenters assert (np.any(cc < 1.0)) assert (np.any((cc > -1.0) * (cc < 1.0))) assert (np.any(cc > -1.0))
def test_resume(self): """ check that we can continue with the iteration by passing centers""" # centers are far off initial_centers = np.array([[1, 2, 3]]).T cl = cluster_mini_batch_kmeans(self.X, clustercenters=initial_centers, max_iter=1, k=3) resume_centers = cl.clustercenters cl.estimate(self.X, clustercenters=resume_centers, max_iter=50) new_centers = cl.clustercenters true = np.array([[-2, 0, 2]]).T d0 = true - resume_centers d1 = true - new_centers diff = np.linalg.norm(d0) diff_next = np.linalg.norm(d1) self.assertLess( diff_next, diff, 'resume_centers=%s, new_centers=%s' % (resume_centers, new_centers))