Exemple #1
0
 def test_chunksize(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     chunksize = 1001
     chain = [reader_xtc, api.tica(), api.cluster_mini_batch_kmeans()]
     p = api.pipeline(chain, chunksize=chunksize)
     assert p.chunksize == chunksize
     for e in p._chain:
         assert e.chunksize == chunksize
Exemple #2
0
 def test_3gaussian_1d_singletraj(self):
     # generate 1D data from three gaussians
     X = [
         np.random.randn(200) - 2.0,
         np.random.randn(300),
         np.random.randn(400) + 2.0
     ]
     X = np.hstack(X)
     kmeans = cluster_mini_batch_kmeans(X,
                                        batch_size=0.5,
                                        k=100,
                                        max_iter=10000)
     cc = kmeans.clustercenters
     assert (np.any(cc < 1.0))
     assert (np.any((cc > -1.0) * (cc < 1.0)))
     assert (np.any(cc > -1.0))
Exemple #3
0
 def test_3gaussian_2d_multitraj(self):
     # generate 1D data from three gaussians
     X1 = np.zeros((200, 2))
     X1[:, 0] = np.random.randn(200) - 2.0
     X2 = np.zeros((300, 2))
     X2[:, 0] = np.random.randn(300)
     X3 = np.zeros((400, 2))
     X3[:, 0] = np.random.randn(400) + 2.0
     X = [X1, X2, X3]
     kmeans = cluster_mini_batch_kmeans(X,
                                        batch_size=0.5,
                                        k=100,
                                        max_iter=10000)
     cc = kmeans.clustercenters
     assert (np.any(cc < 1.0))
     assert (np.any((cc > -1.0) * (cc < 1.0)))
     assert (np.any(cc > -1.0))
Exemple #4
0
    def test_resume(self):
        """ check that we can continue with the iteration by passing centers"""
        # centers are far off
        initial_centers = np.array([[1, 2, 3]]).T
        cl = cluster_mini_batch_kmeans(self.X,
                                       clustercenters=initial_centers,
                                       max_iter=1,
                                       k=3)

        resume_centers = cl.clustercenters
        cl.estimate(self.X, clustercenters=resume_centers, max_iter=50)
        new_centers = cl.clustercenters

        true = np.array([[-2, 0, 2]]).T
        d0 = true - resume_centers
        d1 = true - new_centers

        diff = np.linalg.norm(d0)
        diff_next = np.linalg.norm(d1)

        self.assertLess(
            diff_next, diff, 'resume_centers=%s, new_centers=%s' %
            (resume_centers, new_centers))