Beispiel #1
0
    def test_3gaussian_1d_singletraj(self):
        # generate 1D data from three gaussians
        X = [np.random.randn(100)-2.0,
             np.random.randn(100),
             np.random.randn(100)+2.0]
        X = np.hstack(X)

        for init_strategy in ['kmeans++', 'uniform']:
            kmeans = cluster_kmeans(X, k=10, init_strategy=init_strategy)
            cc = kmeans.clustercenters
            assert (np.any(cc < 1.0)), "failed for init_strategy=%s" % init_strategy
            assert (np.any((cc > -1.0) * (cc < 1.0))), "failed for init_strategy=%s" % init_strategy
            assert (np.any(cc > -1.0)), "failed for init_strategy=%s" % init_strategy

            # test fixed seed
            km1 = cluster_kmeans(X, k=10, init_strategy=init_strategy, fixed_seed=True)
            km2 = cluster_kmeans(X, k=10, init_strategy=init_strategy, fixed_seed=True)
            np.testing.assert_array_equal(km1.clustercenters, km2.clustercenters,
                                          "should yield same centers with fixed seed")

            # test that not-fixed seed yields different results
            retry, done = 0, False
            while not done and retry < 4:
                try:
                    km3 = cluster_kmeans(X, k=10, init_strategy=init_strategy, fixed_seed=False)
                    self.assertRaises(AssertionError, np.testing.assert_array_equal,
                                      km1.clustercenters, km3.clustercenters)
                    done = True
                except AssertionError:
                    retry += 1
            self.assertTrue(done, 'using a fixed seed compared to a not fixed one made no difference!')
Beispiel #2
0
    def test_minrmsd_assignment(self):
        state = np.random.RandomState(123)
        data = state.uniform(-50, 50, size=(500, 3 * 15))
        n_clusters = 15
        kmeans = cluster_kmeans([data],
                                n_clusters,
                                metric='minRMSD',
                                max_iter=0,
                                fixed_seed=32,
                                init_strategy='kmeans++',
                                n_jobs=1)
        kmeans2 = cluster_kmeans([data],
                                 n_clusters,
                                 metric='minRMSD',
                                 max_iter=0,
                                 fixed_seed=32,
                                 init_strategy='kmeans++',
                                 n_jobs=1)
        np.testing.assert_array_equal(kmeans.dtrajs[0], kmeans2.dtrajs[0])
        np.testing.assert_array_almost_equal(kmeans.clustercenters,
                                             kmeans2.clustercenters)
        np.testing.assert_equal(kmeans.metric, 'minRMSD')

        impl = deeptime.clustering.metrics['minRMSD']
        dtraj_manual = []
        for frame in data:
            dists_to_cc = [
                impl.compute_metric(frame, cc) for cc in kmeans.clustercenters
            ]
            dtraj_manual.append(np.argmin(dists_to_cc))
        np.testing.assert_array_equal(dtraj_manual, kmeans.dtrajs[0])
Beispiel #3
0
    def test_add_element(self):
        # start with empty pipeline without auto-parametrization
        p = api.pipeline([], run=False)
        # add some reader
        reader = api.source(self.traj_files, top=self.pdb_file)
        p.add_element(reader)
        p.parametrize()

        # get the result immediately
        out1 = reader.get_output()

        # add some kmeans
        kmeans = api.cluster_kmeans(k=15)
        p.add_element(kmeans)
        p.parametrize()
        # get the result immediately
        kmeans1 = kmeans.get_output()

        # get reader output again
        out2 = reader.get_output()
        p.add_element(api.cluster_kmeans(k=2))
        p.parametrize()

        # get kmeans output again
        kmeans2 = kmeans.get_output()
        # check if add_element changes the intermediate results
        np.testing.assert_array_equal(out1[0], out2[0])
        np.testing.assert_array_equal(out1[1], out2[1])
        np.testing.assert_array_equal(kmeans1[0], kmeans2[0])
        np.testing.assert_array_equal(kmeans1[1], kmeans2[1])
Beispiel #4
0
    def test_check_convergence_serial_parallel(self):
        """ check serial and parallel version of kmeans converge to the same centers.

        Artificial data set is created with 6 disjoint point blobs, to ensure the parallel and the serial version
        converge to the same result. If the blobs would overlap we can not guarantee this, because the parallel version
        can potentially converge to a closer point, which is chosen in a non-deterministic way (multiple threads).
        """
        k = 6
        max_iter = 50
        from pyemma.coordinates.clustering.tests.util import make_blobs
        data = make_blobs(n_samples=500, random_state=45, centers=k, cluster_std=0.5, shuffle=False)[0]
        repeat = True
        it = 0
        # since this can fail in like one of 100 runs, we repeat until success.
        while repeat and it < 3:
            for strat in ('uniform', 'kmeans++'):
                seed = random.randint(0, 2**32-1)
                cl_serial = cluster_kmeans(data, k=k, n_jobs=1, fixed_seed=seed, max_iter=max_iter, init_strategy=strat)
                cl_parallel = cluster_kmeans(data, k=k, n_jobs=2, fixed_seed=seed, max_iter=max_iter, init_strategy=strat)
                try:
                    np.testing.assert_allclose(cl_serial.clustercenters, cl_parallel.clustercenters, atol=1e-4)
                    repeat = False
                except AssertionError:
                    repeat = True
                    it += 1
Beispiel #5
0
    def test_add_element(self):
        # start with empty pipeline without auto-parametrization
        p = api.pipeline([], run=False)
        # add some reader
        reader = api.source(self.traj_files, top=self.pdb_file)
        p.add_element(reader)
        p.parametrize()
        # get the result immediately
        out1 = reader.get_output()

        # add some kmeans
        kmeans = api.cluster_kmeans(k=15)
        p.add_element(kmeans)
        p.parametrize()
        # get the result immediately
        kmeans1 = kmeans.get_output()

        # get reader output again
        out2 = reader.get_output()
        p.add_element(api.kmeans(k=2))
        p.parametrize()

        # get kmeans output again
        kmeans2 = kmeans.get_output()
        # check if add_element changes the intermediate results
        np.testing.assert_array_equal(out1[0], out2[0])
        np.testing.assert_array_equal(out1[1], out2[1])
        np.testing.assert_array_equal(kmeans1[0], kmeans2[0])
        np.testing.assert_array_equal(kmeans1[1], kmeans2[1])
Beispiel #6
0
    def test_with_pg_data_not_in_memory(self):
        import pkg_resources
        import pyemma

        path = pkg_resources.resource_filename('pyemma.coordinates.tests', 'data') + os.path.sep
        pdb_file = os.path.join(path, 'bpti_ca.pdb')
        traj_files = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_034-066.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]
        reader = pyemma.coordinates.source(traj_files, top=pdb_file)

        with settings(show_progress_bars=True), Capturing(which='stderr') as out:
            cluster_kmeans(reader)
        self.assertIn('creating data array', '\n'.join(out))
Beispiel #7
0
    def testDtraj(self):
        self.k = 5
        self.dim = 100
        self.data = [
            np.random.random((30, self.dim)),
            np.random.random((37, self.dim))
        ]
        self.kmeans = cluster_kmeans(data=self.data, k=self.k, max_iter=100)

        assert self.kmeans.dtrajs[0].dtype == self.kmeans.output_type()

        prefix = "test"
        extension = ".dtraj"
        with TemporaryDirectory() as outdir:
            self.kmeans.save_dtrajs(trajfiles=None,
                                    prefix=prefix,
                                    output_dir=outdir,
                                    extension=extension)

            names = [
                "%s_%i%s" % (prefix, i, extension) for i in range(
                    self.kmeans.data_producer.number_of_trajectories())
            ]
            names = [os.path.join(outdir, n) for n in names]

            # check files with given patterns are there
            for f in names:
                os.stat(f)
Beispiel #8
0
 def test_save_dtrajs(self):
     reader = source(self.trajfiles, top=self.topfile)
     cluster = cluster_kmeans(k=2)
     d = Discretizer(reader, cluster=cluster)
     d.parametrize()
     d.save_dtrajs(output_dir=self.dest_dir)
     dtrajs = os.listdir(self.dest_dir)
Beispiel #9
0
 def test_no_transform(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     api.pipeline([reader_xtc,
                   api.cluster_kmeans(k=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc,
                   api.cluster_regspace(dmin=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc,
                   api.cluster_uniform_time()])._chain[-1].get_output()
Beispiel #10
0
 def setUp(self):
     self.k = 5
     self.dim = 100
     self.data = [
         np.random.random((30, self.dim)),
         np.random.random((37, self.dim))
     ]
     self.kmeans = cluster_kmeans(data=self.data, k=self.k, max_iter=100)
Beispiel #11
0
    def test_non_converged_keep_memory(self):
        k = 3
        initial_centers = np.atleast_2d(self.X[np.random.choice(1000, size=k)]).T

        cl = cluster_kmeans(self.X, clustercenters=initial_centers, k=k, max_iter=1, keep_data=True)

        cl.estimate(self.X, clustercenters=cl.clustercenters, max_iter=1)
        assert not cl.converged
        assert hasattr(cl, '_in_memory_chunks')
Beispiel #12
0
 def test_3gaussian_1d_singletraj(self):
     # generate 1D data from three gaussians
     X = [np.random.randn(100)-2.0,
          np.random.randn(100),
          np.random.randn(100)+2.0]
     X = np.hstack(X)
     kmeans = cluster_kmeans(X, k=10)
     cc = kmeans.clustercenters
     assert(np.any(cc < 1.0))
     assert(np.any((cc > -1.0) * (cc < 1.0)))
     assert(np.any(cc > -1.0))
Beispiel #13
0
    def test_save_dtrajs(self):
        reader = source(self.trajfiles, top=self.topfile)
        # select all possible distances
        pairs = np.array([
            x for x in itertools.combinations(list(range(self.n_residues)), 2)
        ])

        cluster = cluster_kmeans(k=2)
        d = Discretizer(reader, cluster=cluster)
        d.parametrize()
        d.save_dtrajs(output_dir=self.dest_dir)
        dtrajs = os.listdir(self.dest_dir)
Beispiel #14
0
    def test_kmeans_converge_outlier_to_equilibrium_state(self):
        initial_centers_equilibrium = np.array([[2, 0, 0], [-2, 0, 0]])
        X = np.array([
            np.array([1, 1.5, 1], dtype=np.float32), np.array([1, 1, -1], dtype=np.float32),
            np.array([1, -1, -1], dtype=np.float32), np.array([-1, -1, -1], dtype=np.float32),
            np.array([-1, 1, 1], dtype=np.float32), np.array([-1, -1, 1], dtype=np.float32),
            np.array([-1, 1, -1], dtype=np.float32), np.array([1, -1, 1], dtype=np.float32)
        ])
        kmeans = cluster_kmeans(X, k=2, clustercenters=initial_centers_equilibrium, max_iter=500, n_jobs=1)

        cl = kmeans.clustercenters
        assert np.all(np.abs(cl) <= 1)
Beispiel #15
0
 def test_kmeans_convex_hull(self):
     points = [
         [-212129 / 100000, -20411 / 50000, 2887 / 5000],
         [-212129 / 100000, 40827 / 100000, -5773 / 10000],
         [-141419 / 100000, -5103 / 3125, 2887 / 5000],
         [-141419 / 100000, 1 / 50000, -433 / 250],
         [-70709 / 50000, 3 / 100000, 17321 / 10000],
         [-70709 / 50000, 163301 / 100000, -5773 / 10000],
         [-70709 / 100000, -204121 / 100000, -5773 / 10000],
         [-70709 / 100000, -15309 / 12500, -433 / 250],
         [-17677 / 25000, -122471 / 100000, 17321 / 10000],
         [-70707 / 100000, 122477 / 100000, 17321 / 10000],
         [-70707 / 100000, 102063 / 50000, 2887 / 5000],
         [-17677 / 25000, 30619 / 25000, -433 / 250],
         [8839 / 12500, -15309 / 12500, -433 / 250],
         [35357 / 50000, 102063 / 50000, 2887 / 5000],
         [8839 / 12500, -204121 / 100000, -5773 / 10000],
         [70713 / 100000, -122471 / 100000, 17321 / 10000],
         [70713 / 100000, 30619 / 25000, -433 / 250],
         [35357 / 50000, 122477 / 100000, 17321 / 10000],
         [106067 / 50000, -20411 / 50000, 2887 / 5000],
         [141423 / 100000, -5103 / 3125, 2887 / 5000],
         [141423 / 100000, 1 / 50000, -433 / 250],
         [8839 / 6250, 3 / 100000, 17321 / 10000],
         [8839 / 6250, 163301 / 100000, -5773 / 10000],
         [106067 / 50000, 40827 / 100000, -5773 / 10000],
     ]
     kmeans = cluster_kmeans(np.asarray(points, dtype=np.float32), k=1)
     res = kmeans.clustercenters
     # Check hyperplane inequalities. If they are all fulfilled, the center lies within the convex hull.
     self.assertGreaterEqual(np.inner(np.array([-11785060650000, -6804069750000, -4811167325000], dtype=float),
                                      res) + 25000531219381, 0)
     self.assertGreaterEqual(
         np.inner(np.array([-1767759097500, 1020624896250, 721685304875], dtype=float), res) + 3749956484003, 0)
     self.assertGreaterEqual(np.inner(np.array([-70710363900000, -40824418500000, 57734973820000], dtype=float),
                                      res) + 199998509082907, 0)
     self.assertGreaterEqual(np.inner(np.array([70710363900000, 40824418500000, -57734973820000], dtype=float),
                                      res) + 199998705841169, 0)
     self.assertGreaterEqual(np.inner(np.array([70710363900000, -40824995850000, -28867412195000], dtype=float),
                                      res) + 149999651832937, 0)
     self.assertGreaterEqual(np.inner(np.array([-35355181950000, 20412497925000, -28867282787500], dtype=float),
                                      res) + 100001120662259, 0)
     self.assertGreaterEqual(
         np.inner(np.array([23570121300000, 13608139500000, 9622334650000], dtype=float), res) + 49998241292257,
         0)
     self.assertGreaterEqual(np.inner(np.array([0, 577350000, -204125000], dtype=float), res) + 1060651231, 0)
     self.assertGreaterEqual(np.inner(np.array([35355181950000, -20412497925000, 28867282787500], dtype=float),
                                      res) + 99997486799779, 0)
     self.assertGreaterEqual(np.inner(np.array([0, 72168750, 51030625], dtype=float), res) + 176771554, 0)
     self.assertGreaterEqual(np.inner(np.array([0, -288675000, 102062500], dtype=float), res) + 530329843, 0)
     self.assertGreaterEqual(np.inner(np.array([0, 0, 250], dtype=float), res) + 433, 0)
     self.assertGreaterEqual(np.inner(np.array([0, -144337500, -102061250], dtype=float), res) + 353560531, 0)
     self.assertGreaterEqual(np.inner(np.array([0, 0, -10000], dtype=float), res) + 17321, 0)
Beispiel #16
0
    def test_3gaussian_1d_singletraj(self):
        # generate 1D data from three gaussians

        from pyemma.util.contexts import numpy_random_seed
        with numpy_random_seed(42):
            X = [np.random.randn(200)-2.0,
                 np.random.randn(200),
                 np.random.randn(200)+2.0]
        X = np.hstack(X)
        k = 50
        from pyemma._base.estimator import param_grid
        grid = param_grid({'init_strategy': ['uniform', 'kmeans++'], 'fixed_seed': [True, 463498]})
        for param in grid:
            init_strategy = param['init_strategy']
            fixed_seed = param['fixed_seed']
            kmeans = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1)
            cc = kmeans.clustercenters
            self.assertTrue(np.all(np.isfinite(cc)), "cluster centers borked for strat %s" % init_strategy)
            assert (np.any(cc < 1.0)), "failed for init_strategy=%s" % init_strategy
            assert (np.any((cc > -1.0) * (cc < 1.0))), "failed for init_strategy=%s" % init_strategy
            assert (np.any(cc > -1.0)), "failed for init_strategy=%s" % init_strategy

            km1 = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1)
            km2 = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1)
            self.assertEqual(len(km1.clustercenters), k)
            self.assertEqual(len(km2.clustercenters), k)
            self.assertEqual(km1.fixed_seed, km2.fixed_seed)

            # check initial centers (after kmeans++, uniform init) are equal.
            np.testing.assert_equal(km1.initial_centers_, km2.initial_centers_)

            while not km1.converged:
                km1.estimate(X=X, clustercenters=km1.clustercenters, keep_data=True)
            while not km2.converged:
                km2.estimate(X=X, clustercenters=km2.clustercenters, keep_data=True)

            assert np.linalg.norm(km1.clustercenters - km1.initial_centers_) > 0
            np.testing.assert_allclose(km1.clustercenters, km2.clustercenters,
                                       err_msg="should yield same centers with fixed seed=%s for strategy %s, Initial centers=%s"
                                               % (fixed_seed, init_strategy, km2.initial_centers_), atol=1e-6)
Beispiel #17
0
 def test_kmeans_equilibrium_state(self):
     initial_centers_equilibrium = [np.array([0, 0, 0])]
     X = np.array([
         np.array([1, 1, 1], dtype=np.float32), np.array([1, 1, -1], dtype=np.float32),
         np.array([1, -1, -1], dtype=np.float32), np.array([-1, -1, -1], dtype=np.float32),
         np.array([-1, 1, 1], dtype=np.float32), np.array([-1, -1, 1], dtype=np.float32),
         np.array([-1, 1, -1], dtype=np.float32), np.array([1, -1, 1], dtype=np.float32)
     ])
     kmeans = cluster_kmeans(X, k=1)
     self.assertEqual(1, len(kmeans.clustercenters), 'If k=1, there should be only one output center.')
     msg = 'Type=' + str(type(kmeans)) + '. ' + \
           'In an equilibrium state the resulting centers should not be different from the initial centers.'
     self.assertTrue(np.array_equal(initial_centers_equilibrium[0], kmeans.clustercenters[0]), msg)
Beispiel #18
0
    def test_inefficient_args_log(self):
        from pyemma.util.testing_tools import MockLoggingHandler
        m = MockLoggingHandler()
        cl = cluster_kmeans(self.X, max_iter=1, keep_data=False)
        cl.logger.addHandler(m)
        cl.estimate(self.X, max_iter=1, clustercenters=cl.clustercenters)
        found = False
        for msg in m.messages['warning']:
            if 'inefficient' in msg:
                found = True
                break

        assert found
Beispiel #19
0
 def test_3gaussian_1d_singletraj(self):
     # generate 1D data from three gaussians
     X = [
         np.random.randn(100) - 2.0,
         np.random.randn(100),
         np.random.randn(100) + 2.0
     ]
     X = np.hstack(X)
     kmeans = cluster_kmeans(X, k=10)
     cc = kmeans.clustercenters
     assert (np.any(cc < 1.0))
     assert (np.any((cc > -1.0) * (cc < 1.0)))
     assert (np.any(cc > -1.0))
Beispiel #20
0
 def test_3gaussian_2d_multitraj(self):
     # generate 1D data from three gaussians
     X1 = np.zeros((100, 2))
     X1[:, 0] = np.random.randn(100)-2.0
     X2 = np.zeros((100, 2))
     X2[:, 0] = np.random.randn(100)
     X3 = np.zeros((100, 2))
     X3[:, 0] = np.random.randn(100)+2.0
     X = [X1, X2, X3]
     kmeans = cluster_kmeans(X, k=10)
     cc = kmeans.clustercenters
     assert(np.any(cc < 1.0))
     assert(np.any((cc > -1.0) * (cc < 1.0)))
     assert(np.any(cc > -1.0))
Beispiel #21
0
 def test_3gaussian_2d_multitraj(self):
     # generate 1D data from three gaussians
     X1 = np.zeros((100, 2))
     X1[:, 0] = np.random.randn(100) - 2.0
     X2 = np.zeros((100, 2))
     X2[:, 0] = np.random.randn(100)
     X3 = np.zeros((100, 2))
     X3[:, 0] = np.random.randn(100) + 2.0
     X = [X1, X2, X3]
     kmeans = cluster_kmeans(X, k=10)
     cc = kmeans.clustercenters
     assert (np.any(cc < 1.0))
     assert (np.any((cc > -1.0) * (cc < 1.0)))
     assert (np.any(cc > -1.0))
    def test_transformer_iterator_random_access(self):
        kmeans = coor.cluster_kmeans(self.data, k=2)
        kmeans.in_memory = True

        for cs in range(0, 5):
            kmeans.chunksize = cs
            ref_stride = {0: 0, 1: 0, 2: 0}
            it = kmeans.iterator(stride=self.stride)
            for x in it:
                ref_stride[x[0]] += len(x[1])
            for key in list(ref_stride.keys()):
                expected = len(it.ra_indices_for_traj(key))
                assert ref_stride[key] == expected, \
                    "Expected to get exactly %s elements of trajectory %s, but got %s for chunksize=%s" \
                    % (expected, key, ref_stride[key], cs)
Beispiel #23
0
 def test_is_parametrized(self):
     # construct pipeline with all possible transformers
     p = api.pipeline(
         [
             api.source(self.traj_files, top=self.pdb_file),
             api.tica(),
             api.pca(),
             api.cluster_kmeans(k=50),
             api.cluster_regspace(dmin=50),
             api.cluster_uniform_time(k=20)
         ], run=False
     )
     self.assertFalse(p._is_parametrized(), "If run=false, the pipeline should not be parametrized.")
     p.parametrize()
     self.assertTrue(p._is_parametrized(), "If parametrized was called, the pipeline should be parametrized.")
Beispiel #24
0
    def test_syntetic_trivial(self):
        test_data = np.zeros((40000, 4))
        test_data[0:10000, :] = 30.0
        test_data[10000:20000, :] = 60.0
        test_data[20000:30000, :] = 90.0
        test_data[30000:, :] = 120.0

        expected = np.array([30.0]*4), np.array([60.]*4), np.array([90.]*4), np.array([120.]*4)
        cl = cluster_kmeans(test_data, k=4)
        found = [False]*4
        for center in cl.clustercenters:
            for i, e in enumerate(expected):
                if np.all( center == e):
                    found[i] = True

        assert np.all(found)
Beispiel #25
0
    def test_3gaussian_1d_singletraj(self):
        # generate 1D data from three gaussians
        X = [
            np.random.randn(100) - 2.0,
            np.random.randn(100),
            np.random.randn(100) + 2.0
        ]
        X = np.hstack(X)

        for init_strategy in ['kmeans++', 'uniform']:
            kmeans = cluster_kmeans(X, k=10, init_strategy=init_strategy)
            cc = kmeans.clustercenters
            assert (np.any(
                cc < 1.0)), "failed for init_strategy=%s" % init_strategy
            assert (np.any(
                (cc > -1.0) *
                (cc < 1.0))), "failed for init_strategy=%s" % init_strategy
            assert (np.any(
                cc > -1.0)), "failed for init_strategy=%s" % init_strategy

            # test fixed seed
            km1 = cluster_kmeans(X,
                                 k=10,
                                 init_strategy=init_strategy,
                                 fixed_seed=True)
            km2 = cluster_kmeans(X,
                                 k=10,
                                 init_strategy=init_strategy,
                                 fixed_seed=True)
            np.testing.assert_array_equal(
                km1.clustercenters, km2.clustercenters,
                "should yield same centers with fixed seed")

            # check a user defined seed
            seed = random.randint(0, 2**32 - 1)
            km1 = cluster_kmeans(X,
                                 k=10,
                                 init_strategy=init_strategy,
                                 fixed_seed=seed)
            km2 = cluster_kmeans(X,
                                 k=10,
                                 init_strategy=init_strategy,
                                 fixed_seed=seed)
            self.assertEqual(km1.fixed_seed, km2.fixed_seed)
            np.testing.assert_array_equal(
                km1.clustercenters, km2.clustercenters,
                "should yield same centers with fixed seed")

            # test that not-fixed seed yields different results
            km3 = cluster_kmeans(X,
                                 k=10,
                                 init_strategy=init_strategy,
                                 fixed_seed=False)
            self.assertNotEqual(km3.fixed_seed, 42)
Beispiel #26
0
    def test_replace_data_source(self):
        reader_xtc = api.source(self.traj_files, top=self.pdb_file)
        reader_gen = DataInMemory(data=self.generated_data)

        kmeans = api.cluster_kmeans(k=10)
        assert hasattr(kmeans, '_chunks')
        p = api.pipeline([reader_xtc, kmeans])
        out1 = kmeans.get_output()
        # replace source
        print(reader_gen)
        p.set_element(0, reader_gen)
        assert hasattr(kmeans, '_chunks')
        p.parametrize()
        out2 = kmeans.get_output()
        self.assertFalse(
            np.array_equal(out1, out2),
            "Data source changed, so should the resulting clusters.")
Beispiel #27
0
    def test_resume(self):
        """ check that we can continue with the iteration by passing centers"""
        initial_centers = np.array([[20, 42, -29]]).T
        cl = cluster_kmeans(self.X, clustercenters=initial_centers,
                            max_iter=1, k=3, keep_data=True)

        resume_centers = cl.clustercenters
        cl.estimate(self.X, clustercenters=resume_centers, max_iter=50)
        new_centers = cl.clustercenters

        true = np.array([-2, 0, 2])
        d0 = true - resume_centers
        d1 = true - new_centers

        diff = np.linalg.norm(d0)
        diff_next = np.linalg.norm(d1)

        self.assertLess(diff_next, diff, 'resume_centers=%s, new_centers=%s' % (resume_centers, new_centers))
Beispiel #28
0
 def setUp(self):
     self.k = 5
     self.dim = 100
     self.data = [np.random.random((30, self.dim)),
                  np.random.random((37, self.dim))]
     self.kmeans = cluster_kmeans(data=self.data, k=self.k, max_iter=100)
Beispiel #29
0
 def test_no_transform(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     api.pipeline([reader_xtc, api.cluster_kmeans(k=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc, api.cluster_regspace(dmin=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc, api.cluster_uniform_time()])._chain[-1].get_output()
Beispiel #30
0
 def test_skip(self):
     cluster_kmeans(np.random.rand(100, 3), skip=42)
Beispiel #31
0
 def test_with_pg(self):
     from pyemma.util.contexts import settings
     with settings(show_progress_bars=True):
         cluster_kmeans(np.random.rand(100, 3))
Beispiel #32
0
 def test_with_pg(self):
     with settings(show_progress_bars=True), Capturing(
             which='stderr') as output:
         cluster_kmeans(np.random.rand(100, 3))
     self.assertNotIn('creating data array', '\n'.join(output))
Beispiel #33
0
 def test_skip(self):
     cl = cluster_kmeans(np.random.rand(100, 3), skip=42)
     assert len(cl.dtrajs[0]) == 100 - 42
Beispiel #34
0
 def test_negative_seed(self):
     """ ensure negative seeds converted to something positive"""
     km = cluster_kmeans(np.random.random((10, 3)), k=2, fixed_seed=-1)
     self.assertGreaterEqual(km.fixed_seed, 0)
Beispiel #35
0
 def test_seed_too_large(self):
     km = cluster_kmeans(np.random.random((10, 3)), k=2, fixed_seed=2**32)
     assert km.fixed_seed < 2**32