Ejemplo n.º 1
0
    def _transform_array(self, X):
        """get closest index of point in :attr:`clustercenters` to x."""
        X = np.require(X, dtype=np.float32, requirements='C')
        # for performance reasons we pre-center the cluster centers for minRMSD.
        if self.metric == 'minRMSD' and not self._precentered:
            self._precentered = True

        model = ClusterModel(cluster_centers=self.clustercenters,
                             metric=self.metric)
        dtraj = model.transform(X)
        res = dtraj[:, None]  # always return a column vector in this function
        return res
Ejemplo n.º 2
0
def test_ndim_assignment(ndim, njobs):
    centers = np.random.uniform(size=(15, ndim)).squeeze()
    model = ClusterModel(centers)
    assert_equal(model.dim, ndim)
    data = np.random.uniform(size=(50, ndim)).squeeze()
    dtraj = model.transform(data, n_jobs=njobs)
    if data.ndim == 1:
        data = data[..., None]
    for i in range(len(data)):
        cc = dtraj[i]
        x = data[i]
        dists = np.linalg.norm(model.cluster_centers - x[None, :], axis=1)
        assert_equal(cc, np.argmin(dists))
Ejemplo n.º 3
0
    def cluster(self, n_bins):
        from deeptime.clustering import ClusterModel

        minval = min(np.min(self.data), np.min(self.data_lagged))
        maxval = max(np.max(self.data), np.max(self.data_lagged))

        grid = np.linspace(minval, maxval, num=n_bins, endpoint=True)
        mesh = np.vstack(np.meshgrid(grid, grid, grid)).reshape(3, -1).T
        cm = ClusterModel(len(mesh), mesh)

        dtraj1 = cm.transform(self.data.astype(np.float64))
        traj1 = np.zeros((len(self.data), mesh.shape[0]))
        traj1[np.arange(len(self.data)), dtraj1] = 1.

        dtraj2 = cm.transform(self.data_lagged.astype(np.float64))
        traj2 = np.zeros((len(self.data_lagged), mesh.shape[0]))
        traj2[np.arange(len(self.data_lagged)), dtraj2] = 1.

        return BickleyJetEndpointsDataset3DClustered(traj1, traj2)
Ejemplo n.º 4
0
    def test_minrmsd_assignments(self):
        # make sure impl is registered
        _ = KmeansClustering(n_clusters=5)
        # now we can import the impl
        impl = deeptime.clustering.metrics['minRMSD']

        from scipy.linalg import expm, norm
        n_clusters = 5
        n_particles = 3
        n_frames_per_cluster = 25

        def rotation_matrix(axis, theta):
            """ rotation matrix
            :param axis: np.ndarray, axis around which to rotate
            :param theta: float, angle in radians
            :return: rotation matrix
            """
            return expm(np.cross(np.eye(3), axis / norm(axis) * theta))

        out = np.zeros((n_clusters * n_frames_per_cluster, 3 * n_particles))
        for i in range(n_clusters):
            # define `n_particles` random particle xyz positions,
            # repeat `n_frames_per_cluster` frames and add noise
            _pos = np.random.choice(np.arange(3 * n_particles),
                                    size=3 * n_particles)
            pos = np.repeat(_pos[None], n_frames_per_cluster,
                            axis=0).astype(float)
            pos += np.random.normal(size=pos.shape, scale=.1)

            # add random rotation and translation for each frame
            rand_rot_trans = np.zeros_like(pos)
            for n, _pos in enumerate(pos):
                r = rotation_matrix(np.array([0, 1, 0]),
                                    np.pi * np.random.rand())
                t = np.array([
                    np.random.normal(),
                    np.random.normal(),
                    np.random.normal()
                ])

                for m in range(n_particles):
                    rand_rot_trans[n, 3 * m:3 *
                                   (m + 1)] = np.dot(r, _pos[3 * m:3 *
                                                             (m + 1)]) - t

            out[n_frames_per_cluster * i:n_frames_per_cluster *
                (i + 1)] = rand_rot_trans

        cc = impl.kmeans.init_centers_kmpp(out,
                                           k=n_clusters,
                                           random_seed=-1,
                                           n_threads=1,
                                           callback=None)
        cl = ClusterModel(cc, metric='minRMSD', converged=True)
        assignments = cl.transform(out)
        unique = []
        for i in range(n_clusters):
            unique_in_inverval = np.unique(
                assignments[n_frames_per_cluster * i:n_frames_per_cluster *
                            (i + 1)])
            # assert that each interval is assigned correctly
            self.assertEqual(unique_in_inverval.shape[0], 1)
            unique.append(unique_in_inverval[0])
Ejemplo n.º 5
0
    def _estimate(self, iterable, **kwargs):
        ########
        # Calculate clustercenters:
        # 1. choose first datapoint as centroid
        # 2. for all X: calc distances to all clustercenters
        # 3. add new centroid, if min(distance to all other clustercenters) >= dmin
        ########
        # temporary list to store cluster centers
        used_frames = 0
        regspace = RegularSpace(dmin=self.dmin,
                                max_centers=self.max_centers,
                                metric=self.metric,
                                n_jobs=self.n_jobs)
        it = iterable.iterator(return_trajindex=False,
                               stride=self.stride,
                               chunk=self.chunksize,
                               skip=self.skip)
        try:
            with it:
                for X in it:
                    regspace.partial_fit(X.astype(np.float32,
                                                  order='C',
                                                  copy=False),
                                         n_jobs=self.n_jobs)
                    used_frames += len(X)
            self._converged = True
        except Exception as e:
            if 'MaxCentersReachedException' in e.__class__.__name__:
                self._converged = False
                msg = 'Maximum number of cluster centers reached.' \
                      ' Consider increasing max_centers or choose' \
                      ' a larger minimum distance, dmin.'
                self.logger.warning(msg)
                warnings.warn(msg)
                # pass amount of processed data
                used_data = used_frames / float(it.n_frames_total()) * 100.0
                raise NotConvergedWarning("Used data for centers: %.2f%%" %
                                          used_data)
            else:
                # todo ugly workaround until maxcentersreached is placed not within metric subpackage but globally
                #  somewhere
                raise
        finally:
            # even if not converged, we store the found centers.
            model = regspace.fetch_model()
            clustercenters = model.cluster_centers.squeeze().reshape(
                -1, iterable.ndim)
            self._inst = ClusterModel(clustercenters, metric=self.metric)
            from types import MethodType

            def _assign(self, data, _, n_jobs):
                out = self.transform(data, n_jobs=n_jobs)
                return out

            self._inst.assign = MethodType(_assign, self._inst)
            self.update_model_params(clustercenters=clustercenters,
                                     n_clusters=len(clustercenters))

            if len(clustercenters) == 1:
                self.logger.warning('Have found only one center according to '
                                    'minimum distance requirement of %f' %
                                    self.dmin)

        return self