Exemple #1
0
 def _compare_clusters(**datasets):
     for name, dataset in datasets.items():
         pca = RandomizedPCA(2)
         pca.fit(dataset)
         X = pca.transform(dataset)
         instances = _kmeans()
         for instance in instances:
             instance.fit(dataset)
             # reduce to 2d for visualisation
             draw_cluster_2d(instance, X, 
                     filename="%s-kmeans-%s.png" % (name, instance.k))
         ms_instances = _meanshift(dataset)
         for instance in ms_instances:
             instance.fit(dataset)
         compare_pies(
                 [_get_distribution(i) for i in instances] + 
                     [_get_distribution(i) for i in ms_instances],
                 ["KMeans(%s)" % i.k for i in instances] + 
                     ["MeanShift(%s)" % round(i.bandwidth) for i in ms_instances],
                 filename="%s-pie.png" % name)
Exemple #2
0
 def _compare_clusters(**datasets):
     for name, dataset in datasets.items():
         pca = RandomizedPCA(2)
         pca.fit(dataset)
         X = pca.transform(dataset)
         instances = _kmeans()
         for instance in instances:
             instance.fit(dataset)
             # reduce to 2d for visualisation
             draw_cluster_2d(instance,
                             X,
                             filename="%s-kmeans-%s.png" %
                             (name, instance.k))
         ms_instances = _meanshift(dataset)
         for instance in ms_instances:
             instance.fit(dataset)
         compare_pies(
             [_get_distribution(i) for i in instances] +
             [_get_distribution(i) for i in ms_instances],
             ["KMeans(%s)" % i.k for i in instances] +
             ["MeanShift(%s)" % round(i.bandwidth) for i in ms_instances],
             filename="%s-pie.png" % name)
Exemple #3
0
 def _draw(dataset, filename, title):
     pca = RandomizedPCA(2)
     pca.fit(dataset)
     X = pca.transform(dataset)
     draw_2d(X, filename, title)
Exemple #4
0
 def _draw(dataset, filename, title):
     pca = RandomizedPCA(2)
     pca.fit(dataset)
     X = pca.transform(dataset)
     draw_2d(X, filename, title)
Exemple #5
0
    def lininit(self):
        """X = UsigmaWT, XTX = Wsigma^2WT, T = XW = Usigma
        Further, we can get lower ranks by using just few of the eigen vevtors
        T(2) = U(2)sigma(2) = XW(2) ---> 2 is the number of selected
        eigenvectors
        This is how we initialize the map, just by using the first two first
        eigen vals and eigenvectors
        Further, we create a linear combination of them in the new map by giving
        values from -1 to 1 in each
        Direction of SOM map
        It shoud be noted that here, X is the covariance matrix of original data
        """
        msize = getattr(self, 'mapsize')
        rows = msize[0]
        cols = msize[1]
        nnodes = getattr(self, 'nnodes')

        if np.min(msize) > 1:
            coord = np.zeros((nnodes, 2))
            for i in range(0, nnodes):
                coord[i, 0] = int(i / cols)  # x
                coord[i, 1] = int(i % cols)  # y
            mx = np.max(coord, axis=0)
            mn = np.min(coord, axis=0)
            coord = (coord - mn) / (mx - mn)
            coord = (coord - .5) * 2
            data = getattr(self, 'data')
            me = np.mean(data, 0)
            data = (data - me)
            codebook = np.tile(me, (nnodes, 1))
            pca = RandomizedPCA(n_components=2)  # Randomized PCA is scalable
            # pca = PCA(n_components=2)
            pca.fit(data)
            eigvec = pca.components_
            eigval = pca.explained_variance_
            norms = np.sqrt(np.einsum('ij, ij->i', eigvec, eigvec))
            eigvec = ((eigvec.T / norms) * eigval).T; eigvec.shape

            for j in range(nnodes):
                for i in range(eigvec.shape[0]):
                    codebook[j, :] = codebook[j, :] + coord[j, i] * eigvec[i, :]
            return np.around(codebook, decimals=6)
        elif np.min(msize) == 1:
            coord = np.zeros((nnodes, 1))
            for i in range(0, nnodes):
                # coord[i, 0] = int(i / cols)  # x
                coord[i, 0] = int(i % cols)  # y
            mx = np.max(coord, axis=0)
            mn = np.min(coord, axis=0)
            # print coord

            coord = (coord - mn) / (mx - mn)
            coord = (coord - 0.5) * 2
            # print coord
            data = getattr(self, 'data')
            me = np.mean(data, 0)
            data = (data - me)
            codebook = np.tile(me, (nnodes, 1))
            pca = RandomizedPCA(n_components=1)  # Randomized PCA is scalable
            # pca = PCA(n_components=2)
            pca.fit(data)
            eigvec = pca.components_
            eigval = pca.explained_variance_
            norms = np.sqrt(np.einsum('ij, ij->i', eigvec, eigvec))
            eigvec = ((eigvec.T / norms) * eigval).T; eigvec.shape

            for j in range(nnodes):
                for i in range(eigvec.shape[0]):
                    codebook[j, :] = codebook[j, :] + coord[j, i] & eigvec[i, :]
            return np.around(codebook, decimals=6)