Exemple #1
0
    def _initialize(self):
        """Set the initial weights, means and covs (with full covariance matrix).

        weights: the prior of the clusters (what percentage of data does a cluster have)
        means: the mean points of the clusters
        covs: the covariance matrix of the clusters
        """
        self.weights = np.ones(self.K)
        if self.init == 'random':
            self.means = [
                self.X[x] for x in random.sample(range(self.n_samples), self.K)
            ]
            self.covs = [np.cov(self.X.T) for _ in range(K)]

        elif self.init == 'kmeans':
            kmeans = KMeans(K=self.K, max_iters=self.max_iters // 3, init='++')
            kmeans.fit(self.X)
            self.assignments = kmeans.predict()
            self.means = kmeans.centroids
            self.covs = []
            for i in np.unique(self.assignments):
                self.weights[int(i)] = (self.assignments == i).sum()
                self.covs.append(np.cov(self.X[self.assignments == i].T))
        else:
            raise ValueError('Unknown type of init parameter')
        self.weights /= self.weights.sum()
Exemple #2
0
def kmeans_example(plot=False):
    X, y = make_blobs(centers=4, n_samples=500, n_features=2,
                      shuffle=True, random_state=42)
    clusters = len(np.unique(y))
    k = KMeans(K=clusters, max_iters=150, init='++')
    k.fit(X)
    k.predict()

    if plot:
        k.plot()
Exemple #3
0
def robust_example(plot=False):
    X, y = load_robust()
    clusters = 5
    k = KMeans(K=clusters, max_iters=50, init="++")
    k.fit(X)
    k.predict()

    if plot:
        k.plot()
    def _initialize(self):
        """Set the initial weights, means and covs (with full covariance matrix).

        weights: the prior of the clusters (what percentage of data does a cluster have)
        means: the mean points of the clusters
        covs: the covariance matrix of the clusters
        """
        self.weights = np.ones(self.K)
        if self.init == 'random':
            self.means = [self.X[x] for x in random.sample(range(self.n_samples), self.K)]
            self.covs = [np.cov(self.X.T) for _ in range(K)]

        elif self.init == 'kmeans':
            kmeans = KMeans(K=self.K, max_iters=self.max_iters // 3, init='++')
            kmeans.fit(self.X)
            self.assignments = kmeans.predict()
            self.means = kmeans.centroids
            self.covs = []
            for i in np.unique(self.assignments):
                self.weights[int(i)] = (self.assignments == i).sum()
                self.covs.append(np.cov(self.X[self.assignments == i].T))
        else:
            raise ValueError('Unknown type of init parameter')
        self.weights /= self.weights.sum()
Exemple #5
0
def KMeans_and_GMM(K):
    COLOR = 'bgrcmyk'

    X, y = make_clusters(skew=True, n_samples=1500, centers=K)
    _, axes = plt.subplots(1, 3)

    # Ground Truth
    axes[0].scatter(X[:, 0], X[:, 1], c=[COLOR[int(assignment)] for assignment in y])
    axes[0].set_title("Ground Truth")

    # KMeans
    kmeans = KMeans(K=K, init='++')
    kmeans.fit(X)
    y_kmeans = kmeans.predict()
    c_kmeans = np.array(kmeans.centroids)
    axes[1].scatter(X[:, 0], X[:, 1], c=[COLOR[int(assignment)] for assignment in y_kmeans])
    axes[1].scatter(c_kmeans[:, 0], c_kmeans[:, 1], c=COLOR[:K], marker="o", s=500)
    axes[1].set_title("KMeans")

    # Gaussian Mixture
    gmm = GaussianMixture(K=K, init='kmeans')
    gmm.fit(X)
    axes[2].set_title("Gaussian Mixture")
    gmm.plot(ax=axes[2])
Exemple #6
0
def iris_example(plot=False):
    X, y = load_iris()
    clusters = len(np.unique(y))
    k = KMeans(K=clusters, max_iters=50, init="++")
    k.fit(X, y)
    k.predict()
    data = np.zeros([k.n_samples, 2])

    # Dimension reducing
    # Sepal width*length
    data[:, 0] = k.X[:, 0] * k.X[:, 1]

    # Petal width*length
    data[:, 1] = k.X[:, 2] * k.X[:, 3]

    if plot:
        k.plot(data)
Exemple #7
0
def KMeans_and_GMM(K):
    COLOR = 'bgrcmyk'

    X, y = make_clusters(skew=True, n_samples=1500, centers=K)
    _, axes = plt.subplots(1, 3)

    # Ground Truth
    axes[0].scatter(X[:, 0], X[:, 1], c=[COLOR[int(assignment)] for assignment in y])
    axes[0].set_title("Ground Truth")

    # KMeans
    kmeans = KMeans(K=K, init='++')
    kmeans.fit(X)
    kmeans.predict()
    axes[1].set_title("KMeans")
    kmeans.plot(ax=axes[1], holdon=True)

    # Gaussian Mixture
    gmm = GaussianMixture(K=K, init='kmeans')
    gmm.fit(X)
    axes[2].set_title("Gaussian Mixture")
    gmm.plot(ax=axes[2])
def KMeans_and_GMM(K):
    COLOR = 'bgrcmyk'

    X, y = make_clusters(skew=True, n_samples=1500, centers=K)
    _, axes = plt.subplots(1, 3)

    # Ground Truth
    axes[0].scatter(X[:, 0], X[:, 1], c=[COLOR[int(assignment)] for assignment in y])
    axes[0].set_title("Ground Truth")

    # KMeans
    kmeans = KMeans(K=K, init='++')
    kmeans.fit(X)
    kmeans.predict()
    axes[1].set_title("KMeans")
    kmeans.plot(ax=axes[1], holdon=True)

    # Gaussian Mixture
    gmm = GaussianMixture(K=K, init='kmeans')
    gmm.fit(X)
    axes[2].set_title("Gaussian Mixture")
    gmm.plot(ax=axes[2])
Exemple #9
0
def test_initialization():
    with pytest.raises(ValueError):
        kmeans = KMeans(init='test', K=2)
        kmeans.fit(data)
        kmeans._initialize_cetroids('test')

    kmeans = KMeans(init='random', K=2)
    kmeans.fit(data)
    kmeans._initialize_cetroids('random')
    assert len(kmeans.centroids) == kmeans.K

    kmeans = KMeans(init='++', K=2)
    kmeans.fit(data)
    kmeans._initialize_cetroids('++')
    assert len(kmeans.centroids) == kmeans.K