def test_respawn(self): m = 100 k = 5 X = np.random.randn(m, 2) centroids = np.zeros((k, 2)) centroids = pvml.kmeans_train(X, k, init_centroids=centroids) Z, _ = pvml.kmeans_inference(X, centroids) self.assertListEqual(np.unique(Z).tolist(), list(range(k)))
def test_one_per_class(self): for k in range(1, 12): with self.subTest(k): X = np.random.randn(k, 3) centroids = pvml.kmeans_train(X, k) Y, _ = pvml.kmeans_inference(X, centroids) self.assertListEqual( normalize_labels(Y).tolist(), list(range(k)))
def _linear(self, k, m): Y = np.arange(m) % k a = np.linspace(0, 2 * np.pi, m) X = np.stack([np.cos(a) + 10 * Y, np.sin(a)], 1) centroids = pvml.kmeans_train(X, k) Z, _ = pvml.kmeans_inference(X, centroids) self.assertListEqual( normalize_labels(Y).tolist(), normalize_labels(Z).tolist())
def _sort_centroids(self, X, Y): # K-means labels do not correspond to training labels. A # categorical classifier is used to reorder the centroids to # minimize the error. P, _ = pvml.kmeans_inference(X, self.centroids) probs, priors = pvml.categorical_naive_bayes_train(P[:, None], Y) YK = np.arange(self.k)[:, None] Q, _ = pvml.categorical_naive_bayes_inference(YK, probs, priors) ii = np.argsort(Q) self.centroids = self.centroids[ii, :]
def inference(self, X): ret = pvml.kmeans_inference(X, self.centroids) return ret