Example #1
0
    def setUp(self):
        self.points = np.array([[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2],
                                [0.1, 2.5], [0.2, 2], [0.1, 3], [0.2, 4]],
                               dtype=np.float32)
        self.num_points = self.points.shape[0]
        self.true_centers = np.array([
            normalize(
                np.mean(normalize(self.points)[0:4, :], axis=0,
                        keepdims=True))[0],
            normalize(
                np.mean(normalize(self.points)[4:, :], axis=0,
                        keepdims=True))[0]
        ],
                                     dtype=np.float32)
        self.true_assignments = np.array([0] * 4 + [1] * 4)
        self.true_score = len(self.points) - np.tensordot(
            normalize(self.points), self.true_centers[self.true_assignments])

        self.num_centers = 2
        self.kmeans = kmeans_lib.KMeansClustering(
            self.num_centers,
            initial_clusters=kmeans_lib.KMeansClustering.RANDOM_INIT,
            distance_metric=kmeans_lib.KMeansClustering.COSINE_DISTANCE,
            use_mini_batch=self.use_mini_batch,
            mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
            config=self.config(3))
Example #2
0
 def _kmeans(self, relative_tolerance=None):
   return kmeans_lib.KMeansClustering(
       self.num_centers,
       initial_clusters=self.initial_clusters,
       distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
       use_mini_batch=self.use_mini_batch,
       mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
       random_seed=24,
       relative_tolerance=relative_tolerance)
Example #3
0
 def test_kmeans_plus_plus_batch_too_small(self):
   points = np.array(
       [[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32)
   kmeans = kmeans_lib.KMeansClustering(
       num_clusters=points.shape[0],
       initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
       distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
       use_mini_batch=True,
       mini_batch_steps_per_iteration=100,
       random_seed=24,
       relative_tolerance=None)
   with self.assertRaisesOpError(AssertionError):
     kmeans.train(
         input_fn=self.input_fn(batch_size=4, points=points, randomize=False),
         steps=1)
Example #4
0
 def test_kmeans_plus_plus_batch_just_right(self):
   points = np.array([[1, 2]], dtype=np.float32)
   kmeans = kmeans_lib.KMeansClustering(
       num_clusters=points.shape[0],
       initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
       distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
       use_mini_batch=True,
       mini_batch_steps_per_iteration=100,
       random_seed=24,
       relative_tolerance=None)
   kmeans.train(
       input_fn=self.input_fn(batch_size=1, points=points, randomize=False),
       steps=1)
   clusters = kmeans.cluster_centers()
   self.assertAllEqual(points, clusters)
Example #5
0
    def test_predict_kmeans_plus_plus(self):
        # Most points are concetrated near one center. KMeans++ is likely to find
        # the less populated centers.
        points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                           [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1],
                           [-3., -3.1], [-3., -3.1], [-3.2, -3.], [-3., -3.]],
                          dtype=np.float32)
        true_centers = np.array([
            normalize(np.mean(normalize(points)[0:2, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[2:4, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[4:, :], axis=0,
                              keepdims=True))[0]
        ],
                                dtype=np.float32)
        true_assignments = [0] * 2 + [1] * 2 + [2] * 8
        true_score = len(points) - np.tensordot(normalize(points),
                                                true_centers[true_assignments])
        kmeans = kmeans_lib.KMeansClustering(
            3,
            initial_clusters=self.initial_clusters,
            distance_metric=kmeans_lib.KMeansClustering.COSINE_DISTANCE,
            use_mini_batch=self.use_mini_batch,
            mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
            config=self.config(3))
        kmeans.train(input_fn=lambda: (constant_op.constant(points), None),
                     steps=30)

        centers = normalize(kmeans.cluster_centers())
        self.assertAllClose(sorted(centers.tolist()),
                            sorted(true_centers.tolist()),
                            atol=1e-2)

        def _input_fn():
            return (input_lib.limit_epochs(constant_op.constant(points),
                                           num_epochs=1), None)

        assignments = list(kmeans.predict_cluster_index(input_fn=_input_fn))
        self.assertAllClose(centers[assignments],
                            true_centers[true_assignments],
                            atol=1e-2)

        score = kmeans.score(
            input_fn=lambda: (constant_op.constant(points), None))
        self.assertAllClose(score, true_score, atol=1e-2)
Example #6
0
 def _fit(self, num_iters=10):
   scores = []
   start = time.time()
   for i in range(num_iters):
     print('Starting tensorflow KMeans: %d' % i)
     tf_kmeans = kmeans_lib.KMeansClustering(
         self.num_clusters,
         initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
         kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2),
         random_seed=i * 42,
         relative_tolerance=1e-6,
         config=self.config(3))
     tf_kmeans.train(
         input_fn=lambda: (constant_op.constant(self.points), None), steps=50)
     _ = tf_kmeans.cluster_centers()
     scores.append(
         tf_kmeans.score(
             input_fn=lambda: (constant_op.constant(self.points), None)))
   self._report(num_iters, start, time.time(), scores)
Example #7
0
  def test_monitor(self):
    if self.use_mini_batch:
      # We don't test for use_mini_batch case since the loss value can be noisy.
      return
    kmeans = kmeans_lib.KMeansClustering(
        self.num_centers,
        initial_clusters=self.initial_clusters,
        distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=self.use_mini_batch,
        mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
        config=self.config(14),
        random_seed=12,
        relative_tolerance=1e-4)

    kmeans.train(
        input_fn=self.input_fn(),
        # Force it to train until the relative tolerance monitor stops it.
        steps=None)
    score = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points))
    self.assertNear(self.true_score, score, self.true_score * 0.01)
Example #8
0
 def test_queues(self):
     kmeans = kmeans_lib.KMeansClustering(5)
     kmeans.train(input_fn=self.input_fn(), steps=1)
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
from tensorflow.contrib.factorization.python.ops import kmeans

def input_fn_1D(input_1D_):
    input_t = tf.convert_to_tensor(input_1D_, dtype=tf.float32)
    input_t = tf.expand_dims(input_t, 1)
    return(input_t, None)

input_1D = np.array([1,2,3.0,4,5,126,21,33,6,73.0,2,3,56,98,100,4,8,33,102])

k_means_estimator = kmeans.KMeansClustering(num_clusters=2)
fit = k_means_estimator.train(input_fn=lambda: input_fn_1D(input_1D), steps=1000)
clusters_1D = k_means_estimator.cluster_centers()

fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(input_1D, np.zeros_like(input_1D), s=300, marker='o')
ax1.scatter(clusters_1D, np.zeros_like(clusters_1D), c='r', s=200, marker='s')
plt.show()

for var in fit.get_variable_names():
    print(var, "-----> ", fit.get_variable_value(var))