Beispiel #1
0
  def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
    # Most points are concetrated near one center. KMeans++ is likely to find
    # the less populated centers.
    points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                       [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1],
                       [-3., -3.1], [-3.2, -3.], [-3., -3.]]).astype(np.float32)
    true_centers = np.array(
        [normalize(np.mean(normalize(points)[0:2, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[2:4, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[4:, :], axis=0,
                           keepdims=True))[0]])
    true_assignments = [0] * 2 + [1] * 2 + [2] * 8
    true_score = len(points) - np.tensordot(normalize(points),
                                            true_centers[true_assignments])

    kmeans = KMeans(3,
                    initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                    distance_metric=kmeans_ops.COSINE_DISTANCE,
                    use_mini_batch=self.use_mini_batch,
                    config=self.config(3))
    kmeans.fit(x=points, steps=30, batch_size=12)

    centers = normalize(kmeans.clusters())
    self.assertAllClose(sorted(centers.tolist()),
                        sorted(true_centers.tolist()),
                        atol=1e-2)

    assignments = kmeans.predict(points, batch_size=12)
    self.assertAllClose(centers[assignments],
                        true_centers[true_assignments], atol=1e-2)

    score = kmeans.score(points, batch_size=12)
    self.assertAllClose(score, true_score, atol=1e-2)
    def test_predict_with_cosine_distance(self):
        points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18], [-2.5, -3.5],
                           [-2, -8], [-3, -1], [-3, -18]]).astype(np.float32)
        true_centers = np.array([
            normalize(np.mean(normalize(points)[0:4, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[4:, :], axis=0,
                              keepdims=True))[0]
        ])
        true_assignments = [0] * 4 + [1] * 4
        true_score = len(points) - np.tensordot(normalize(points),
                                                true_centers[true_assignments])

        kmeans = KMeans(2,
                        initial_clusters=kmeans_ops.RANDOM_INIT,
                        distance_metric=kmeans_ops.COSINE_DISTANCE,
                        use_mini_batch=self.use_mini_batch,
                        config=self.config(3))
        kmeans.fit(x=points, steps=30, batch_size=8)

        centers = normalize(kmeans.clusters())
        self.assertAllClose(np.sort(centers, axis=0),
                            np.sort(true_centers, axis=0),
                            atol=1e-2)

        assignments = kmeans.predict(points, batch_size=8)
        self.assertAllClose(centers[assignments],
                            true_centers[true_assignments],
                            atol=1e-2)

        score = kmeans.score(points, batch_size=8)
        self.assertAllClose(score, true_score, atol=1e-2)
Beispiel #3
0
  def test_predict_with_cosine_distance(self):
    points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18],
                       [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]]).astype(
                           np.float32)
    true_centers = np.array(
        [normalize(np.mean(normalize(points)[0:4, :],
                           axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[4:, :],
                           axis=0,
                           keepdims=True))[0]])
    true_assignments = [0] * 4 + [1] * 4
    true_score = len(points) - np.tensordot(normalize(points),
                                            true_centers[true_assignments])

    kmeans = KMeans(2,
                    initial_clusters=kmeans_ops.RANDOM_INIT,
                    distance_metric=kmeans_ops.COSINE_DISTANCE,
                    use_mini_batch=self.use_mini_batch,
                    config=self.config(3))
    kmeans.fit(x=points, steps=30, batch_size=8)

    centers = normalize(kmeans.clusters())
    self.assertAllClose(np.sort(centers, axis=0),
                        np.sort(true_centers, axis=0), atol=1e-2)

    assignments = kmeans.predict(points, batch_size=8)
    self.assertAllClose(centers[assignments],
                        true_centers[true_assignments], atol=1e-2)

    score = kmeans.score(points, batch_size=8)
    self.assertAllClose(score, true_score, atol=1e-2)
Beispiel #4
0
  def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
    # Most points are concetrated near one center. KMeans++ is likely to find
    # the less populated centers.
    points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                       [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1],
                       [-3., -3.1], [-3.2, -3.], [-3., -3.]], dtype=np.float32)
    true_centers = np.array(
        [normalize(np.mean(normalize(points)[0:2, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[2:4, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[4:, :], axis=0,
                           keepdims=True))[0]], dtype=np.float32)
    true_assignments = [0] * 2 + [1] * 2 + [2] * 8
    true_score = len(points) - np.tensordot(normalize(points),
                                            true_centers[true_assignments])

    kmeans = KMeans(3,
                    initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                    distance_metric=kmeans_ops.COSINE_DISTANCE,
                    use_mini_batch=self.use_mini_batch,
                    config=self.config(3))
    kmeans.fit(x=points, steps=30, batch_size=12)

    centers = normalize(kmeans.clusters())
    self.assertAllClose(sorted(centers.tolist()),
                        sorted(true_centers.tolist()),
                        atol=1e-2)

    assignments = kmeans.predict(points, batch_size=12)
    self.assertAllClose(centers[assignments],
                        true_centers[true_assignments], atol=1e-2)

    score = kmeans.score(points, batch_size=12)
    self.assertAllClose(score, true_score, atol=1e-2)
Beispiel #5
0
 def _fit(self, num_iters=10):
   scores = []
   start = time.time()
   for i in range(num_iters):
     print('Starting tensorflow KMeans: %d' % i)
     tf_kmeans = KMeans(self.num_clusters,
                        initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                        kmeans_plus_plus_num_retries=int(
                            math.log(self.num_clusters) + 2),
                        random_seed=i * 42,
                        config=run_config.RunConfig(tf_random_seed=3))
     tf_kmeans.fit(x=self.points, batch_size=self.num_points, steps=50,
                   relative_tolerance=1e-6)
     _ = tf_kmeans.clusters()
     scores.append(tf_kmeans.score(self.points))
   self._report(num_iters, start, time.time(), scores)
Beispiel #6
0
  def test_monitor(self):
    if self.batch_size != self.num_points:
      # TODO(agarwal): Doesn't work with mini-batch.
      return
    kmeans = KMeans(self.num_centers,
                    initial_clusters=kmeans_ops.RANDOM_INIT,
                    use_mini_batch=self.use_mini_batch,
                    config=run_config.RunConfig(tf_random_seed=14),
                    random_seed=12)

    kmeans.fit(x=self.points,
               # Force it to train forever until the monitor stops it.
               steps=None,
               batch_size=self.batch_size,
               relative_tolerance=1e-4)
    score = kmeans.score(x=self.points)
    self.assertNear(self.true_score, score, self.true_score * 0.005)
Beispiel #7
0
points, _, scores = make_random_points(true_centers,num_points)


from tensorflow.contrib.factorization.python.ops import kmeans as kmeans_ops
from tensorflow.contrib.factorization.python.ops.kmeans import \
    KMeansClustering as KMeans
kmeans = KMeans(num_centers=num_centers,
                initial_clusters = kmeans_ops.RANDOM_INIT,
                use_mini_batch=False,
                config=RunConfig(tf_random_seed=14),
                random_seed=12)
kmeans.fit(x=points,steps=10,batch_size=8)
clusters = kmeans.clusters()

kmeans.predict(points,batch_size=128)
kmeans.score(points,batch_size=128)
kmeans.transform(points,batch_size=128)


####################################
#支持向量机
 def input_fn():
     return{
         'example_id':tf.constant(['1','2','3']),
         'feature1':tf.constant([[0.0],[1.0],[3.0]]),
         'feature2':tf.constant([[1.0],[-1.2],[1.0]]),
     },tf.constant([1],[0],[1])

 feature1 = tf.contrib.layers.real_valued_column('feature1')
 feature2 = tf.contrib.layers.real_valued_column('feature2')