Ejemplo n.º 1
0
    def test_predict_with_cosine_distance(self):
        points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18], [-2.5, -3.5],
                           [-2, -8], [-3, -1], [-3, -18]]).astype(np.float32)
        true_centers = np.array([
            normalize(np.mean(normalize(points)[0:4, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[4:, :], axis=0,
                              keepdims=True))[0]
        ])
        true_assignments = [0] * 4 + [1] * 4
        true_score = len(points) - np.tensordot(normalize(points),
                                                true_centers[true_assignments])

        kmeans = KMeans(2,
                        initial_clusters=kmeans_ops.RANDOM_INIT,
                        distance_metric=kmeans_ops.COSINE_DISTANCE,
                        use_mini_batch=self.use_mini_batch,
                        config=self.config(3))
        kmeans.fit(x=points, steps=30, batch_size=8)

        centers = normalize(kmeans.clusters())
        self.assertAllClose(np.sort(centers, axis=0),
                            np.sort(true_centers, axis=0),
                            atol=1e-2)

        assignments = kmeans.predict(points, batch_size=8)
        self.assertAllClose(centers[assignments],
                            true_centers[true_assignments],
                            atol=1e-2)

        score = kmeans.score(points, batch_size=8)
        self.assertAllClose(score, true_score, atol=1e-2)
Ejemplo n.º 2
0
  def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
    # Most points are concetrated near one center. KMeans++ is likely to find
    # the less populated centers.
    points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                       [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1],
                       [-3., -3.1], [-3.2, -3.], [-3., -3.]]).astype(np.float32)
    true_centers = np.array(
        [normalize(np.mean(normalize(points)[0:2, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[2:4, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[4:, :], axis=0,
                           keepdims=True))[0]])
    true_assignments = [0] * 2 + [1] * 2 + [2] * 8
    true_score = len(points) - np.tensordot(normalize(points),
                                            true_centers[true_assignments])

    kmeans = KMeans(3,
                    initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                    distance_metric=kmeans_ops.COSINE_DISTANCE,
                    use_mini_batch=self.use_mini_batch,
                    config=self.config(3))
    kmeans.fit(x=points, steps=30, batch_size=12)

    centers = normalize(kmeans.clusters())
    self.assertAllClose(sorted(centers.tolist()),
                        sorted(true_centers.tolist()),
                        atol=1e-2)

    assignments = kmeans.predict(points, batch_size=12)
    self.assertAllClose(centers[assignments],
                        true_centers[true_assignments], atol=1e-2)

    score = kmeans.score(points, batch_size=12)
    self.assertAllClose(score, true_score, atol=1e-2)
Ejemplo n.º 3
0
  def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
    # Most points are concetrated near one center. KMeans++ is likely to find
    # the less populated centers.
    points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                       [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1],
                       [-3., -3.1], [-3.2, -3.], [-3., -3.]], dtype=np.float32)
    true_centers = np.array(
        [normalize(np.mean(normalize(points)[0:2, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[2:4, :], axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[4:, :], axis=0,
                           keepdims=True))[0]], dtype=np.float32)
    true_assignments = [0] * 2 + [1] * 2 + [2] * 8
    true_score = len(points) - np.tensordot(normalize(points),
                                            true_centers[true_assignments])

    kmeans = KMeans(3,
                    initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                    distance_metric=kmeans_ops.COSINE_DISTANCE,
                    use_mini_batch=self.use_mini_batch,
                    config=self.config(3))
    kmeans.fit(x=points, steps=30, batch_size=12)

    centers = normalize(kmeans.clusters())
    self.assertAllClose(sorted(centers.tolist()),
                        sorted(true_centers.tolist()),
                        atol=1e-2)

    assignments = kmeans.predict(points, batch_size=12)
    self.assertAllClose(centers[assignments],
                        true_centers[true_assignments], atol=1e-2)

    score = kmeans.score(points, batch_size=12)
    self.assertAllClose(score, true_score, atol=1e-2)
Ejemplo n.º 4
0
  def test_predict_with_cosine_distance(self):
    points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18],
                       [-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]]).astype(
                           np.float32)
    true_centers = np.array(
        [normalize(np.mean(normalize(points)[0:4, :],
                           axis=0,
                           keepdims=True))[0],
         normalize(np.mean(normalize(points)[4:, :],
                           axis=0,
                           keepdims=True))[0]])
    true_assignments = [0] * 4 + [1] * 4
    true_score = len(points) - np.tensordot(normalize(points),
                                            true_centers[true_assignments])

    kmeans = KMeans(2,
                    initial_clusters=kmeans_ops.RANDOM_INIT,
                    distance_metric=kmeans_ops.COSINE_DISTANCE,
                    use_mini_batch=self.use_mini_batch,
                    config=self.config(3))
    kmeans.fit(x=points, steps=30, batch_size=8)

    centers = normalize(kmeans.clusters())
    self.assertAllClose(np.sort(centers, axis=0),
                        np.sort(true_centers, axis=0), atol=1e-2)

    assignments = kmeans.predict(points, batch_size=8)
    self.assertAllClose(centers[assignments],
                        true_centers[true_assignments], atol=1e-2)

    score = kmeans.score(points, batch_size=8)
    self.assertAllClose(score, true_score, atol=1e-2)
Ejemplo n.º 5
0
true_centers = make_random_centers(num_centers,num_dims)
points, _, scores = make_random_points(true_centers,num_points)


from tensorflow.contrib.factorization.python.ops import kmeans as kmeans_ops
from tensorflow.contrib.factorization.python.ops.kmeans import \
    KMeansClustering as KMeans
kmeans = KMeans(num_centers=num_centers,
                initial_clusters = kmeans_ops.RANDOM_INIT,
                use_mini_batch=False,
                config=RunConfig(tf_random_seed=14),
                random_seed=12)
kmeans.fit(x=points,steps=10,batch_size=8)
clusters = kmeans.clusters()

kmeans.predict(points,batch_size=128)
kmeans.score(points,batch_size=128)
kmeans.transform(points,batch_size=128)


####################################
#支持向量机
 def input_fn():
     return{
         'example_id':tf.constant(['1','2','3']),
         'feature1':tf.constant([[0.0],[1.0],[3.0]]),
         'feature2':tf.constant([[1.0],[-1.2],[1.0]]),
     },tf.constant([1],[0],[1])

 feature1 = tf.contrib.layers.real_valued_column('feature1')
 feature2 = tf.contrib.layers.real_valued_column('feature2')