예제 #1
0
    def setUp(self):
        self.points = np.array([[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2],
                                [0.1, 2.5], [0.2, 2], [0.1, 3], [0.2, 4]],
                               dtype=np.float32)
        self.num_points = self.points.shape[0]
        self.true_centers = np.array([
            normalize(
                np.mean(normalize(self.points)[0:4, :], axis=0,
                        keepdims=True))[0],
            normalize(
                np.mean(normalize(self.points)[4:, :], axis=0,
                        keepdims=True))[0]
        ],
                                     dtype=np.float32)
        self.true_assignments = np.array([0] * 4 + [1] * 4)
        self.true_score = len(self.points) - np.tensordot(
            normalize(self.points), self.true_centers[self.true_assignments])

        self.num_centers = 2
        self.kmeans = kmeans_lib.KMeansClustering(
            self.num_centers,
            initial_clusters=factorization.RANDOM_INIT,
            distance_metric=factorization.COSINE_DISTANCE,
            use_mini_batch=self.use_mini_batch,
            mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
            config=self.config(3))
def main(_):
    train_set = np.load(FLAGS.train_file)
    val_set = np.load(FLAGS.test_file)
    num_k = int(FLAGS.num_k)

    clusterer = kmeans.KMeansClustering(
          num_clusters=num_k,
          use_mini_batch=False)
    labels = train_set[:,0:1]
    if FLAGS.endtoend:
        ks = train_set[:1:2]
        x = train_set[:,2:]
    else:
        x = train_set[:,1:]

    clusterer.fit(
          input_fn=_input_fn(x.astype(np.float32)), steps=10)

    labels_val = val_set[:,0:1]
    if FLAGS.endtoend:
        ks_val = val_set[:,1:2]
        x_val = val_set[:,2:]
    else:
        x_val = val_set[:,1:]
    predictions = np.array(list(clusterer.predict_cluster_idx(
            input_fn=_input_fn(x_val.astype(np.float32), num_epochs=1))))

    process_clusters('K Cluster', labels_val.flatten(), predictions)

    if FLAGS.endtoend:
        process_clusters('K end-to-end', labels_val.flatten(), ks_val.flatten().astype(int))
def create_experiment_fn(output_dir=None):
    """Experiment function."""
    distance_metric = (tf.contrib.factorization.COSINE_DISTANCE
                       if FLAGS.use_cosine_distance else
                       tf.contrib.factorization.SQUARED_EUCLIDEAN_DISTANCE)
    initial_clusters = (tf.contrib.factorization.KMEANS_PLUS_PLUS_INIT
                        if FLAGS.use_kmeans_plus_plus else
                        tf.contrib.factorization.RANDOM_INIT)

    # Create estimator
    kmeans = kmeans_lib.KMeansClustering(
        FLAGS.num_clusters,
        model_dir=output_dir,
        initial_clusters=initial_clusters,
        distance_metric=distance_metric,
        use_mini_batch=True,
        relative_tolerance=FLAGS.relative_tolerance,
        config=tf.contrib.learn.RunConfig(
            save_checkpoints_secs=FLAGS.save_checkpoints_secs))

    train_monitors = []
    if FLAGS.debug:
        train_monitors.append(tf_debug.LocalCLIDebugHook())

    return tf.contrib.learn.Experiment(
        estimator=kmeans,
        train_steps=FLAGS.num_train_steps,
        eval_steps=1,
        eval_input_fn=_input_fn,
        train_input_fn=_input_fn,
        train_monitors=train_monitors,
        export_strategies=[
            saved_model_export_utils.make_export_strategy(_predict_input_fn,
                                                          exports_to_keep=5)
        ])
def main(_):
    filelist = []
    for line in open(FLAGS.file_list):
        filelist.append(line)

    train_set = np.load(FLAGS.train_file)
    val_set = np.load(FLAGS.test_file)
    num_k = int(FLAGS.num_k)

    clusterer = kmeans.KMeansClustering(
          num_clusters=num_k,
          use_mini_batch=False)
    labels = train_set[:,0:1]
    if FLAGS.endtoend:
        ks = train_set[:1:2]
        x = train_set[:,2:]
    else:
        x = train_set[:,1:]

    clusterer.fit(
          input_fn=_input_fn(x.astype(np.float32)), steps=10)

    labels_val = val_set[:,0:1]
    if FLAGS.endtoend:
        ks_val = val_set[:,1:2]
        x_val = val_set[:,2:]
    else:
        x_val = val_set[:,1:]
    predictions = np.array(list(clusterer.predict_cluster_idx(
            input_fn=_input_fn(x_val.astype(np.float32), num_epochs=1))))

    file_dict = dict()
    c = 0
    for f in filelist:
        f = f.strip()
        fields = f.split(',')
        if int(fields[3]) == 1:
            # See if field[1] exists in dict
            key = fields[1]
            info = fields[0] + ',' + str(predictions[c]) + ',' + fields[4] + ',' + fields[5] + ',' + fields[6] + ',' + fields[7]
            if key not in file_dict:
                file_dict[key] = []
            file_dict[key].append(info)
            c += 1

    colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0)]
    for k,v in file_dict.iteritems():
        fields = k.strip().split('/')
        filename = os.path.join(FLAGS.output_folder,fields[-1].replace('leftImg8bit','results'))
        img = cv2.imread(k.strip())
        for info in v:    
		fields = info.split(',')
		label = int(fields[1])
    		x = int(fields[2])
    		y = int(fields[3])
    		w = int(fields[4])
    		h = int(fields[5])
    		cv2.rectangle(img,(x,y),(x+w,y+h),colors[label],2)
        cv2.imwrite(filename, img)
예제 #5
0
 def _kmeans(self, relative_tolerance=None):
     return kmeans_lib.KMeansClustering(
         self.num_centers,
         initial_clusters=factorization.RANDOM_INIT,
         use_mini_batch=self.use_mini_batch,
         config=self.config(14),
         random_seed=10,
         relative_tolerance=relative_tolerance)
예제 #6
0
 def _kmeans(self, relative_tolerance=None):
     return kmeans_lib.KMeansClustering(
         self.num_centers,
         initial_clusters=factorization.KMEANS_PLUS_PLUS_INIT,
         distance_metric=factorization.SQUARED_EUCLIDEAN_DISTANCE,
         use_mini_batch=self.use_mini_batch,
         mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
         random_seed=24,
         relative_tolerance=relative_tolerance)
예제 #7
0
    def test_fit_raise_if_num_clusters_larger_than_num_points_random_init(
            self):
        points = np.array([[2.0, 3.0], [1.6, 8.2]], dtype=np.float32)

        with self.assertRaisesOpError('less'):
            kmeans = kmeans_lib.KMeansClustering(
                num_clusters=3, initial_clusters=factorization.RANDOM_INIT)
            kmeans.fit(input_fn=lambda: (constant_op.constant(points), None),
                       steps=10)
def generate_cluster(k, data_set):
    """
    This will generate centroids of k cluster from the given date set

    :param k number of clusters to generate
    :param data_set input data set
    """
    k_means_estimator = kmeans.KMeansClustering(num_clusters=k)
    k_means_estimator.fit(input_fn=lambda: input_fn_1d(data_set), steps=1000)
    return k_means_estimator.clusters()
예제 #9
0
 def test_kmeans_plus_plus_batch_too_small(self):
   points = np.array(
       [[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32)
   kmeans = kmeans_lib.KMeansClustering(
       num_clusters=points.shape[0],
       initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
       distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
       use_mini_batch=True,
       mini_batch_steps_per_iteration=100,
       random_seed=24,
       relative_tolerance=None)
   with self.assertRaisesOpError(AssertionError):
     kmeans.fit(
         input_fn=self.input_fn(batch_size=4, points=points, randomize=False),
         steps=1)
예제 #10
0
 def test_kmeans_plus_plus_batch_just_right(self):
   points = np.array([[1, 2]], dtype=np.float32)
   kmeans = kmeans_lib.KMeansClustering(
       num_clusters=points.shape[0],
       initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
       distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
       use_mini_batch=True,
       mini_batch_steps_per_iteration=100,
       random_seed=24,
       relative_tolerance=None)
   kmeans.fit(
       input_fn=self.input_fn(batch_size=1, points=points, randomize=False),
       steps=1)
   clusters = kmeans.clusters()
   self.assertAllEqual(points, clusters)
예제 #11
0
    def test_predict_kmeans_plus_plus(self):
        # Most points are concetrated near one center. KMeans++ is likely to find
        # the less populated centers.
        points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                           [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1],
                           [-3., -3.1], [-3., -3.1], [-3.2, -3.], [-3., -3.]],
                          dtype=np.float32)
        true_centers = np.array([
            normalize(np.mean(normalize(points)[0:2, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[2:4, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[4:, :], axis=0,
                              keepdims=True))[0]
        ],
                                dtype=np.float32)
        true_assignments = [0] * 2 + [1] * 2 + [2] * 8
        true_score = len(points) - np.tensordot(normalize(points),
                                                true_centers[true_assignments])

        kmeans = kmeans_lib.KMeansClustering(
            3,
            initial_clusters=factorization.KMEANS_PLUS_PLUS_INIT,
            distance_metric=factorization.COSINE_DISTANCE,
            use_mini_batch=self.use_mini_batch,
            mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
            config=self.config(3))
        kmeans.fit(input_fn=lambda: (constant_op.constant(points), None),
                   steps=30)

        centers = normalize(kmeans.clusters())
        self.assertAllClose(sorted(centers.tolist()),
                            sorted(true_centers.tolist()),
                            atol=1e-2)

        def _input_fn():
            return (input_lib.limit_epochs(constant_op.constant(points),
                                           num_epochs=1), None)

        assignments = list(kmeans.predict_cluster_idx(input_fn=_input_fn))
        self.assertAllClose(centers[assignments],
                            true_centers[true_assignments],
                            atol=1e-2)

        score = kmeans.score(input_fn=lambda:
                             (constant_op.constant(points), None),
                             steps=1)
        self.assertAllClose(score, true_score, atol=1e-2)
예제 #12
0
  def setUp(self):
    np.random.seed(3)
    random_seed_lib.set_random_seed(2)
    self.num_centers = 2
    self.num_dims = 2
    self.num_points = 4000
    self.batch_size = self.num_points
    self.true_centers = self.make_random_centers(self.num_centers,
                                                 self.num_dims)
    self.points, self.assignments = self.make_random_points(
        self.true_centers, self.num_points)

    # Use initial means from kmeans (just like scikit-learn does).
    clusterer = kmeans.KMeansClustering(num_clusters=self.num_centers)
    clusterer.fit(input_fn=lambda: (constant_op.constant(self.points), None),
                  steps=30)
    self.initial_means = clusterer.clusters()
예제 #13
0
    def setUp(self):
        np.random.seed(3)
        self.num_centers = 5
        self.num_dims = 2
        self.num_points = 10000
        self.true_centers = make_random_centers(self.num_centers,
                                                self.num_dims)
        self.points, _, self.scores = make_random_points(
            self.true_centers, self.num_points)
        self.true_score = np.add.reduce(self.scores)

        self.kmeans = kmeans_lib.KMeansClustering(
            self.num_centers,
            initial_clusters=factorization.RANDOM_INIT,
            use_mini_batch=self.use_mini_batch,
            config=self.config(14),
            random_seed=10)
예제 #14
0
    def test_monitor(self):
        if self.use_mini_batch:
            return
        kmeans = kmeans_lib.KMeansClustering(
            self.num_centers,
            initial_clusters=factorization.RANDOM_INIT,
            use_mini_batch=self.use_mini_batch,
            config=run_config.RunConfig(tf_random_seed=14),
            random_seed=12)

        kmeans.fit(
            input_fn=self.input_fn(),
            # Force it to train forever until the monitor stops it.
            steps=None,
            relative_tolerance=1e-4)
        score = kmeans.score(
            input_fn=self.input_fn(batch_size=self.num_points), steps=1)
        self.assertNear(self.true_score, score, self.true_score * 0.005)
예제 #15
0
 def _fit(self, num_iters=10):
   scores = []
   start = time.time()
   for i in range(num_iters):
     print('Starting tensorflow KMeans: %d' % i)
     tf_kmeans = kmeans_lib.KMeansClustering(
         self.num_clusters,
         initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
         kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2),
         random_seed=i * 42,
         relative_tolerance=1e-6,
         config=run_config.RunConfig(tf_random_seed=3))
     tf_kmeans.fit(
         input_fn=lambda: (constant_op.constant(self.points), None), steps=50)
     _ = tf_kmeans.clusters()
     scores.append(
         tf_kmeans.score(
             input_fn=lambda: (constant_op.constant(self.points), None),
             steps=1))
   self._report(num_iters, start, time.time(), scores)
예제 #16
0
  def test_monitor(self):
    if self.use_mini_batch:
      # We don't test for use_mini_batch case since the loss value can be noisy.
      return
    kmeans = kmeans_lib.KMeansClustering(
        self.num_centers,
        initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
        distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=self.use_mini_batch,
        mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration,
        config=learn.RunConfig(tf_random_seed=14),
        random_seed=12,
        relative_tolerance=1e-4)

    kmeans.fit(
        input_fn=self.input_fn(),
        # Force it to train until the relative tolerance monitor stops it.
        steps=None)
    score = kmeans.score(
        input_fn=self.input_fn(batch_size=self.num_points), steps=1)
예제 #17
0
 def test_queues(self):
     kmeans = kmeans_lib.KMeansClustering(5)
     kmeans.fit(input_fn=self.input_fn(), steps=1)
# __init__(
#     num_clusters,
#     model_dir=None,
#     initial_clusters=RANDOM_INIT,
#     distance_metric=SQUARED_EUCLIDEAN_DISTANCE,
#     random_seed=0,
#     use_mini_batch=True,
#     mini_batch_steps_per_iteration=1,
#     kmeans_plus_plus_num_retries=2,
#     relative_tolerance=None,
#     config=None
# )

# In[16]:

k_means_estimator = kmeans.KMeansClustering(num_clusters=10)

# Start with a 1000 steps and then try 10000
fit = k_means_estimator.fit(input_fn=lambda: input_fn(training_digits),
                            steps=1000)

# In[17]:

clusters = k_means_estimator.clusters()

# ### Plot the image representations of the centroids of the clusters
# *Note that these may not be actual images in our training data*

# In[20]:

for i in range(10):