def setUp(self): self.points = np.array([[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2], [0.1, 2.5], [0.2, 2], [0.1, 3], [0.2, 4]], dtype=np.float32) self.num_points = self.points.shape[0] self.true_centers = np.array([ normalize( np.mean(normalize(self.points)[0:4, :], axis=0, keepdims=True))[0], normalize( np.mean(normalize(self.points)[4:, :], axis=0, keepdims=True))[0] ], dtype=np.float32) self.true_assignments = np.array([0] * 4 + [1] * 4) self.true_score = len(self.points) - np.tensordot( normalize(self.points), self.true_centers[self.true_assignments]) self.num_centers = 2 self.kmeans = kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=factorization.RANDOM_INIT, distance_metric=factorization.COSINE_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, config=self.config(3))
def main(_): train_set = np.load(FLAGS.train_file) val_set = np.load(FLAGS.test_file) num_k = int(FLAGS.num_k) clusterer = kmeans.KMeansClustering( num_clusters=num_k, use_mini_batch=False) labels = train_set[:,0:1] if FLAGS.endtoend: ks = train_set[:1:2] x = train_set[:,2:] else: x = train_set[:,1:] clusterer.fit( input_fn=_input_fn(x.astype(np.float32)), steps=10) labels_val = val_set[:,0:1] if FLAGS.endtoend: ks_val = val_set[:,1:2] x_val = val_set[:,2:] else: x_val = val_set[:,1:] predictions = np.array(list(clusterer.predict_cluster_idx( input_fn=_input_fn(x_val.astype(np.float32), num_epochs=1)))) process_clusters('K Cluster', labels_val.flatten(), predictions) if FLAGS.endtoend: process_clusters('K end-to-end', labels_val.flatten(), ks_val.flatten().astype(int))
def create_experiment_fn(output_dir=None): """Experiment function.""" distance_metric = (tf.contrib.factorization.COSINE_DISTANCE if FLAGS.use_cosine_distance else tf.contrib.factorization.SQUARED_EUCLIDEAN_DISTANCE) initial_clusters = (tf.contrib.factorization.KMEANS_PLUS_PLUS_INIT if FLAGS.use_kmeans_plus_plus else tf.contrib.factorization.RANDOM_INIT) # Create estimator kmeans = kmeans_lib.KMeansClustering( FLAGS.num_clusters, model_dir=output_dir, initial_clusters=initial_clusters, distance_metric=distance_metric, use_mini_batch=True, relative_tolerance=FLAGS.relative_tolerance, config=tf.contrib.learn.RunConfig( save_checkpoints_secs=FLAGS.save_checkpoints_secs)) train_monitors = [] if FLAGS.debug: train_monitors.append(tf_debug.LocalCLIDebugHook()) return tf.contrib.learn.Experiment( estimator=kmeans, train_steps=FLAGS.num_train_steps, eval_steps=1, eval_input_fn=_input_fn, train_input_fn=_input_fn, train_monitors=train_monitors, export_strategies=[ saved_model_export_utils.make_export_strategy(_predict_input_fn, exports_to_keep=5) ])
def main(_): filelist = [] for line in open(FLAGS.file_list): filelist.append(line) train_set = np.load(FLAGS.train_file) val_set = np.load(FLAGS.test_file) num_k = int(FLAGS.num_k) clusterer = kmeans.KMeansClustering( num_clusters=num_k, use_mini_batch=False) labels = train_set[:,0:1] if FLAGS.endtoend: ks = train_set[:1:2] x = train_set[:,2:] else: x = train_set[:,1:] clusterer.fit( input_fn=_input_fn(x.astype(np.float32)), steps=10) labels_val = val_set[:,0:1] if FLAGS.endtoend: ks_val = val_set[:,1:2] x_val = val_set[:,2:] else: x_val = val_set[:,1:] predictions = np.array(list(clusterer.predict_cluster_idx( input_fn=_input_fn(x_val.astype(np.float32), num_epochs=1)))) file_dict = dict() c = 0 for f in filelist: f = f.strip() fields = f.split(',') if int(fields[3]) == 1: # See if field[1] exists in dict key = fields[1] info = fields[0] + ',' + str(predictions[c]) + ',' + fields[4] + ',' + fields[5] + ',' + fields[6] + ',' + fields[7] if key not in file_dict: file_dict[key] = [] file_dict[key].append(info) c += 1 colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0)] for k,v in file_dict.iteritems(): fields = k.strip().split('/') filename = os.path.join(FLAGS.output_folder,fields[-1].replace('leftImg8bit','results')) img = cv2.imread(k.strip()) for info in v: fields = info.split(',') label = int(fields[1]) x = int(fields[2]) y = int(fields[3]) w = int(fields[4]) h = int(fields[5]) cv2.rectangle(img,(x,y),(x+w,y+h),colors[label],2) cv2.imwrite(filename, img)
def _kmeans(self, relative_tolerance=None): return kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=factorization.RANDOM_INIT, use_mini_batch=self.use_mini_batch, config=self.config(14), random_seed=10, relative_tolerance=relative_tolerance)
def _kmeans(self, relative_tolerance=None): return kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=factorization.KMEANS_PLUS_PLUS_INIT, distance_metric=factorization.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, random_seed=24, relative_tolerance=relative_tolerance)
def test_fit_raise_if_num_clusters_larger_than_num_points_random_init( self): points = np.array([[2.0, 3.0], [1.6, 8.2]], dtype=np.float32) with self.assertRaisesOpError('less'): kmeans = kmeans_lib.KMeansClustering( num_clusters=3, initial_clusters=factorization.RANDOM_INIT) kmeans.fit(input_fn=lambda: (constant_op.constant(points), None), steps=10)
def generate_cluster(k, data_set): """ This will generate centroids of k cluster from the given date set :param k number of clusters to generate :param data_set input data set """ k_means_estimator = kmeans.KMeansClustering(num_clusters=k) k_means_estimator.fit(input_fn=lambda: input_fn_1d(data_set), steps=1000) return k_means_estimator.clusters()
def test_kmeans_plus_plus_batch_too_small(self): points = np.array( [[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32) kmeans = kmeans_lib.KMeansClustering( num_clusters=points.shape[0], initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=True, mini_batch_steps_per_iteration=100, random_seed=24, relative_tolerance=None) with self.assertRaisesOpError(AssertionError): kmeans.fit( input_fn=self.input_fn(batch_size=4, points=points, randomize=False), steps=1)
def test_kmeans_plus_plus_batch_just_right(self): points = np.array([[1, 2]], dtype=np.float32) kmeans = kmeans_lib.KMeansClustering( num_clusters=points.shape[0], initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=True, mini_batch_steps_per_iteration=100, random_seed=24, relative_tolerance=None) kmeans.fit( input_fn=self.input_fn(batch_size=1, points=points, randomize=False), steps=1) clusters = kmeans.clusters() self.assertAllEqual(points, clusters)
def test_predict_kmeans_plus_plus(self): # Most points are concetrated near one center. KMeans++ is likely to find # the less populated centers. points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1], [-3., -3.1], [-3.2, -3.], [-3., -3.]], dtype=np.float32) true_centers = np.array([ normalize(np.mean(normalize(points)[0:2, :], axis=0, keepdims=True))[0], normalize(np.mean(normalize(points)[2:4, :], axis=0, keepdims=True))[0], normalize(np.mean(normalize(points)[4:, :], axis=0, keepdims=True))[0] ], dtype=np.float32) true_assignments = [0] * 2 + [1] * 2 + [2] * 8 true_score = len(points) - np.tensordot(normalize(points), true_centers[true_assignments]) kmeans = kmeans_lib.KMeansClustering( 3, initial_clusters=factorization.KMEANS_PLUS_PLUS_INIT, distance_metric=factorization.COSINE_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, config=self.config(3)) kmeans.fit(input_fn=lambda: (constant_op.constant(points), None), steps=30) centers = normalize(kmeans.clusters()) self.assertAllClose(sorted(centers.tolist()), sorted(true_centers.tolist()), atol=1e-2) def _input_fn(): return (input_lib.limit_epochs(constant_op.constant(points), num_epochs=1), None) assignments = list(kmeans.predict_cluster_idx(input_fn=_input_fn)) self.assertAllClose(centers[assignments], true_centers[true_assignments], atol=1e-2) score = kmeans.score(input_fn=lambda: (constant_op.constant(points), None), steps=1) self.assertAllClose(score, true_score, atol=1e-2)
def setUp(self): np.random.seed(3) random_seed_lib.set_random_seed(2) self.num_centers = 2 self.num_dims = 2 self.num_points = 4000 self.batch_size = self.num_points self.true_centers = self.make_random_centers(self.num_centers, self.num_dims) self.points, self.assignments = self.make_random_points( self.true_centers, self.num_points) # Use initial means from kmeans (just like scikit-learn does). clusterer = kmeans.KMeansClustering(num_clusters=self.num_centers) clusterer.fit(input_fn=lambda: (constant_op.constant(self.points), None), steps=30) self.initial_means = clusterer.clusters()
def setUp(self): np.random.seed(3) self.num_centers = 5 self.num_dims = 2 self.num_points = 10000 self.true_centers = make_random_centers(self.num_centers, self.num_dims) self.points, _, self.scores = make_random_points( self.true_centers, self.num_points) self.true_score = np.add.reduce(self.scores) self.kmeans = kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=factorization.RANDOM_INIT, use_mini_batch=self.use_mini_batch, config=self.config(14), random_seed=10)
def test_monitor(self): if self.use_mini_batch: return kmeans = kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=factorization.RANDOM_INIT, use_mini_batch=self.use_mini_batch, config=run_config.RunConfig(tf_random_seed=14), random_seed=12) kmeans.fit( input_fn=self.input_fn(), # Force it to train forever until the monitor stops it. steps=None, relative_tolerance=1e-4) score = kmeans.score( input_fn=self.input_fn(batch_size=self.num_points), steps=1) self.assertNear(self.true_score, score, self.true_score * 0.005)
def _fit(self, num_iters=10): scores = [] start = time.time() for i in range(num_iters): print('Starting tensorflow KMeans: %d' % i) tf_kmeans = kmeans_lib.KMeansClustering( self.num_clusters, initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2), random_seed=i * 42, relative_tolerance=1e-6, config=run_config.RunConfig(tf_random_seed=3)) tf_kmeans.fit( input_fn=lambda: (constant_op.constant(self.points), None), steps=50) _ = tf_kmeans.clusters() scores.append( tf_kmeans.score( input_fn=lambda: (constant_op.constant(self.points), None), steps=1)) self._report(num_iters, start, time.time(), scores)
def test_monitor(self): if self.use_mini_batch: # We don't test for use_mini_batch case since the loss value can be noisy. return kmeans = kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, config=learn.RunConfig(tf_random_seed=14), random_seed=12, relative_tolerance=1e-4) kmeans.fit( input_fn=self.input_fn(), # Force it to train until the relative tolerance monitor stops it. steps=None) score = kmeans.score( input_fn=self.input_fn(batch_size=self.num_points), steps=1)
def test_queues(self): kmeans = kmeans_lib.KMeansClustering(5) kmeans.fit(input_fn=self.input_fn(), steps=1)
# __init__( # num_clusters, # model_dir=None, # initial_clusters=RANDOM_INIT, # distance_metric=SQUARED_EUCLIDEAN_DISTANCE, # random_seed=0, # use_mini_batch=True, # mini_batch_steps_per_iteration=1, # kmeans_plus_plus_num_retries=2, # relative_tolerance=None, # config=None # ) # In[16]: k_means_estimator = kmeans.KMeansClustering(num_clusters=10) # Start with a 1000 steps and then try 10000 fit = k_means_estimator.fit(input_fn=lambda: input_fn(training_digits), steps=1000) # In[17]: clusters = k_means_estimator.clusters() # ### Plot the image representations of the centroids of the clusters # *Note that these may not be actual images in our training data* # In[20]: for i in range(10):