def main(unused_argv): with tf.device("/cpu:0"): image = load_image_from_path("images/image.jpg")[np.newaxis, ...] print(image.dtype) g = tf.Graph() with g.as_default(): box_extractor = BoxExtractor(get_faster_rcnn_config()) inputs = tf.placeholder(tf.float32, shape=image.shape) boxes, scores, cropped_inputs = box_extractor(inputs) with tf.Session(graph=g) as sess: saver = tf.train.Saver(var_list=box_extractor.variables) saver.restore(sess, get_faster_rcnn_checkpoint()) results = sess.run([boxes, scores, cropped_inputs], feed_dict={inputs: image}) tf.logging.info("Successfully passed test.") height = image.shape[1] width = image.shape[2] fig, ax = plt.subplots(1) ax.imshow(image[0, :]) for i in range(results[0].shape[1]): this_box = results[0][0, i, :] box_y1 = this_box[0] * height box_x1 = this_box[1] * width box_y2 = this_box[2] * height box_x2 = this_box[3] * width rect = patches.Rectangle((box_x1, box_y1), (box_x2 - box_x1), (box_y2 - box_y1), linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) plt.savefig("images/image_boxes.png") plt.clf() for i in range(results[0].shape[1]): fig, ax = plt.subplots(1) ax.imshow(results[2][i, ...].astype(np.uint8)) plt.savefig("images/box{0}.png".format(i)) plt.clf()
def main(unused_argv): image = load_image_from_path("images/image.jpg")[np.newaxis, ...] vocab, pretrained_matrix = load_glove(vocab_size=100, embedding_size=50) pos, pos_embeddings = get_parts_of_speech(), np.random.normal( 0, 0.1, [15, 50]) with tf.Graph().as_default(): inputs = tf.placeholder(tf.float32, shape=image.shape) box_extractor = BoxExtractor(get_faster_rcnn_config(), top_k_boxes=16) boxes, scores, cropped_inputs = box_extractor(inputs) feature_extractor = FeatureExtractor() mean_image_features = tf.reduce_mean(feature_extractor(inputs), [1, 2]) mean_object_features = tf.reshape( tf.reduce_mean(feature_extractor(cropped_inputs), [1, 2]), [1, 16, 2048]) image_captioner = PartOfSpeechImageCaptioner(UpDownCell(50), vocab, pretrained_matrix, UpDownCell(50), UpDownCell(50), pos, pos_embeddings) pos_logits, pos_logits_ids, word_logits, word_logits_ids = image_captioner( mean_image_features=mean_image_features, mean_object_features=mean_object_features) with tf.Session() as sess: box_saver = tf.train.Saver(var_list=box_extractor.variables) resnet_saver = tf.train.Saver(var_list=feature_extractor.variables) box_saver.restore(sess, get_faster_rcnn_checkpoint()) resnet_saver.restore(sess, get_resnet_v2_101_checkpoint()) sess.run(tf.variables_initializer(image_captioner.variables)) results = sess.run( [pos_logits, pos_logits_ids, word_logits, word_logits_ids], feed_dict={inputs: image}) assert (results[2].shape[0] == 1 and results[2].shape[1] == 3 and results[2].shape[3] == 100) tf.logging.info("Successfully passed test.")
def __init__(self): """Creates handles to the TensorFlow computational graph.""" # TensorFlow ops for JPEG decoding. self.encoded_jpeg = tf.placeholder(dtype=tf.string) self.decoded_jpeg = tf.image.decode_jpeg(self.encoded_jpeg, channels=3) self.decoded_jpeg = tf.image.resize_images(self.decoded_jpeg, [ FLAGS.image_height, FLAGS.image_width]) # Create the model to extract image boxes self.box_extractor = BoxExtractor(get_faster_rcnn_config(), trainable=False) self.image_tensor = tf.placeholder(tf.float32, name='image_tensor', shape=[None, FLAGS.image_height, FLAGS.image_width, 3]) self.boxes, self.scores, self.cropped_images = self.box_extractor(self.image_tensor) # Create a single TensorFlow Session for all image decoding calls. self.sess = tf.Session() self.rcnn_saver = tf.train.Saver(var_list=self.box_extractor.variables) self.rcnn_saver.restore(self.sess, get_faster_rcnn_checkpoint()) self.lock = threading.Lock() self.attribute_map = get_visual_attributes()
def __init__(self): """Creates handles to the TensorFlow computational graph.""" # TensorFlow ops for JPEG decoding. self.encoded_jpeg = tf.placeholder(dtype=tf.string) self.decoded_jpeg = tf.image.decode_jpeg(self.encoded_jpeg, channels=3) self.decoded_jpeg = tf.image.resize_images( self.decoded_jpeg, [FLAGS.image_height, FLAGS.image_width]) # Create the model to extract image boxes self.box_extractor = BoxExtractor(get_faster_rcnn_config(), trainable=False) self.image_tensor = tf.placeholder( tf.float32, name='image_tensor', shape=[None, FLAGS.image_height, FLAGS.image_width, 3]) self.boxes, self.scores, self.cropped_images = self.box_extractor( self.image_tensor) # Create the model to extract the image features self.feature_extractor = FeatureExtractor(is_training=False, global_pool=False) # Compute the mean ResNet-101 features self.image_features = tf.reduce_mean( self.feature_extractor(self.image_tensor), [1, 2]) feature_batch = tf.shape(self.image_features)[0] feature_depth = tf.shape(self.image_features)[1] self.object_features = tf.reduce_mean( self.feature_extractor(self.cropped_images), [1, 2]) self.object_features = tf.reshape(self.object_features, [feature_batch, 100, feature_depth]) # Create a single TensorFlow Session for all image decoding calls. self.sess = tf.Session() rcnn_saver = tf.train.Saver(var_list=self.box_extractor.variables) resnet_saver = tf.train.Saver( var_list=self.feature_extractor.variables) rcnn_saver.restore(self.sess, get_faster_rcnn_checkpoint()) resnet_saver.restore(self.sess, get_resnet_v2_101_checkpoint()) self.lock = threading.Lock() self.attribute_map = get_visual_attributes()
def main(unused_argv): def _is_valid_num_shards(num_shards): """Returns True if num_shards is compatible with FLAGS.num_threads.""" return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads assert _is_valid_num_shards(FLAGS.train_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards") assert _is_valid_num_shards(FLAGS.val_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards") assert _is_valid_num_shards(FLAGS.test_shards), ( "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards") # Create vocabulary from the glove embeddings. vocab, _ = load_glove(vocab_size=FLAGS.vocab_size, embedding_size=FLAGS.embedding_size) tagger = load_tagger() if not tf.gfile.IsDirectory(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) # Load image metadata from caption files. mscoco_train_dataset = _load_and_process_metadata(FLAGS.train_captions_file, FLAGS.train_image_dir) mscoco_val_dataset = _load_and_process_metadata(FLAGS.val_captions_file, FLAGS.val_image_dir) # Redistribute the MSCOCO data as follows: # train_dataset = 99% of mscoco_train_dataset # val_dataset = 1% of mscoco_train_dataset (for validation during training). # test_dataset = 100% of mscoco_val_dataset (for final evaluation). train_cutoff = int(0.99 * len(mscoco_train_dataset)) train_dataset = mscoco_train_dataset[:train_cutoff] val_dataset = mscoco_train_dataset[train_cutoff:] test_dataset = mscoco_val_dataset # If needed crop the dataset to make it smaller max_train_size = len(train_dataset) if FLAGS.train_dataset_size < max_train_size: # Shuffle the ordering of images. Make the randomization repeatable. random.seed(12345) random.shuffle(train_dataset) train_dataset = train_dataset[:FLAGS.train_dataset_size] max_val_size = len(val_dataset) if FLAGS.val_dataset_size < max_val_size: # Shuffle the ordering of images. Make the randomization repeatable. random.seed(12345) random.shuffle(val_dataset) val_dataset = val_dataset[:FLAGS.val_dataset_size] max_test_size = len(test_dataset) if FLAGS.test_dataset_size < max_test_size: # Shuffle the ordering of images. Make the randomization repeatable. random.seed(12345) random.shuffle(test_dataset) test_dataset = test_dataset[:FLAGS.test_dataset_size] # Create the model to extract image boxes box_extractor = BoxExtractor(get_faster_rcnn_config(), top_k_boxes=FLAGS.top_k_boxes, trainable=False) image_tensor = tf.placeholder(tf.float32, name='image_tensor', shape=[None, FLAGS.image_height, FLAGS.image_width, 3]) boxes, scores, cropped_images = box_extractor(image_tensor) # Create the model to extract the image features feature_extractor = FeatureExtractor(is_training=False, global_pool=False) # Compute the ResNet-101 features image_features = feature_extractor(image_tensor) feature_batch = tf.shape(image_features)[0] feature_depth = tf.shape(image_features)[3] object_features = tf.reduce_mean(feature_extractor(cropped_images), [1, 2]) object_features = tf.reshape(object_features, [feature_batch, FLAGS.top_k_boxes, feature_depth]) with tf.Session() as sess: rcnn_saver = tf.train.Saver(var_list=box_extractor.variables) resnet_saver = tf.train.Saver(var_list=feature_extractor.variables) rcnn_saver.restore(sess, get_faster_rcnn_checkpoint()) resnet_saver.restore(sess, get_resnet_v2_101_checkpoint()) lock = threading.Lock() def run_model_fn(images): lock.acquire() r = sess.run([image_features, object_features], feed_dict={image_tensor: images}) lock.release() return r _process_dataset("train", train_dataset, vocab, tagger, FLAGS.train_shards, run_model_fn) _process_dataset("val", val_dataset, vocab, tagger, FLAGS.val_shards, run_model_fn) _process_dataset("test", test_dataset, vocab, tagger, FLAGS.test_shards, run_model_fn)
attributes: {3}""" tf.logging.set_verbosity(tf.logging.INFO) tf.flags.DEFINE_string("file_pattern", "image.jpg", "") FLAGS = tf.flags.FLAGS if __name__ == "__main__": attribute_map = get_visual_attributes() with tf.Graph().as_default(): list_of_filenames = tf.gfile.Glob(FLAGS.file_pattern) list_of_images = [ load_image_from_path(filename) for filename in list_of_filenames ] box_extractor = BoxExtractor(get_faster_rcnn_config()) attribute_detector = AttributeDetector(1000) feature_extractor = FeatureExtractor() inputs = tf.placeholder(tf.float32, shape=[None, None, None, 3]) resized_inputs = tf.image.resize_images(inputs, [224, 224]) boxes, scores, cropped_inputs = box_extractor(inputs) image_features = tf.reduce_mean(feature_extractor(resized_inputs), [1, 2]) object_features = tf.reduce_mean(feature_extractor(cropped_inputs), [1, 2]) batch_size = tf.shape(image_features)[0] num_boxes = tf.shape(object_features)[0] // batch_size depth = tf.shape(image_features)[1] object_features = tf.reshape(object_features, [batch_size, num_boxes, depth])