Esempio n. 1
0
def main(unused_argv):

    with tf.device("/cpu:0"):

        image = load_image_from_path("images/image.jpg")[np.newaxis, ...]
        print(image.dtype)

        g = tf.Graph()
        with g.as_default():
            box_extractor = BoxExtractor(get_faster_rcnn_config())
            inputs = tf.placeholder(tf.float32, shape=image.shape)
            boxes, scores, cropped_inputs = box_extractor(inputs)

        with tf.Session(graph=g) as sess:
            saver = tf.train.Saver(var_list=box_extractor.variables)
            saver.restore(sess, get_faster_rcnn_checkpoint())
            results = sess.run([boxes, scores, cropped_inputs],
                               feed_dict={inputs: image})

        tf.logging.info("Successfully passed test.")

        height = image.shape[1]
        width = image.shape[2]
        fig, ax = plt.subplots(1)
        ax.imshow(image[0, :])

        for i in range(results[0].shape[1]):

            this_box = results[0][0, i, :]
            box_y1 = this_box[0] * height
            box_x1 = this_box[1] * width
            box_y2 = this_box[2] * height
            box_x2 = this_box[3] * width

            rect = patches.Rectangle((box_x1, box_y1), (box_x2 - box_x1),
                                     (box_y2 - box_y1),
                                     linewidth=1,
                                     edgecolor='r',
                                     facecolor='none')

            ax.add_patch(rect)

        plt.savefig("images/image_boxes.png")
        plt.clf()

        for i in range(results[0].shape[1]):

            fig, ax = plt.subplots(1)
            ax.imshow(results[2][i, ...].astype(np.uint8))
            plt.savefig("images/box{0}.png".format(i))
            plt.clf()
Esempio n. 2
0
def main(unused_argv):

    image = load_image_from_path("images/image.jpg")[np.newaxis, ...]

    vocab, pretrained_matrix = load_glove(vocab_size=100, embedding_size=50)
    pos, pos_embeddings = get_parts_of_speech(), np.random.normal(
        0, 0.1, [15, 50])
    with tf.Graph().as_default():

        inputs = tf.placeholder(tf.float32, shape=image.shape)
        box_extractor = BoxExtractor(get_faster_rcnn_config(), top_k_boxes=16)
        boxes, scores, cropped_inputs = box_extractor(inputs)
        feature_extractor = FeatureExtractor()
        mean_image_features = tf.reduce_mean(feature_extractor(inputs), [1, 2])
        mean_object_features = tf.reshape(
            tf.reduce_mean(feature_extractor(cropped_inputs), [1, 2]),
            [1, 16, 2048])
        image_captioner = PartOfSpeechImageCaptioner(UpDownCell(50), vocab,
                                                     pretrained_matrix,
                                                     UpDownCell(50),
                                                     UpDownCell(50), pos,
                                                     pos_embeddings)
        pos_logits, pos_logits_ids, word_logits, word_logits_ids = image_captioner(
            mean_image_features=mean_image_features,
            mean_object_features=mean_object_features)

        with tf.Session() as sess:

            box_saver = tf.train.Saver(var_list=box_extractor.variables)
            resnet_saver = tf.train.Saver(var_list=feature_extractor.variables)

            box_saver.restore(sess, get_faster_rcnn_checkpoint())
            resnet_saver.restore(sess, get_resnet_v2_101_checkpoint())
            sess.run(tf.variables_initializer(image_captioner.variables))

            results = sess.run(
                [pos_logits, pos_logits_ids, word_logits, word_logits_ids],
                feed_dict={inputs: image})

            assert (results[2].shape[0] == 1 and results[2].shape[1] == 3
                    and results[2].shape[3] == 100)
            tf.logging.info("Successfully passed test.")
Esempio n. 3
0
 def __init__(self):
     """Creates handles to the TensorFlow computational graph."""
     # TensorFlow ops for JPEG decoding.
     self.encoded_jpeg = tf.placeholder(dtype=tf.string)
     self.decoded_jpeg = tf.image.decode_jpeg(self.encoded_jpeg, channels=3)
     self.decoded_jpeg = tf.image.resize_images(self.decoded_jpeg, [
         FLAGS.image_height, FLAGS.image_width])
     
     # Create the model to extract image boxes
     self.box_extractor = BoxExtractor(get_faster_rcnn_config(), trainable=False)
     self.image_tensor = tf.placeholder(tf.float32, name='image_tensor', shape=[None, 
         FLAGS.image_height, FLAGS.image_width, 3])
     self.boxes, self.scores, self.cropped_images = self.box_extractor(self.image_tensor)
     
     # Create a single TensorFlow Session for all image decoding calls.
     self.sess = tf.Session()
     self.rcnn_saver = tf.train.Saver(var_list=self.box_extractor.variables)
     self.rcnn_saver.restore(self.sess, get_faster_rcnn_checkpoint())
     self.lock = threading.Lock()
     self.attribute_map = get_visual_attributes()
    def __init__(self):
        """Creates handles to the TensorFlow computational graph."""
        # TensorFlow ops for JPEG decoding.
        self.encoded_jpeg = tf.placeholder(dtype=tf.string)
        self.decoded_jpeg = tf.image.decode_jpeg(self.encoded_jpeg, channels=3)
        self.decoded_jpeg = tf.image.resize_images(
            self.decoded_jpeg, [FLAGS.image_height, FLAGS.image_width])

        # Create the model to extract image boxes
        self.box_extractor = BoxExtractor(get_faster_rcnn_config(),
                                          trainable=False)
        self.image_tensor = tf.placeholder(
            tf.float32,
            name='image_tensor',
            shape=[None, FLAGS.image_height, FLAGS.image_width, 3])
        self.boxes, self.scores, self.cropped_images = self.box_extractor(
            self.image_tensor)

        # Create the model to extract the image features
        self.feature_extractor = FeatureExtractor(is_training=False,
                                                  global_pool=False)
        # Compute the mean ResNet-101 features
        self.image_features = tf.reduce_mean(
            self.feature_extractor(self.image_tensor), [1, 2])
        feature_batch = tf.shape(self.image_features)[0]
        feature_depth = tf.shape(self.image_features)[1]
        self.object_features = tf.reduce_mean(
            self.feature_extractor(self.cropped_images), [1, 2])
        self.object_features = tf.reshape(self.object_features,
                                          [feature_batch, 100, feature_depth])

        # Create a single TensorFlow Session for all image decoding calls.
        self.sess = tf.Session()
        rcnn_saver = tf.train.Saver(var_list=self.box_extractor.variables)
        resnet_saver = tf.train.Saver(
            var_list=self.feature_extractor.variables)
        rcnn_saver.restore(self.sess, get_faster_rcnn_checkpoint())
        resnet_saver.restore(self.sess, get_resnet_v2_101_checkpoint())
        self.lock = threading.Lock()
        self.attribute_map = get_visual_attributes()
def main(unused_argv):
    def _is_valid_num_shards(num_shards):
        """Returns True if num_shards is compatible with FLAGS.num_threads."""
        return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads

    assert _is_valid_num_shards(FLAGS.train_shards), (
        "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards")
    assert _is_valid_num_shards(FLAGS.val_shards), (
        "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards")
    assert _is_valid_num_shards(FLAGS.test_shards), (
        "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards")
        
    # Create vocabulary from the glove embeddings.
    vocab, _ = load_glove(vocab_size=FLAGS.vocab_size, embedding_size=FLAGS.embedding_size)
    tagger = load_tagger()

    if not tf.gfile.IsDirectory(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    # Load image metadata from caption files.
    mscoco_train_dataset = _load_and_process_metadata(FLAGS.train_captions_file, FLAGS.train_image_dir)
    mscoco_val_dataset = _load_and_process_metadata(FLAGS.val_captions_file, FLAGS.val_image_dir)

    # Redistribute the MSCOCO data as follows:
    #   train_dataset = 99% of mscoco_train_dataset
    #   val_dataset = 1% of mscoco_train_dataset (for validation during training).
    #   test_dataset = 100% of mscoco_val_dataset (for final evaluation).
    train_cutoff = int(0.99 * len(mscoco_train_dataset))
    train_dataset = mscoco_train_dataset[:train_cutoff]
    val_dataset = mscoco_train_dataset[train_cutoff:]
    test_dataset = mscoco_val_dataset
    
    # If needed crop the dataset to make it smaller
    max_train_size = len(train_dataset)
    if FLAGS.train_dataset_size < max_train_size:
        # Shuffle the ordering of images. Make the randomization repeatable.
        random.seed(12345)
        random.shuffle(train_dataset)
        train_dataset = train_dataset[:FLAGS.train_dataset_size]
        
    max_val_size = len(val_dataset)
    if FLAGS.val_dataset_size < max_val_size:
        # Shuffle the ordering of images. Make the randomization repeatable.
        random.seed(12345)
        random.shuffle(val_dataset)
        val_dataset = val_dataset[:FLAGS.val_dataset_size]
        
    max_test_size = len(test_dataset)
    if FLAGS.test_dataset_size < max_test_size:
        # Shuffle the ordering of images. Make the randomization repeatable.
        random.seed(12345)
        random.shuffle(test_dataset)
        test_dataset = test_dataset[:FLAGS.test_dataset_size]

    # Create the model to extract image boxes
    box_extractor = BoxExtractor(get_faster_rcnn_config(), top_k_boxes=FLAGS.top_k_boxes, 
        trainable=False)
    image_tensor = tf.placeholder(tf.float32, name='image_tensor', shape=[None, 
        FLAGS.image_height, FLAGS.image_width, 3])
    boxes, scores, cropped_images = box_extractor(image_tensor)
    # Create the model to extract the image features
    feature_extractor = FeatureExtractor(is_training=False, global_pool=False)
    # Compute the ResNet-101 features
    image_features = feature_extractor(image_tensor)
    feature_batch = tf.shape(image_features)[0]
    feature_depth = tf.shape(image_features)[3]
    object_features = tf.reduce_mean(feature_extractor(cropped_images), [1, 2])
    object_features = tf.reshape(object_features, [feature_batch, FLAGS.top_k_boxes, feature_depth])

    with tf.Session() as sess:

        rcnn_saver = tf.train.Saver(var_list=box_extractor.variables)
        resnet_saver = tf.train.Saver(var_list=feature_extractor.variables)
        rcnn_saver.restore(sess, get_faster_rcnn_checkpoint())
        resnet_saver.restore(sess, get_resnet_v2_101_checkpoint())
        
        lock = threading.Lock()
        def run_model_fn(images):
            lock.acquire()
            r = sess.run([image_features, object_features], feed_dict={image_tensor: images})
            lock.release()
            return r

        _process_dataset("train", train_dataset, vocab, tagger, FLAGS.train_shards, run_model_fn)
        _process_dataset("val", val_dataset, vocab, tagger, FLAGS.val_shards, run_model_fn)
        _process_dataset("test", test_dataset, vocab, tagger, FLAGS.test_shards, run_model_fn)
                                         [1, 2])
        batch_size = tf.shape(image_features)[0]
        num_boxes = tf.shape(object_features)[0] // batch_size
        depth = tf.shape(image_features)[1]
        object_features = tf.reshape(object_features,
                                     [batch_size, num_boxes, depth])
        logits, image_detections, object_detections = attribute_detector(
            image_features, object_features)

        saver = tf.train.Saver(var_list=box_extractor.variables)
        feature_extractor_saver = tf.train.Saver(
            var_list=feature_extractor.variables)
        attribute_detector_saver = tf.train.Saver(
            var_list=attribute_detector.variables)

        faster_rcnn_ckpt = get_faster_rcnn_checkpoint()
        feature_extractor_ckpt = get_resnet_v2_101_checkpoint()
        attribute_detector_ckpt, attribute_detector_ckpt_name = get_attribute_detector_checkpoint(
        )

        with tf.Session() as sess:

            assert (feature_extractor_ckpt is not None
                    and attribute_detector_ckpt is not None)
            saver.restore(sess, faster_rcnn_ckpt)
            feature_extractor_saver.restore(sess, feature_extractor_ckpt)
            attribute_detector_saver.restore(sess, attribute_detector_ckpt)

            for i, (name,
                    image) in enumerate(zip(list_of_filenames,
                                            list_of_images)):