Example #1
0
def main(unused_argv):

    with tf.device("/cpu:0"):

        image = load_image_from_path("images/image.jpg")[np.newaxis, ...]
        print(image.dtype)

        g = tf.Graph()
        with g.as_default():
            box_extractor = BoxExtractor(get_faster_rcnn_config())
            inputs = tf.placeholder(tf.float32, shape=image.shape)
            boxes, scores, cropped_inputs = box_extractor(inputs)

        with tf.Session(graph=g) as sess:
            saver = tf.train.Saver(var_list=box_extractor.variables)
            saver.restore(sess, get_faster_rcnn_checkpoint())
            results = sess.run([boxes, scores, cropped_inputs],
                               feed_dict={inputs: image})

        tf.logging.info("Successfully passed test.")

        height = image.shape[1]
        width = image.shape[2]
        fig, ax = plt.subplots(1)
        ax.imshow(image[0, :])

        for i in range(results[0].shape[1]):

            this_box = results[0][0, i, :]
            box_y1 = this_box[0] * height
            box_x1 = this_box[1] * width
            box_y2 = this_box[2] * height
            box_x2 = this_box[3] * width

            rect = patches.Rectangle((box_x1, box_y1), (box_x2 - box_x1),
                                     (box_y2 - box_y1),
                                     linewidth=1,
                                     edgecolor='r',
                                     facecolor='none')

            ax.add_patch(rect)

        plt.savefig("images/image_boxes.png")
        plt.clf()

        for i in range(results[0].shape[1]):

            fig, ax = plt.subplots(1)
            ax.imshow(results[2][i, ...].astype(np.uint8))
            plt.savefig("images/box{0}.png".format(i))
            plt.clf()
Example #2
0
def main(unused_argv):

    image = load_image_from_path("images/image.jpg")[np.newaxis, ...]

    vocab, pretrained_matrix = load_glove(vocab_size=100, embedding_size=50)
    pos, pos_embeddings = get_parts_of_speech(), np.random.normal(
        0, 0.1, [15, 50])
    with tf.Graph().as_default():

        inputs = tf.placeholder(tf.float32, shape=image.shape)
        box_extractor = BoxExtractor(get_faster_rcnn_config(), top_k_boxes=16)
        boxes, scores, cropped_inputs = box_extractor(inputs)
        feature_extractor = FeatureExtractor()
        mean_image_features = tf.reduce_mean(feature_extractor(inputs), [1, 2])
        mean_object_features = tf.reshape(
            tf.reduce_mean(feature_extractor(cropped_inputs), [1, 2]),
            [1, 16, 2048])
        image_captioner = PartOfSpeechImageCaptioner(UpDownCell(50), vocab,
                                                     pretrained_matrix,
                                                     UpDownCell(50),
                                                     UpDownCell(50), pos,
                                                     pos_embeddings)
        pos_logits, pos_logits_ids, word_logits, word_logits_ids = image_captioner(
            mean_image_features=mean_image_features,
            mean_object_features=mean_object_features)

        with tf.Session() as sess:

            box_saver = tf.train.Saver(var_list=box_extractor.variables)
            resnet_saver = tf.train.Saver(var_list=feature_extractor.variables)

            box_saver.restore(sess, get_faster_rcnn_checkpoint())
            resnet_saver.restore(sess, get_resnet_v2_101_checkpoint())
            sess.run(tf.variables_initializer(image_captioner.variables))

            results = sess.run(
                [pos_logits, pos_logits_ids, word_logits, word_logits_ids],
                feed_dict={inputs: image})

            assert (results[2].shape[0] == 1 and results[2].shape[1] == 3
                    and results[2].shape[3] == 100)
            tf.logging.info("Successfully passed test.")
({0:.2f} img/sec) image: {1:05d}
    filename: {2}
    attributes: {3}"""

tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string("file_pattern", "image.jpg", "")
FLAGS = tf.flags.FLAGS

if __name__ == "__main__":

    attribute_map = get_visual_attributes()
    with tf.Graph().as_default():

        list_of_filenames = tf.gfile.Glob(FLAGS.file_pattern)
        list_of_images = [
            load_image_from_path(filename) for filename in list_of_filenames
        ]
        box_extractor = BoxExtractor(get_faster_rcnn_config())
        attribute_detector = AttributeDetector(1000)
        feature_extractor = FeatureExtractor()

        inputs = tf.placeholder(tf.float32, shape=[None, None, None, 3])
        resized_inputs = tf.image.resize_images(inputs, [224, 224])
        boxes, scores, cropped_inputs = box_extractor(inputs)
        image_features = tf.reduce_mean(feature_extractor(resized_inputs),
                                        [1, 2])
        object_features = tf.reduce_mean(feature_extractor(cropped_inputs),
                                         [1, 2])
        batch_size = tf.shape(image_features)[0]
        num_boxes = tf.shape(object_features)[0] // batch_size
        depth = tf.shape(image_features)[1]