コード例 #1
0
def main(_):
    with tf.Graph().as_default():
        out_shape=[FLAGS.train_image_size] * 2

        image_input=tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input=tf.placeholder(tf.int32, shape=(2,))

        features, output_shape=\
            textboxes_plusplus_preprocessing.preprocess_for_eval(
                image_input,
                out_shape,
                data_format=FLAGS.data_format,
                output_rgb=False)
        features=tf.expand_dims(features, axis=0) # (1, ?, ?, 3)
        output_shape=tf.expand_dims(output_shape, axis=0) # (1, 2)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            with tf.device('/cpu:0'):
                anchor_processor=\
                    anchor_manipulator.AnchorProcessor(
                        positive_threshold=None,
                        ignore_threshold=None,
                        prior_scaling=config.PRIOR_SCALING)

                anchor_heights_all_layers,\
                anchor_widths_all_layers,\
                num_anchors_per_location_all_layers=\
                    anchor_processor.get_anchors_size_all_layers(
                        config.ALL_ANCHOR_SCALES,
                        config.ALL_EXTRA_SCALES,
                        config.ALL_ANCHOR_RATIOS,
                        config.NUM_FEATURE_LAYERS)

                # shape=(num_anchors_all_layers,).
                anchors_ymin,\
                anchors_xmin,\
                anchors_ymax,\
                anchors_xmax,\
                _=\
                    anchor_processor.get_all_anchors_all_layers(
                        tf.squeeze(output_shape, axis=0),
                        anchor_heights_all_layers,
                        anchor_widths_all_layers,
                        num_anchors_per_location_all_layers,
                        config.ANCHOR_OFFSETS,
                        config.VERTICAL_OFFSETS,
                        config.ALL_LAYER_SHAPES,
                        config.ALL_LAYER_STRIDES,
                        [0.] * config.NUM_FEATURE_LAYERS,
                        [False] * config.NUM_FEATURE_LAYERS)

                backbone=textboxes_plusplus_net.VGG16Backbone(FLAGS.data_format)
                feature_layers=backbone.forward(features, training=False)
                # shape=(num_features,
                #        bs,
                #        fh,
                #        fw,
                #        num_anchors_per_locations * 2 * num_offsets)
                location_predictions, class_predictions=\
                    textboxes_plusplus_net.multibox_head(
                        feature_layers,
                        FLAGS.num_classes,
                        config.NUM_OFFSETS,
                        num_anchors_per_location_all_layers,
                        data_format=FLAGS.data_format)
                if FLAGS.data_format == 'channels_first':
                    class_predictions=\
                        [tf.transpose(pred,
                                      [0, 2, 3, 1])\
                         for pred in class_predictions]
                    location_predictions=\
                        [tf.transpose(pred,
                                      [0, 2, 3, 1])\
                         for pred in location_predictions]
                class_predictions=\
                    [tf.reshape(pred,
                                [-1, FLAGS.num_classes])\
                     for pred in class_predictions]
                location_predictions=\
                    [tf.reshape(pred, [-1, config.NUM_OFFSETS])\
                     for pred in location_predictions]

                class_predictions=tf.concat(class_predictions, axis=0)
                location_predictions=tf.concat(location_predictions, axis=0)

                # total_parameters = 0
                # for variable in tf.trainable_variables():
                #     # shape is an array of tf.Dimension
                #     shape = variable.get_shape()
                #     print(shape)
                #     print(len(shape))
                #     variable_parameters = 1
                #     for dim in shape:
                #         print(dim)
                #         variable_parameters *= dim.value
                #     print(variable_parameters)
                #     total_parameters += variable_parameters
                # print(total_parameters)


        with tf.device('/cpu:0'):
            bboxes_pred, quadrilaterals_pred=\
                anchor_processor.decode_anchors(
                    location_predictions,
                    anchors_ymin,
                    anchors_xmin,
                    anchors_ymax,
                    anchors_xmax)
            selected_bboxes,\
            selected_quadrilaterals,\
            selected_scores=\
                bbox_util.parse_by_class(
                    tf.squeeze(output_shape, axis=0),
                    class_predictions,
                    bboxes_pred,
                    quadrilaterals_pred,
                    FLAGS.num_classes,
                    FLAGS.select_threshold,
                    FLAGS.min_size,
                    FLAGS.keep_topk,
                    FLAGS.nms_topk,
                    FLAGS.nms_threshold)

            labels_list=[]
            scores_list=[]
            bboxes_list=[]
            quadrilaterals_list=[]
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
                quadrilaterals_list.append(selected_quadrilaterals[k])
            all_labels=tf.concat(labels_list, axis=0)
            all_scores=tf.concat(scores_list, axis=0)
            all_bboxes=tf.concat(bboxes_list, axis=0)
            all_quadrilaterals=tf.concat(quadrilaterals_list, axis=0)

        saver=tf.train.Saver()
        with tf.Session() as sess:
            init=tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            total_time=0
            # np_image=imread('./demo/' + FLAGS.image_file_name)
            image_files_name=sorted(os.listdir(FLAGS.source_directory))
            for i, image_file_name in enumerate(image_files_name):
                np_image=imread(os.path.join(FLAGS.source_directory, image_file_name))
                start_time=time.time()

                labels_,\
                scores_,\
                bboxes_,\
                quadrilaterals_,\
                output_shape_=\
                    sess.run([all_labels,
                              all_scores,
                              all_bboxes,
                              all_quadrilaterals,
                              output_shape],
                             feed_dict={image_input : np_image,
                                        shape_input : np_image.shape[:-1]})

                elapsed_time=time.time() - start_time
                print('{}: elapsed_time = {}'.format(i + 1, elapsed_time))
                total_time+=elapsed_time

                bboxes_[:, 0]=bboxes_[:, 0] * np_image.shape[0] / output_shape_[0, 0]
                bboxes_[:, 1]=bboxes_[:, 1] * np_image.shape[1] / output_shape_[0, 1]
                bboxes_[:, 2]=bboxes_[:, 2] * np_image.shape[0] / output_shape_[0, 0]
                bboxes_[:, 3]=bboxes_[:, 3] * np_image.shape[1] / output_shape_[0, 1]
                quadrilaterals_[:, 0]=quadrilaterals_[:, 0] * np_image.shape[0] / output_shape_[0, 0]
                quadrilaterals_[:, 1]=quadrilaterals_[:, 1] * np_image.shape[1] / output_shape_[0, 1]
                quadrilaterals_[:, 2]=quadrilaterals_[:, 2] * np_image.shape[0] / output_shape_[0, 0]
                quadrilaterals_[:, 3]=quadrilaterals_[:, 3] * np_image.shape[1] / output_shape_[0, 1]
                quadrilaterals_[:, 4]=quadrilaterals_[:, 4] * np_image.shape[0] / output_shape_[0, 0]
                quadrilaterals_[:, 5]=quadrilaterals_[:, 5] * np_image.shape[1] / output_shape_[0, 1]
                quadrilaterals_[:, 6]=quadrilaterals_[:, 6] * np_image.shape[0] / output_shape_[0, 0]
                quadrilaterals_[:, 7]=quadrilaterals_[:, 7] * np_image.shape[1] / output_shape_[0, 1]

                # image_with_bboxes=\
                #     drawing_toolbox.draw_bboxes_on_image(
                #         np_image.copy(),
                #         labels_,
                #         scores_,
                #         bboxes_,
                #         thickness=2)
                # imsave('./demo/' + FLAGS.image_file_name[:-4] + '_bboxes' + '.jpg',
                #        image_with_bboxes)
                image_with_quadrilaterals=\
                    drawing_toolbox.draw_quadrilaterals_on_image(
                        np_image.copy(),
                        labels_,
                        scores_,
                        quadrilaterals_,
                        thickness=2)
                imsave(FLAGS.storage_directory + image_file_name[:-4] + '_quadrilaterals' + '.jpg', image_with_quadrilaterals)
                
                y1, x1, y2, x2,\
                y3, x3, y4, x4=[int(e) for e in quadrilaterals_[0, :]]

                topLeftVertex = [x1, y1]
                topRightVertex = [x2, y2]
                bottomLeftVertex = [x4, y4]
                bottomRightVertex = [x3, y3]

                ymin=int(round(bboxes_[0, 0]))
                xmin=int(round(bboxes_[0, 1]))
                ymax=int(round(bboxes_[0, 2]))
                xmax=int(round(bboxes_[0, 3]))

                PLATE_WIDTH = xmax - xmin
                PLATE_HEIGHT = ymax - ymin

                pts1 = np.float32([topLeftVertex, topRightVertex, bottomLeftVertex, bottomRightVertex])
                pts2 = np.float32([[0, 0], [PLATE_WIDTH, 0], [0, PLATE_HEIGHT], [PLATE_WIDTH, PLATE_HEIGHT]])
            
                M = cv2.getPerspectiveTransform(pts1, pts2)
                cropped_image = cv2.warpPerspective(np_image.copy(), M, (PLATE_WIDTH, PLATE_HEIGHT))
                imsave(FLAGS.storage_directory + image_file_name[:-4] + '_cropped' + '.jpg', cropped_image)
            
            print('total_time: ', total_time)
コード例 #2
0
    def input_fn():
        target_shape = [FLAGS.train_image_size] * 2

        anchor_processor =\
            anchor_manipulator.AnchorProcessor(
                positive_threshold=FLAGS.match_threshold,
                ignore_threshold=FLAGS.neg_threshold,
                prior_scaling=config.PRIOR_SCALING)
        # anchor_processor: Python object

        anchor_heights_all_layers,\
            anchor_widths_all_layers,\
            num_anchors_per_location_all_layers =\
            anchor_processor.get_anchors_size_all_layers(
                config.ALL_ANCHOR_SCALES,
                config.ALL_EXTRA_SCALES,
                config.ALL_ANCHOR_RATIOS,
                config.NUM_FEATURE_LAYERS)
        # anchor_heights_all_layers: [1d-tf.constant tf.float32,
        #                           1d-tf.constant tf.float32,
        #                           ...]
        # anchor_widths_all_layers: [1d-tf.constant tf.float32,
        #                           1d-tf.constant tf.float32,
        #                           ...]
        # num_anchors_per_location_all_layers: [Python int, Python int, ...]

        anchors_ymin,\
            anchors_xmin,\
            anchors_ymax,\
            anchors_xmax,\
            inside_mask =\
            anchor_processor.get_all_anchors_all_layers(
                target_shape,
                anchor_heights_all_layers,
                anchor_widths_all_layers,
                num_anchors_per_location_all_layers,
                config.ANCHOR_OFFSETS,
                config.VERTICAL_OFFSETS,
                config.ALL_LAYER_SHAPES,
                config.ALL_LAYER_STRIDES,
                [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS,
                [False] * config.NUM_FEATURE_LAYERS)
        # anchors_ymin: 1d-tf.Tensor(num_anchors_all_layers) tf.float32
        # inside_mask: 1d-tf.Tensor(num_anchors_all_layers) tf.bool

        num_anchors_per_layer = []
        for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES):
            _, _num_anchors_per_layer =\
                anchor_processor.count_num_anchors_per_layer(
                    num_anchors_per_location_all_layers[ind],
                    layer_shape,
                    name='count_num_anchors_per_layer_{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)
        # num_anchors_per_layer = [num_anchors_layer1, num_anchors_layer2, ...]
        # e.g., num_anchors_per_layer = [48 x 48 x 2 x 10, ...]

        def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_):
            return textboxes_plusplus_preprocessing.preprocess_image(
                image_,
                labels_,
                bboxes_,
                quadrilaterals_,
                target_shape,
                is_training=is_training,
                data_format=FLAGS.data_format,
                output_rgb=False)

        def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_):
            return anchor_processor.encode_anchors(
                glabels_,
                gbboxes_,
                gquadrilaterals_,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax,
                inside_mask)
        image, _, shape, loc_targets, cls_targets, match_scores =\
            dataset_common.slim_get_batch(
                FLAGS.num_classes,
                batch_size,
                ('train' if is_training else 'val'),
                os.path.join(FLAGS.data_dir, dataset_pattern),
                FLAGS.num_readers,
                FLAGS.num_preprocessing_threads,
                image_preprocessing_fn,
                anchor_encoder_fn,
                num_epochs=FLAGS.train_epochs,
                is_training=is_training)

        global global_anchor_info
        global_anchor_info =\
            {'decode_fn':
             lambda pred: anchor_processor.batch_decode_anchors(
                pred,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax),
             'num_anchors_per_layer': num_anchors_per_layer,
             'num_anchors_per_location_all_layers':
                num_anchors_per_location_all_layers}

        return image,\
            {'shape': shape,  # original shape from .tfrecord files
             'loc_targets': loc_targets,  # [bs, n_anchors, 12]
             'cls_targets': cls_targets,  # [bs, n_anchors]
             'match_scores': match_scores  # [bs, n_anchors]
             }
コード例 #3
0
def main(_):
    with tf.Graph().as_default():

        def split_image_into_overlapped_images(image, n, r):
            """TODO: Docstring for split_image_into_overlapped_images.

            :image: TODO
            :n: TODO
            :r: TODO
            :returns: TODO

            """
            IH, IW = tf.shape(image)[0], tf.shape(image)[1]
            ny, nx = n
            ry, rx = r
            SH = tf.cast(
                tf.floordiv(tf.cast(IH, tf.float32), (ny - ny * ry + ry)),
                tf.int32)
            SW = tf.cast(
                tf.floordiv(tf.cast(IW, tf.float32), (nx - nx * rx + rx)),
                tf.int32)
            OH = tf.cast(ry * tf.cast(SH, tf.float32), tf.int32)
            OW = tf.cast(rx * tf.cast(SW, tf.float32), tf.int32)
            images = []
            os = []
            for i in range(ny):
                oy = i * (SH - OH)
                for j in range(nx):
                    ox = j * (SW - OW)
                    os.append([oy, ox])
                    images.append(image[oy:oy + SH, ox:ox + SW])
            return [[image, tf.shape(image), o]
                    for image, o in zip(images, os)]

        output_shape = [FLAGS.image_size] * 2

        input_image = tf.placeholder(tf.uint8, shape=(None, None, 3))
        # nr1 = [(2, 0.7), (4, 0.6), (8, 0.5)]
        # nr2 = [(4, 0.4)]  # no1
        # nr3 = [(4, 0.2)]
        # nr4 = [(4, 0.3)]
        # nr5 = [(4, 0.6)]
        # nr6 = [(4, 0.5)]
        # nr7 = [(8, 0.2)]
        # nr8 = [(8, 0.8)]
        # nr9 = [(8, 0.4)]  # no1
        # nr10 = [(2, 0.8)]
        # nr11 = [(2, 0.2)]
        # nr12 = [(2, 0.4)]
        # nr13 = [(2, 0.6)]  # no1
        # nr14 = [(2, 0.5)]
        # nr15 = [(2, 0.6), (4, 0.4)]  # select_threshold = 0.5
        nr16 = [(2, 0.6), (4, 0.4)]  # select_threshold = 0.95
        images, shapes, os =\
            zip(*([[image, shape, o]
                   for n, r in nr16
                   for image, shape, o in split_image_into_overlapped_images(
                       input_image,
                       (n, n),
                       (r, r))] + [[input_image,
                                    tf.shape(input_image), [0, 0]]]))
        # images = [images[0], images[1]]
        # shapes = [shapes[0], shapes[1]]
        # os = [os[0], os[1]]

        oys, oxs = zip(*os)
        shapes = tf.stack(shapes)
        oys = tf.stack(oys)
        oxs = tf.stack(oxs)
        oys = tf.expand_dims(oys, -1)
        oxs = tf.expand_dims(oxs, -1)

        features = []
        for image in images:
            features.append(
                textboxes_plusplus_preprocessing.preprocess_for_eval(
                    image,
                    None,
                    None,
                    output_shape,
                    data_format=FLAGS.data_format,
                    output_rgb=False))
        features = tf.stack(features, axis=0)
        output_shape =\
            tf.expand_dims(
                tf.constant(output_shape,
                            dtype=tf.int32),
                axis=0)  # (1, 2)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            with tf.device('/cpu:0'):
                anchor_processor =\
                    anchor_manipulator.AnchorProcessor(
                        positive_threshold=None,
                        ignore_threshold=None,
                        prior_scaling=config.PRIOR_SCALING)

                anchor_heights_all_layers,\
                    anchor_widths_all_layers,\
                    num_anchors_per_location_all_layers =\
                    anchor_processor.get_anchors_size_all_layers(
                        config.ALL_ANCHOR_SCALES,
                        config.ALL_EXTRA_SCALES,
                        config.ALL_ANCHOR_RATIOS,
                        config.NUM_FEATURE_LAYERS)
                # anchor_heights_all_layers: [1d-tf.constant tf.float32,
                #                           1d-tf.constant tf.float32,
                #                           ...]
                # anchor_widths_all_layers: [1d-tf.constant tf.float32,
                #                           1d-tf.constant tf.float32,
                #                           ...]
                # num_anchors_per_location_all_layers:
                #   [Python int, Python int, ...]

                anchors_ymin,\
                    anchors_xmin,\
                    anchors_ymax,\
                    anchors_xmax, _ =\
                    anchor_processor.get_all_anchors_all_layers(
                        tf.squeeze(output_shape, axis=0),
                        anchor_heights_all_layers,
                        anchor_widths_all_layers,
                        num_anchors_per_location_all_layers,
                        config.ANCHOR_OFFSETS,
                        config.VERTICAL_OFFSETS,
                        config.ALL_LAYER_SHAPES,
                        config.ALL_LAYER_STRIDES,
                        [0.] * config.NUM_FEATURE_LAYERS,
                        [False] * config.NUM_FEATURE_LAYERS)
                # anchors_ymin: 1d-tf.Tensor(num_anchors_all_layers) tf.float32

                backbone =\
                    textboxes_plusplus_net.VGG16Backbone(FLAGS.data_format)
                feature_layers = backbone.forward(features, training=False)
                # shape = (num_feature_layers,
                #          BS,
                #          FH,
                #          FW,
                #          feature_depth)

                location_predictions, class_predictions =\
                    textboxes_plusplus_net.multibox_head(
                        feature_layers,
                        FLAGS.num_classes,
                        config.NUM_OFFSETS,
                        num_anchors_per_location_all_layers,
                        data_format=FLAGS.data_format)
                # shape = (num_feature_layers,
                #          bs,
                #          fh,
                #          fw,
                #          num_anchors_per_loc * 2 * num_offsets)

                if FLAGS.data_format == 'channels_first':
                    class_predictions =\
                        [tf.transpose(pred,
                                      [0, 2, 3, 1])
                         for pred in class_predictions]
                    location_predictions =\
                        [tf.transpose(pred,
                                      [0, 2, 3, 1])
                         for pred in location_predictions]
                class_predictions =\
                    [tf.reshape(pred,
                                [len(images), -1, FLAGS.num_classes])
                     for pred in class_predictions]
                location_predictions =\
                    [tf.reshape(pred, [len(images), -1, config.NUM_OFFSETS])
                     for pred in location_predictions]
                # shape = (num_feature_layers,
                #          bs,
                #          fh * fw * num_anchors_per_loc * 2,
                #          num_offsets)

                class_predictions = tf.concat(class_predictions, axis=1)
                location_predictions = tf.concat(location_predictions, axis=1)

                # total_parameters = 0
                # for variable in tf.trainable_variables():
                #     # shape is an array of tf.Dimension
                #     shape = variable.get_shape()
                #     print(shape)
                #     print(len(shape))
                #     variable_parameters = 1
                #     for dim in shape:
                #         print(dim)
                #         variable_parameters *= dim.value
                #     print(variable_parameters)
                #     total_parameters += variable_parameters
                # print(total_parameters)

        with tf.device('/cpu:0'):
            bboxes_pred, quadrilaterals_pred =\
                anchor_processor.batch_decode_anchors(
                    location_predictions,
                    anchors_ymin,
                    anchors_xmin,
                    anchors_ymax,
                    anchors_xmax)

            bboxes_ymin =\
                tf.cast(bboxes_pred[:, :, 0] * tf.expand_dims(tf.cast(
                    tf.truediv(shapes[:, 0],
                               output_shape[0, 0]),
                    tf.float32
                ), -1), tf.int32) + oys
            bboxes_xmin =\
                tf.cast(bboxes_pred[:, :, 1] * tf.expand_dims(tf.cast(
                    tf.truediv(shapes[:, 1],
                               output_shape[0, 1]),
                    tf.float32
                ), -1), tf.int32) + oxs
            bboxes_ymax =\
                tf.cast(bboxes_pred[:, :, 2] * tf.expand_dims(tf.cast(
                    tf.truediv(shapes[:, 0],
                               output_shape[0, 0]),
                    tf.float32), -1), tf.int32) + oys
            bboxes_xmax =\
                tf.cast(bboxes_pred[:, :, 3] * tf.expand_dims(tf.cast(
                    tf.truediv(shapes[:, 1],
                               output_shape[0, 1]),
                    tf.float32), -1), tf.int32) + oxs
            bboxes_pred =\
                tf.reshape(
                    tf.stack([bboxes_ymin, bboxes_xmin,
                              bboxes_ymax, bboxes_xmax], -1),
                    shape=[-1, 4])
            quadrilaterals_y1 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 0] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 0],
                                           output_shape[0, 0]),
                                tf.float32), -1), tf.int32) + oys
            quadrilaterals_x1 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 1] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 1],
                                           output_shape[0, 1]),
                                tf.float32), -1), tf.int32) + oxs
            quadrilaterals_y2 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 2] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 0],
                                           output_shape[0, 0]),
                                tf.float32), -1), tf.int32) + oys
            quadrilaterals_x2 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 3] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 1],
                                           output_shape[0, 1]),
                                tf.float32), -1), tf.int32) + oxs
            quadrilaterals_y3 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 4] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 0],
                                           output_shape[0, 0]),
                                tf.float32), -1), tf.int32) + oys
            quadrilaterals_x3 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 5] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 1],
                                           output_shape[0, 1]),
                                tf.float32), -1), tf.int32) + oxs
            quadrilaterals_y4 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 6] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 0],
                                           output_shape[0, 0]),
                                tf.float32), -1), tf.int32) + oys
            quadrilaterals_x4 =\
                tf.cast(
                    quadrilaterals_pred[:, :, 7] * tf.expand_dims(
                        tf.cast(tf.truediv(shapes[:, 1],
                                           output_shape[0, 1]),
                                tf.float32), -1), tf.int32) + oxs
            quadrilaterals_pred =\
                tf.reshape(
                    tf.stack([quadrilaterals_y1,
                              quadrilaterals_x1,
                              quadrilaterals_y2,
                              quadrilaterals_x2,
                              quadrilaterals_y3,
                              quadrilaterals_x3,
                              quadrilaterals_y4,
                              quadrilaterals_x4], -1),
                    shape=[-1, 8])
            class_predictions = tf.reshape(class_predictions,
                                           shape=[-1, FLAGS.num_classes])
            bboxes_pred = tf.cast(bboxes_pred, tf.float32)
            quadrilaterals_pred = tf.cast(quadrilaterals_pred, tf.float32)

            selected_bboxes,\
                selected_quadrilaterals,\
                selected_scores =\
                bbox_util.parse_by_class(
                    tf.shape(input_image)[:2],
                    class_predictions,
                    bboxes_pred,
                    quadrilaterals_pred,
                    FLAGS.num_classes,
                    FLAGS.select_threshold,
                    FLAGS.min_size,
                    FLAGS.keep_topk,
                    FLAGS.nms_topk,
                    FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            quadrilaterals_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
                quadrilaterals_list.append(selected_quadrilaterals[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)
            all_quadrilaterals = tf.concat(quadrilaterals_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            image_paths =\
                sorted(
                    [path
                     for pattern in FLAGS.input_image_stem_patterns.split(',')
                     for path in Path(FLAGS.input_image_root).glob(pattern)],
                    key=lambda e: int(re.findall(r'(?<=_)\d+(?=.)',
                                                 e.name)[0]))
            for i, image_path in enumerate(image_paths):
                # image = imread(str(image_path))
                image =\
                    cv2.imread(
                        str(image_path),
                        cv2.IMREAD_IGNORE_ORIENTATION | cv2.IMREAD_COLOR
                    )[:, :, ::-1]
                start_time = time.time()

                labels_,\
                    scores_,\
                    bboxes_,\
                    quadrilaterals_ =\
                    sess.run([all_labels,
                              all_scores,
                              all_bboxes,
                              all_quadrilaterals,
                              ],
                             feed_dict={input_image: image})

                elapsed_time = time.time() - start_time
                print('{}: elapsed_time = {}'.format(i + 1, elapsed_time))
                annotation_file_name =\
                    'task1_' + image_path.name.replace('.jpg', '.txt')
                with open(
                        Path(FLAGS.output_directory).joinpath(
                            annotation_file_name), 'w') as f:
                    num_predicted_text_lines = np.shape(quadrilaterals_)[0]
                    for i in range(num_predicted_text_lines):
                        y1, x1, y2, x2,\
                            y3, x3, y4, x4 =\
                            [int(e) for e in quadrilaterals_[i, :]]
                        score = float(scores_[i])
                        if (y1 == 0 and x1 == 0 and y2 == 0 and x2 == 0
                                and y3 == 0 and x3 == 0 and y4 == 0 and x4 == 0
                                and score == 0.0):
                            continue
                        f.write('{},{},{},{},{},{},{},{},{}\n'.format(
                            x1, y1, x2, y2, x3, y3, x4, y4, score))
コード例 #4
0
def main(_):
	target_shape=[FLAGS.train_image_size] * 2

	anchor_processor=\
		anchor_manipulator.AnchorProcessor(
			positive_threshold=None,
			ignore_threshold=None,
			prior_scaling=config.PRIOR_SCALING)

	anchor_heights_all_layers,\
	anchor_widths_all_layers,\
	num_anchors_per_location_all_layers=\
		anchor_processor.get_anchors_size_all_layers(
			config.ALL_ANCHOR_SCALES,
			config.ALL_EXTRA_SCALES,
			config.ALL_ANCHOR_RATIOS,
			config.NUM_FEATURE_LAYERS)

	# shape=(num_anchors_all_layers,).
	fm=FLAGS.chosen_feature_map
	anchors_ymin,\
	anchors_xmin,\
	anchors_ymax,\
	anchors_xmax=\
		anchor_processor.get_all_anchors_one_layer(
			anchor_heights_all_layers[fm],
			anchor_widths_all_layers[fm],
			num_anchors_per_location_all_layers[fm],
			config.ALL_LAYER_SHAPES[fm],
			config.ALL_LAYER_STRIDES[fm],
			config.ANCHOR_OFFSETS[fm],
			config.VERTICAL_OFFSETS[fm],
			name=None)

	with tf.Session() as sess:
		# shape=(num_anchor_locations_per_feature_map, num_anchors_per_location).
		anchors_ymin,\
		anchors_xmin,\
		anchors_ymax,\
		anchors_xmax=\
			sess.run([anchors_ymin,
					  anchors_xmin,
					  anchors_ymax,
					  anchors_xmax])

		input_image=cv2.imread(FLAGS.image_path)
		input_image=cv2.resize(
			input_image,
			tuple(target_shape),
			interpolation=cv2.INTER_AREA)

		grid_drawed_image=draw_grid(
			image=input_image.copy(),
			grid_shape=config.ALL_LAYER_SHAPES[fm],
			color=(0, 0, 0))
		cv2.imshow('grid_drawed_image', grid_drawed_image)

		num_anchor_locations_per_feature_map=len(anchors_ymin)
		num_anchors_per_location=len(anchors_ymin[0])

		location_index=int(random.random() * num_anchor_locations_per_feature_map)
		anchor_index=int(random.random() * num_anchors_per_location)

		location_index=4
		anchor_begin_index=0
		anchor_end_index=4

		anchors_ymin=anchors_ymin[location_index][anchor_begin_index:anchor_end_index]
		anchors_xmin=anchors_xmin[location_index][anchor_begin_index:anchor_end_index]
		anchors_ymax=anchors_ymax[location_index][anchor_begin_index:anchor_end_index]
		anchors_xmax=anchors_xmax[location_index][anchor_begin_index:anchor_end_index]

		# anchor=[anchor_ymin, anchor_xmin, anchor_ymax, anchor_xmax]
		anchors=[anchor for anchor in map(list, zip(*[anchors_ymin,
													  anchors_xmin,
													  anchors_ymax,
													  anchors_xmax]))]

		anchor_drawed_image=draw_anchors(
			grid_drawed_image.copy(),
			anchors)
		cv2.imshow('anchor_drawed_image', anchor_drawed_image)

		cv2.imwrite('grid_' + str(FLAGS.chosen_feature_map) + '.jpg', grid_drawed_image)
		cv2.imwrite('grid_' + str(FLAGS.chosen_feature_map) + '_with_anchors' + '.jpg', anchor_drawed_image)

		cv2.waitKey(0)
		cv2.destroyAllWindows()
コード例 #5
0
def model_fn(features, labels, mode, params):
    file_name = features['file_name']
    file_name = tf.identity(file_name, name='file_name')
    shape = features['shape']
    output_shape = features['output_shape']
    image = features['image']

    anchor_processor = anchor_manipulator.AnchorProcessor(
        positive_threshold=None,
        ignore_threshold=None,
        prior_scaling=config.PRIOR_SCALING)
    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[image],
                           reuse=tf.AUTO_REUSE):
        with tf.device('/cpu:0'):
            anchor_heights_all_layers,\
            anchor_widths_all_layers,\
            num_anchors_per_location_all_layers=\
                anchor_processor.get_anchors_size_all_layers(
                    config.ALL_ANCHOR_SCALES,
                    config.ALL_EXTRA_SCALES,
                    config.ALL_ANCHOR_RATIOS,
                    config.NUM_FEATURE_LAYERS)

            anchors_ymin,\
            anchors_xmin,\
            anchors_ymax,\
            anchors_xmax,\
            _=\
                anchor_processor.get_all_anchors_all_layers(
                    tf.squeeze(output_shape, axis=0),
                    anchor_heights_all_layers,
                    anchor_widths_all_layers,
                    num_anchors_per_location_all_layers,
                    config.ANCHOR_OFFSETS,
                    config.VERTICAL_OFFSETS,
                    config.ALL_LAYER_SHAPES,
                    config.ALL_LAYER_STRIDES,
                    [0.] * config.NUM_FEATURE_LAYERS,
                    [False] * config.NUM_FEATURE_LAYERS)

            backbone=\
                textboxes_plusplus_net.VGG16Backbone(params['data_format'])
            feature_layers = backbone.forward(
                image, training=(mode == tf.estimator.ModeKeys.TRAIN))
            location_predictions, class_predictions=\
                textboxes_plusplus_net.multibox_head(
                    feature_layers,
                    params['num_classes'],
                    config.NUM_OFFSETS,
                    num_anchors_per_location_all_layers,
                    data_format=params['data_format'])
            if params['data_format'] == 'channels_first':
                location_predictions=\
                    [tf.transpose(pred, [0, 2, 3, 1])\
                    for pred in location_predictions]
                class_predictions=\
                    [tf.transpose(pred, [0, 2, 3, 1])\
                     for pred in class_predictions]

            location_predictions=\
                [tf.reshape(pred,
                            [tf.shape(image)[0],
                             -1,
                             config.NUM_OFFSETS])\
                 for pred in location_predictions]
            class_predictions=\
                [tf.reshape(pred,
                            [tf.shape(image)[0],
                            -1,
                            params['num_classes']])\
                for pred in class_predictions]

            location_predictions = tf.concat(location_predictions, axis=1)
            class_predictions = tf.concat(class_predictions, axis=1)

            location_predictions = tf.reshape(location_predictions,
                                              [-1, config.NUM_OFFSETS])
            class_predictions = tf.reshape(class_predictions,
                                           [-1, params['num_classes']])
    with tf.device('/cpu:0'):
        bboxes_pred,\
        quadrilaterals_pred=\
            anchor_processor.decode_anchors(
                location_predictions,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax)
        selected_bboxes,\
        selected_quadrilaterals,\
        selected_scores=\
            bbox_util.parse_by_class(
                tf.squeeze(output_shape, axis=0),
                class_predictions,
                bboxes_pred,
                quadrilaterals_pred,
                params['num_classes'],
                params['select_threshold'],
                params['min_size'],
                params['keep_topk'],
                params['nms_topk'],
                params['nms_threshold'])

    labels_list = []
    scores_list = []
    bboxes_list = []
    quadrilaterals_list = []
    for k, v in selected_scores.items():
        labels_list.append(tf.ones_like(v, tf.int32) * k)
        scores_list.append(v)
        bboxes_list.append(selected_bboxes[k])
        quadrilaterals_list.append(selected_quadrilaterals[k])
    all_labels = tf.concat(labels_list, axis=0)
    all_scores = tf.concat(scores_list, axis=0)
    all_bboxes = tf.concat(bboxes_list, axis=0)
    all_quadrilaterals = tf.concat(quadrilaterals_list, axis=0)

    save_image_op=\
        tf.py_func(save_image_with_labels,
                   [textboxes_plusplus_preprocessing.unwhiten_image(
                       tf.squeeze(image, axis=0),
                       output_rgb=False),
                    all_labels * tf.to_int32(all_scores > 0.3),
                    all_scores,
                    all_bboxes,
                    all_quadrilaterals],
                   tf.int64,
                   stateful=True)
    tf.identity(save_image_op, name='save_image_op')
    predictions=\
        {'file_name': file_name,
         'shape': shape,
         'output_shape': output_shape}
    for class_ind in range(1, params['num_classes']):
        predictions['scores_{}'.format(class_ind)]=\
            tf.expand_dims(selected_scores[class_ind], axis=0)
        predictions['bboxes_{}'.format(class_ind)]=\
            tf.expand_dims(selected_bboxes[class_ind], axis=0)
        predictions['quadrilaterals_{}'.format(class_ind)]=\
            tf.expand_dims(selected_quadrilaterals[class_ind], axis=0)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          prediction_hooks=None,
                                          loss=None,
                                          train_op=None)
    else:
        raise ValueError('This script only support "PREDICT" mode!')
コード例 #6
0
    def input_fn():
        target_shape = [FLAGS.train_image_size] * 2

        anchor_processor =\
            anchor_manipulator.AnchorProcessor(
                positive_threshold=FLAGS.match_threshold,
                ignore_threshold=FLAGS.neg_threshold,
                prior_scaling=config.PRIOR_SCALING)

        anchor_heights_all_layers,\
            anchor_widths_all_layers,\
            num_anchors_per_location_all_layers =\
            anchor_processor.get_anchors_size_all_layers(
                config.ALL_ANCHOR_SCALES,
                config.ALL_EXTRA_SCALES,
                config.ALL_ANCHOR_RATIOS,
                config.NUM_FEATURE_LAYERS)

        # shape = (num_anchors_all_layers,).
        anchors_ymin,\
            anchors_xmin,\
            anchors_ymax,\
            anchors_xmax,\
            inside_mask =\
            anchor_processor.get_all_anchors_all_layers(
                target_shape,
                anchor_heights_all_layers,
                anchor_widths_all_layers,
                num_anchors_per_location_all_layers,
                config.ANCHOR_OFFSETS,
                config.VERTICAL_OFFSETS,
                config.ALL_LAYER_SHAPES,
                config.ALL_LAYER_STRIDES,
                [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS,
                [False] * config.NUM_FEATURE_LAYERS)

        num_anchors_per_layer = []
        for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES):
            _, _num_anchors_per_layer =\
                anchor_processor.count_num_anchors_per_layer(
                    num_anchors_per_location_all_layers[ind],
                    layer_shape,
                    name='count_num_anchors_per_layer_{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)

        def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_):
            return textboxes_plusplus_preprocessing.preprocess_image(
                image_,
                labels_,
                bboxes_,
                quadrilaterals_,
                target_shape,
                is_training=is_training,
                data_format=FLAGS.data_format,
                output_rgb=False)

        def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_):
            return anchor_processor.encode_anchors(
                glabels_,
                gbboxes_,
                gquadrilaterals_,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax,
                inside_mask)

        image, _, shape, loc_targets, cls_targets, match_scores =\
            dataset_common.slim_get_batch(
                FLAGS.num_classes,
                batch_size,
                (dataset_pattern[:-2]),
                os.path.join(FLAGS.data_dir, dataset_pattern),
                FLAGS.num_readers,
                FLAGS.num_preprocessing_threads,
                image_preprocessing_fn,
                anchor_encoder_fn,
                num_epochs=FLAGS.train_epochs,
                is_training=is_training)

        global global_anchor_info
        global_anchor_info =\
            {'decode_fn':
             lambda pred: anchor_processor.batch_decode_anchors(
                pred,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax),
             'num_anchors_per_layer': num_anchors_per_layer,
             'num_anchors_per_location_all_layers':
                num_anchors_per_location_all_layers}

        return image,\
            {'shape': shape,
             'loc_targets': loc_targets,
             'cls_targets': cls_targets,
             'match_scores': match_scores
             }