def input_fn():
        out_shape = [FLAGS.train_image_size] * 2
        anchor_creator = anchor_manipulator_v2.AnchorCreator(
            out_shape,
            layers_shapes=[(24, 24), (12, 12), (6, 6)],
            anchor_scales=[(0.1, ), (0.2, 0.375, 0.55), (0.725, 0.9)],
            extra_anchor_scales=[(0.1414, ), (0.2739, 0.4541, 0.6315),
                                 (0.8078, 0.9836)],
            anchor_ratios=[(2., .5), (2., 3., .5, 0.3333), (2., .5)],
            layer_steps=[16, 32, 64])

        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        num_anchors_per_layer = []
        for ind in range(len(all_anchors)):
            num_anchors_per_layer.append(all_num_anchors_depth[ind] *
                                         all_num_anchors_spatial[ind])

        anchor_encoder_decoder = anchor_manipulator_v2.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=FLAGS.match_threshold,
            ignore_threshold=FLAGS.neg_threshold,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        image_preprocessing_fn = lambda image_, shape_, glabels_, gbboxes_: preprocessing_factory.get_preprocessing(
            'xdet_resnet', is_training=True
        )(image_,
          glabels_,
          gbboxes_,
          out_shape=out_shape,
          data_format=('NCHW'
                       if FLAGS.data_format == 'channels_first' else 'NHWC'))
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(
            glabels_, gbboxes_, all_anchors, all_num_anchors_depth,
            all_num_anchors_spatial)

        image, shape, loc_targets, cls_targets, match_scores = dataset_factory.get_dataset(
            FLAGS.dataset_name,
            FLAGS.dataset_split_name,
            FLAGS.data_dir,
            image_preprocessing_fn,
            file_pattern=None,
            reader=None,
            batch_size=FLAGS.batch_size,
            num_readers=FLAGS.num_readers,
            num_preprocessing_threads=FLAGS.num_preprocessing_threads,
            num_epochs=FLAGS.train_epochs,
            anchor_encoder=anchor_encoder_fn)

        global global_anchor_info
        global_anchor_info = {
            'decode_fn':
            lambda pred: anchor_encoder_decoder.decode_all_anchors(
                pred, num_anchors_per_layer),
            'num_anchors_per_layer':
            num_anchors_per_layer,
            'all_num_anchors_depth':
            all_num_anchors_depth
        }

        return image, {
            'shape': shape,
            'loc_targets': loc_targets,
            'cls_targets': cls_targets,
            'match_scores': match_scores
        }
Exemple #2
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = common_preprocessing.preprocess_for_test(
            image_input,
            out_shape,
            data_format=('NCHW'
                         if FLAGS.data_format == 'channels_first' else 'NHWC'))

        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator_v2.AnchorCreator(
            out_shape,
            layers_shapes=[(24, 24), (12, 12), (6, 6)],
            anchor_scales=[(0.1, ), (0.2, 0.375, 0.55), (0.725, 0.9)],
            extra_anchor_scales=[(0.1414, ), (0.2739, 0.4541, 0.6315),
                                 (0.8078, 0.9836)],
            anchor_ratios=[(2., .5), (2., 3., .5, 0.3333), (2., .5)],
            layer_steps=[16, 32, 64])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator_v2.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            with tf.device('/gpu:0'):
                backbone = xdet_body_v4.xdet_resnet_v4(FLAGS.resnet_size,
                                                       FLAGS.data_format)
                backbone_outputs = backbone(inputs=features, is_training=False)

                cls_pred, location_pred = xdet_body_v4.xdet_head(
                    backbone_outputs,
                    FLAGS.num_classes,
                    all_num_anchors_depth,
                    False,
                    data_format=FLAGS.data_format)

                if FLAGS.data_format == 'channels_first':
                    cls_pred = [
                        tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                    ]
                    location_pred = [
                        tf.transpose(pred, [0, 2, 3, 1])
                        for pred in location_pred
                    ]

                cls_pred = [
                    tf.reshape(pred, [-1, FLAGS.num_classes])
                    for pred in cls_pred
                ]
                location_pred = [
                    tf.reshape(pred, [-1, 4]) for pred in location_pred
                ]

                cls_pred = tf.concat(cls_pred, axis=0)
                location_pred = tf.concat(location_pred, axis=0)

        bboxes_pred = decode_fn(location_pred)
        bboxes_pred = tf.concat(bboxes_pred, axis=0)
        selected_bboxes, selected_scores = parse_by_class(
            cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold,
            FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk,
            FLAGS.nms_threshold)

        labels_list = []
        scores_list = []
        bboxes_list = []
        for k, v in selected_scores.items():
            labels_list.append(tf.ones_like(v, tf.int32) * k)
            scores_list.append(v)
            bboxes_list.append(selected_bboxes[k])
        all_labels = tf.concat(labels_list, axis=0)
        all_scores = tf.concat(scores_list, axis=0)
        all_bboxes = tf.concat(bboxes_list, axis=0)

        summary_dir = os.path.join(FLAGS.model_dir, 'predict')
        if not os.path.exists(summary_dir):
            os.makedirs(summary_dir)

        all_images = tf.gfile.Glob(
            os.path.join(FLAGS.test_dataset_path, '*.jpg'))
        #print(all_images)
        len_images = len(all_images)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)

        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, get_checkpoint())

            for ind, image_name in enumerate(all_images):
                sys.stdout.write('\r>> Processing image %d/%d' %
                                 (ind + 1, len_images))
                sys.stdout.flush()

                np_image = imread(image_name)

                labels_, scores_, bboxes_ = sess.run(
                    [all_labels, all_scores, all_bboxes],
                    feed_dict={
                        image_input: np_image,
                        shape_input: np_image.shape[:-1]
                    })

                img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                              labels_,
                                                              scores_,
                                                              bboxes_,
                                                              thickness=2)
                imsave(
                    os.path.join(FLAGS.debug_dir,
                                 'output_{}.jpg'.format(image_name[-10:-4])),
                    img_to_draw)

            sys.stdout.write('\n')
            sys.stdout.flush()
Exemple #3
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = common_preprocessing.preprocess_for_test(
            image_input,
            out_shape,
            data_format=('NCHW'
                         if FLAGS.data_format == 'channels_first' else 'NHWC'))

        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator_v2.AnchorCreator(
            out_shape,
            layers_shapes=[(20, 20), (10, 10), (5, 5)],
            anchor_scales=[(0.1, ), (0.2, 0.375, 0.55), (0.725, 0.9)],
            extra_anchor_scales=[(0.1414, ), (0.2739, 0.4541, 0.6315),
                                 (0.8078, 0.9836)],
            anchor_ratios=[(2., .5), (2., 3., .5, 0.3333), (2., .5)],
            layer_steps=[16, 32, 64])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator_v2.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            with tf.device('/gpu:0'):
                backbone = xdet_body_v5.xdet_resnet_v5(FLAGS.resnet_size,
                                                       FLAGS.data_format)
                backbone_outputs = backbone(inputs=features, is_training=False)

                cls_pred, location_pred = xdet_body_v5.xdet_head(
                    backbone_outputs,
                    FLAGS.num_classes,
                    all_num_anchors_depth,
                    False,
                    data_format=FLAGS.data_format)

                if FLAGS.data_format == 'channels_first':
                    cls_pred = [
                        tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                    ]
                    location_pred = [
                        tf.transpose(pred, [0, 2, 3, 1])
                        for pred in location_pred
                    ]

                cls_pred = [
                    tf.reshape(pred, [-1, FLAGS.num_classes])
                    for pred in cls_pred
                ]
                location_pred = [
                    tf.reshape(pred, [-1, 4]) for pred in location_pred
                ]

                cls_pred = tf.concat(cls_pred, axis=0)
                location_pred = tf.concat(location_pred, axis=0)

        bboxes_pred = decode_fn(location_pred)
        bboxes_pred = tf.concat(bboxes_pred, axis=0)
        selected_bboxes, selected_scores = parse_by_class(
            cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold,
            FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk,
            FLAGS.nms_threshold)

        scores_list = []
        bboxes_list = []
        for class_ind in range(1, FLAGS.num_classes):
            scores_list.append(selected_scores[class_ind])
            bboxes_list.append(selected_bboxes[class_ind])

        saver = tf.train.Saver()

        summary_dir = os.path.join(FLAGS.model_dir, 'predict')
        if not os.path.exists(summary_dir):
            os.makedirs(summary_dir)

        all_images = tf.gfile.Glob(
            os.path.join(FLAGS.test_dataset_path, '*.jpg'))
        #print(all_images)
        len_images = len(all_images)
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False,
                                gpu_options=gpu_options)
        with tf.Session(config=config) as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, get_checkpoint())

            file_mode = 'wt'
            for ind, image_name in enumerate(all_images):
                sys.stdout.write('\r>> Processing image %d/%d' %
                                 (ind + 1, len_images))
                sys.stdout.flush()

                np_image = imread(image_name)
                detected_results = sess.run(
                    bboxes_list + scores_list + [shape_input],
                    feed_dict={
                        image_input: np_image,
                        shape_input: np_image.shape[:-1]
                    })

                detected_bboxes = detected_results[:len(detected_results) // 2]
                detected_scores = detected_results[len(detected_results) //
                                                   2:-1]
                shape = detected_results[-1]

                assert len(detected_bboxes) == len(detected_scores)

                for class_ind in range(FLAGS.num_classes - 1):
                    with open(
                            os.path.join(
                                summary_dir,
                                'results_{}.txt'.format(class_ind + 1)),
                            file_mode) as f:
                        scores = detected_scores[class_ind]
                        bboxes = detected_bboxes[class_ind]
                        bboxes[:, 0] = (bboxes[:, 0] * shape[0]).astype(
                            np.int32, copy=False) + 1
                        bboxes[:, 1] = (bboxes[:, 1] * shape[1]).astype(
                            np.int32, copy=False) + 1
                        bboxes[:, 2] = (bboxes[:, 2] * shape[0]).astype(
                            np.int32, copy=False) + 1
                        bboxes[:, 3] = (bboxes[:, 3] * shape[1]).astype(
                            np.int32, copy=False) + 1

                        valid_mask = np.logical_and(
                            (bboxes[:, 2] - bboxes[:, 0] > 0),
                            (bboxes[:, 3] - bboxes[:, 1] > 0))

                        for det_ind in range(valid_mask.shape[0]):
                            if not valid_mask[det_ind]:
                                continue
                            f.write(
                                '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(image_name[-10:-4], scores[det_ind],
                                       bboxes[det_ind, 1], bboxes[det_ind, 0],
                                       bboxes[det_ind, 3], bboxes[det_ind, 2]))

                file_mode = 'at'
            sys.stdout.write('\n')
            sys.stdout.flush()