Ejemplo n.º 1
0
    def eval(self):
        retinanet = build_whole_network.DetectionNetworkRetinaNet(
            cfgs=self.cfgs, is_training=False)

        all_boxes_r = self.eval_with_plac(img_dir=self.args.img_dir,
                                          det_net=retinanet,
                                          image_ext=self.args.image_ext)

        # with open(cfgs.VERSION + '_detections_r.pkl', 'rb') as f2:
        #     all_boxes_r = pickle.load(f2)
        #
        #     print(len(all_boxes_r))

        imgs = os.listdir(self.args.img_dir)
        real_test_imgname_list = [
            i.split(self.args.image_ext)[0] for i in imgs
        ]

        print(10 * "**")
        print('rotation eval:')
        evaler = EVAL(self.cfgs)
        evaler.voc_evaluate_detections(
            all_boxes=all_boxes_r,
            test_imgid_list=real_test_imgname_list,
            test_annotation_path=self.args.test_annotation_path)
Ejemplo n.º 2
0
    def eval(self):
        txt_name = '{}.txt'.format(self.cfgs.VERSION)
        real_test_img_list = self.get_test_image()

        retinanet = build_whole_network.DetectionNetworkRetinaNet(cfgs=self.cfgs,
                                                                  is_training=False)
        self.test_mlt(det_net=retinanet, real_test_img_list=real_test_img_list, txt_name=txt_name)

        if not self.args.show_box:
            os.remove(txt_name)
Ejemplo n.º 3
0
    def main(self):
        with tf.Graph().as_default() as graph, tf.device('/cpu:0'):

            num_gpu = 1
            global_step = slim.get_or_create_global_step()
            lr = self.warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
            tf.summary.scalar('lr', lr)

            optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
            retinanet = build_whole_network.DetectionNetworkRetinaNet(cfgs=self.cfgs,
                                                                      is_training=True)

            with tf.name_scope('get_batch'):
                if cfgs.IMAGE_PYRAMID:
                    shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                    shortside_len = tf.random_shuffle(shortside_len_list)[0]

                else:
                    shortside_len = cfgs.IMG_SHORT_SIDE_LEN
                img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                    self.reader.next_batch(dataset_name=cfgs.DATASET_NAME,
                                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                                           shortside_len=shortside_len,
                                           is_training=True)

            # data processing
            inputs_list = []
            for i in range(num_gpu):
                img = tf.expand_dims(img_batch[i], axis=0)
                pretrain_zoo = PretrainModelZoo()
                if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo:
                    img = img / tf.constant([cfgs.PIXEL_STD])

                gtboxes_and_label_r = tf.py_func(backward_convert,
                                                 inp=[gtboxes_and_label_batch[i]],
                                                 Tout=tf.float32)
                gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

                gtboxes_and_label_h = get_horizen_minAreaRectangle(gtboxes_and_label_batch[i])
                gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

                num_objects = num_objects_batch[i]
                num_objects = tf.cast(tf.reshape(num_objects, [-1, ]), tf.float32)

                img_h = img_h_batch[i]
                img_w = img_w_batch[i]

                inputs_list.append([img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w])

            tower_grads = []
            biases_regularizer = tf.no_regularizer
            weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

            with tf.variable_scope(tf.get_variable_scope()):
                for i in range(num_gpu):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('tower_%d' % i):
                            with slim.arg_scope(
                                    [slim.model_variable, slim.variable],
                                    device='/device:CPU:0'):
                                with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane,
                                                     slim.conv2d_transpose, slim.separable_conv2d,
                                                     slim.fully_connected],
                                                    weights_regularizer=weights_regularizer,
                                                    biases_regularizer=biases_regularizer,
                                                    biases_initializer=tf.constant_initializer(0.0)):

                                    gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(self.get_gtboxes_and_label,
                                                                                          inp=[inputs_list[i][1],
                                                                                               inputs_list[i][2],
                                                                                               inputs_list[i][3]],
                                                                                          Tout=[tf.float32, tf.float32])
                                    gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])
                                    gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

                                    img = inputs_list[i][0]
                                    img_shape = inputs_list[i][-2:]
                                    img = tf.image.crop_to_bounding_box(image=img,
                                                                        offset_height=0,
                                                                        offset_width=0,
                                                                        target_height=tf.cast(img_shape[0], tf.int32),
                                                                        target_width=tf.cast(img_shape[1], tf.int32))

                                    outputs = retinanet.build_whole_detection_network(input_img_batch=img,
                                                                                      gtboxes_batch_h=gtboxes_and_label_h,
                                                                                      gtboxes_batch_r=gtboxes_and_label_r,
                                                                                      gpu_id=i)
                                    gtboxes_in_img_h = self.drawer.draw_boxes_with_categories(img_batch=img,
                                                                                              boxes=gtboxes_and_label_h[
                                                                                                    :, :-1],
                                                                                              labels=gtboxes_and_label_h[
                                                                                                     :, -1],
                                                                                              method=0)
                                    gtboxes_in_img_r = self.drawer.draw_boxes_with_categories(img_batch=img,
                                                                                              boxes=gtboxes_and_label_r[
                                                                                                    :, :-1],
                                                                                              labels=gtboxes_and_label_r[
                                                                                                     :, -1],
                                                                                              method=1)
                                    tf.summary.image('Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h)
                                    tf.summary.image('Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r)

                                    if cfgs.ADD_BOX_IN_TENSORBOARD:
                                        detections_in_img = self.drawer.draw_boxes_with_categories_and_scores(
                                            img_batch=img,
                                            boxes=outputs[0],
                                            scores=outputs[1],
                                            labels=outputs[2],
                                            method=1)
                                        tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img)

                                    loss_dict = outputs[-1]
                                    total_loss_dict, total_losses = self.loss_dict(loss_dict, num_gpu)

                                    if i == num_gpu - 1:
                                        regularization_losses = tf.get_collection(
                                            tf.GraphKeys.REGULARIZATION_LOSSES)
                                        # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                        total_losses = total_losses + tf.add_n(regularization_losses)

                            tf.get_variable_scope().reuse_variables()
                            grads = optimizer.compute_gradients(total_losses)
                            if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                                grads = slim.learning.clip_gradient_norms(grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                            tower_grads.append(grads)
            self.log_printer(retinanet, optimizer, global_step, tower_grads, total_loss_dict, num_gpu, graph)