def inference(test_dir, inference_save_path):

    test_imgname_list = [
        os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir)
        if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))
    ]
    assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \
                                        ' Note that, we only support img format of (.jpg, .png, and .tiff) '

    faster_rcnn = build_whole_network_r3det.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=False)
    detect(det_net=faster_rcnn,
           inference_save_path=inference_save_path,
           real_test_imgname_list=test_imgname_list)
def eval(num_imgs, args):

    txt_name = '{}.txt'.format(cfgs.VERSION)
    if not args.show_box:
        if not os.path.exists(txt_name):
            fw = open(txt_name, 'w')
            fw.close()

        fr = open(txt_name, 'r')
        img_filter = fr.readlines()
        print('****************************' * 3)
        print('Already tested imgs:', img_filter)
        print('****************************' * 3)
        fr.close()

        test_imgname_list = [
            os.path.join(args.test_dir, img_name)
            for img_name in os.listdir(args.test_dir)
            if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif',
                                  '.tiff')) and (img_name +
                                                 '\n' not in img_filter)
        ]
    else:
        test_imgname_list = [
            os.path.join(args.test_dir, img_name)
            for img_name in os.listdir(args.test_dir)
            if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff'))
        ]

    assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \
                                        ' Note that, we only support img format of (.jpg, .png, and .tiff) '

    if num_imgs == np.inf:
        real_test_img_list = test_imgname_list
    else:
        real_test_img_list = test_imgname_list[:num_imgs]

    retinanet = build_whole_network_r3det.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=False)
    test_dota(det_net=retinanet,
              real_test_img_list=real_test_img_list,
              args=args,
              txt_name=txt_name)

    if not args.show_box:
        os.remove(txt_name)
def eval(num_imgs, img_dir, image_ext, test_annotation_path, draw_imgs):

    r3det = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                       is_training=False)

    all_boxes_r = eval_with_plac(img_dir=img_dir, det_net=r3det,
                                 num_imgs=num_imgs, image_ext=image_ext, draw_imgs=draw_imgs)

    # with open(cfgs.VERSION + '_detections_r.pkl', 'rb') as f2:
    #     all_boxes_r = pickle.load(f2)
    #
    #     print(len(all_boxes_r))

    imgs = os.listdir(img_dir)
    real_test_imgname_list = [i.split(image_ext)[0] for i in imgs]

    print(10 * "**")
    print('rotation eval:')
    voc_eval_r.voc_evaluate_detections(all_boxes=all_boxes_r,
                                       test_imgid_list=real_test_imgname_list,
                                       test_annotation_path=test_annotation_path)
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
        global_step = slim.get_or_create_global_step()
        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
        tf.summary.scalar('lr', lr)

        with tf.name_scope('get_batch'):
            if cfgs.IMAGE_PYRAMID:
                shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                shortside_len = tf.random_shuffle(shortside_len_list)[0]

            else:
                shortside_len = cfgs.IMG_SHORT_SIDE_LEN

            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,
                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                           shortside_len=shortside_len,
                           is_training=True)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        r3det = build_whole_network_r3det.DetectionNetwork(
            base_network_name=cfgs.NET_NAME, is_training=True)

        # data processing
        inputs_list = []
        for i in range(num_gpu):
            img = tf.expand_dims(img_batch[i], axis=0)

            if cfgs.NET_NAME in [
                    'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d'
            ]:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label_r = tf.py_func(backward_convert,
                                             inp=[gtboxes_and_label_batch[i]],
                                             Tout=tf.float32)
            gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

            gtboxes_and_label_h = get_horizen_minAreaRectangle(
                gtboxes_and_label_batch[i])
            gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

            num_objects = num_objects_batch[i]
            num_objects = tf.cast(tf.reshape(num_objects, [
                -1,
            ]), tf.float32)

            img_h = img_h_batch[i]
            img_w = img_w_batch[i]

            inputs_list.append([
                img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects,
                img_h, img_w
            ])

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'cls_loss': tf.constant(0., tf.float32),
            'reg_loss': tf.constant(0., tf.float32),
            'refine_cls_loss': tf.constant(0., tf.float32),
            'refine_reg_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        if cfgs.USE_SUPERVISED_MASK:
            total_loss_dict['mask_loss'] = tf.constant(0., tf.float32)

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpu):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope(
                                [
                                    slim.conv2d, slim.conv2d_in_plane,
                                    slim.conv2d_transpose,
                                    slim.separable_conv2d, slim.fully_connected
                                ],
                                    weights_regularizer=weights_regularizer,
                                    biases_regularizer=biases_regularizer,
                                    biases_initializer=tf.constant_initializer(
                                        0.0)):

                                gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(
                                    get_gtboxes_and_label,
                                    inp=[
                                        inputs_list[i][1], inputs_list[i][2],
                                        inputs_list[i][3]
                                    ],
                                    Tout=[tf.float32, tf.float32])
                                gtboxes_and_label_h = tf.reshape(
                                    gtboxes_and_label_h, [-1, 5])
                                gtboxes_and_label_r = tf.reshape(
                                    gtboxes_and_label_r, [-1, 6])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                img = tf.image.crop_to_bounding_box(
                                    image=img,
                                    offset_height=0,
                                    offset_width=0,
                                    target_height=tf.cast(
                                        img_shape[0], tf.int32),
                                    target_width=tf.cast(
                                        img_shape[1], tf.int32))

                                outputs = r3det.build_whole_detection_network(
                                    input_img_batch=img,
                                    gtboxes_batch_h=gtboxes_and_label_h,
                                    gtboxes_batch_r=gtboxes_and_label_r,
                                    gpu_id=i)
                                gtboxes_in_img_h = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_h[:, :-1],
                                    labels=gtboxes_and_label_h[:, -1],
                                    method=0)
                                gtboxes_in_img_r = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_r[:, :-1],
                                    labels=gtboxes_and_label_r[:, -1],
                                    method=1)
                                tf.summary.image(
                                    'Compare/gtboxes_h_gpu:%d' % i,
                                    gtboxes_in_img_h)
                                tf.summary.image(
                                    'Compare/gtboxes_r_gpu:%d' % i,
                                    gtboxes_in_img_r)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2],
                                        method=1)
                                    tf.summary.image(
                                        'Compare/final_detection_gpu:%d' % i,
                                        detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[
                                        k] += loss_dict[k] / num_gpu

                                total_losses /= num_gpu
                                total_loss_dict['total_losses'] += total_losses

                                if i == num_gpu - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(
                                        regularization_losses)

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(total_losses)
                        if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                            grads = slim.learning.clip_gradient_norms(
                                grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k),
                              total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        if cfgs.MUTILPY_BIAS_GRADIENT is not None:
            final_gvs = []
            with tf.variable_scope('Gradient_Mult'):
                for grad, var in grads:
                    scale = 1.
                    if '/biases:' in var.name:
                        scale *= cfgs.MUTILPY_BIAS_GRADIENT
                    if 'conv_new' in var.name:
                        scale *= 3.
                    if not np.allclose(scale, 1.0):
                        grad = tf.multiply(grad, scale)

                    final_gvs.append((grad, var))
            apply_gradient_op = optimizer.apply_gradients(
                final_gvs, global_step=global_step)
        else:
            apply_gradient_op = optimizer.apply_gradients(
                grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.9999, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = r3det.get_restorer()
        saver = tf.train.Saver(max_to_keep=5)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        tfconfig = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_gpu):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()
                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***' * 20)
                        print("""%s: global_step:%d  current_step:%d""" %
                              (training_time,
                               (global_stepnp - 1) * num_gpu, step * num_gpu))
                        print("""per_cost_time:%.3fs""" %
                              ((end - start) / num_gpu))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                        if np.isnan(total_loss_dict_['total_losses']):
                            sys.exit(0)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run(
                                [train_op, global_step, summary_op])
                            summary_writer.add_summary(
                                summary_str, (global_stepnp - 1) * num_gpu)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu)
                        == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(
                        save_dir, '{}_'.format(cfgs.DATASET_NAME) + str(
                            (global_stepnp - 1) * num_gpu) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
Exemple #5
0
def build_detection_graph():
    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.float32, shape=[1, 640, 640, 3],
                              name='input_img')  # is RGB. not BGR
    #img_plac = tf.placeholder(dtype=tf.uint8, shape=[1, None, None, 3],
    #                          name='input_img')  # is RGB. not BGR
    #raw_shape = tf.shape(img_plac)
    #raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])
    #img_batch = tf.cast(img_plac, tf.float32)
    #img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
    #                          name='input_img')  # is RGB. not GBR
    #raw_shape = tf.shape(img_plac)
    #raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])

    #img_batch = tf.cast(img_plac, tf.float32)
    #img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
    #                                                 target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
    #                                                 length_limitation=cfgs.IMG_MAX_LENGTH)

    #if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
    #    img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    #else:
    #    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    #img_batch = tf.expand_dims(img_batch, axis=0)
    #img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    #img_batch = tf.expand_dims(img_batch, axis=0)  # [1, None, None, 3]

    det_net = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                         is_training=False)
    #det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
    #                                               is_training=False)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_plac,
        gtboxes_batch_h=None,
        gtboxes_batch_r=None)
    #detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
    #    input_img_batch=img_batch,
    #    gtboxes_batch_h=None,
    #    gtboxes_batch_r=None)
    #detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
    #    input_img_batch=img_batch,
    #    gtboxes_batch=None)

    #x_c, y_c, w, h, theta = detection_boxes[:, 0], detection_boxes[:, 1],\
    #                                 detection_boxes[:, 2], detection_boxes[:, 3],\
    #                                 detection_boxes[:, 4]
    #xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
    #                         detected_boxes[:, 2], detected_boxes[:, 3]

    #resized_shape = tf.shape(img_batch)
    #resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2])

    #x_c = x_c * raw_w / resized_w
    #w = w * raw_w / resized_w
    #xmin = xmin * raw_w / resized_w
    #xmax = xmax * raw_w / resized_w

    #y_c = y_c * raw_h / resized_h
    #h = h * raw_h / resized_h
    #ymin = ymin * raw_h / resized_h
    #ymax = ymax * raw_h / resized_h
    boxes = tf.transpose(tf.stack([detection_boxes[:, 0], detection_boxes[:, 1],
                                     detection_boxes[:, 2], detection_boxes[:, 3],
                                     detection_boxes[:, 4]]))
    #boxes = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
    #boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
    dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
                     tf.reshape(detection_scores, [-1, 1]),
                     boxes], axis=1, name='DetResults')

    return dets