Exemple #1
0
def anno_convert():
    o_txt_dir = '/home/ai-i-hanmingfei/datasets/ODAI-ICPR/split_origin/val/s8_inference_oriented/labelTxt'
    out_dir = \
        '/home/ai-i-hanmingfei/datasets/ODAI-ICPR/split_origin/val/s8_inference_oriented/labelTxt_transferred'
    txt_paths = os.listdir(o_txt_dir)
    mkdir(out_dir)

    # print(txt_paths)
    for txt in txt_paths:
        with open(os.path.join(o_txt_dir, txt), 'r') as h_in:
            print(txt)
            lines = h_in.readlines()
            print(lines)

        # if not os.path.isdir(os.path.join(o_txt_dir, 'tmp')):
        #     os.makedirs(os.path.join(o_txt_dir, 'tmp'))

        with open(os.path.join(out_dir, txt), 'w') as h_out:
            for line in lines:
                anno = line.strip().split(' ')
                print(anno)
                line_tmp = [
                    str(math.ceil(int(anno[i]) * 1024.0 / 600.0))
                    for i in range(8)
                ]
                line_tmp.append(anno[8])
                print(line_tmp)
                h_out.write(' '.join(line_tmp) + '\n')
Exemple #2
0
def write_voc_results_file(all_boxes, test_imgid_list, det_save_dir):
    '''

  :param all_boxes: is a list. each item reprensent the detections of a img.
  the detections is a array. shape is [-1, 7]. [category, score, x, y, w, h, theta]
  Note that: if none detections in this img. that the detetions is : []

  :param test_imgid_list:
  :param det_save_path:
  :return:
  '''
    for cls, cls_id in NAME_LABEL_MAP.items():
        if cls == 'back_ground':
            continue
        print("Writing {} VOC resutls file".format(cls))

        tools.mkdir(det_save_dir)
        det_save_path = os.path.join(det_save_dir, "det_" + cls + ".txt")
        with open(det_save_path, 'wt') as f:
            for index, img_name in enumerate(test_imgid_list):
                this_img_detections = all_boxes[index]

                this_cls_detections = this_img_detections[
                    this_img_detections[:, 0] == cls_id]
                if this_cls_detections.shape[0] == 0:
                    continue  # this cls has none detections in this img
                for a_det in this_cls_detections:
                    f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                            format(img_name, a_det[1], a_det[2], a_det[3],
                                   a_det[4], a_det[5], a_det[6])
                            )  # that is [img_name, score, x, y, w, h, theta]
def inference(det_net, data_dir):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
    img_batch = tf.cast(img_plac, tf.float32)
    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN)

    det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(
        input_img_batch=img_batch, gtboxes_batch=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        imgs = os.listdir(data_dir)
        for i, a_img_name in enumerate(imgs):

            # f = open('./res_icdar_r/res_{}.txt'.format(a_img_name.split('.jpg')[0]), 'w')

            raw_img = cv2.imread(os.path.join(data_dir, a_img_name))
            # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()
            resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch, det_boxes_r, det_scores_r, det_category_r],
                    feed_dict={img_plac: raw_img}
                )
            end = time.time()

            # res_r = coordinate_convert.forward_convert(det_boxes_r_, False)
            # res_r = np.array(res_r, np.int32)
            # for r in res_r:
            #     f.write('{},{},{},{},{},{},{},{}\n'.format(r[0], r[1], r[2], r[3],
            #                                                r[4], r[5], r[6], r[7]))
            # f.close()

            det_detections_r = draw_box_in_img.draw_rotate_box_cv(
                np.squeeze(resized_img, 0),
                boxes=det_boxes_r_,
                labels=det_category_r_,
                scores=det_scores_r_)
            save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION)
            tools.mkdir(save_dir)
            cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg',
                        det_detections_r)
            view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1,
                     len(imgs))
Exemple #4
0
def clip_image(file_idx, image, boxes_all, width, height, stride_w, stride_h):
    if len(boxes_all) > 0:
        shape = image.shape
        for start_h in range(0, shape[0], stride_h):
            for start_w in range(0, shape[1], stride_w):
                boxes = copy.deepcopy(boxes_all)
                box = np.zeros_like(boxes_all)
                start_h_new = start_h
                start_w_new = start_w
                if start_h + height > shape[0]:
                    start_h_new = shape[0] - height
                if start_w + width > shape[1]:
                    start_w_new = shape[1] - width
                top_left_row = max(start_h_new, 0)
                top_left_col = max(start_w_new, 0)
                bottom_right_row = min(start_h + height, shape[0])
                bottom_right_col = min(start_w + width, shape[1])

                subImage = image[top_left_row:bottom_right_row,
                                 top_left_col:bottom_right_col]

                box[:, 0] = boxes[:, 0] - top_left_col
                box[:, 2] = boxes[:, 2] - top_left_col
                box[:, 4] = boxes[:, 4] - top_left_col
                box[:, 6] = boxes[:, 6] - top_left_col

                box[:, 1] = boxes[:, 1] - top_left_row
                box[:, 3] = boxes[:, 3] - top_left_row
                box[:, 5] = boxes[:, 5] - top_left_row
                box[:, 7] = boxes[:, 7] - top_left_row
                box[:, 8] = boxes[:, 8]
                center_y = 0.25 * (box[:, 1] + box[:, 3] + box[:, 5] +
                                   box[:, 7])
                center_x = 0.25 * (box[:, 0] + box[:, 2] + box[:, 4] +
                                   box[:, 6])

                cond1 = np.intersect1d(
                    np.where(center_y[:] >= 0)[0],
                    np.where(center_x[:] >= 0)[0])
                cond2 = np.intersect1d(
                    np.where(
                        center_y[:] <= (bottom_right_row - top_left_row))[0],
                    np.where(
                        center_x[:] <= (bottom_right_col - top_left_col))[0])
                idx = np.intersect1d(cond1, cond2)
                if len(idx) > 0 and (subImage.shape[0] > 5
                                     and subImage.shape[1] > 5):
                    mkdir(os.path.join(save_dir, 'images'))
                    img = os.path.join(
                        save_dir, 'images', "%s_%04d_%04d.png" %
                        (file_idx, top_left_row, top_left_col))
                    cv2.imwrite(img, subImage)

                    mkdir(os.path.join(save_dir, 'labeltxt'))
                    xml = os.path.join(
                        save_dir, 'labeltxt', "%s_%04d_%04d.xml" %
                        (file_idx, top_left_row, top_left_col))
                    save_to_xml(xml, subImage.shape[0], subImage.shape[1],
                                box[idx, :], class_list)
def convert_pascal_to_tfrecord():
    xml_path = os.path.join(FLAGS.VOC_dir, FLAGS.xml_dir)
    image_path = os.path.join(FLAGS.VOC_dir, FLAGS.image_dir)
    save_path = os.path.join(
        FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord')
    mkdir(FLAGS.save_dir)

    # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
    # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
    writer = tf.python_io.TFRecordWriter(path=save_path)

    fr = open(FLAGS.label_txt, 'r')
    lines = fr.readlines()

    real_cnt = 0
    pbar = tqdm(glob.glob(xml_path + '/*.xml'))

    for xml in pbar:
        # to avoid path error in different development platform
        xml = xml.replace('\\', '/')
        tmp = xml.split('/')[-1].split('.')[0] + "\n"
        if tmp not in lines:
            continue

        img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
        img_path = image_path + '/' + img_name

        if not os.path.exists(img_path):
            print('{} is not exist!'.format(img_path))
            continue

        img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)
        # assert gtbox_label.shape[0] > 0, 'no box'
        if gtbox_label.shape[0] == 0:
            continue

        # img = np.array(Image.open(img_path))
        img = cv2.imread(img_path)[:, :, ::-1]

        feature = tf.train.Features(
            feature={
                # do not need encode() in linux
                'img_name': _bytes_feature(img_name.encode()),
                # 'img_name': _bytes_feature(img_name),
                'img_height': _int64_feature(img_height),
                'img_width': _int64_feature(img_width),
                'img': _bytes_feature(img.tostring()),
                'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
                'num_objects': _int64_feature(gtbox_label.shape[0])
            })

        example = tf.train.Example(features=feature)
        writer.write(example.SerializeToString())
        real_cnt += 1
        pbar.set_description("Conversion progress")

    print('\nConversion is complete! {} images.'.format(real_cnt))
    writer.close()
Exemple #6
0
def inference(det_net, path):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
    img_batch = tf.cast(img_plac, tf.float32)
    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN)
    det_boxes_h, det_scores_h, det_category_h, \
    det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch,
                                                                                      gtboxes_h_batch=None,
                                                                                      gtboxes_r_batch=None)
    fininsh_load = time.time()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False

    tmp = []

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        # imgs = os.listdir(data_dir)
        # for i, a_img_name in enumerate(imgs):

        #     raw_img = cv2.imread(os.path.join(data_dir,
        #                                       a_img_name))
        raw_img = cv2.imread(path)
        basename = os.path.splitext(os.path.basename(path))[0]
        # print(raw_img)
        # exit()
        start = time.time()
        resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \
        det_boxes_r_, det_scores_r_, det_category_r_ = \
            sess.run(
                [img_batch, det_boxes_h, det_scores_h, det_category_h,
                    det_boxes_r, det_scores_r, det_category_r],
                feed_dict={img_plac: raw_img}
            )
        end = time.time()

        save_dir = os.path.join(cfgs.ROTATE_SAVE_PATH, cfgs.VERSION)
        tools.mkdir(save_dir)

        boxes = det_boxes_r_
        category = det_category_r_

        process(boxes, category, np.squeeze(resized_img, 0), basename,
                save_dir)
Exemple #7
0
def clip_image(file_idx, image, boxes_all, width, height, stride_w, stride_h):

    boxes_all_5 = backward_convert(boxes_all[:, :8], False)
    print(boxes_all[np.logical_or(boxes_all_5[:, 2] <= 5, boxes_all_5[:, 3] <= 5), :])
    boxes_all = boxes_all[np.logical_and(boxes_all_5[:, 2] > 5, boxes_all_5[:, 3] > 5), :]

    if boxes_all.shape[0] > 0:
        shape = image.shape
        for start_h in range(0, shape[0], stride_h):
            for start_w in range(0, shape[1], stride_w):
                boxes = copy.deepcopy(boxes_all)
                if USE_HEAD:
                    box = np.zeros_like(boxes_all)
                else:
                    box = np.zeros_like(boxes_all[:, :10])
                start_h_new = start_h
                start_w_new = start_w
                if start_h + height > shape[0]:
                    start_h_new = shape[0] - height
                if start_w + width > shape[1]:
                    start_w_new = shape[1] - width
                top_left_row = max(start_h_new, 0)
                top_left_col = max(start_w_new, 0)
                bottom_right_row = min(start_h + height, shape[0])
                bottom_right_col = min(start_w + width, shape[1])

                subImage = image[top_left_row:bottom_right_row, top_left_col: bottom_right_col]

                box[:, 0:7:2] = boxes[:, 0:7:2] - top_left_col
                box[:, 1:8:2] = boxes[:, 1:8:2] - top_left_row

                if USE_HEAD:
                    box[:, 8] = boxes[:, 8] - top_left_col
                    box[:, 9] = boxes[:, 9] - top_left_row

                box[:, -2:] = boxes[:, -2:]

                center_y = 0.25 * (box[:, 1] + box[:, 3] + box[:, 5] + box[:, 7])
                center_x = 0.25 * (box[:, 0] + box[:, 2] + box[:, 4] + box[:, 6])

                cond1 = np.intersect1d(np.where(center_y[:] >= 0)[0], np.where(center_x[:] >= 0)[0])
                cond2 = np.intersect1d(np.where(center_y[:] <= (bottom_right_row - top_left_row))[0],
                                       np.where(center_x[:] <= (bottom_right_col - top_left_col))[0])
                idx = np.intersect1d(cond1, cond2)
                if len(idx) > 0 and (subImage.shape[0] > 5 and subImage.shape[1] > 5):
                    mkdir(os.path.join(save_dir, 'images'))
                    img = os.path.join(save_dir, 'images',
                                       "%s_%04d_%04d.jpg" % (file_idx, top_left_row, top_left_col))
                    cv2.imwrite(img, subImage)

                    mkdir(os.path.join(save_dir, 'labelxml'))
                    xml = os.path.join(save_dir, 'labelxml',
                                       "%s_%04d_%04d.xml" % (file_idx, top_left_row, top_left_col))
                    save_to_xml(xml, "%s_%04d_%04d" % (file_idx, top_left_row, top_left_col),
                                subImage.shape[0], subImage.shape[1], box[idx, :], class_list)
Exemple #8
0
def write_voc_results_file(boxes, labels, scores, img_name, det_save_dir):
    '''
    write cls.txt
    [img_name, probability, w, h, x_c, y_c, theta]
    '''
    tools.mkdir(det_save_dir)
    num, _ = boxes.shape
    for i in range(num):
        for cls, cls_id in NAME_LABEL_MAP.items():
            if cls == 'back_ground':
                continue
            if labels[i] == cls_id:
                #print("Writing {} VOC resutls file".format(cls))
                det_save_path = os.path.join(det_save_dir,
                                             "det_" + cls + ".txt")
                with open(det_save_path, 'a') as f:
                    f.write('{:s} {:.6f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                            format(img_name, scores[i], boxes[i][0],
                                   boxes[i][1], boxes[i][2], boxes[i][3],
                                   boxes[i][4])
                            )  # that is [img_name, score, x, y, w, h, theta]
Exemple #9
0
def eval_with_plac(det_net, args):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None,
                                                     3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(
            cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category, detection_boxes_angle = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch_h=None,
        gtboxes_batch_r=None,
        gt_smooth_label=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        all_boxes_r = []
        img_short_side_len_list = cfgs.IMG_SHORT_SIDE_LEN if isinstance(
            cfgs.IMG_SHORT_SIDE_LEN, list) else [cfgs.IMG_SHORT_SIDE_LEN]
        img_short_side_len_list = [
            img_short_side_len_list[0]
        ] if not args.multi_scale else img_short_side_len_list
        imgs = os.listdir(args.img_dir)
        pbar = tqdm(imgs)
        for a_img_name in pbar:
            a_img_name = a_img_name.split(args.image_ext)[0]

            raw_img = cv2.imread(
                os.path.join(args.img_dir, a_img_name + args.image_ext))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            box_res_rotate = []
            label_res_rotate = []
            score_res_rotate = []

            for short_size in img_short_side_len_list:
                max_len = cfgs.IMG_MAX_LENGTH
                if raw_h < raw_w:
                    new_h, new_w = short_size, min(
                        int(short_size * float(raw_w) / raw_h), max_len)
                else:
                    new_h, new_w = min(int(short_size * float(raw_h) / raw_w),
                                       max_len), short_size
                img_resize = cv2.resize(raw_img, (new_w, new_h))

                resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                    sess.run(
                        [img_batch, detection_boxes_angle, detection_scores, detection_category],
                        feed_dict={img_plac: img_resize[:, :, ::-1]}
                    )
                resized_h, resized_w = resized_img.shape[1], resized_img.shape[
                    2]

                if len(det_boxes_r_) > 0:
                    det_boxes_r_ = forward_convert(det_boxes_r_, False)
                    det_boxes_r_[:, 0::2] *= (raw_w / resized_w)
                    det_boxes_r_[:, 1::2] *= (raw_h / resized_h)

                    for ii in range(len(det_boxes_r_)):
                        box_rotate = det_boxes_r_[ii]
                        box_res_rotate.append(box_rotate)
                        label_res_rotate.append(det_category_r_[ii])
                        score_res_rotate.append(det_scores_r_[ii])
            box_res_rotate = np.array(box_res_rotate)
            label_res_rotate = np.array(label_res_rotate)
            score_res_rotate = np.array(score_res_rotate)

            box_res_rotate_ = []
            label_res_rotate_ = []
            score_res_rotate_ = []
            threshold = {'car': 0.2, 'plane': 0.3}

            for sub_class in range(1, cfgs.CLASS_NUM + 1):
                index = np.where(label_res_rotate == sub_class)[0]
                if len(index) == 0:
                    continue
                tmp_boxes_r = box_res_rotate[index]
                tmp_label_r = label_res_rotate[index]
                tmp_score_r = score_res_rotate[index]

                tmp_boxes_r_ = backward_convert(tmp_boxes_r, False)

                try:
                    inx = nms_rotate.nms_rotate_cpu(
                        boxes=np.array(tmp_boxes_r_),
                        scores=np.array(tmp_score_r),
                        iou_threshold=threshold[LABEL_NAME_MAP[sub_class]],
                        max_output_size=150)
                except:
                    tmp_boxes_r_ = np.array(tmp_boxes_r_)
                    tmp = np.zeros(
                        [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1])
                    tmp[:, 0:-1] = tmp_boxes_r_
                    tmp[:, -1] = np.array(tmp_score_r)
                    # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms
                    jitter = np.zeros(
                        [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1])
                    jitter[:,
                           0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000
                    inx = rotate_gpu_nms(
                        np.array(tmp, np.float32) +
                        np.array(jitter, np.float32),
                        float(threshold[LABEL_NAME_MAP[sub_class]]), 0)

                box_res_rotate_.extend(np.array(tmp_boxes_r)[inx])
                score_res_rotate_.extend(np.array(tmp_score_r)[inx])
                label_res_rotate_.extend(np.array(tmp_label_r)[inx])

            box_res_rotate_ = np.array(box_res_rotate_)
            score_res_rotate_ = np.array(score_res_rotate_)
            label_res_rotate_ = np.array(label_res_rotate_)

            if args.draw_imgs:
                detected_indices = score_res_rotate_ >= cfgs.VIS_SCORE
                detected_scores = score_res_rotate_[detected_indices]
                detected_boxes = box_res_rotate_[detected_indices]
                detected_boxes = backward_convert(detected_boxes,
                                                  with_label=False)
                detected_categories = label_res_rotate_[detected_indices]

                det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores(
                    np.array(raw_img, np.float32),
                    boxes=detected_boxes,
                    labels=detected_categories,
                    scores=detected_scores,
                    method=1,
                    in_graph=False,
                    is_csl=True)

                save_dir = os.path.join('test_ucas_aod', cfgs.VERSION,
                                        'ucas_aod_img_vis')
                tools.mkdir(save_dir)

                cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name),
                            det_detections_r[:, :, ::-1])

            if box_res_rotate_.shape[0] != 0:
                box_res_rotate_ = backward_convert(box_res_rotate_, False)

            x_c, y_c, w, h, theta = box_res_rotate_[:, 0], box_res_rotate_[:, 1], box_res_rotate_[:, 2], \
                                    box_res_rotate_[:, 3], box_res_rotate_[:, 4]

            boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))
            dets_r = np.hstack((label_res_rotate_.reshape(-1, 1),
                                score_res_rotate_.reshape(-1, 1), boxes_r))
            all_boxes_r.append(dets_r)

            pbar.set_description("Eval image %s" % a_img_name)

        # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb')
        # pickle.dump(all_boxes_r, fw1)
        return all_boxes_r
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)

        mask_batch = tf.py_func(
            image_preprocess.get_mask,
            [tf.squeeze(img_batch, 0),
             tf.squeeze(gtboxes_and_label_batch, 0)], [tf.float32])

        gtboxes_and_label = tf.py_func(
            back_forward_convert,
            inp=[tf.squeeze(gtboxes_and_label_batch, 0)],
            Tout=tf.float32)
        gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6])

        gtboxes_and_label_AreaRectangle = get_horizen_minAreaRectangle(
            gtboxes_and_label)
        gtboxes_and_label_AreaRectangle = tf.reshape(
            gtboxes_and_label_AreaRectangle, [-1, 5])

    with tf.name_scope('draw_gtboxes'):
        gtboxes_in_img = draw_box_with_color(
            img_batch,
            tf.reshape(gtboxes_and_label_AreaRectangle, [-1, 5])[:, :-1],
            text=tf.shape(gtboxes_and_label_AreaRectangle)[0])

        gtboxes_rotate_in_img = draw_box_with_color_rotate(
            img_batch,
            tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1],
            text=tf.shape(gtboxes_and_label)[0])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_boxes_h, final_scores_h, final_category_h, \
        final_boxes_r, final_scores_r, final_category_r, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch,
            gtboxes_r_batch=gtboxes_and_label,
            gtboxes_h_batch=gtboxes_and_label_AreaRectangle,
            mask_batch=mask_batch[0])

    dets_in_img = draw_boxes_with_categories_and_scores(
        img_batch=img_batch,
        boxes=final_boxes_h,
        labels=final_category_h,
        scores=final_scores_h)

    dets_rotate_in_img = draw_boxes_with_categories_and_scores_rotate(
        img_batch=img_batch,
        boxes=final_boxes_r,
        labels=final_category_r,
        scores=final_scores_r)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss_h = loss_dict['fastrcnn_cls_loss_h']
    fastrcnn_loc_loss_h = loss_dict['fastrcnn_loc_loss_h']
    fastrcnn_cls_loss_r = loss_dict['fastrcnn_cls_loss_r']
    fastrcnn_loc_loss_r = loss_dict['fastrcnn_loc_loss_r']
    fastrcnn_total_loss = fastrcnn_cls_loss_h + fastrcnn_loc_loss_h + fastrcnn_cls_loss_r + fastrcnn_loc_loss_r

    if cfgs.ADD_ATTENTION:
        attention_loss = loss_dict['attention_loss']
        total_loss = rpn_total_loss + fastrcnn_total_loss + attention_loss + weight_decay_loss
    else:
        attention_loss = tf.convert_to_tensor(0)
        total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss_h', fastrcnn_cls_loss_h)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss_h',
                      fastrcnn_loc_loss_h)
    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss_r', fastrcnn_cls_loss_r)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss_r',
                      fastrcnn_loc_loss_r)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)
    if cfgs.ADD_ATTENTION:
        tf.summary.scalar('ATTENTION_LOSS/attention_loss', attention_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    tf.summary.image('img/gtboxes', gtboxes_in_img)
    tf.summary.image('img/gtboxes_rotate', gtboxes_rotate_in_img)
    tf.summary.image('img/mask', mask_batch)
    tf.summary.image('img/dets', dets_in_img)
    tf.summary.image('img/dets_rotate', dets_rotate_in_img)

    # ---------------------------------------------------------------------------------------------add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # ---------------------------------------------------------------------------------------------compute gradients

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #
    with tf.Session(config=config) as sess:
        # with tf.Session() as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \
                    _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \
                    _fast_rcnn_location_rotate_loss, _fast_rcnn_classification_rotate_loss, \
                    _fast_rcnn_total_loss, _attention_loss, _total_loss, _ = \
                        sess.run([global_step, img_name_batch, rpn_location_loss, rpn_cls_loss,
                                  rpn_total_loss, fastrcnn_loc_loss_h, fastrcnn_cls_loss_h,
                                  fastrcnn_loc_loss_r, fastrcnn_cls_loss_r, fastrcnn_total_loss,
                                  attention_loss, total_loss, train_op])

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                                                    rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t
                                                    rpn_total_loss:{} |
                                                    fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t
                                                    fast_rcnn_loc_rotate_loss:{} |\t fast_rcnn_cla_rotate_loss:{} |\t
                                                    fast_rcnn_total_loss:{} |\t attention_loss:{} |\t
                                                    total_loss:{} |\t pre_cost_time:{}s""" \
                          .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss,
                                  _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss,
                                  _fast_rcnn_classification_loss, _fast_rcnn_location_rotate_loss,
                                  _fast_rcnn_classification_rotate_loss, _fast_rcnn_total_loss,
                                  _attention_loss, _total_loss, (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                       is_training=True)

    with tf.name_scope('get_batch'):
        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
        gtbox_plac = tf.placeholder(dtype=tf.int32, shape=[None, 5])

        img_batch, gtboxes_and_label = preprocess_img(img_plac, gtbox_plac)
        # gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                         slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch,
            gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = 0 # tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
    # ____________________________________________________________________________________________________build loss



    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img_batch,
                                                                   boxes=gtboxes_and_label[:, :-1],
                                                                   labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                                     boxes=final_bbox,
                                                                                     labels=final_category,
                                                                                     scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(global_step,
                                     boundaries=[np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1])],
                                     values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(gradients,
                                                          cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients



    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):

            img_id, img, gt_info = next_img(step=step)
            training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step],
                                            feed_dict={img_plac: img,
                                                       gtbox_plac: gt_info}
                                            )

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss],
                        feed_dict={img_plac: img,
                                   gtbox_plac: gt_info})
                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                              rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                              fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                              total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_id), rpnLocLoss, rpnClsLoss,
                                  rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op],
                                                                 feed_dict={img_plac: img,
                                                                            gtbox_plac: gt_info}
                                                                 )
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')
Exemple #12
0
def train():

    # get Network Class
    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)
    # get batch
    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)

        # convert gtboxes_and_labels
        gtboxes_and_label = tf.py_func(
            back_forward_convert,
            inp=[tf.squeeze(gtboxes_and_label_batch, 0)],
            Tout=tf.float32)
        gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6])

    # draw ground-truth
    with tf.name_scope('draw_gtboxes'):
        gtboxes_in_img = draw_box_with_color_rotate(
            img_batch,
            tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1],
            text=tf.shape(gtboxes_and_label)[0])

    # default regularizers
    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):

        # build network
        final_boxes, final_scores, final_category, loss_dict = \
            faster_rcnn.build_whole_detection_network(input_img_batch=img_batch,
                                                      gtboxes_batch=gtboxes_and_label)

    # draw detections
    dets_in_img = draw_boxes_with_categories_and_scores_rotate(
        img_batch=img_batch,
        boxes=final_boxes,
        labels=final_category,
        scores=final_scores)

    # ----------------------------------------------------------------------------------------------------build loss
    # weight decay loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())

    # rpn losses
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    # fastrcnn losses
    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    # total loss
    total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
    # ____________________________________________________________________________________________________build loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    tf.summary.image('img/gtboxes', gtboxes_in_img)
    tf.summary.image('img/dets', dets_in_img)

    # ___________________________________________________________________________________________________add summary
    # create global step
    global_step = slim.get_or_create_global_step()

    # learning rate setting
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)

    # optimizer with lr
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    # compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    # clipping gradients
    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients

    # train_op, ie. apply gradients
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)

    # summary_op
    summary_op = tf.summary.merge_all()

    # init_op
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # restorer and Saver
    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=10)

    # GPU Config
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # start a session
    with tf.Session(config=config) as sess:

        # init
        sess.run(init_op)

        # restore
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        # Session对象是支持多线程的, 可以在同一个会话(Session)中创建多个线程,并行执行;
        # 创建线程协调器(管理器), 并启动(Tensor/训练数据)入队线程,
        # 入队具体使用多少个线程在read_tfrecord.py->next_batch->tf.train.batch中定义;
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        # construct summary writer
        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        # train MAX_ITERATION steps
        for step in range(cfgs.MAX_ITERATION):

            # time of this step
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            # no show & no summary
            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:

                # global_step is added to run, global_stepnp will be used in Save
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                # show
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:

                    start = time.time()

                    # middle infos are added to run, losses and img_name_batch
                    _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \
                    _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \
                    _fast_rcnn_total_loss, _total_loss, _ = \
                        sess.run([global_step, img_name_batch, rpn_location_loss, rpn_cls_loss,
                                  rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss,
                                  fastrcnn_total_loss, total_loss, train_op])

                    # # show boxes, scores, category infos
                    # final_boxes_r, _final_scores_r, _final_category_r = sess.run([final_boxes_r, final_scores_r, final_category_r])
                    # print('*'*100)
                    # print(_final_boxes_r)
                    # print(_final_scores_r)
                    # print(_final_category_r)

                    end = time.time()

                    # print
                    print(""" {}: step {}: image_name:{} |\t
                                                    rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t
                                                    rpn_total_loss:{} |
                                                    fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t
                                                    fast_rcnn_total_loss:{} |\t
                                                    total_loss:{} |\t pre_cost_time:{}s""" \
                          .format(training_time, _global_step, str(_img_name_batch[0]),
                                _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss,
                                _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss,
                                _total_loss, (end - start)))

                # summary
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        # summary_op is added to run
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])

                        # add summary
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            # save
            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                # mkdir
                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                # save checkpoint
                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        # 协调器发出终止所有线程的命令
        coord.request_stop()
        # 把开启的线程加入主线程,等待threads结束
        coord.join(threads)
Exemple #13
0
def train():
    with tf.Graph().as_default():

        with tf.name_scope('get_batch'):
            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,   # 'ship', 'spacenet', 'pascal', 'coco'
                           batch_size=cfgs.BATCH_SIZE,
                           shortside_len=cfgs.SHORT_SIDE_LEN,
                           is_training=True)

            gtboxes_and_label = tf.py_func(back_forward_convert,
                                           inp=[tf.squeeze(gtboxes_and_label_batch, 0)],
                                           Tout=tf.float32)
            gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6])

        with tf.name_scope('draw_gtboxes'):
            gtboxes_in_img = draw_box_with_color(img_batch, tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1],
                                                 text=tf.shape(gtboxes_and_label_batch)[1])

        # ***********************************************************************************************
        # *                                         shared CNN                                          *
        # ***********************************************************************************************
        _, share_net = get_network_byname(net_name=cfgs.NET_NAME,
                                          inputs=img_batch,
                                          num_classes=None,
                                          is_training=True,
                                          output_stride=None,
                                          global_pool=False,
                                          spatial_squeeze=False)

        # ***********************************************************************************************
        # *                                            rpn                                              *
        # * Note: here the rpn is Feature Pyramid Networks                                              *
        # ***********************************************************************************************
        rpn = build_rpn.RPN(net_name=cfgs.NET_NAME,
                            inputs=img_batch,
                            gtboxes_and_label=gtboxes_and_label,
                            is_training=True,
                            share_head=cfgs.SHARED_HEADS,
                            share_net=share_net,
                            anchor_ratios=cfgs.ANCHOR_RATIOS,
                            anchor_scales=cfgs.ANCHOR_SCALES,
                            anchor_angles=cfgs.ANCHOR_ANGLES,
                            scale_factors=cfgs.SCALE_FACTORS,  # this parameter will affect the performance
                            base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST,  # P2, P3, P4, P5, P6
                            level=cfgs.LEVEL,
                            anchor_stride=cfgs.ANCHOR_STRIDE,
                            top_k_nms=cfgs.RPN_TOP_K_NMS,
                            kernel_size=cfgs.KERNEL_SIZE,
                            use_angles_condition=False,
                            anchor_angle_threshold=cfgs.RPN_ANCHOR_ANGLES_THRESHOLD,
                            nms_angle_threshold=cfgs.RPN_NMS_ANGLES_THRESHOLD,
                            rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD,
                            max_proposals_num=cfgs.MAX_PROPOSAL_NUM,
                            rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD,
                            rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD,
                            # iou>=0.7 is positive box, iou< 0.3 is negative
                            rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE,
                            rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE,
                            remove_outside_anchors=cfgs.IS_FILTER_OUTSIDE_BOXES,  # whether remove anchors outside
                            rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME],
                            scope='')

        rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals()  # rpn_score shape: [300, ]

        rpn_location_loss, rpn_classification_loss, rpn_predict_boxes, rpn_predict_scores = rpn.rpn_losses()
        rpn_total_loss = rpn_classification_loss + rpn_location_loss

        with tf.name_scope('draw_proposals'):
            # score > 0.6 is object
            rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, cfgs.FINAL_SCORE_THRESHOLD)),
                                                  [-1])
            rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices)
            rpn_object_soxres = tf.gather(rpn_proposals_scores, rpn_object_boxes_indices)

            rpn_proposals_objcet_boxes_in_img = draw_boxes_with_scores(img_batch,
                                                                       rpn_object_boxes,
                                                                       scores=rpn_object_soxres)

            # rpn_proposals_objcet_boxes_in_img = draw_box_with_color(img_batch, rpn_object_boxes,
            #                                                         text=tf.shape(rpn_object_boxes)[0])
            rpn_proposals_boxes_in_img = draw_box_with_color(img_batch, rpn_proposals_boxes,
                                                             text=tf.shape(rpn_proposals_boxes)[0])
        # ***********************************************************************************************
        # *                                         Fast RCNN                                           *
        # ***********************************************************************************************

        fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=img_batch,
                                             feature_pyramid=rpn.feature_pyramid,
                                             rpn_proposals_boxes=rpn_proposals_boxes,
                                             rpn_proposals_scores=rpn_proposals_scores,
                                             stop_gradient_for_proposals=False,
                                             img_shape=tf.shape(img_batch),
                                             roi_size=cfgs.ROI_SIZE,
                                             roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE,
                                             scale_factors=cfgs.SCALE_FACTORS,
                                             gtboxes_and_label=gtboxes_and_label,
                                             fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD,
                                             top_k_nms=cfgs.FAST_RCNN_TOP_K_NMS,
                                             nms_angle_threshold=cfgs.FAST_RCNN_NMS_ANGLES_THRESHOLD,
                                             use_angle_condition=False,
                                             level=cfgs.LEVEL,
                                             fast_rcnn_maximum_boxes_per_img=100,
                                             fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                                             show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD,  # show detections which score >= 0.6
                                             num_classes=cfgs.CLASS_NUM,
                                             fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE,
                                             fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE,
                                             fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD,
                                             # iou>0.5 is positive, iou<0.5 is negative
                                             boxes_angle_threshold=cfgs.FAST_RCNN_BOXES_ANGLES_THRESHOLD,
                                             use_dropout=cfgs.USE_DROPOUT,
                                             weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME],
                                             is_training=True)

        fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
            fast_rcnn.fast_rcnn_predict()
        fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss()
        fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss

        with tf.name_scope('draw_boxes_with_categories'):
            fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=img_batch,
                                                                         boxes=fast_rcnn_decode_boxes,
                                                                         labels=detection_category,
                                                                         scores=fast_rcnn_score)

        # train
        total_loss = slim.losses.get_total_loss()

        global_step = slim.get_or_create_global_step()

        lr = tf.train.piecewise_constant(global_step,
                                         boundaries=[np.int64(70000), np.int64(120000)],
                                         values=[cfgs.LR, cfgs.LR/10, cfgs.LR/100])

        # optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

        if cfgs.RPN_TRAIN:
            train_op = slim.learning.create_train_op(rpn_total_loss, optimizer, global_step)
        else:
            train_op = slim.learning.create_train_op(total_loss, optimizer, global_step)
        # train_op = optimizer.minimize(second_classification_loss, global_step)

        # ***********************************************************************************************
        # *                                          Summary                                            *
        # ***********************************************************************************************
        # ground truth and predict
        tf.summary.image('img/gtboxes', gtboxes_in_img)
        tf.summary.image('img/fast_rcnn_predict', fast_rcnn_predict_boxes_in_imgs)

        # rpn loss and image
        tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss)
        tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss)
        tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss)

        tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss)
        tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss)

        tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss)

        tf.summary.scalar('loss/total_loss', total_loss)

        tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img)
        tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img)
        # learning_rate
        tf.summary.scalar('learning_rate', lr)

        summary_op = tf.summary.merge_all()
        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        restorer, restore_ckpt = restore_model.get_restorer()
        saver = tf.train.Saver(max_to_keep=10)

        config = tf.ConfigProto()
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(init_op)
            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess, coord)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

            for step in range(cfgs.MAX_ITERATION):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                start = time.time()

                _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \
                _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \
                _fast_rcnn_total_loss, _total_loss, _= \
                    sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss,
                              rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss,
                              fast_rcnn_total_loss, total_loss, train_op])

                end = time.time()

                if step % 10 == 0:

                    print(""" {}: step{}    image_name:{} |\t
                          rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                          fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                          total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss,
                                  _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss,
                                  _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss,
                                  (end - start)))

                if (step % 50 == 0) and (step % 10000 != 0): # 50
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, _global_step)
                    summary_writer.flush()

                if (step > 0 and step % 10000 == 0) or (step == cfgs.MAX_ITERATION - 1):

                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, _global_step)
                    summary_writer.flush()

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(save_dir, 'voc_'+str(_global_step)+'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        global_step = slim.get_or_create_global_step()

        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, cfgs.NUM_GPU*cfgs.BATCH_SIZE)
        tf.summary.scalar('lr', lr)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                           is_training=True,
                                                           batch_size=cfgs.BATCH_SIZE)

        with tf.name_scope('get_batch'):
            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                           batch_size=cfgs.BATCH_SIZE * cfgs.NUM_GPU,
                           shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                           is_training=True)

        # data processing
        inputs_list = []
        for i in range(cfgs.NUM_GPU):
            start = i*cfgs.BATCH_SIZE
            end = (i+1)*cfgs.BATCH_SIZE
            img = img_batch[start:end, :, :, :]
            if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[start:end, :, :],
                                                   [cfgs.BATCH_SIZE, -1, 5]), tf.float32)
            num_objects = num_objects_batch[start:end]
            num_objects = tf.cast(tf.reshape(num_objects, [cfgs.BATCH_SIZE, -1, ]), tf.float32)

            img_h = img_h_batch[start:end]
            img_w = img_w_batch[start:end]
            # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32)
            # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32)

            inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w])

        # put_op_list = []
        # get_op_list = []
        # for i in range(cfgs.NUM_GPU):
        #     with tf.device("/GPU:%s" % i):
        #         area = tf.contrib.staging.StagingArea(
        #             dtypes=[tf.float32, tf.float32, tf.float32])
        #         put_op_list.append(area.put(inputs_list[i]))
        #         get_op_list.append(area.get())

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'rpn_cls_loss': tf.constant(0., tf.float32),
            'rpn_bbox_loss': tf.constant(0., tf.float32),
            'rpn_ctr_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(cfgs.NUM_GPU):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane,
                                                 slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                                                weights_regularizer=weights_regularizer,
                                                biases_regularizer=biases_regularizer,
                                                biases_initializer=tf.constant_initializer(0.0)):

                                gtboxes_and_label = tf.py_func(get_gtboxes_and_label,
                                                               inp=[inputs_list[i][1], inputs_list[i][2]],
                                                               Tout=tf.float32)
                                gtboxes_and_label = tf.reshape(gtboxes_and_label, [cfgs.BATCH_SIZE, -1, 5])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                h_crop = tf.reduce_max(img_shape[0])
                                w_crop = tf.reduce_max(img_shape[1])
                                img = tf.image.crop_to_bounding_box(image=img,
                                                                    offset_height=0,
                                                                    offset_width=0,
                                                                    target_height=tf.cast(h_crop, tf.int32),
                                                                    target_width=tf.cast(w_crop, tf.int32))

                                outputs = faster_rcnn.build_whole_detection_network(input_img_batch=img,
                                                                                    gtboxes_batch=gtboxes_and_label)
                                gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img[0, :, :, :],
                                                                                               boxes=gtboxes_and_label[0, :, :-1],
                                                                                               labels=gtboxes_and_label[0, :, -1])
                                gtboxes_in_img = tf.expand_dims(gtboxes_in_img, 0)
                                tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
                                        img_batch=img[0, :, :, :],
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2])
                                    detections_in_img = tf.expand_dims(detections_in_img, 0)
                                    tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[k] += loss_dict[k] / cfgs.NUM_GPU

                                total_losses = total_losses / cfgs.NUM_GPU
                                total_loss_dict['total_losses'] += total_losses

                                if i == cfgs.NUM_GPU - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(regularization_losses)

                        tf.get_variable_scope().reuse_variables()

                        grads = optimizer.compute_gradients(total_losses)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        # final_gvs = []
        # with tf.variable_scope('Gradient_Mult'):
        #     for grad, var in grads:
        #         scale = 1.
        #         # if '/biases:' in var.name:
        #         #    scale *= 2.
        #         if 'conv_new' in var.name:
        #             scale *= 3.
        #         if not np.allclose(scale, 1.0):
        #             grad = tf.multiply(grad, scale)
        #         final_gvs.append((grad, var))

        apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = faster_rcnn.get_restorer()
        saver = tf.train.Saver(max_to_keep=10)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        tfconfig = tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True

        num_per_iter = cfgs.NUM_GPU * cfgs.BATCH_SIZE
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_per_iter):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***'*20)
                        print("""%s: global_step:%d  current_step:%d"""
                              % (training_time, (global_stepnp-1)*num_per_iter, step*num_per_iter))
                        print("""per_cost_time:%.3fs"""
                              % ((end - start) / num_per_iter))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op])
                            summary_writer.add_summary(summary_str, (global_stepnp-1)*num_per_iter)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_per_iter) == 0) or (step >= cfgs.MAX_ITERATION // cfgs.NUM_GPU - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_per_iter) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
Exemple #15
0
def eval_with_plac(det_net, real_test_imgname_list, img_root, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     length_limitation=cfgs.IMG_MAX_LENGTH)
    if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255)
    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        for i, a_img_name in enumerate(real_test_imgname_list):

            raw_img = cv2.imread(os.path.join(img_root, a_img_name))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()
            resized_img, detected_boxes, detected_scores, detected_categories = \
                sess.run(
                    [img_batch, detection_boxes, detection_scores, detection_category],
                    feed_dict={img_plac: raw_img[:, :, ::-1]}  # cv is BGR. But need RGB
                )
            end = time.time()
            # print("{} cost time : {} ".format(img_name, (end - start)))
            if draw_imgs:
                show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
                show_scores = detected_scores[show_indices]
                show_boxes = detected_boxes[show_indices]
                show_categories = detected_categories[show_indices]

                draw_img = np.squeeze(resized_img, 0)
                if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
                    draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255
                else:
                    draw_img = draw_img + np.array(cfgs.PIXEL_MEAN)

                # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN)

                final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img,
                                                                                    boxes=show_boxes,
                                                                                    labels=show_categories,
                                                                                    scores=show_scores,
                                                                                    in_graph=False)
                if not os.path.exists(cfgs.TEST_SAVE_PATH):
                    os.makedirs(cfgs.TEST_SAVE_PATH)

                cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + a_img_name,
                            final_detections[:, :, ::-1])

            xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
                                     detected_boxes[:, 2], detected_boxes[:, 3]

            resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]

            xmin = xmin * raw_w / resized_w
            xmax = xmax * raw_w / resized_w

            ymin = ymin * raw_h / resized_h
            ymax = ymax * raw_h / resized_h

            boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax]))
            dets = np.hstack((detected_categories.reshape(-1, 1),
                              detected_scores.reshape(-1, 1),
                              boxes))
            # all_boxes.append(dets)

            # eval txt
            CLASS_VOC = NAME_LABEL_MAP.keys()

            write_handle = {}
            txt_dir = os.path.join('voc2012_eval', cfgs.VERSION, 'results', 'VOC2012', 'Main')
            tools.mkdir(txt_dir)
            for sub_class in CLASS_VOC:
                if sub_class == 'back_ground':
                    continue
                write_handle[sub_class] = open(os.path.join(txt_dir, 'comp3_det_test_%s.txt' % sub_class), 'a+')

            for det in dets:
                command = '%s %.6f %.6f %.6f %.6f %.6f\n' % (a_img_name.split('/')[-1].split('.')[0],
                                                             det[1],
                                                             det[2], det[3], det[4], det[5])
                write_handle[LABEl_NAME_MAP[det[0]]].write(command)

            for sub_class in CLASS_VOC:
                if sub_class == 'back_ground':
                    continue
                write_handle[sub_class].close()

            tools.view_bar('%s image cost %.3fs' % (a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
Exemple #16
0
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not GBR
    img_batch = tf.cast(img_plac, tf.float32)
    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     is_resize=False)

    det_boxes_h, det_scores_h, det_category_h, \
    det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_h_batch=None, gtboxes_r_batch=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        all_boxes_r = []
        imgs = os.listdir(img_dir)
        for i, a_img_name in enumerate(imgs):
            a_img_name = a_img_name.split(image_ext)[0]

            raw_img = cv2.imread(os.path.join(img_dir,
                                              a_img_name + image_ext))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()
            resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch, det_boxes_h, det_scores_h, det_category_h,
                     det_boxes_r, det_scores_r, det_category_r],
                    feed_dict={img_plac: raw_img}
                )
            end = time.time()
            # print("{} cost time : {} ".format(img_name, (end - start)))
            if draw_imgs:

                det_detections_r = draw_box_in_img.draw_rotate_box_cv(np.squeeze(resized_img, 0),
                                                                      boxes=det_boxes_r_,
                                                                      labels=det_category_r_,
                                                                      scores=det_scores_r_)
                save_dir = os.path.join(cfgs.TEST_SAVE_PATH, cfgs.VERSION)
                tools.mkdir(save_dir)

                cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg',
                            det_detections_r[:, :, ::-1])

            resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]
            det_boxes_r_ = forward_convert(det_boxes_r_, False)
            det_boxes_r_[:, 0::2] *= (raw_w / resized_w)
            det_boxes_r_[:, 1::2] *= (raw_h / resized_h)
            det_boxes_r_ = back_forward_convert(det_boxes_r_, False)

            x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \
                                    det_boxes_r_[:, 3], det_boxes_r_[:, 4]

            boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))

            dets_r = np.hstack((det_category_r_.reshape(-1, 1),
                                det_scores_r_.reshape(-1, 1),
                                boxes_r))
            all_boxes_r.append(dets_r)

            tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs))

        fw2 = open(cfgs.VERSION + '_detections_r.pkl', 'w')
        pickle.dump(all_boxes_r, fw2)
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None,
                                                     3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch,
        target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
        length_limitation=cfgs.IMG_MAX_LENGTH)
    if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(
            cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        all_boxes_r = []
        imgs = os.listdir(img_dir)
        pbar = tqdm(imgs)
        for a_img_name in pbar:
            a_img_name = a_img_name.split(image_ext)[0]

            raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch, detection_boxes, detection_scores, detection_category],
                    feed_dict={img_plac: raw_img[:, :, ::-1]}
                )

            if draw_imgs:
                detected_indices = det_scores_r_ >= cfgs.VIS_SCORE
                detected_scores = det_scores_r_[detected_indices]
                detected_boxes = det_boxes_r_[detected_indices]
                detected_categories = det_category_r_[detected_indices]

                det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores(
                    np.squeeze(resized_img, 0),
                    boxes=detected_boxes,
                    labels=detected_categories,
                    scores=detected_scores,
                    method=1,
                    in_graph=True)

                save_dir = os.path.join('test_hrsc', cfgs.VERSION,
                                        'hrsc2016_img_vis')
                tools.mkdir(save_dir)

                cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name),
                            det_detections_r[:, :, ::-1])

            if det_boxes_r_.shape[0] != 0:
                resized_h, resized_w = resized_img.shape[1], resized_img.shape[
                    2]
                det_boxes_r_ = forward_convert(det_boxes_r_, False)
                det_boxes_r_[:, 0::2] *= (raw_w / resized_w)
                det_boxes_r_[:, 1::2] *= (raw_h / resized_h)
                det_boxes_r_ = backward_convert(det_boxes_r_, False)

            x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \
                                    det_boxes_r_[:, 3], det_boxes_r_[:, 4]

            boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))
            dets_r = np.hstack((det_category_r_.reshape(-1, 1),
                                det_scores_r_.reshape(-1, 1), boxes_r))
            all_boxes_r.append(dets_r)

            pbar.set_description("Eval image %s" % a_img_name)

        # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb')
        # pickle.dump(all_boxes_r, fw1)
        return all_boxes_r
Exemple #18
0
def inference(det_net, data_dir):
    TIME = 0
    TIME_NUM = 0
    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
    img_batch = tf.cast(img_plac, tf.float32)
    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch,
        target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
        is_resize=IS_RESIZE)

    det_boxes_h, det_scores_h, det_category_h, \
    det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch,
                                                                                      gtboxes_h_batch=None,
                                                                                      gtboxes_r_batch=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        imgs = os.listdir(data_dir)
        # print(imgs)
        for i, a_img_name in enumerate(imgs):
            file_text, extension = os.path.splitext(a_img_name)
            if extension != ".jpg" and extension != ".tif" and extension != ".png":
                continue
            # f = open('./res_icdar_r/res_{}.txt'.format(a_img_name.split('.jpg')[0]), 'w')

            raw_img = cv2.imread(os.path.join(data_dir, a_img_name))
            # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()
            resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \
            det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch, det_boxes_h, det_scores_h, det_category_h,
                     det_boxes_r, det_scores_r, det_category_r],
                    feed_dict={img_plac: raw_img}
                )
            end = time.time()

            TIME += end - start
            TIME_NUM += 1

            if WRITE_VOC == True:
                boxes = np.array(det_boxes_r_, np.int64)
                scores = np.array(det_scores_r_, np.float32)
                labels = np.array(det_category_r_, np.int32)

                det_save_dir = cfgs.INFERENCE_SAVE_PATH
                write_voc_results_file(boxes, labels, scores,
                                       a_img_name.split('.')[0], det_save_dir)
                write_pixel_results(boxes, labels, scores,
                                    a_img_name.split('.')[0], det_save_dir)

            det_detections_h = draw_box_in_img.draw_box_cv(
                np.squeeze(resized_img, 0),
                boxes=det_boxes_h_,
                labels=det_category_h_,
                scores=det_scores_h_)
            det_detections_r = draw_box_in_img.draw_rotate_box_cv(
                np.squeeze(resized_img, 0),
                boxes=det_boxes_r_,
                labels=det_category_r_,
                scores=det_scores_r_)
            save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION)
            tools.mkdir(save_dir)
            if OUTPUT_H_IMG:
                cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg',
                            det_detections_h)

            cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg',
                        det_detections_r)
            view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1,
                     len(imgs))

        print('avg time:{}'.format(TIME / TIME_NUM))
def test_mlt(det_net, real_test_img_list, gpu_ids, show_box, txt_name):

    save_path = os.path.join('./test_mlt', cfgs.VERSION)
    tools.mkdir(save_path)

    nr_records = len(real_test_img_list)
    pbar = tqdm(total=nr_records)
    gpu_num = len(gpu_ids.strip().split(','))

    nr_image = math.ceil(nr_records / gpu_num)
    result_queue = Queue(500)
    procs = []

    for i, gpu_id in enumerate(gpu_ids.strip().split(',')):
        start = i * nr_image
        end = min(start + nr_image, nr_records)
        split_records = real_test_img_list[start:end]
        proc = Process(target=worker,
                       args=(int(gpu_id), split_records, det_net,
                             result_queue))
        print('process:%d, start:%d, end:%d' % (i, start, end))
        proc.start()
        procs.append(proc)

    for i in range(nr_records):
        res = result_queue.get()
        if res['boxes'].shape[0] == 0:
            fw_txt_dt = open(
                os.path.join(
                    save_path, 'res_{}.txt'.format(res['image_id'].split(
                        '/')[-1].split('.')[0].split('ts_')[1])), 'w')
            fw_txt_dt.close()
            pbar.update(1)

            fw = open(txt_name, 'a+')
            fw.write('{}\n'.format(res['image_id'].split('/')[-1]))
            fw.close()

            continue
        x1, y1, x2, y2, x3, y3, x4, y4 = res['boxes'][:, 0], res['boxes'][:, 1], res['boxes'][:, 2], res['boxes'][:, 3],\
                                         res['boxes'][:, 4], res['boxes'][:, 5], res['boxes'][:, 6], res['boxes'][:, 7]

        x1, y1 = x1 * res['scales'][0], y1 * res['scales'][1]
        x2, y2 = x2 * res['scales'][0], y2 * res['scales'][1]
        x3, y3 = x3 * res['scales'][0], y3 * res['scales'][1]
        x4, y4 = x4 * res['scales'][0], y4 * res['scales'][1]

        boxes = np.transpose(np.stack([x1, y1, x2, y2, x3, y3, x4, y4]))

        if show_box:
            boxes = backward_convert(boxes, False)
            nake_name = res['image_id'].split('/')[-1]
            draw_path = os.path.join(save_path, nake_name)
            draw_img = np.array(cv2.imread(res['image_id']), np.float32)

            final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(
                draw_img,
                boxes=boxes,
                labels=res['labels'],
                scores=res['scores'],
                method=1,
                in_graph=False)
            cv2.imwrite(draw_path, final_detections)

        else:
            fw_txt_dt = open(
                os.path.join(
                    save_path, 'res_{}.txt'.format(res['image_id'].split(
                        '/')[-1].split('.')[0].split('ts_')[1])), 'w')

            for ii, box in enumerate(boxes):
                line = '%d,%d,%d,%d,%d,%d,%d,%d,%.3f\n' % (
                    box[0], box[1], box[2], box[3], box[4], box[5], box[6],
                    box[7], res['scores'][ii])
                fw_txt_dt.write(line)
            fw_txt_dt.close()

            fw = open(txt_name, 'a+')
            fw.write('{}\n'.format(res['image_id'].split('/')[-1]))
            fw.close()

        pbar.set_description("Test image %s" % res['image_id'].split('/')[-1])

        pbar.update(1)

    for p in procs:
        p.join()
Exemple #20
0
def test_dota(det_net, real_test_img_list, args, txt_name):

    save_path = os.path.join('./test_dota', cfgs.VERSION)

    nr_records = len(real_test_img_list)
    pbar = tqdm(total=nr_records)
    gpu_num = len(args.gpus.strip().split(','))

    nr_image = math.ceil(nr_records / gpu_num)
    result_queue = Queue(500)
    procs = []

    for i, gpu_id in enumerate(args.gpus.strip().split(',')):
        start = i * nr_image
        end = min(start + nr_image, nr_records)
        split_records = real_test_img_list[start:end]
        proc = Process(target=worker,
                       args=(int(gpu_id), split_records, det_net, args,
                             result_queue))
        print('process:%d, start:%d, end:%d' % (i, start, end))
        proc.start()
        procs.append(proc)

    log_dir = './dcl_log/{}'.format(cfgs.VERSION)
    tools.mkdir(log_dir)
    fw_tsv = open(os.path.join(log_dir, 'dcl_meta.tsv'), 'w')
    # fw_tsv.write("Label\n")
    final_logits = []
    for i in range(nr_records):
        res = result_queue.get()

        if args.show_box:

            nake_name = res['image_id'].split('/')[-1]
            tools.mkdir(os.path.join(save_path, 'dota_img_vis'))
            draw_path = os.path.join(save_path, 'dota_img_vis', nake_name)

            draw_img = np.array(cv2.imread(res['image_id']), np.float32)
            detected_boxes = backward_convert(res['boxes'], with_label=False)

            detected_indices = res['scores'] >= cfgs.VIS_SCORE
            detected_scores = res['scores'][detected_indices]
            detected_boxes = detected_boxes[detected_indices]
            detected_categories = res['labels'][detected_indices]

            final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(
                draw_img,
                boxes=detected_boxes,
                labels=detected_categories,
                scores=detected_scores,
                method=1,
                head=np.ones_like(detected_scores) * -1,
                is_csl=True,
                in_graph=False)
            cv2.imwrite(draw_path, final_detections)

        else:
            detected_indices = res['scores'] >= cfgs.VIS_SCORE
            res['scores'] = res['scores'][detected_indices]
            res['boxes'] = res['boxes'][detected_indices]
            res['labels'] = res['labels'][detected_indices]
            rboxes = backward_convert(res['boxes'], with_label=False)
            rboxes = coordinate_present_convert(rboxes, -1, False)
            rlogits = res['logits'][detected_indices]

            for ii, rb in enumerate(rboxes):
                fw_tsv.write("%d\n" % (int(rb[-1])))
                final_logits.append(rlogits[ii])

            fw = open(txt_name, 'a+')
            fw.write('{}\n'.format(res['image_id'].split('/')[-1]))
            fw.close()

        pbar.set_description("Test image %s" % res['image_id'].split('/')[-1])

        pbar.update(1)

    for p in procs:
        p.join()

    fw_tsv.close()
    final_logits = np.array(final_logits)
    np.save(os.path.join(log_dir, "final_logits.npy"), final_logits)
Exemple #21
0
def inference(det_net, file_paths, des_folder, h_len, w_len, h_overlap, w_overlap, save_res=False):

    if save_res:
        assert cfgs.SHOW_SCORE_THRSHOLD >= 0.5, \
            'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.5) in cfgs.py'

    else:
        assert cfgs.SHOW_SCORE_THRSHOLD <= 0.005, \
            'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.00) in cfgs.py'

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
    img_batch = tf.cast(img_plac, tf.float32)
    if cfgs.NET_NAME in ['resnet101_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)
    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     is_resize=False)

    det_boxes_h, det_scores_h, det_category_h = det_net.build_whole_detection_network(input_img_batch=img_batch,
                                                                                      gtboxes_batch=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        if not os.path.exists('./tmp.txt'):
            fw = open('./tmp.txt', 'w')
            fw.close()

        fr = open('./tmp.txt', 'r')
        pass_img = fr.readlines()
        fr.close()

        for count, img_path in enumerate(file_paths):
            fw = open('./tmp.txt', 'a+')
            if img_path + '\n' in pass_img:
                continue
            start = timer()
            img = cv2.imread(img_path)

            box_res = []
            label_res = []
            score_res = []

            imgH = img.shape[0]
            imgW = img.shape[1]

            if imgH < h_len:
                temp = np.zeros([h_len, imgW, 3], np.float32)
                temp[0:imgH, :, :] = img
                img = temp
                imgH = h_len

            if imgW < w_len:
                temp = np.zeros([imgH, w_len, 3], np.float32)
                temp[:, 0:imgW, :] = img
                img = temp
                imgW = w_len

            for hh in range(0, imgH, h_len - h_overlap):
                if imgH - hh - 1 < h_len:
                    hh_ = imgH - h_len
                else:
                    hh_ = hh
                for ww in range(0, imgW, w_len - w_overlap):
                    if imgW - ww - 1 < w_len:
                        ww_ = imgW - w_len
                    else:
                        ww_ = ww
                    src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :]

                    det_boxes_h_, det_scores_h_, det_category_h_ = \
                        sess.run(
                            [det_boxes_h, det_scores_h, det_category_h],
                            feed_dict={img_plac: src_img[:, :, ::-1]}
                        )

                    if len(det_boxes_h_) > 0:
                        for ii in range(len(det_boxes_h_)):
                            box = det_boxes_h_[ii]
                            box[0] = box[0] + ww_
                            box[1] = box[1] + hh_
                            box[2] = box[2] + ww_
                            box[3] = box[3] + hh_
                            box_res.append(box)
                            label_res.append(det_category_h_[ii])
                            score_res.append(det_scores_h_[ii])

            box_res = np.array(box_res)
            label_res = np.array(label_res)
            score_res = np.array(score_res)

            box_res_, label_res_, score_res_ = [], [], []

            h_threshold = {'roundabout': 0.35, 'tennis-court': 0.35, 'swimming-pool': 0.4, 'storage-tank': 0.3,
                           'soccer-ball-field': 0.3, 'small-vehicle': 0.4, 'ship': 0.35, 'plane': 0.35,
                           'large-vehicle': 0.4, 'helicopter': 0.4, 'harbor': 0.3, 'ground-track-field': 0.4,
                           'bridge': 0.3, 'basketball-court': 0.4, 'baseball-diamond': 0.3}

            for sub_class in range(1, cfgs.CLASS_NUM + 1):
                index = np.where(label_res == sub_class)[0]
                if len(index) == 0:
                    continue
                tmp_boxes_h = box_res[index]
                tmp_label_h = label_res[index]
                tmp_score_h = score_res[index]

                tmp_boxes_h = np.array(tmp_boxes_h)
                tmp = np.zeros([tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1])
                tmp[:, 0:-1] = tmp_boxes_h
                tmp[:, -1] = np.array(tmp_score_h)

                inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32),
                                     thresh=h_threshold[LABEl_NAME_MAP[sub_class]],
                                     max_output_size=500)

                box_res_.extend(np.array(tmp_boxes_h)[inx])
                score_res_.extend(np.array(tmp_score_h)[inx])
                label_res_.extend(np.array(tmp_label_h)[inx])

            time_elapsed = timer() - start

            if save_res:

                scores = np.array(score_res_)
                labels = np.array(label_res_)
                boxes = np.array(box_res_)
                valid_show = scores > cfgs.SHOW_SCORE_THRSHOLD
                scores = scores[valid_show]
                boxes = boxes[valid_show]
                labels = labels[valid_show]

                det_detections_h = draw_box_in_img.draw_boxes_with_label_and_scores(np.array(img, np.float32),
                                                                                    boxes=np.array(boxes),
                                                                                    labels=np.array(labels),
                                                                                    scores=np.array(scores),
                                                                                    in_graph=False)

                save_dir = os.path.join(des_folder, cfgs.VERSION)
                tools.mkdir(save_dir)
                cv2.imwrite(save_dir + '/' + img_path.split('/')[-1].split('.')[0] + '_h.jpg',
                            det_detections_h)

                view_bar('{} cost {}s'.format(img_path.split('/')[-1].split('.')[0],
                                              time_elapsed), count + 1, len(file_paths))

            else:
                # eval txt
                CLASS_DOTA = NAME_LABEL_MAP.keys()

                # Task2
                write_handle_h = {}
                txt_dir_h = os.path.join('txt_output', cfgs.VERSION + '_h')
                tools.mkdir(txt_dir_h)
                for sub_class in CLASS_DOTA:
                    if sub_class == 'back_ground':
                        continue
                    write_handle_h[sub_class] = open(os.path.join(txt_dir_h, 'Task2_%s.txt' % sub_class), 'a+')

                for i, hbox in enumerate(box_res_):
                    command = '%s %.3f %.1f %.1f %.1f %.1f\n' % (img_path.split('/')[-1].split('.')[0],
                                                                 score_res_[i],
                                                                 hbox[0], hbox[1], hbox[2], hbox[3])
                    write_handle_h[LABEl_NAME_MAP[label_res_[i]]].write(command)

                for sub_class in CLASS_DOTA:
                    if sub_class == 'back_ground':
                        continue
                    write_handle_h[sub_class].close()

            view_bar('%s cost %.3fs' % (img_path.split('/')[-1].split('.')[0],
                                        time_elapsed), count + 1, len(file_paths))
            fw.write('{}\n'.format(img_path))
            fw.close()
        os.remove('./tmp.txt')
Exemple #22
0
def eval_with_plac(img_dir, det_net, image_ext, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None,
                                                     3])  # is RGB. not GBR
    img_batch = tf.cast(img_plac, tf.float32)
    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch,
        target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
        is_resize=False)

    det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(
        input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        all_boxes_h = []
        all_boxes_r = []
        imgs = os.listdir(img_dir)
        for i, a_img_name in enumerate(imgs):

            a_img_name = a_img_name.split(image_ext)[0]
            recs = {}
            recs[a_img_name] = parse_rec(
                os.path.join(test_annotation_path, a_img_name + '.xml'))
            #R = [obj for obj in recs[a_img_name]]
            bbox = np.squeeze(np.array([x['bbox'] for x in recs[a_img_name]]))
            #labels = bbox[:, -1]
            if len(bbox.shape) == 1:
                bbox = np.expand_dims(bbox, axis=0)
            labels = bbox[:, -1]

            raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()
            resized_img,  \
            det_boxes_r_, det_scores_r_, det_category_r_ = \
                sess.run(
                    [img_batch,
                     det_boxes_r, det_scores_r, det_category_r],
                    feed_dict={img_plac: raw_img}
                )
            end = time.time()
            det_boxes_r_ = det_boxes_r_[det_scores_r_ >= 0.4]
            det_category_r_ = det_category_r_[det_scores_r_ >= 0.4]
            det_scores_r_ = det_scores_r_[det_scores_r_ >= 0.4]

            keep = nms_rotate.nms_rotate_cpu(det_boxes_r_, det_scores_r_, 0.3,
                                             20)
            det_boxes_r_ = det_boxes_r_[keep]
            det_scores_r_ = det_scores_r_[keep]
            det_category_r_ = det_category_r_[keep]
            ##### box_ratio > 2  or < 1/2
            index = (det_boxes_r_[:, 2] / det_boxes_r_[:, 3] >
                     2) | (det_boxes_r_[:, 2] / det_boxes_r_[:, 3] <= 0.5)
            det_boxes_r_ = det_boxes_r_[index]
            det_scores_r_ = det_scores_r_[index]
            det_category_r_ = det_category_r_[index]

            # print("{} cost time : {} ".format(img_name, (end - start)))
            if draw_imgs:
                det_detections_h = draw_box_in_img.draw_rotate_box_cv1(
                    np.squeeze(resized_img, 0),
                    boxes=bbox,
                    labels=labels,
                    scores=np.ones(bbox.shape[0]))
                det_detections_r = draw_box_in_img.draw_rotate_box_cv(
                    np.squeeze(resized_img, 0),
                    boxes=det_boxes_r_,
                    labels=det_category_r_,
                    scores=det_scores_r_)
                save_dir = os.path.join(cfgs.TEST_SAVE_PATH, cfgs.VERSION)
                tools.mkdir(save_dir)
                cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg',
                            det_detections_h[:, :, ::-1])
                cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg',
                            det_detections_r[:, :, ::-1])

            # xmin, ymin, xmax, ymax = det_boxes_h_[:, 0], det_boxes_h_[:, 1], \
            #                          det_boxes_h_[:, 2], det_boxes_h_[:, 3]

            if det_boxes_r_.shape[0] != 0:
                resized_h, resized_w = resized_img.shape[1], resized_img.shape[
                    2]
                det_boxes_r_ = forward_convert(det_boxes_r_, False)
                det_boxes_r_[:, 0::2] *= (raw_w / resized_w)
                det_boxes_r_[:, 1::2] *= (raw_h / resized_h)
                det_boxes_r_ = back_forward_convert(det_boxes_r_, False)

            x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \
                                    det_boxes_r_[:, 3], det_boxes_r_[:, 4]

            # xmin = xmin * raw_w / resized_w
            # xmax = xmax * raw_w / resized_w
            # ymin = ymin * raw_h / resized_h
            # ymax = ymax * raw_h / resized_h

            # boxes_h = np.transpose(np.stack([xmin, ymin, xmax, ymax]))
            boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta]))
            # dets_h = np.hstack((det_category_h_.reshape(-1, 1),
            #                     det_scores_h_.reshape(-1, 1),
            #                     boxes_h))
            dets_r = np.hstack((det_category_r_.reshape(-1, 1),
                                det_scores_r_.reshape(-1, 1), boxes_r))
            # all_boxes_h.append(dets_h)
            all_boxes_r.append(dets_r)

            tools.view_bar(
                '{} image cost {}s'.format(a_img_name, (end - start)), i + 1,
                len(imgs))

        fw1 = open(cfgs.VERSION + '_detections_h.pkl', 'w')
        fw2 = open(cfgs.VERSION + '_detections_r.pkl', 'w')
        pickle.dump(all_boxes_h, fw1)
        pickle.dump(all_boxes_r, fw2)
def test_dota(det_net, real_test_img_list, args, txt_name):

    save_path = os.path.join('./test_dota', cfgs.VERSION)

    nr_records = len(real_test_img_list)
    pbar = tqdm(total=nr_records)
    gpu_num = len(args.gpus.strip().split(','))

    nr_image = math.ceil(nr_records / gpu_num)
    result_queue = Queue(500)
    procs = []

    for i, gpu_id in enumerate(args.gpus.strip().split(',')):
        start = i * nr_image
        end = min(start + nr_image, nr_records)
        split_records = real_test_img_list[start:end]
        proc = Process(target=worker,
                       args=(int(gpu_id), split_records, det_net, args,
                             result_queue))
        print('process:%d, start:%d, end:%d' % (i, start, end))
        proc.start()
        procs.append(proc)

    for i in range(nr_records):
        res = result_queue.get()

        if args.show_box:

            nake_name = res['image_id'].split('/')[-1]
            tools.mkdir(os.path.join(save_path, 'dota_img_vis'))
            draw_path = os.path.join(save_path, 'dota_img_vis', nake_name)

            draw_img = np.array(cv2.imread(res['image_id']), np.float32)

            detected_indices = res['scores'] >= cfgs.VIS_SCORE
            detected_scores = res['scores'][detected_indices]
            detected_boxes = res['boxes'][detected_indices]
            detected_categories = res['labels'][detected_indices]

            final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(
                draw_img,
                boxes=detected_boxes,
                labels=detected_categories,
                scores=detected_scores,
                method=1,
                in_graph=False)
            cv2.imwrite(draw_path, final_detections)

        else:
            CLASS_DOTA = NAME_LABEL_MAP.keys()
            write_handle = {}

            tools.mkdir(os.path.join(save_path, 'dota_res'))
            for sub_class in CLASS_DOTA:
                if sub_class == 'back_ground':
                    continue
                write_handle[sub_class] = open(
                    os.path.join(save_path, 'dota_res',
                                 'Task1_%s.txt' % sub_class), 'a+')

            rboxes = forward_convert(res['boxes'], with_label=False)

            for i, rbox in enumerate(rboxes):
                command = '%s %.3f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n' % (
                    res['image_id'].split('/')[-1].split('.')[0],
                    res['scores'][i],
                    rbox[0],
                    rbox[1],
                    rbox[2],
                    rbox[3],
                    rbox[4],
                    rbox[5],
                    rbox[6],
                    rbox[7],
                )
                write_handle[LABEL_NAME_MAP[res['labels'][i]]].write(command)

            for sub_class in CLASS_DOTA:
                if sub_class == 'back_ground':
                    continue
                write_handle[sub_class].close()

            fw = open(txt_name, 'a+')
            fw.write('{}\n'.format(res['image_id'].split('/')[-1]))
            fw.close()

        pbar.set_description("Test image %s" % res['image_id'].split('/')[-1])

        pbar.update(1)

    for p in procs:
        p.join()
def test_coco(det_net, real_test_img_list, eval_data, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     length_limitation=cfgs.IMG_MAX_LENGTH)
    if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255)
    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        save_path = os.path.join('./eval_coco', cfgs.VERSION)
        tools.mkdir(save_path)
        fw_json_dt = open(os.path.join(save_path, 'coco_test-dev.json'), 'w')
        coco_det = []
        for i, a_img in enumerate(real_test_img_list):

            raw_img = cv2.imread(os.path.join(eval_data, a_img['file_name']))
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()
            resized_img, detected_boxes, detected_scores, detected_categories = \
                sess.run(
                    [img_batch, detection_boxes, detection_scores, detection_category],
                    feed_dict={img_plac: raw_img[:, :, ::-1]}  # cv is BGR. But need RGB
                )
            end = time.time()

            eval_indices = detected_scores >= 0.01
            detected_scores = detected_scores[eval_indices]
            detected_boxes = detected_boxes[eval_indices]
            detected_categories = detected_categories[eval_indices]

            # print("{} cost time : {} ".format(img_name, (end - start)))
            if draw_imgs:
                show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
                show_scores = detected_scores[show_indices]
                show_boxes = detected_boxes[show_indices]
                show_categories = detected_categories[show_indices]

                draw_img = np.squeeze(resized_img, 0)
                if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
                    draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255
                else:
                    draw_img = draw_img + np.array(cfgs.PIXEL_MEAN)

                # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN)

                final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img,
                                                                                    boxes=show_boxes,
                                                                                    labels=show_categories,
                                                                                    scores=show_scores,
                                                                                    in_graph=False)
                if not os.path.exists(cfgs.TEST_SAVE_PATH):
                    os.makedirs(cfgs.TEST_SAVE_PATH)

                cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + '{}.jpg'.format(a_img['id']),
                            final_detections[:, :, ::-1])

            xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
                                     detected_boxes[:, 2], detected_boxes[:, 3]

            resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]

            xmin = xmin * raw_w / resized_w
            xmax = xmax * raw_w / resized_w

            ymin = ymin * raw_h / resized_h
            ymax = ymax * raw_h / resized_h

            boxes = np.transpose(np.stack([xmin, ymin, xmax-xmin, ymax-ymin]))

            # cost much time
            for j, box in enumerate(boxes):
                coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]), float(box[3])],
                                 'score': float(detected_scores[j]), 'image_id': a_img['id'],
                                 'category_id': int(classes_originID[LABEl_NAME_MAP[detected_categories[j]]])})

            tools.view_bar('%s image cost %.3fs' % (a_img['id'], (end - start)), i + 1, len(real_test_img_list))

        json.dump(coco_det, fw_json_dt)
        fw_json_dt.close()
def eval_coco(det_net, real_test_img_list, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None,
                                                     3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch,
        target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
        length_limitation=cfgs.IMG_MAX_LENGTH)
    if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(
            cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch, gtboxes_batch=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        save_path = os.path.join('./eval_coco', cfgs.VERSION)
        tools.mkdir(save_path)
        fw_json_dt = open(os.path.join(save_path, 'coco_minival.json'), 'w')
        coco_det = []

        pbar = tqdm(real_test_img_list)
        for a_img in pbar:
            record = json.loads(a_img)
            img_path = os.path.join('/data/COCO/val2017',
                                    record['fpath'].split('_')[-1])
            raw_img = cv2.imread(img_path)
            # raw_img = cv2.imread(record['fpath'])
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            resized_img, detected_boxes, detected_scores, detected_categories = \
                sess.run(
                    [img_batch, detection_boxes, detection_scores, detection_category],
                    feed_dict={img_plac: raw_img[:, :, ::-1]}  # cv is BGR. But need RGB
                )

            eval_indices = detected_scores >= 0.01
            detected_scores = detected_scores[eval_indices]
            detected_boxes = detected_boxes[eval_indices]
            detected_categories = detected_categories[eval_indices]

            if draw_imgs:
                show_indices = detected_scores >= 0.3
                show_scores = detected_scores[show_indices]
                show_boxes = detected_boxes[show_indices]
                show_categories = detected_categories[show_indices]

                draw_img = np.squeeze(resized_img, 0)

                if cfgs.NET_NAME in [
                        'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d'
                ]:
                    draw_img = (draw_img * np.array(cfgs.PIXEL_STD) +
                                np.array(cfgs.PIXEL_MEAN_)) * 255
                else:
                    draw_img = draw_img + np.array(cfgs.PIXEL_MEAN)
                final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(
                    draw_img,
                    boxes=show_boxes,
                    labels=show_categories,
                    scores=show_scores,
                    in_graph=False)
                if not os.path.exists(cfgs.TEST_SAVE_PATH):
                    os.makedirs(cfgs.TEST_SAVE_PATH)

                cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + record['ID'],
                            final_detections[:, :, ::-1])

            xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
                                     detected_boxes[:, 2], detected_boxes[:, 3]

            resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]

            xmin = xmin * raw_w / resized_w
            xmax = xmax * raw_w / resized_w

            ymin = ymin * raw_h / resized_h
            ymax = ymax * raw_h / resized_h

            boxes = np.transpose(
                np.stack([xmin, ymin, xmax - xmin, ymax - ymin]))

            # cost much time
            for j, box in enumerate(boxes):
                coco_det.append({
                    'bbox': [
                        float(box[0]),
                        float(box[1]),
                        float(box[2]),
                        float(box[3])
                    ],
                    'score':
                    float(detected_scores[j]),
                    'image_id':
                    int(record['ID'].split('.jpg')[0].split('_000000')[-1]),
                    'category_id':
                    int(classes_originID[LABEL_NAME_MAP[
                        detected_categories[j]]])
                })

            pbar.set_description("Eval image %s" % record['ID'])

        json.dump(coco_det, fw_json_dt)
        fw_json_dt.close()
        return os.path.join(save_path, 'coco_minival.json')
Exemple #26
0
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                         slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
    # ____________________________________________________________________________________________________build loss

    # ---------------------------------------------------------------------------------------------------add summary

    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
    # optimizer = tf.train.AdamOptimizer(lr)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    # Create session
    #allow_soft_placement=True自动将无法放到GPU上的操作放回到CPU
    config = tf.ConfigProto(allow_soft_placement=True)
    #让GPU按需分配,不一定占用某个GPU的全部内存
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        #日志保存地址
        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)
        timer = Timer()
        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:

                    start = time.time()
                    _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss])
                    end = time.time()

                    print('{}: step:{}/{}iter'.format(training_time,
                                                      global_stepnp,
                                                      cfgs.MAX_ITERATION))
                    print('>>> rpn_loc_loss:{:.4f}  |  rpn_cla_loss:{:.4f}  |  rpn_total_loss:{:.4f}\n'
                          '>>> fast_rcnn_loc_loss:{:.4f}  |  fast_rcnn_cla_loss:{:.4f}  |  fast_rcnn_total_loss:{:.4f}\n'
                          '>>> single_time:{:.4f}s  |   total_loss:{:.4f} '\
                          .format(rpnLocLoss, rpnClsLoss,rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss,
                          fastrcnnTotalLoss,(end-start), totalLoss))
                    print(
                        '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
                    )

                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
def eval_coco(det_net, real_test_img_list, draw_imgs=False):

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])  # is RGB. not BGR
    img_batch = tf.cast(img_plac, tf.float32)

    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     length_limitation=cfgs.IMG_MAX_LENGTH,
                                                     is_resize=False)
    if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255)
    img_batch = tf.expand_dims(img_batch, axis=0)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch=None)

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        save_path = os.path.join('./eval_coco', cfgs.VERSION)
        tools.mkdir(save_path)
        fw_json_dt = open(os.path.join(save_path, 'coco_minival_ms.json'), 'w')
        coco_det = []
        for i, a_img in enumerate(real_test_img_list):

            record = json.loads(a_img)
            raw_img = cv2.imread(record['fpath'])
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]

            start = time.time()

            detected_scores_, detected_boxes_, detected_categories_ = [], [], []

            for ss in [600, 800, 1000, 1200]:  # cfgs.IMG_SHORT_SIDE_LEN:
                img_resize = cv2.resize(raw_img, (ss, ss))

                resized_img, tmp_detected_boxes, tmp_detected_scores, tmp_detected_categories = \
                    sess.run(
                        [img_batch, detection_boxes, detection_scores, detection_category],
                        feed_dict={img_plac: img_resize[:, :, ::-1]}  # cv is BGR. But need RGB
                    )

                eval_indices = tmp_detected_scores >= 0.01
                tmp_detected_scores = tmp_detected_scores[eval_indices]
                tmp_detected_boxes = tmp_detected_boxes[eval_indices]
                tmp_detected_categories = tmp_detected_categories[eval_indices]

                xmin, ymin, xmax, ymax = tmp_detected_boxes[:, 0], tmp_detected_boxes[:, 1], \
                                         tmp_detected_boxes[:, 2], tmp_detected_boxes[:, 3]

                resized_h, resized_w = resized_img.shape[1], resized_img.shape[2]

                xmin = xmin * raw_w / resized_w
                xmax = xmax * raw_w / resized_w

                ymin = ymin * raw_h / resized_h
                ymax = ymax * raw_h / resized_h

                resize_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax]))

                detected_scores_.append(tmp_detected_scores)
                detected_boxes_.append(resize_boxes)
                detected_categories_.append(tmp_detected_categories)

            detected_scores_ = np.concatenate(detected_scores_)
            detected_boxes_ = np.concatenate(detected_boxes_)
            detected_categories_ = np.concatenate(detected_categories_)

            detected_scores, detected_boxes, detected_categories = [], [], []

            for sub_class in range(1, cfgs.CLASS_NUM + 1):
                index = np.where(detected_categories_ == sub_class)[0]
                if len(index) == 0:
                    continue
                tmp_boxes_h = detected_boxes_[index]
                tmp_label_h = detected_categories_[index]
                tmp_score_h = detected_scores_[index]

                tmp_boxes_h = np.array(tmp_boxes_h)
                tmp = np.zeros([tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1])
                tmp[:, 0:-1] = tmp_boxes_h
                tmp[:, -1] = np.array(tmp_score_h)

                inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32),
                                     thresh=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD,
                                     max_output_size=500)

                detected_boxes.extend(np.array(tmp_boxes_h)[inx])
                detected_scores.extend(np.array(tmp_score_h)[inx])
                detected_categories.extend(np.array(tmp_label_h)[inx])

            detected_scores = np.array(detected_scores)
            detected_boxes = np.array(detected_boxes)
            detected_categories = np.array(detected_categories)

            # print("{} cost time : {} ".format(img_name, (end - start)))
            if draw_imgs:
                show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
                show_scores = detected_scores[show_indices]
                show_boxes = detected_boxes[show_indices]
                show_categories = detected_categories[show_indices]

                # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
                #     draw_img = (raw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255
                # else:
                #     draw_img = raw_img + np.array(cfgs.PIXEL_MEAN)

                # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN)

                raw_img = np.array(raw_img, np.float32)
                final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(raw_img,
                                                                                    boxes=show_boxes,
                                                                                    labels=show_categories,
                                                                                    scores=show_scores,
                                                                                    in_graph=False)
                if not os.path.exists(cfgs.TEST_SAVE_PATH):
                    os.makedirs(cfgs.TEST_SAVE_PATH)

                cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + record['ID'],
                            final_detections)

            # cost much time
            for j, box in enumerate(detected_boxes):
                coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]-box[0]), float(box[3]-box[1])],
                                 'score': float(detected_scores[j]), 'image_id': int(record['ID'].split('.jpg')[0].split('_000000')[-1]),
                                 'category_id': int(classes_originID[LABEl_NAME_MAP[detected_categories[j]]])})
            end = time.time()
            tools.view_bar('%s image cost %.3fs' % (record['ID'], (end - start)), i + 1, len(real_test_img_list))

        json.dump(coco_det, fw_json_dt)
        fw_json_dt.close()
        return os.path.join(save_path, 'coco_minival_ms.json')
Exemple #28
0
def inference(det_net,
              file_paths,
              des_folder,
              h_len,
              w_len,
              h_overlap,
              w_overlap,
              save_res=False):

    if save_res:
        assert cfgs.SHOW_SCORE_THRSHOLD >= 0.5, \
            'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.5) in cfgs.py'

    else:
        assert cfgs.SHOW_SCORE_THRSHOLD < 0.005, \
            'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.00) in cfgs.py'

    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
    img_batch = tf.cast(img_plac, tf.float32)
    img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    img_batch = short_side_resize_for_inference_data(
        img_tensor=img_batch,
        target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
        is_resize=False)

    det_boxes_h, det_scores_h, det_category_h, \
    det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch,
                                                                                      gtboxes_h_batch=None,
                                                                                      gtboxes_r_batch=None)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = det_net.get_restorer()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        for count, img_path in enumerate(file_paths):
            start = timer()
            img = cv2.imread(img_path)

            box_res = []
            label_res = []
            score_res = []
            box_res_rotate = []
            label_res_rotate = []

            score_res_rotate = []

            imgH = img.shape[0]
            imgW = img.shape[1]

            if imgH < h_len:
                temp = np.zeros([h_len, imgW, 3], np.float32)
                temp[0:imgH, :, :] = img
                img = temp
                imgH = h_len

            if imgW < w_len:
                temp = np.zeros([imgH, w_len, 3], np.float32)
                temp[:, 0:imgW, :] = img
                img = temp
                imgW = w_len

            for hh in range(0, imgH, h_len - h_overlap):
                if imgH - hh - 1 < h_len:
                    hh_ = imgH - h_len
                else:
                    hh_ = hh
                for ww in range(0, imgW, w_len - w_overlap):
                    if imgW - ww - 1 < w_len:
                        ww_ = imgW - w_len
                    else:
                        ww_ = ww
                    src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :]

                    det_boxes_h_, det_scores_h_, det_category_h_, \
                    det_boxes_r_, det_scores_r_, det_category_r_ = \
                        sess.run(
                            [det_boxes_h, det_scores_h, det_category_h,
                             det_boxes_r, det_scores_r, det_category_r],
                            feed_dict={img_plac: src_img[:, :, ::-1]}
                        )

                    if len(det_boxes_h_) > 0:
                        for ii in range(len(det_boxes_h_)):
                            box = det_boxes_h_[ii]
                            box[0] = box[0] + ww_
                            box[1] = box[1] + hh_
                            box[2] = box[2] + ww_
                            box[3] = box[3] + hh_
                            box_res.append(box)
                            label_res.append(det_category_h_[ii])
                            score_res.append(det_scores_h_[ii])
                    if len(det_boxes_r_) > 0:
                        for ii in range(len(det_boxes_r_)):
                            box_rotate = det_boxes_r_[ii]
                            box_rotate[0] = box_rotate[0] + ww_
                            box_rotate[1] = box_rotate[1] + hh_
                            box_res_rotate.append(box_rotate)
                            label_res_rotate.append(det_category_r_[ii])
                            score_res_rotate.append(det_scores_r_[ii])

            box_res = np.array(box_res)
            label_res = np.array(label_res)
            score_res = np.array(score_res)

            box_res_rotate = np.array(box_res_rotate)
            label_res_rotate = np.array(label_res_rotate)
            score_res_rotate = np.array(score_res_rotate)

            box_res_rotate_, label_res_rotate_, score_res_rotate_ = [], [], []
            box_res_, label_res_, score_res_ = [], [], []

            r_threshold = {
                'roundabout': 0.1,
                'tennis-court': 0.3,
                'swimming-pool': 0.1,
                'storage-tank': 0.2,
                'soccer-ball-field': 0.3,
                'small-vehicle': 0.2,
                'ship': 0.05,
                'plane': 0.3,
                'large-vehicle': 0.1,
                'helicopter': 0.2,
                'harbor': 0.0001,
                'ground-track-field': 0.3,
                'bridge': 0.0001,
                'basketball-court': 0.3,
                'baseball-diamond': 0.3
            }

            h_threshold = {
                'roundabout': 0.35,
                'tennis-court': 0.35,
                'swimming-pool': 0.4,
                'storage-tank': 0.3,
                'soccer-ball-field': 0.3,
                'small-vehicle': 0.4,
                'ship': 0.35,
                'plane': 0.35,
                'large-vehicle': 0.4,
                'helicopter': 0.4,
                'harbor': 0.3,
                'ground-track-field': 0.4,
                'bridge': 0.3,
                'basketball-court': 0.4,
                'baseball-diamond': 0.3
            }

            for sub_class in range(1, cfgs.CLASS_NUM + 1):
                index = np.where(label_res_rotate == sub_class)[0]
                if len(index) == 0:
                    continue
                tmp_boxes_r = box_res_rotate[index]
                tmp_label_r = label_res_rotate[index]
                tmp_score_r = score_res_rotate[index]

                tmp_boxes_r = np.array(tmp_boxes_r)
                tmp = np.zeros(
                    [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1])
                tmp[:, 0:-1] = tmp_boxes_r
                tmp[:, -1] = np.array(tmp_score_r)

                try:
                    inx = nms_rotate.nms_rotate_cpu(
                        boxes=np.array(tmp_boxes_r),
                        scores=np.array(tmp_score_r),
                        iou_threshold=r_threshold[LABEl_NAME_MAP[sub_class]],
                        max_output_size=500)
                except:
                    # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms
                    jitter = np.zeros(
                        [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1])
                    jitter[:,
                           0] += np.random.rand(tmp_boxes_r.shape[0], ) / 1000
                    inx = rotate_gpu_nms(
                        np.array(tmp, np.float32) +
                        np.array(jitter, np.float32),
                        float(r_threshold[LABEl_NAME_MAP[sub_class]]), 0)

                box_res_rotate_.extend(np.array(tmp_boxes_r)[inx])
                score_res_rotate_.extend(np.array(tmp_score_r)[inx])
                label_res_rotate_.extend(np.array(tmp_label_r)[inx])

            for sub_class in range(1, cfgs.CLASS_NUM + 1):
                index = np.where(label_res == sub_class)[0]
                if len(index) == 0:
                    continue
                tmp_boxes_h = box_res[index]
                tmp_label_h = label_res[index]
                tmp_score_h = score_res[index]

                tmp_boxes_h = np.array(tmp_boxes_h)
                tmp = np.zeros(
                    [tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1])
                tmp[:, 0:-1] = tmp_boxes_h
                tmp[:, -1] = np.array(tmp_score_h)

                inx = nms.py_cpu_nms(
                    dets=np.array(tmp, np.float32),
                    thresh=h_threshold[LABEl_NAME_MAP[sub_class]],
                    max_output_size=500)

                box_res_.extend(np.array(tmp_boxes_h)[inx])
                score_res_.extend(np.array(tmp_score_h)[inx])
                label_res_.extend(np.array(tmp_label_h)[inx])

            time_elapsed = timer() - start

            if save_res:
                det_detections_h = draw_box_in_img.draw_box_cv(
                    np.array(img, np.float32) - np.array(cfgs.PIXEL_MEAN),
                    boxes=np.array(box_res_),
                    labels=np.array(label_res_),
                    scores=np.array(score_res_))
                det_detections_r = draw_box_in_img.draw_rotate_box_cv(
                    np.array(img, np.float32) - np.array(cfgs.PIXEL_MEAN),
                    boxes=np.array(box_res_rotate_),
                    labels=np.array(label_res_rotate_),
                    scores=np.array(score_res_rotate_))
                save_dir = os.path.join(des_folder, cfgs.VERSION)
                tools.mkdir(save_dir)
                cv2.imwrite(
                    save_dir + '/' + img_path.split('/')[-1].split('.')[0] +
                    '_h.jpg', det_detections_h)
                cv2.imwrite(
                    save_dir + '/' + img_path.split('/')[-1].split('.')[0] +
                    '_r.jpg', det_detections_r)

                view_bar(
                    '{} cost {}s'.format(
                        img_path.split('/')[-1].split('.')[0], time_elapsed),
                    count + 1, len(file_paths))

            else:
                # eval txt
                CLASS_DOTA = NAME_LABEL_MAP.keys()
                # Task1
                write_handle_r = {}
                write_handle_h_ = {}
                txt_dir_r = os.path.join('txt_output', cfgs.VERSION + '_r')
                txt_dir_h_minAreaRect = os.path.join(
                    'txt_output', cfgs.VERSION + '_h_minAreaRect')
                tools.mkdir(txt_dir_r)
                tools.mkdir(txt_dir_h_minAreaRect)
                for sub_class in CLASS_DOTA:
                    if sub_class == 'back_ground':
                        continue
                    write_handle_r[sub_class] = open(
                        os.path.join(txt_dir_r, 'Task1_%s.txt' % sub_class),
                        'a+')
                    write_handle_h_[sub_class] = open(
                        os.path.join(txt_dir_h_minAreaRect,
                                     'Task2_%s.txt' % sub_class), 'a+')

                rboxes = coordinate_convert.forward_convert(box_res_rotate_,
                                                            with_label=False)

                for i, rbox in enumerate(rboxes):
                    command = '%s %.3f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n' % (
                        img_path.split('/')[-1].split('.')[0],
                        score_res_rotate_[i],
                        rbox[0],
                        rbox[1],
                        rbox[2],
                        rbox[3],
                        rbox[4],
                        rbox[5],
                        rbox[6],
                        rbox[7],
                    )
                    command_ = '%s %.3f %.1f %.1f %.1f %.1f\n' % (
                        img_path.split('/')[-1].split('.')[0],
                        score_res_rotate_[i], min(rbox[::2]), min(
                            rbox[1::2]), max(rbox[::2]), max(rbox[1::2]))
                    write_handle_r[LABEl_NAME_MAP[label_res_rotate_[i]]].write(
                        command)
                    write_handle_h_[LABEl_NAME_MAP[
                        label_res_rotate_[i]]].write(command_)

                for sub_class in CLASS_DOTA:
                    if sub_class == 'back_ground':
                        continue
                    write_handle_r[sub_class].close()

                # Task2
                write_handle_h = {}
                txt_dir_h = os.path.join('txt_output', cfgs.VERSION + '_h')
                tools.mkdir(txt_dir_h)
                for sub_class in CLASS_DOTA:
                    if sub_class == 'back_ground':
                        continue
                    write_handle_h[sub_class] = open(
                        os.path.join(txt_dir_h, 'Task2_%s.txt' % sub_class),
                        'a+')

                for i, hbox in enumerate(box_res_):
                    command = '%s %.3f %.1f %.1f %.1f %.1f\n' % (
                        img_path.split('/')[-1].split('.')[0], score_res_[i],
                        hbox[0], hbox[1], hbox[2], hbox[3])
                    write_handle_h[LABEl_NAME_MAP[label_res_[i]]].write(
                        command)

                for sub_class in CLASS_DOTA:
                    if sub_class == 'back_ground':
                        continue
                    write_handle_h[sub_class].close()

                view_bar(
                    '{} cost {}s'.format(
                        img_path.split('/')[-1].split('.')[0], time_elapsed),
                    count + 1, len(file_paths))
Exemple #29
0
def train():

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    # gradients = faster_rcnn.get_gradients(optimizer, total_loss)
    #
    # # enlarge_gradients for bias
    # if cfgs.MUTILPY_BIAS_GRADIENT:
    #     gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)
    #
    # if cfgs.GRADIENT_CLIPPING_BY_NORM:
    #     with tf.name_scope('clip_gradients_YJR'):
    #         gradients = slim.learning.clip_gradient_norms(gradients,
    #                                                       cfgs.GRADIENT_CLIPPING_BY_NORM)
    # # _____________________________________________________________________________________________compute gradients
    #
    #
    #
    # # train_op
    # train_op = optimizer.apply_gradients(grads_and_vars=gradients,
    #                                      global_step=global_step)

    tower_grads = []
    for i in range(len(cfgs.GPU_GROUP)):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('GGpu%d' % i) as scope:
                loss, faster_rcnn, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,\
                fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss = tower_loss(scope)
                tf.get_variable_scope().reuse_variables()
                grads = optimizer.compute_gradients(loss)

                if cfgs.MUTILPY_BIAS_GRADIENT:
                    grads = faster_rcnn.enlarge_gradients_for_bias(grads)

                if cfgs.GRADIENT_CLIPPING_BY_NORM:
                    with tf.name_scope('clip_gradients_YJR'):
                        grads = slim.learning.clip_gradient_norms(
                            grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                tower_grads.append(grads)
    grads = average_gradients(tower_grads)
    train_op = optimizer.apply_gradients(grads, global_step)

    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, loss])

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                              rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                              fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                              total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), rpnLocLoss, rpnClsLoss,
                                  rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
Exemple #30
0
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
        global_step = slim.get_or_create_global_step()
        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
        tf.summary.scalar('lr', lr)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        retinanet = build_whole_network.DetectionNetwork(
            base_network_name=cfgs.NET_NAME, is_training=True)

        with tf.name_scope('get_batch'):
            if cfgs.IMAGE_PYRAMID:
                shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                shortside_len = tf.random_shuffle(shortside_len_list)[0]

            else:
                shortside_len = cfgs.IMG_SHORT_SIDE_LEN

            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,
                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                           shortside_len=shortside_len,
                           is_training=True)

        # data processing
        inputs_list = []
        for i in range(num_gpu):
            img = tf.expand_dims(img_batch[i], axis=0)
            if cfgs.NET_NAME in [
                    'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d'
            ]:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label_r = tf.py_func(backward_convert,
                                             inp=[gtboxes_and_label_batch[i]],
                                             Tout=tf.float32)
            gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

            gtboxes_and_label_h = get_horizen_minAreaRectangle(
                gtboxes_and_label_batch[i])
            gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

            num_objects = num_objects_batch[i]
            num_objects = tf.cast(tf.reshape(num_objects, [
                -1,
            ]), tf.float32)

            img_h = img_h_batch[i]
            img_w = img_w_batch[i]

            inputs_list.append([
                img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects,
                img_h, img_w
            ])

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'cls_loss': tf.constant(0., tf.float32),
            'reg_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpu):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope(
                                [
                                    slim.conv2d, slim.conv2d_in_plane,
                                    slim.conv2d_transpose,
                                    slim.separable_conv2d, slim.fully_connected
                                ],
                                    weights_regularizer=weights_regularizer,
                                    biases_regularizer=biases_regularizer,
                                    biases_initializer=tf.constant_initializer(
                                        0.0)):

                                gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(
                                    get_gtboxes_and_label,
                                    inp=[
                                        inputs_list[i][1], inputs_list[i][2],
                                        inputs_list[i][3]
                                    ],
                                    Tout=[tf.float32, tf.float32])
                                gtboxes_and_label_h = tf.reshape(
                                    gtboxes_and_label_h, [-1, 5])
                                gtboxes_and_label_r = tf.reshape(
                                    gtboxes_and_label_r, [-1, 6])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                img = tf.image.crop_to_bounding_box(
                                    image=img,
                                    offset_height=0,
                                    offset_width=0,
                                    target_height=tf.cast(
                                        img_shape[0], tf.int32),
                                    target_width=tf.cast(
                                        img_shape[1], tf.int32))

                                outputs = retinanet.build_whole_detection_network(
                                    input_img_batch=img,
                                    gtboxes_batch_h=gtboxes_and_label_h,
                                    gtboxes_batch_r=gtboxes_and_label_r,
                                    gpu_id=i)
                                gtboxes_in_img_h = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_h[:, :-1],
                                    labels=gtboxes_and_label_h[:, -1],
                                    method=0)
                                gtboxes_in_img_r = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_r[:, :-1],
                                    labels=gtboxes_and_label_r[:, -1],
                                    method=1)
                                tf.summary.image(
                                    'Compare/gtboxes_h_gpu:%d' % i,
                                    gtboxes_in_img_h)
                                tf.summary.image(
                                    'Compare/gtboxes_r_gpu:%d' % i,
                                    gtboxes_in_img_r)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2],
                                        method=1)
                                    tf.summary.image(
                                        'Compare/final_detection_gpu:%d' % i,
                                        detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[
                                        k] += loss_dict[k] / num_gpu

                                total_losses /= num_gpu
                                total_loss_dict['total_losses'] += total_losses

                                if i == num_gpu - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(
                                        regularization_losses)

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(total_losses)
                        if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                            grads = slim.learning.clip_gradient_norms(
                                grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k),
                              total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        if cfgs.MUTILPY_BIAS_GRADIENT is not None:
            final_gvs = []
            with tf.variable_scope('Gradient_Mult'):
                for grad, var in grads:
                    scale = 1.
                    if '/biases:' in var.name:
                        scale *= cfgs.MUTILPY_BIAS_GRADIENT
                    if 'conv_new' in var.name:
                        scale *= 3.
                    if not np.allclose(scale, 1.0):
                        grad = tf.multiply(grad, scale)

                    final_gvs.append((grad, var))
            apply_gradient_op = optimizer.apply_gradients(
                final_gvs, global_step=global_step)
        else:
            apply_gradient_op = optimizer.apply_gradients(
                grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.9999, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = retinanet.get_restorer()
        saver = tf.train.Saver(max_to_keep=5)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        tfconfig = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_gpu):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***' * 20)
                        print("""%s: global_step:%d  current_step:%d""" %
                              (training_time,
                               (global_stepnp - 1) * num_gpu, step * num_gpu))
                        print("""per_cost_time:%.3fs""" %
                              ((end - start) / num_gpu))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                        if np.isnan(total_loss_dict_['total_losses']):
                            sys.exit(0)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run(
                                [train_op, global_step, summary_op])
                            summary_writer.add_summary(
                                summary_str, (global_stepnp - 1) * num_gpu)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu)
                        == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(
                        save_dir, '{}_'.format(cfgs.DATASET_NAME) + str(
                            (global_stepnp - 1) * num_gpu) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
def eval_coco(det_net, real_test_img_list, gpu_ids):

    save_path = os.path.join('./eval_coco', cfgs.VERSION)
    tools.mkdir(save_path)
    fw_json_dt = open(os.path.join(save_path, 'coco_minival.json'), 'w')
    coco_det = []

    nr_records = len(real_test_img_list)
    pbar = tqdm(total=nr_records)
    gpu_num = len(gpu_ids.strip().split(','))

    nr_image = math.ceil(nr_records / gpu_num)
    result_queue = Queue(5000)
    procs = []

    for i in range(gpu_num):
        start = i * nr_image
        end = min(start + nr_image, nr_records)
        split_records = real_test_img_list[start:end]
        proc = Process(target=worker, args=(int(gpu_ids.strip().split(',')[i]), split_records, det_net, result_queue))
        print('process:%d, start:%d, end:%d' % (i, start, end))
        proc.start()
        procs.append(proc)

    for i in range(nr_records):
        res = result_queue.get()

        xmin, ymin, xmax, ymax = res['boxes'][:, 0], res['boxes'][:, 1], \
                                 res['boxes'][:, 2], res['boxes'][:, 3]

        xmin = xmin * res['scales'][0]
        xmax = xmax * res['scales'][0]

        ymin = ymin * res['scales'][1]
        ymax = ymax * res['scales'][1]

        boxes = np.transpose(np.stack([xmin, ymin, xmax-xmin, ymax-ymin]))

        sort_scores = np.array(res['scores'])
        sort_labels = np.array(res['labels'])
        sort_boxes = np.array(boxes)

        # if len(res['scores']) > cfgs.MAXIMUM_DETECTIONS:
        #     sort_indx = np.argsort(np.array(res['scores']) * -1)[:cfgs.MAXIMUM_DETECTIONS]
        #     # print(sort_indx)
        #     sort_scores = np.array(res['scores'])[sort_indx]
        #     sort_labels = np.array(res['labels'])[sort_indx]
        #     sort_boxes = np.array(boxes)[sort_indx]

        for j, box in enumerate(sort_boxes):
            coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]), float(box[3])],
                             'score': float(sort_scores[j]), 'image_id': int(res['image_id'].split('.jpg')[0].split('_000000')[-1]),
                             'category_id': int(classes_originID[LABEL_NAME_MAP[sort_labels[j]]])})

        pbar.set_description("Eval image %s" % res['image_id'])

        pbar.update(1)

    for p in procs:
        p.join()

    json.dump(coco_det, fw_json_dt)
    fw_json_dt.close()
    return os.path.join(save_path, 'coco_minival.json')