def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text # obj_struct['pose'] = obj.find('pose').text # obj_struct['truncated'] = int(obj.find('truncated').text) # obj_struct['difficult'] = int(obj.find('difficult').text) obj_struct['difficult'] = 0 bbox = obj.find('bndbox') rbox = [ eval(bbox.find('x1').text), eval(bbox.find('y1').text), eval(bbox.find('x2').text), eval(bbox.find('y2').text), eval(bbox.find('x3').text), eval(bbox.find('y3').text), eval(bbox.find('x4').text), eval(bbox.find('y4').text) ] rbox = np.array([rbox], np.float32) rbox = coordinate_convert.backward_convert(rbox, with_label=False) obj_struct['bbox'] = rbox objects.append(obj_struct) return objects
def clip_image(file_idx, image, boxes_all, width, height, stride_w, stride_h): boxes_all_5 = backward_convert(boxes_all[:, :8], False) print(boxes_all[np.logical_or(boxes_all_5[:, 2] <= 5, boxes_all_5[:, 3] <= 5), :]) boxes_all = boxes_all[np.logical_and(boxes_all_5[:, 2] > 5, boxes_all_5[:, 3] > 5), :] if boxes_all.shape[0] > 0: shape = image.shape for start_h in range(0, shape[0], stride_h): for start_w in range(0, shape[1], stride_w): boxes = copy.deepcopy(boxes_all) if USE_HEAD: box = np.zeros_like(boxes_all) else: box = np.zeros_like(boxes_all[:, :10]) start_h_new = start_h start_w_new = start_w if start_h + height > shape[0]: start_h_new = shape[0] - height if start_w + width > shape[1]: start_w_new = shape[1] - width top_left_row = max(start_h_new, 0) top_left_col = max(start_w_new, 0) bottom_right_row = min(start_h + height, shape[0]) bottom_right_col = min(start_w + width, shape[1]) subImage = image[top_left_row:bottom_right_row, top_left_col: bottom_right_col] box[:, 0:7:2] = boxes[:, 0:7:2] - top_left_col box[:, 1:8:2] = boxes[:, 1:8:2] - top_left_row if USE_HEAD: box[:, 8] = boxes[:, 8] - top_left_col box[:, 9] = boxes[:, 9] - top_left_row box[:, -2:] = boxes[:, -2:] center_y = 0.25 * (box[:, 1] + box[:, 3] + box[:, 5] + box[:, 7]) center_x = 0.25 * (box[:, 0] + box[:, 2] + box[:, 4] + box[:, 6]) cond1 = np.intersect1d(np.where(center_y[:] >= 0)[0], np.where(center_x[:] >= 0)[0]) cond2 = np.intersect1d(np.where(center_y[:] <= (bottom_right_row - top_left_row))[0], np.where(center_x[:] <= (bottom_right_col - top_left_col))[0]) idx = np.intersect1d(cond1, cond2) if len(idx) > 0 and (subImage.shape[0] > 5 and subImage.shape[1] > 5): mkdir(os.path.join(save_dir, 'images')) img = os.path.join(save_dir, 'images', "%s_%04d_%04d.jpg" % (file_idx, top_left_row, top_left_col)) cv2.imwrite(img, subImage) mkdir(os.path.join(save_dir, 'labelxml')) xml = os.path.join(save_dir, 'labelxml', "%s_%04d_%04d.xml" % (file_idx, top_left_row, top_left_col)) save_to_xml(xml, "%s_%04d_%04d" % (file_idx, top_left_row, top_left_col), subImage.shape[0], subImage.shape[1], box[idx, :], class_list)
def filter_small_gt(gtboxes): gtboxes_5 = backward_convert(gtboxes) gtboxes_5_ = gtboxes_5[gtboxes_5[:, 2] >= 5, :] if gtboxes_5_.shape[0] != 0: gtboxes_5_ = gtboxes_5_[gtboxes_5_[:, 3] >= 5, :] if gtboxes_5_.shape[0] == 0: gtboxes_5_ = np.reshape(gtboxes_5[0, :], [-1, 6]) gtboxes_5_[:, 2] = 5. gtboxes_5_[:, 3] = 5. gtboxes_8 = forward_convert(gtboxes_5_) return gtboxes_8
def worker(gpu_id, images, det_net, args, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for img_path in images: # if 'P0016' not in img_path: # continue img = cv2.imread(img_path) box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] imgH = img.shape[0] imgW = img.shape[1] if imgH < args.h_len: temp = np.zeros([args.h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = args.h_len if imgW < args.w_len: temp = np.zeros([imgH, args.w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = args.w_len for hh in range(0, imgH, args.h_len - args.h_overlap): if imgH - hh - 1 < args.h_len: hh_ = imgH - args.h_len else: hh_ = hh for ww in range(0, imgW, args.w_len - args.w_overlap): if imgW - ww - 1 < args.w_len: ww_ = imgW - args.w_len else: ww_ = ww src_img = img[hh_:(hh_ + args.h_len), ww_:(ww_ + args.w_len), :] resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: src_img[:, :, ::-1]} ) resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] src_h, src_w = src_img.shape[0], src_img.shape[1] if len(det_boxes_r_) > 0: det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (src_w / resized_w) det_boxes_r_[:, 1::2] *= (src_h / resized_h) det_boxes_r_ = backward_convert(det_boxes_r_, False) for ii in range(len(det_boxes_r_)): box_rotate = det_boxes_r_[ii] box_rotate[0] = box_rotate[0] + ww_ box_rotate[1] = box_rotate[1] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_[ii]) score_res_rotate.append(det_scores_r_[ii]) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] threshold = { 'roundabout': 0.1, 'tennis-court': 0.3, 'swimming-pool': 0.1, 'storage-tank': 0.2, 'soccer-ball-field': 0.3, 'small-vehicle': 0.2, 'ship': 0.05, 'plane': 0.3, 'large-vehicle': 0.1, 'helicopter': 0.2, 'harbor': 0.0001, 'ground-track-field': 0.3, 'bridge': 0.0001, 'basketball-court': 0.3, 'baseball-diamond': 0.3 } for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r = np.array(tmp_boxes_r) tmp = np.zeros( [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r tmp[:, -1] = np.array(tmp_score_r) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r), scores=np.array(tmp_score_r), iou_threshold=threshold[LABEL_NAME_MAP[sub_class]], max_output_size=500) except: # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(threshold[LABEL_NAME_MAP[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) result_dict = { 'boxes': np.array(box_res_rotate_), 'scores': np.array(score_res_rotate_), 'labels': np.array(label_res_rotate_), 'image_id': img_path } result_queue.put_nowait(result_dict)
def test_dota(det_net, real_test_img_list, args, txt_name): save_path = os.path.join('./test_dota', cfgs.VERSION) nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(args.gpus.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(500) procs = [] for i, gpu_id in enumerate(args.gpus.strip().split(',')): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_id), split_records, det_net, args, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) for i in range(nr_records): res = result_queue.get() if args.show_box: nake_name = res['image_id'].split('/')[-1] tools.mkdir(os.path.join(save_path, 'dota_img_vis')) draw_path = os.path.join(save_path, 'dota_img_vis', nake_name) draw_img = np.array(cv2.imread(res['image_id']), np.float32) detected_boxes = backward_convert(res['boxes'], with_label=False) detected_indices = res['scores'] >= cfgs.VIS_SCORE detected_scores = res['scores'][detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = res['labels'][detected_indices] final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, head=np.ones_like(detected_scores) * -1, in_graph=False) cv2.imwrite(draw_path, final_detections) else: CLASS_DOTA = NAME_LABEL_MAP.keys() write_handle = {} tools.mkdir(os.path.join(save_path, 'dota_res')) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle[sub_class] = open(os.path.join(save_path, 'dota_res', 'Task1_%s.txt' % sub_class), 'a+') # rboxes = forward_convert(res['boxes'], with_label=False) for i, rbox in enumerate(res['boxes']): command = '%s %.3f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n' % (res['image_id'].split('/')[-1].split('.')[0], res['scores'][i], rbox[0], rbox[1], rbox[2], rbox[3], rbox[4], rbox[5], rbox[6], rbox[7],) write_handle[LABEL_NAME_MAP[res['labels'][i]]].write(command) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle[sub_class].close() fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() pbar.set_description("Test image %s" % res['image_id'].split('/')[-1]) pbar.update(1) for p in procs: p.join()
# init_op = tf.group( # tf.global_variables_initializer(), # tf.local_variables_initializer() # ) # # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # # with tf.Session(config=config) as sess: # sess.run(init_op) # # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess, coord) # # img_name_batch_, img_batch_, gtboxes_and_label_batch_, num_objects_batch_, img_h_batch_, img_w_batch_ \ # = sess.run([img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch]) # # print(img_name_batch_.shape) # print(img_batch_.shape) # print(gtboxes_and_label_batch_.shape) # print(num_objects_batch_.shape) # print(img_h_batch_.shape) # print('debug') # # coord.request_stop() # coord.join(threads) tmp = np.array([[50, 50, 40, 50, -30, 1], [50, 50, 4, 5, -30, 1]]) tmp = forward_convert(tmp) print(filter_small_gt(tmp)) print(backward_convert(filter_small_gt(tmp)))
def eval_with_plac(det_net, args): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category, detection_boxes_angle = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_smooth_label=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] img_short_side_len_list = cfgs.IMG_SHORT_SIDE_LEN if isinstance( cfgs.IMG_SHORT_SIDE_LEN, list) else [cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [ img_short_side_len_list[0] ] if not args.multi_scale else img_short_side_len_list imgs = os.listdir(args.img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(args.image_ext)[0] raw_img = cv2.imread( os.path.join(args.img_dir, a_img_name + args.image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] for short_size in img_short_side_len_list: max_len = cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes_angle, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] if len(det_boxes_r_) > 0: det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) for ii in range(len(det_boxes_r_)): box_rotate = det_boxes_r_[ii] box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_[ii]) score_res_rotate.append(det_scores_r_[ii]) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] threshold = {'car': 0.2, 'plane': 0.3} for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r_), scores=np.array(tmp_score_r), iou_threshold=threshold[LABEL_NAME_MAP[sub_class]], max_output_size=150) except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(threshold[LABEL_NAME_MAP[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) box_res_rotate_ = np.array(box_res_rotate_) score_res_rotate_ = np.array(score_res_rotate_) label_res_rotate_ = np.array(label_res_rotate_) if args.draw_imgs: detected_indices = score_res_rotate_ >= cfgs.VIS_SCORE detected_scores = score_res_rotate_[detected_indices] detected_boxes = box_res_rotate_[detected_indices] detected_boxes = backward_convert(detected_boxes, with_label=False) detected_categories = label_res_rotate_[detected_indices] det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores( np.array(raw_img, np.float32), boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=False, is_csl=True) save_dir = os.path.join('test_ucas_aod', cfgs.VERSION, 'ucas_aod_img_vis') tools.mkdir(save_dir) cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name), det_detections_r[:, :, ::-1]) if box_res_rotate_.shape[0] != 0: box_res_rotate_ = backward_convert(box_res_rotate_, False) x_c, y_c, w, h, theta = box_res_rotate_[:, 0], box_res_rotate_[:, 1], box_res_rotate_[:, 2], \ box_res_rotate_[:, 3], box_res_rotate_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((label_res_rotate_.reshape(-1, 1), score_res_rotate_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} ) if draw_imgs: detected_indices = det_scores_r_ >= cfgs.VIS_SCORE detected_scores = det_scores_r_[detected_indices] detected_boxes = det_boxes_r_[detected_indices] detected_categories = det_category_r_[detected_indices] det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores( np.squeeze(resized_img, 0), boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=True) save_dir = os.path.join('test_hrsc', cfgs.VERSION, 'hrsc2016_img_vis') tools.mkdir(save_dir) cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name), det_detections_r[:, :, ::-1]) if det_boxes_r_.shape[0] != 0: resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) det_boxes_r_ = backward_convert(det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r
def worker(gpu_id, images, det_net, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category, detection_boxes_angle = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_smooth_label=None, gpu_id=0) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for a_img in images: raw_img = cv2.imread(a_img) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] det_boxes_r_all, det_scores_r_all, det_category_r_all = [], [], [] img_short_side_len_list = cfgs.IMG_SHORT_SIDE_LEN if isinstance( cfgs.IMG_SHORT_SIDE_LEN, list) else [cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [ img_short_side_len_list[0] ] if not args.multi_scale else img_short_side_len_list for short_size in img_short_side_len_list: max_len = cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes_angle, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) detected_indices = detected_scores >= cfgs.VIS_SCORE detected_scores = detected_scores[detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = detected_categories[detected_indices] if detected_boxes.shape[0] == 0: continue resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] detected_boxes = forward_convert(detected_boxes, False) detected_boxes[:, 0::2] *= (raw_w / resized_w) detected_boxes[:, 1::2] *= (raw_h / resized_h) # detected_boxes = backward_convert(detected_boxes, False) det_boxes_r_all.extend(detected_boxes) det_scores_r_all.extend(detected_scores) det_category_r_all.extend(detected_categories) det_boxes_r_all = np.array(det_boxes_r_all) det_scores_r_all = np.array(det_scores_r_all) det_category_r_all = np.array(det_category_r_all) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] if det_scores_r_all.shape[0] != 0: for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(det_category_r_all == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = det_boxes_r_all[index] tmp_label_r = det_category_r_all[index] tmp_score_r = det_scores_r_all[index] tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r_), scores=np.array(tmp_score_r), iou_threshold=cfgs.NMS_IOU_THRESHOLD, max_output_size=5000) except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) jitter[:, 0] += np.random.rand( tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(cfgs.NMS_IOU_THRESHOLD), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) box_res_rotate_ = np.array(box_res_rotate_) score_res_rotate_ = np.array(score_res_rotate_) label_res_rotate_ = np.array(label_res_rotate_) result_dict = { 'scales': [1, 1], 'boxes': box_res_rotate_, 'scores': score_res_rotate_, 'labels': label_res_rotate_, 'image_id': a_img } result_queue.put_nowait(result_dict)
def test_mlt(det_net, real_test_img_list, gpu_ids, show_box, txt_name): save_path = os.path.join('./test_mlt', cfgs.VERSION) tools.mkdir(save_path) nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(gpu_ids.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(500) procs = [] for i, gpu_id in enumerate(gpu_ids.strip().split(',')): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_id), split_records, det_net, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) for i in range(nr_records): res = result_queue.get() if res['boxes'].shape[0] == 0: fw_txt_dt = open( os.path.join( save_path, 'res_{}.txt'.format(res['image_id'].split( '/')[-1].split('.')[0].split('ts_')[1])), 'w') fw_txt_dt.close() pbar.update(1) fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() continue x1, y1, x2, y2, x3, y3, x4, y4 = res['boxes'][:, 0], res['boxes'][:, 1], res['boxes'][:, 2], res['boxes'][:, 3],\ res['boxes'][:, 4], res['boxes'][:, 5], res['boxes'][:, 6], res['boxes'][:, 7] x1, y1 = x1 * res['scales'][0], y1 * res['scales'][1] x2, y2 = x2 * res['scales'][0], y2 * res['scales'][1] x3, y3 = x3 * res['scales'][0], y3 * res['scales'][1] x4, y4 = x4 * res['scales'][0], y4 * res['scales'][1] boxes = np.transpose(np.stack([x1, y1, x2, y2, x3, y3, x4, y4])) if show_box: boxes = backward_convert(boxes, False) nake_name = res['image_id'].split('/')[-1] draw_path = os.path.join(save_path, nake_name) draw_img = np.array(cv2.imread(res['image_id']), np.float32) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=boxes, labels=res['labels'], scores=res['scores'], method=1, in_graph=False) cv2.imwrite(draw_path, final_detections) else: fw_txt_dt = open( os.path.join( save_path, 'res_{}.txt'.format(res['image_id'].split( '/')[-1].split('.')[0].split('ts_')[1])), 'w') for ii, box in enumerate(boxes): line = '%d,%d,%d,%d,%d,%d,%d,%d,%.3f\n' % ( box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7], res['scores'][ii]) fw_txt_dt.write(line) fw_txt_dt.close() fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() pbar.set_description("Test image %s" % res['image_id'].split('/')[-1]) pbar.update(1) for p in procs: p.join()
def test_dota(det_net, real_test_img_list, args, txt_name): save_path = os.path.join('./test_dota', cfgs.VERSION) nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(args.gpus.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(500) procs = [] for i, gpu_id in enumerate(args.gpus.strip().split(',')): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_id), split_records, det_net, args, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) log_dir = './dcl_log/{}'.format(cfgs.VERSION) tools.mkdir(log_dir) fw_tsv = open(os.path.join(log_dir, 'dcl_meta.tsv'), 'w') # fw_tsv.write("Label\n") final_logits = [] for i in range(nr_records): res = result_queue.get() if args.show_box: nake_name = res['image_id'].split('/')[-1] tools.mkdir(os.path.join(save_path, 'dota_img_vis')) draw_path = os.path.join(save_path, 'dota_img_vis', nake_name) draw_img = np.array(cv2.imread(res['image_id']), np.float32) detected_boxes = backward_convert(res['boxes'], with_label=False) detected_indices = res['scores'] >= cfgs.VIS_SCORE detected_scores = res['scores'][detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = res['labels'][detected_indices] final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, head=np.ones_like(detected_scores) * -1, is_csl=True, in_graph=False) cv2.imwrite(draw_path, final_detections) else: detected_indices = res['scores'] >= cfgs.VIS_SCORE res['scores'] = res['scores'][detected_indices] res['boxes'] = res['boxes'][detected_indices] res['labels'] = res['labels'][detected_indices] rboxes = backward_convert(res['boxes'], with_label=False) rboxes = coordinate_present_convert(rboxes, -1, False) rlogits = res['logits'][detected_indices] for ii, rb in enumerate(rboxes): fw_tsv.write("%d\n" % (int(rb[-1]))) final_logits.append(rlogits[ii]) fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() pbar.set_description("Test image %s" % res['image_id'].split('/')[-1]) pbar.update(1) for p in procs: p.join() fw_tsv.close() final_logits = np.array(final_logits) np.save(os.path.join(log_dir, "final_logits.npy"), final_logits)
raw_label_dir = os.path.join(raw_data, 'labelTxt') save_dir = '/data/DOTA/DOTA_TOTAL/train800/' images = [i for i in os.listdir(raw_images_dir) if 'png' in i] labels = [i for i in os.listdir(raw_label_dir) if 'txt' in i] print('find image', len(images)) print('find label', len(labels)) min_length = 1e10 max_length = 1 for idx, img in enumerate(images): img_data = cv2.imread(os.path.join(raw_images_dir, img)) txt_data = open(os.path.join(raw_label_dir, img.replace('png', 'txt')), 'r').readlines() box = format_label(txt_data) box = backward_convert(box) for b in box: if class_list[int(b[-1])] not in distribution: distribution[class_list[int(b[-1])]] = {'s': 0, 'm': 0, 'l': 0} if np.sqrt(b[2] * b[3]) < 32: distribution[class_list[int(b[-1])]]['s'] += 1 elif np.sqrt(b[2] * b[3]) < 96: distribution[class_list[int(b[-1])]]['m'] += 1 else: distribution[class_list[int(b[-1])]]['l'] += 1 print(distribution)