def anno_convert(): o_txt_dir = '/home/ai-i-hanmingfei/datasets/ODAI-ICPR/split_origin/val/s8_inference_oriented/labelTxt' out_dir = \ '/home/ai-i-hanmingfei/datasets/ODAI-ICPR/split_origin/val/s8_inference_oriented/labelTxt_transferred' txt_paths = os.listdir(o_txt_dir) mkdir(out_dir) # print(txt_paths) for txt in txt_paths: with open(os.path.join(o_txt_dir, txt), 'r') as h_in: print(txt) lines = h_in.readlines() print(lines) # if not os.path.isdir(os.path.join(o_txt_dir, 'tmp')): # os.makedirs(os.path.join(o_txt_dir, 'tmp')) with open(os.path.join(out_dir, txt), 'w') as h_out: for line in lines: anno = line.strip().split(' ') print(anno) line_tmp = [ str(math.ceil(int(anno[i]) * 1024.0 / 600.0)) for i in range(8) ] line_tmp.append(anno[8]) print(line_tmp) h_out.write(' '.join(line_tmp) + '\n')
def write_voc_results_file(all_boxes, test_imgid_list, det_save_dir): ''' :param all_boxes: is a list. each item reprensent the detections of a img. the detections is a array. shape is [-1, 7]. [category, score, x, y, w, h, theta] Note that: if none detections in this img. that the detetions is : [] :param test_imgid_list: :param det_save_path: :return: ''' for cls, cls_id in NAME_LABEL_MAP.items(): if cls == 'back_ground': continue print("Writing {} VOC resutls file".format(cls)) tools.mkdir(det_save_dir) det_save_path = os.path.join(det_save_dir, "det_" + cls + ".txt") with open(det_save_path, 'wt') as f: for index, img_name in enumerate(test_imgid_list): this_img_detections = all_boxes[index] this_cls_detections = this_img_detections[ this_img_detections[:, 0] == cls_id] if this_cls_detections.shape[0] == 0: continue # this cls has none detections in this img for a_det in this_cls_detections: f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(img_name, a_det[1], a_det[2], a_det[3], a_det[4], a_det[5], a_det[6]) ) # that is [img_name, score, x, y, w, h, theta]
def inference(det_net, data_dir): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN) det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') imgs = os.listdir(data_dir) for i, a_img_name in enumerate(imgs): # f = open('./res_icdar_r/res_{}.txt'.format(a_img_name.split('.jpg')[0]), 'w') raw_img = cv2.imread(os.path.join(data_dir, a_img_name)) # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() # res_r = coordinate_convert.forward_convert(det_boxes_r_, False) # res_r = np.array(res_r, np.int32) # for r in res_r: # f.write('{},{},{},{},{},{},{},{}\n'.format(r[0], r[1], r[2], r[3], # r[4], r[5], r[6], r[7])) # f.close() det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r) view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs))
def clip_image(file_idx, image, boxes_all, width, height, stride_w, stride_h): if len(boxes_all) > 0: shape = image.shape for start_h in range(0, shape[0], stride_h): for start_w in range(0, shape[1], stride_w): boxes = copy.deepcopy(boxes_all) box = np.zeros_like(boxes_all) start_h_new = start_h start_w_new = start_w if start_h + height > shape[0]: start_h_new = shape[0] - height if start_w + width > shape[1]: start_w_new = shape[1] - width top_left_row = max(start_h_new, 0) top_left_col = max(start_w_new, 0) bottom_right_row = min(start_h + height, shape[0]) bottom_right_col = min(start_w + width, shape[1]) subImage = image[top_left_row:bottom_right_row, top_left_col:bottom_right_col] box[:, 0] = boxes[:, 0] - top_left_col box[:, 2] = boxes[:, 2] - top_left_col box[:, 4] = boxes[:, 4] - top_left_col box[:, 6] = boxes[:, 6] - top_left_col box[:, 1] = boxes[:, 1] - top_left_row box[:, 3] = boxes[:, 3] - top_left_row box[:, 5] = boxes[:, 5] - top_left_row box[:, 7] = boxes[:, 7] - top_left_row box[:, 8] = boxes[:, 8] center_y = 0.25 * (box[:, 1] + box[:, 3] + box[:, 5] + box[:, 7]) center_x = 0.25 * (box[:, 0] + box[:, 2] + box[:, 4] + box[:, 6]) cond1 = np.intersect1d( np.where(center_y[:] >= 0)[0], np.where(center_x[:] >= 0)[0]) cond2 = np.intersect1d( np.where( center_y[:] <= (bottom_right_row - top_left_row))[0], np.where( center_x[:] <= (bottom_right_col - top_left_col))[0]) idx = np.intersect1d(cond1, cond2) if len(idx) > 0 and (subImage.shape[0] > 5 and subImage.shape[1] > 5): mkdir(os.path.join(save_dir, 'images')) img = os.path.join( save_dir, 'images', "%s_%04d_%04d.png" % (file_idx, top_left_row, top_left_col)) cv2.imwrite(img, subImage) mkdir(os.path.join(save_dir, 'labeltxt')) xml = os.path.join( save_dir, 'labeltxt', "%s_%04d_%04d.xml" % (file_idx, top_left_row, top_left_col)) save_to_xml(xml, subImage.shape[0], subImage.shape[1], box[idx, :], class_list)
def convert_pascal_to_tfrecord(): xml_path = os.path.join(FLAGS.VOC_dir, FLAGS.xml_dir) image_path = os.path.join(FLAGS.VOC_dir, FLAGS.image_dir) save_path = os.path.join( FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord') mkdir(FLAGS.save_dir) # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) writer = tf.python_io.TFRecordWriter(path=save_path) fr = open(FLAGS.label_txt, 'r') lines = fr.readlines() real_cnt = 0 pbar = tqdm(glob.glob(xml_path + '/*.xml')) for xml in pbar: # to avoid path error in different development platform xml = xml.replace('\\', '/') tmp = xml.split('/')[-1].split('.')[0] + "\n" if tmp not in lines: continue img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format img_path = image_path + '/' + img_name if not os.path.exists(img_path): print('{} is not exist!'.format(img_path)) continue img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml) # assert gtbox_label.shape[0] > 0, 'no box' if gtbox_label.shape[0] == 0: continue # img = np.array(Image.open(img_path)) img = cv2.imread(img_path)[:, :, ::-1] feature = tf.train.Features( feature={ # do not need encode() in linux 'img_name': _bytes_feature(img_name.encode()), # 'img_name': _bytes_feature(img_name), 'img_height': _int64_feature(img_height), 'img_width': _int64_feature(img_width), 'img': _bytes_feature(img.tostring()), 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()), 'num_objects': _int64_feature(gtbox_label.shape[0]) }) example = tf.train.Example(features=feature) writer.write(example.SerializeToString()) real_cnt += 1 pbar.set_description("Conversion progress") print('\nConversion is complete! {} images.'.format(real_cnt)) writer.close()
def inference(det_net, path): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) fininsh_load = time.time() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = False tmp = [] with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') # imgs = os.listdir(data_dir) # for i, a_img_name in enumerate(imgs): # raw_img = cv2.imread(os.path.join(data_dir, # a_img_name)) raw_img = cv2.imread(path) basename = os.path.splitext(os.path.basename(path))[0] # print(raw_img) # exit() start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() save_dir = os.path.join(cfgs.ROTATE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) boxes = det_boxes_r_ category = det_category_r_ process(boxes, category, np.squeeze(resized_img, 0), basename, save_dir)
def clip_image(file_idx, image, boxes_all, width, height, stride_w, stride_h): boxes_all_5 = backward_convert(boxes_all[:, :8], False) print(boxes_all[np.logical_or(boxes_all_5[:, 2] <= 5, boxes_all_5[:, 3] <= 5), :]) boxes_all = boxes_all[np.logical_and(boxes_all_5[:, 2] > 5, boxes_all_5[:, 3] > 5), :] if boxes_all.shape[0] > 0: shape = image.shape for start_h in range(0, shape[0], stride_h): for start_w in range(0, shape[1], stride_w): boxes = copy.deepcopy(boxes_all) if USE_HEAD: box = np.zeros_like(boxes_all) else: box = np.zeros_like(boxes_all[:, :10]) start_h_new = start_h start_w_new = start_w if start_h + height > shape[0]: start_h_new = shape[0] - height if start_w + width > shape[1]: start_w_new = shape[1] - width top_left_row = max(start_h_new, 0) top_left_col = max(start_w_new, 0) bottom_right_row = min(start_h + height, shape[0]) bottom_right_col = min(start_w + width, shape[1]) subImage = image[top_left_row:bottom_right_row, top_left_col: bottom_right_col] box[:, 0:7:2] = boxes[:, 0:7:2] - top_left_col box[:, 1:8:2] = boxes[:, 1:8:2] - top_left_row if USE_HEAD: box[:, 8] = boxes[:, 8] - top_left_col box[:, 9] = boxes[:, 9] - top_left_row box[:, -2:] = boxes[:, -2:] center_y = 0.25 * (box[:, 1] + box[:, 3] + box[:, 5] + box[:, 7]) center_x = 0.25 * (box[:, 0] + box[:, 2] + box[:, 4] + box[:, 6]) cond1 = np.intersect1d(np.where(center_y[:] >= 0)[0], np.where(center_x[:] >= 0)[0]) cond2 = np.intersect1d(np.where(center_y[:] <= (bottom_right_row - top_left_row))[0], np.where(center_x[:] <= (bottom_right_col - top_left_col))[0]) idx = np.intersect1d(cond1, cond2) if len(idx) > 0 and (subImage.shape[0] > 5 and subImage.shape[1] > 5): mkdir(os.path.join(save_dir, 'images')) img = os.path.join(save_dir, 'images', "%s_%04d_%04d.jpg" % (file_idx, top_left_row, top_left_col)) cv2.imwrite(img, subImage) mkdir(os.path.join(save_dir, 'labelxml')) xml = os.path.join(save_dir, 'labelxml', "%s_%04d_%04d.xml" % (file_idx, top_left_row, top_left_col)) save_to_xml(xml, "%s_%04d_%04d" % (file_idx, top_left_row, top_left_col), subImage.shape[0], subImage.shape[1], box[idx, :], class_list)
def write_voc_results_file(boxes, labels, scores, img_name, det_save_dir): ''' write cls.txt [img_name, probability, w, h, x_c, y_c, theta] ''' tools.mkdir(det_save_dir) num, _ = boxes.shape for i in range(num): for cls, cls_id in NAME_LABEL_MAP.items(): if cls == 'back_ground': continue if labels[i] == cls_id: #print("Writing {} VOC resutls file".format(cls)) det_save_path = os.path.join(det_save_dir, "det_" + cls + ".txt") with open(det_save_path, 'a') as f: f.write('{:s} {:.6f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(img_name, scores[i], boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3], boxes[i][4]) ) # that is [img_name, score, x, y, w, h, theta]
def eval_with_plac(det_net, args): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category, detection_boxes_angle = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_smooth_label=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] img_short_side_len_list = cfgs.IMG_SHORT_SIDE_LEN if isinstance( cfgs.IMG_SHORT_SIDE_LEN, list) else [cfgs.IMG_SHORT_SIDE_LEN] img_short_side_len_list = [ img_short_side_len_list[0] ] if not args.multi_scale else img_short_side_len_list imgs = os.listdir(args.img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(args.image_ext)[0] raw_img = cv2.imread( os.path.join(args.img_dir, a_img_name + args.image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] for short_size in img_short_side_len_list: max_len = cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes_angle, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] if len(det_boxes_r_) > 0: det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) for ii in range(len(det_boxes_r_)): box_rotate = det_boxes_r_[ii] box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_[ii]) score_res_rotate.append(det_scores_r_[ii]) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] threshold = {'car': 0.2, 'plane': 0.3} for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r_), scores=np.array(tmp_score_r), iou_threshold=threshold[LABEL_NAME_MAP[sub_class]], max_output_size=150) except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(threshold[LABEL_NAME_MAP[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) box_res_rotate_ = np.array(box_res_rotate_) score_res_rotate_ = np.array(score_res_rotate_) label_res_rotate_ = np.array(label_res_rotate_) if args.draw_imgs: detected_indices = score_res_rotate_ >= cfgs.VIS_SCORE detected_scores = score_res_rotate_[detected_indices] detected_boxes = box_res_rotate_[detected_indices] detected_boxes = backward_convert(detected_boxes, with_label=False) detected_categories = label_res_rotate_[detected_indices] det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores( np.array(raw_img, np.float32), boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=False, is_csl=True) save_dir = os.path.join('test_ucas_aod', cfgs.VERSION, 'ucas_aod_img_vis') tools.mkdir(save_dir) cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name), det_detections_r[:, :, ::-1]) if box_res_rotate_.shape[0] != 0: box_res_rotate_ = backward_convert(box_res_rotate_, False) x_c, y_c, w, h, theta = box_res_rotate_[:, 0], box_res_rotate_[:, 1], box_res_rotate_[:, 2], \ box_res_rotate_[:, 3], box_res_rotate_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((label_res_rotate_.reshape(-1, 1), score_res_rotate_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r
def train(): faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) mask_batch = tf.py_func( image_preprocess.get_mask, [tf.squeeze(img_batch, 0), tf.squeeze(gtboxes_and_label_batch, 0)], [tf.float32]) gtboxes_and_label = tf.py_func( back_forward_convert, inp=[tf.squeeze(gtboxes_and_label_batch, 0)], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6]) gtboxes_and_label_AreaRectangle = get_horizen_minAreaRectangle( gtboxes_and_label) gtboxes_and_label_AreaRectangle = tf.reshape( gtboxes_and_label_AreaRectangle, [-1, 5]) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color( img_batch, tf.reshape(gtboxes_and_label_AreaRectangle, [-1, 5])[:, :-1], text=tf.shape(gtboxes_and_label_AreaRectangle)[0]) gtboxes_rotate_in_img = draw_box_with_color_rotate( img_batch, tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1], text=tf.shape(gtboxes_and_label)[0]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_r_batch=gtboxes_and_label, gtboxes_h_batch=gtboxes_and_label_AreaRectangle, mask_batch=mask_batch[0]) dets_in_img = draw_boxes_with_categories_and_scores( img_batch=img_batch, boxes=final_boxes_h, labels=final_category_h, scores=final_scores_h) dets_rotate_in_img = draw_boxes_with_categories_and_scores_rotate( img_batch=img_batch, boxes=final_boxes_r, labels=final_category_r, scores=final_scores_r) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss_h = loss_dict['fastrcnn_cls_loss_h'] fastrcnn_loc_loss_h = loss_dict['fastrcnn_loc_loss_h'] fastrcnn_cls_loss_r = loss_dict['fastrcnn_cls_loss_r'] fastrcnn_loc_loss_r = loss_dict['fastrcnn_loc_loss_r'] fastrcnn_total_loss = fastrcnn_cls_loss_h + fastrcnn_loc_loss_h + fastrcnn_cls_loss_r + fastrcnn_loc_loss_r if cfgs.ADD_ATTENTION: attention_loss = loss_dict['attention_loss'] total_loss = rpn_total_loss + fastrcnn_total_loss + attention_loss + weight_decay_loss else: attention_loss = tf.convert_to_tensor(0) total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss_h', fastrcnn_cls_loss_h) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss_h', fastrcnn_loc_loss_h) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss_r', fastrcnn_cls_loss_r) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss_r', fastrcnn_loc_loss_r) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) if cfgs.ADD_ATTENTION: tf.summary.scalar('ATTENTION_LOSS/attention_loss', attention_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/gtboxes_rotate', gtboxes_rotate_in_img) tf.summary.image('img/mask', mask_batch) tf.summary.image('img/dets', dets_in_img) tf.summary.image('img/dets_rotate', dets_rotate_in_img) # ---------------------------------------------------------------------------------------------add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # ---------------------------------------------------------------------------------------------compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto() config.gpu_options.allow_growth = True # with tf.Session(config=config) as sess: # with tf.Session() as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_location_rotate_loss, _fast_rcnn_classification_rotate_loss, \ _fast_rcnn_total_loss, _attention_loss, _total_loss, _ = \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss_h, fastrcnn_cls_loss_h, fastrcnn_loc_loss_r, fastrcnn_cls_loss_r, fastrcnn_total_loss, attention_loss, total_loss, train_op]) end = time.time() print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_loc_rotate_loss:{} |\t fast_rcnn_cla_rotate_loss:{} |\t fast_rcnn_total_loss:{} |\t attention_loss:{} |\t total_loss:{} |\t pre_cost_time:{}s""" \ .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_location_rotate_loss, _fast_rcnn_classification_rotate_loss, _fast_rcnn_total_loss, _attention_loss, _total_loss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) gtbox_plac = tf.placeholder(dtype=tf.int32, shape=[None, 5]) img_batch, gtboxes_and_label = preprocess_img(img_plac, gtbox_plac) # gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = 0 # tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant(global_step, boundaries=[np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1])], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients_YJR'): gradients = slim.learning.clip_gradient_norms(gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=30) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): img_id, img, gt_info = next_img(step=step) training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step], feed_dict={img_plac: img, gtbox_plac: gt_info} ) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \ fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \ sess.run( [train_op, global_step, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss], feed_dict={img_plac: img, gtbox_plac: gt_info}) end = time.time() print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, global_stepnp, str(img_id), rpnLocLoss, rpnClsLoss, rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op], feed_dict={img_plac: img, gtbox_plac: gt_info} ) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved')
def train(): # get Network Class faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) # get batch with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) # convert gtboxes_and_labels gtboxes_and_label = tf.py_func( back_forward_convert, inp=[tf.squeeze(gtboxes_and_label_batch, 0)], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6]) # draw ground-truth with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color_rotate( img_batch, tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1], text=tf.shape(gtboxes_and_label)[0]) # default regularizers biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): # build network final_boxes, final_scores, final_category, loss_dict = \ faster_rcnn.build_whole_detection_network(input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # draw detections dets_in_img = draw_boxes_with_categories_and_scores_rotate( img_batch=img_batch, boxes=final_boxes, labels=final_category, scores=final_scores) # ----------------------------------------------------------------------------------------------------build loss # weight decay loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) # rpn losses rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss # fastrcnn losses fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss # total loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/dets', dets_in_img) # ___________________________________________________________________________________________________add summary # create global step global_step = slim.get_or_create_global_step() # learning rate setting lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) # optimizer with lr optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients # compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) # clipping gradients if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op, ie. apply gradients train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) # summary_op summary_op = tf.summary.merge_all() # init_op init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # restorer and Saver restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) # GPU Config config = tf.ConfigProto() config.gpu_options.allow_growth = True # start a session with tf.Session(config=config) as sess: # init sess.run(init_op) # restore if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') # Session对象是支持多线程的, 可以在同一个会话(Session)中创建多个线程,并行执行; # 创建线程协调器(管理器), 并启动(Tensor/训练数据)入队线程, # 入队具体使用多少个线程在read_tfrecord.py->next_batch->tf.train.batch中定义; coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) # construct summary writer summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) # train MAX_ITERATION steps for step in range(cfgs.MAX_ITERATION): # time of this step training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # no show & no summary if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: # global_step is added to run, global_stepnp will be used in Save _, global_stepnp = sess.run([train_op, global_step]) else: # show if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() # middle infos are added to run, losses and img_name_batch _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_total_loss, _total_loss, _ = \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss, train_op]) # # show boxes, scores, category infos # final_boxes_r, _final_scores_r, _final_category_r = sess.run([final_boxes_r, final_scores_r, final_category_r]) # print('*'*100) # print(_final_boxes_r) # print(_final_scores_r) # print(_final_category_r) end = time.time() # print print(""" {}: step {}: image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |\t total_loss:{} |\t pre_cost_time:{}s""" \ .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss, (end - start))) # summary else: if step % cfgs.SMRY_ITER == 0: # summary_op is added to run _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) # add summary summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() # save if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): # mkdir save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) # save checkpoint save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') # 协调器发出终止所有线程的命令 coord.request_stop() # 把开启的线程加入主线程,等待threads结束 coord.join(threads)
def train(): with tf.Graph().as_default(): with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'ship', 'spacenet', 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.py_func(back_forward_convert, inp=[tf.squeeze(gtboxes_and_label_batch, 0)], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6]) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color(img_batch, tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1], text=tf.shape(gtboxes_and_label_batch)[1]) # *********************************************************************************************** # * shared CNN * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * rpn * # * Note: here the rpn is Feature Pyramid Networks * # *********************************************************************************************** rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=gtboxes_and_label, is_training=True, share_head=cfgs.SHARED_HEADS, share_net=share_net, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, anchor_angles=cfgs.ANCHOR_ANGLES, scale_factors=cfgs.SCALE_FACTORS, # this parameter will affect the performance base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, anchor_stride=cfgs.ANCHOR_STRIDE, top_k_nms=cfgs.RPN_TOP_K_NMS, kernel_size=cfgs.KERNEL_SIZE, use_angles_condition=False, anchor_angle_threshold=cfgs.RPN_ANCHOR_ANGLES_THRESHOLD, nms_angle_threshold=cfgs.RPN_NMS_ANGLES_THRESHOLD, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, # iou>=0.7 is positive box, iou< 0.3 is negative rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=cfgs.IS_FILTER_OUTSIDE_BOXES, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], scope='') rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] rpn_location_loss, rpn_classification_loss, rpn_predict_boxes, rpn_predict_scores = rpn.rpn_losses() rpn_total_loss = rpn_classification_loss + rpn_location_loss with tf.name_scope('draw_proposals'): # score > 0.6 is object rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices) rpn_object_soxres = tf.gather(rpn_proposals_scores, rpn_object_boxes_indices) rpn_proposals_objcet_boxes_in_img = draw_boxes_with_scores(img_batch, rpn_object_boxes, scores=rpn_object_soxres) # rpn_proposals_objcet_boxes_in_img = draw_box_with_color(img_batch, rpn_object_boxes, # text=tf.shape(rpn_object_boxes)[0]) rpn_proposals_boxes_in_img = draw_box_with_color(img_batch, rpn_proposals_boxes, text=tf.shape(rpn_proposals_boxes)[0]) # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=img_batch, feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, stop_gradient_for_proposals=False, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=gtboxes_and_label, fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, top_k_nms=cfgs.FAST_RCNN_TOP_K_NMS, nms_angle_threshold=cfgs.FAST_RCNN_NMS_ANGLES_THRESHOLD, use_angle_condition=False, level=cfgs.LEVEL, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, # iou>0.5 is positive, iou<0.5 is negative boxes_angle_threshold=cfgs.FAST_RCNN_BOXES_ANGLES_THRESHOLD, use_dropout=cfgs.USE_DROPOUT, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=True) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ fast_rcnn.fast_rcnn_predict() fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss() fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss with tf.name_scope('draw_boxes_with_categories'): fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=img_batch, boxes=fast_rcnn_decode_boxes, labels=detection_category, scores=fast_rcnn_score) # train total_loss = slim.losses.get_total_loss() global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant(global_step, boundaries=[np.int64(70000), np.int64(120000)], values=[cfgs.LR, cfgs.LR/10, cfgs.LR/100]) # optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) if cfgs.RPN_TRAIN: train_op = slim.learning.create_train_op(rpn_total_loss, optimizer, global_step) else: train_op = slim.learning.create_train_op(total_loss, optimizer, global_step) # train_op = optimizer.minimize(second_classification_loss, global_step) # *********************************************************************************************** # * Summary * # *********************************************************************************************** # ground truth and predict tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/fast_rcnn_predict', fast_rcnn_predict_boxes_in_imgs) # rpn loss and image tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss) tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss) tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss) tf.summary.scalar('loss/total_loss', total_loss) tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img) tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img) # learning_rate tf.summary.scalar('learning_rate', lr) summary_op = tf.summary.merge_all() init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = restore_model.get_restorer() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) start = time.time() _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_total_loss, _total_loss, _= \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss, rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss, fast_rcnn_total_loss, total_loss, train_op]) end = time.time() if step % 10 == 0: print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss, (end - start))) if (step % 50 == 0) and (step % 10000 != 0): # 50 summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, _global_step) summary_writer.flush() if (step > 0 and step % 10000 == 0) or (step == cfgs.MAX_ITERATION - 1): summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, _global_step) summary_writer.flush() save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'voc_'+str(_global_step)+'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, cfgs.NUM_GPU*cfgs.BATCH_SIZE) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=True, batch_size=cfgs.BATCH_SIZE) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE * cfgs.NUM_GPU, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) # data processing inputs_list = [] for i in range(cfgs.NUM_GPU): start = i*cfgs.BATCH_SIZE end = (i+1)*cfgs.BATCH_SIZE img = img_batch[start:end, :, :, :] if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[start:end, :, :], [cfgs.BATCH_SIZE, -1, 5]), tf.float32) num_objects = num_objects_batch[start:end] num_objects = tf.cast(tf.reshape(num_objects, [cfgs.BATCH_SIZE, -1, ]), tf.float32) img_h = img_h_batch[start:end] img_w = img_w_batch[start:end] # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32) # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32) inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w]) # put_op_list = [] # get_op_list = [] # for i in range(cfgs.NUM_GPU): # with tf.device("/GPU:%s" % i): # area = tf.contrib.staging.StagingArea( # dtypes=[tf.float32, tf.float32, tf.float32]) # put_op_list.append(area.put(inputs_list[i])) # get_op_list.append(area.get()) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) total_loss_dict = { 'rpn_cls_loss': tf.constant(0., tf.float32), 'rpn_bbox_loss': tf.constant(0., tf.float32), 'rpn_ctr_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(cfgs.NUM_GPU): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): gtboxes_and_label = tf.py_func(get_gtboxes_and_label, inp=[inputs_list[i][1], inputs_list[i][2]], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [cfgs.BATCH_SIZE, -1, 5]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] h_crop = tf.reduce_max(img_shape[0]) w_crop = tf.reduce_max(img_shape[1]) img = tf.image.crop_to_bounding_box(image=img, offset_height=0, offset_width=0, target_height=tf.cast(h_crop, tf.int32), target_width=tf.cast(w_crop, tf.int32)) outputs = faster_rcnn.build_whole_detection_network(input_img_batch=img, gtboxes_batch=gtboxes_and_label) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img[0, :, :, :], boxes=gtboxes_and_label[0, :, :-1], labels=gtboxes_and_label[0, :, -1]) gtboxes_in_img = tf.expand_dims(gtboxes_in_img, 0) tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img[0, :, :, :], boxes=outputs[0], scores=outputs[1], labels=outputs[2]) detections_in_img = tf.expand_dims(detections_in_img, 0) tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[k] += loss_dict[k] / cfgs.NUM_GPU total_losses = total_losses / cfgs.NUM_GPU total_loss_dict['total_losses'] += total_losses if i == cfgs.NUM_GPU - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n(regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] # final_gvs = [] # with tf.variable_scope('Gradient_Mult'): # for grad, var in grads: # scale = 1. # # if '/biases:' in var.name: # # scale *= 2. # if 'conv_new' in var.name: # scale *= 3. # if not np.allclose(scale, 1.0): # grad = tf.multiply(grad, scale) # final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) tfconfig = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True num_per_iter = cfgs.NUM_GPU * cfgs.BATCH_SIZE with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_per_iter): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***'*20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp-1)*num_per_iter, step*num_per_iter)) print("""per_cost_time:%.3fs""" % ((end - start) / num_per_iter)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, (global_stepnp-1)*num_per_iter) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_per_iter) == 0) or (step >= cfgs.MAX_ITERATION // cfgs.NUM_GPU - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_per_iter) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def eval_with_plac(det_net, real_test_imgname_list, img_root, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(os.path.join(img_root, a_img_name)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + a_img_name, final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) dets = np.hstack((detected_categories.reshape(-1, 1), detected_scores.reshape(-1, 1), boxes)) # all_boxes.append(dets) # eval txt CLASS_VOC = NAME_LABEL_MAP.keys() write_handle = {} txt_dir = os.path.join('voc2012_eval', cfgs.VERSION, 'results', 'VOC2012', 'Main') tools.mkdir(txt_dir) for sub_class in CLASS_VOC: if sub_class == 'back_ground': continue write_handle[sub_class] = open(os.path.join(txt_dir, 'comp3_det_test_%s.txt' % sub_class), 'a+') for det in dets: command = '%s %.6f %.6f %.6f %.6f %.6f\n' % (a_img_name.split('/')[-1].split('.')[0], det[1], det[2], det[3], det[4], det[5]) write_handle[LABEl_NAME_MAP[det[0]]].write(command) for sub_class in CLASS_VOC: if sub_class == 'back_ground': continue write_handle[sub_class].close() tools.view_bar('%s image cost %.3fs' % (a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) for i, a_img_name in enumerate(imgs): a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: det_detections_r = draw_box_in_img.draw_rotate_box_cv(np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.TEST_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r[:, :, ::-1]) resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) det_boxes_r_ = back_forward_convert(det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs)) fw2 = open(cfgs.VERSION + '_detections_r.pkl', 'w') pickle.dump(all_boxes_r, fw2)
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} ) if draw_imgs: detected_indices = det_scores_r_ >= cfgs.VIS_SCORE detected_scores = det_scores_r_[detected_indices] detected_boxes = det_boxes_r_[detected_indices] detected_categories = det_category_r_[detected_indices] det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores( np.squeeze(resized_img, 0), boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=True) save_dir = os.path.join('test_hrsc', cfgs.VERSION, 'hrsc2016_img_vis') tools.mkdir(save_dir) cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name), det_detections_r[:, :, ::-1]) if det_boxes_r_.shape[0] != 0: resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) det_boxes_r_ = backward_convert(det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r
def inference(det_net, data_dir): TIME = 0 TIME_NUM = 0 # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=IS_RESIZE) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') imgs = os.listdir(data_dir) # print(imgs) for i, a_img_name in enumerate(imgs): file_text, extension = os.path.splitext(a_img_name) if extension != ".jpg" and extension != ".tif" and extension != ".png": continue # f = open('./res_icdar_r/res_{}.txt'.format(a_img_name.split('.jpg')[0]), 'w') raw_img = cv2.imread(os.path.join(data_dir, a_img_name)) # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() TIME += end - start TIME_NUM += 1 if WRITE_VOC == True: boxes = np.array(det_boxes_r_, np.int64) scores = np.array(det_scores_r_, np.float32) labels = np.array(det_category_r_, np.int32) det_save_dir = cfgs.INFERENCE_SAVE_PATH write_voc_results_file(boxes, labels, scores, a_img_name.split('.')[0], det_save_dir) write_pixel_results(boxes, labels, scores, a_img_name.split('.')[0], det_save_dir) det_detections_h = draw_box_in_img.draw_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_h_, labels=det_category_h_, scores=det_scores_h_) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) if OUTPUT_H_IMG: cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg', det_detections_h) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r) view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs)) print('avg time:{}'.format(TIME / TIME_NUM))
def test_mlt(det_net, real_test_img_list, gpu_ids, show_box, txt_name): save_path = os.path.join('./test_mlt', cfgs.VERSION) tools.mkdir(save_path) nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(gpu_ids.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(500) procs = [] for i, gpu_id in enumerate(gpu_ids.strip().split(',')): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_id), split_records, det_net, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) for i in range(nr_records): res = result_queue.get() if res['boxes'].shape[0] == 0: fw_txt_dt = open( os.path.join( save_path, 'res_{}.txt'.format(res['image_id'].split( '/')[-1].split('.')[0].split('ts_')[1])), 'w') fw_txt_dt.close() pbar.update(1) fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() continue x1, y1, x2, y2, x3, y3, x4, y4 = res['boxes'][:, 0], res['boxes'][:, 1], res['boxes'][:, 2], res['boxes'][:, 3],\ res['boxes'][:, 4], res['boxes'][:, 5], res['boxes'][:, 6], res['boxes'][:, 7] x1, y1 = x1 * res['scales'][0], y1 * res['scales'][1] x2, y2 = x2 * res['scales'][0], y2 * res['scales'][1] x3, y3 = x3 * res['scales'][0], y3 * res['scales'][1] x4, y4 = x4 * res['scales'][0], y4 * res['scales'][1] boxes = np.transpose(np.stack([x1, y1, x2, y2, x3, y3, x4, y4])) if show_box: boxes = backward_convert(boxes, False) nake_name = res['image_id'].split('/')[-1] draw_path = os.path.join(save_path, nake_name) draw_img = np.array(cv2.imread(res['image_id']), np.float32) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=boxes, labels=res['labels'], scores=res['scores'], method=1, in_graph=False) cv2.imwrite(draw_path, final_detections) else: fw_txt_dt = open( os.path.join( save_path, 'res_{}.txt'.format(res['image_id'].split( '/')[-1].split('.')[0].split('ts_')[1])), 'w') for ii, box in enumerate(boxes): line = '%d,%d,%d,%d,%d,%d,%d,%d,%.3f\n' % ( box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7], res['scores'][ii]) fw_txt_dt.write(line) fw_txt_dt.close() fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() pbar.set_description("Test image %s" % res['image_id'].split('/')[-1]) pbar.update(1) for p in procs: p.join()
def test_dota(det_net, real_test_img_list, args, txt_name): save_path = os.path.join('./test_dota', cfgs.VERSION) nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(args.gpus.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(500) procs = [] for i, gpu_id in enumerate(args.gpus.strip().split(',')): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_id), split_records, det_net, args, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) log_dir = './dcl_log/{}'.format(cfgs.VERSION) tools.mkdir(log_dir) fw_tsv = open(os.path.join(log_dir, 'dcl_meta.tsv'), 'w') # fw_tsv.write("Label\n") final_logits = [] for i in range(nr_records): res = result_queue.get() if args.show_box: nake_name = res['image_id'].split('/')[-1] tools.mkdir(os.path.join(save_path, 'dota_img_vis')) draw_path = os.path.join(save_path, 'dota_img_vis', nake_name) draw_img = np.array(cv2.imread(res['image_id']), np.float32) detected_boxes = backward_convert(res['boxes'], with_label=False) detected_indices = res['scores'] >= cfgs.VIS_SCORE detected_scores = res['scores'][detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = res['labels'][detected_indices] final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, head=np.ones_like(detected_scores) * -1, is_csl=True, in_graph=False) cv2.imwrite(draw_path, final_detections) else: detected_indices = res['scores'] >= cfgs.VIS_SCORE res['scores'] = res['scores'][detected_indices] res['boxes'] = res['boxes'][detected_indices] res['labels'] = res['labels'][detected_indices] rboxes = backward_convert(res['boxes'], with_label=False) rboxes = coordinate_present_convert(rboxes, -1, False) rlogits = res['logits'][detected_indices] for ii, rb in enumerate(rboxes): fw_tsv.write("%d\n" % (int(rb[-1]))) final_logits.append(rlogits[ii]) fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() pbar.set_description("Test image %s" % res['image_id'].split('/')[-1]) pbar.update(1) for p in procs: p.join() fw_tsv.close() final_logits = np.array(final_logits) np.save(os.path.join(log_dir, "final_logits.npy"), final_logits)
def inference(det_net, file_paths, des_folder, h_len, w_len, h_overlap, w_overlap, save_res=False): if save_res: assert cfgs.SHOW_SCORE_THRSHOLD >= 0.5, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.5) in cfgs.py' else: assert cfgs.SHOW_SCORE_THRSHOLD <= 0.005, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.00) in cfgs.py' # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) if cfgs.NET_NAME in ['resnet101_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes_h, det_scores_h, det_category_h = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') if not os.path.exists('./tmp.txt'): fw = open('./tmp.txt', 'w') fw.close() fr = open('./tmp.txt', 'r') pass_img = fr.readlines() fr.close() for count, img_path in enumerate(file_paths): fw = open('./tmp.txt', 'a+') if img_path + '\n' in pass_img: continue start = timer() img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] imgH = img.shape[0] imgW = img.shape[1] if imgH < h_len: temp = np.zeros([h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = h_len if imgW < w_len: temp = np.zeros([imgH, w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = w_len for hh in range(0, imgH, h_len - h_overlap): if imgH - hh - 1 < h_len: hh_ = imgH - h_len else: hh_ = hh for ww in range(0, imgW, w_len - w_overlap): if imgW - ww - 1 < w_len: ww_ = imgW - w_len else: ww_ = ww src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :] det_boxes_h_, det_scores_h_, det_category_h_ = \ sess.run( [det_boxes_h, det_scores_h, det_category_h], feed_dict={img_plac: src_img[:, :, ::-1]} ) if len(det_boxes_h_) > 0: for ii in range(len(det_boxes_h_)): box = det_boxes_h_[ii] box[0] = box[0] + ww_ box[1] = box[1] + hh_ box[2] = box[2] + ww_ box[3] = box[3] + hh_ box_res.append(box) label_res.append(det_category_h_[ii]) score_res.append(det_scores_h_[ii]) box_res = np.array(box_res) label_res = np.array(label_res) score_res = np.array(score_res) box_res_, label_res_, score_res_ = [], [], [] h_threshold = {'roundabout': 0.35, 'tennis-court': 0.35, 'swimming-pool': 0.4, 'storage-tank': 0.3, 'soccer-ball-field': 0.3, 'small-vehicle': 0.4, 'ship': 0.35, 'plane': 0.35, 'large-vehicle': 0.4, 'helicopter': 0.4, 'harbor': 0.3, 'ground-track-field': 0.4, 'bridge': 0.3, 'basketball-court': 0.4, 'baseball-diamond': 0.3} for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = box_res[index] tmp_label_h = label_res[index] tmp_score_h = score_res[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros([tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32), thresh=h_threshold[LABEl_NAME_MAP[sub_class]], max_output_size=500) box_res_.extend(np.array(tmp_boxes_h)[inx]) score_res_.extend(np.array(tmp_score_h)[inx]) label_res_.extend(np.array(tmp_label_h)[inx]) time_elapsed = timer() - start if save_res: scores = np.array(score_res_) labels = np.array(label_res_) boxes = np.array(box_res_) valid_show = scores > cfgs.SHOW_SCORE_THRSHOLD scores = scores[valid_show] boxes = boxes[valid_show] labels = labels[valid_show] det_detections_h = draw_box_in_img.draw_boxes_with_label_and_scores(np.array(img, np.float32), boxes=np.array(boxes), labels=np.array(labels), scores=np.array(scores), in_graph=False) save_dir = os.path.join(des_folder, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + img_path.split('/')[-1].split('.')[0] + '_h.jpg', det_detections_h) view_bar('{} cost {}s'.format(img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths)) else: # eval txt CLASS_DOTA = NAME_LABEL_MAP.keys() # Task2 write_handle_h = {} txt_dir_h = os.path.join('txt_output', cfgs.VERSION + '_h') tools.mkdir(txt_dir_h) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_h[sub_class] = open(os.path.join(txt_dir_h, 'Task2_%s.txt' % sub_class), 'a+') for i, hbox in enumerate(box_res_): command = '%s %.3f %.1f %.1f %.1f %.1f\n' % (img_path.split('/')[-1].split('.')[0], score_res_[i], hbox[0], hbox[1], hbox[2], hbox[3]) write_handle_h[LABEl_NAME_MAP[label_res_[i]]].write(command) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_h[sub_class].close() view_bar('%s cost %.3fs' % (img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths)) fw.write('{}\n'.format(img_path)) fw.close() os.remove('./tmp.txt')
def eval_with_plac(img_dir, det_net, image_ext, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_h = [] all_boxes_r = [] imgs = os.listdir(img_dir) for i, a_img_name in enumerate(imgs): a_img_name = a_img_name.split(image_ext)[0] recs = {} recs[a_img_name] = parse_rec( os.path.join(test_annotation_path, a_img_name + '.xml')) #R = [obj for obj in recs[a_img_name]] bbox = np.squeeze(np.array([x['bbox'] for x in recs[a_img_name]])) #labels = bbox[:, -1] if len(bbox.shape) == 1: bbox = np.expand_dims(bbox, axis=0) labels = bbox[:, -1] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() det_boxes_r_ = det_boxes_r_[det_scores_r_ >= 0.4] det_category_r_ = det_category_r_[det_scores_r_ >= 0.4] det_scores_r_ = det_scores_r_[det_scores_r_ >= 0.4] keep = nms_rotate.nms_rotate_cpu(det_boxes_r_, det_scores_r_, 0.3, 20) det_boxes_r_ = det_boxes_r_[keep] det_scores_r_ = det_scores_r_[keep] det_category_r_ = det_category_r_[keep] ##### box_ratio > 2 or < 1/2 index = (det_boxes_r_[:, 2] / det_boxes_r_[:, 3] > 2) | (det_boxes_r_[:, 2] / det_boxes_r_[:, 3] <= 0.5) det_boxes_r_ = det_boxes_r_[index] det_scores_r_ = det_scores_r_[index] det_category_r_ = det_category_r_[index] # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: det_detections_h = draw_box_in_img.draw_rotate_box_cv1( np.squeeze(resized_img, 0), boxes=bbox, labels=labels, scores=np.ones(bbox.shape[0])) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.TEST_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg', det_detections_h[:, :, ::-1]) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r[:, :, ::-1]) # xmin, ymin, xmax, ymax = det_boxes_h_[:, 0], det_boxes_h_[:, 1], \ # det_boxes_h_[:, 2], det_boxes_h_[:, 3] if det_boxes_r_.shape[0] != 0: resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) det_boxes_r_ = back_forward_convert(det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] # xmin = xmin * raw_w / resized_w # xmax = xmax * raw_w / resized_w # ymin = ymin * raw_h / resized_h # ymax = ymax * raw_h / resized_h # boxes_h = np.transpose(np.stack([xmin, ymin, xmax, ymax])) boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) # dets_h = np.hstack((det_category_h_.reshape(-1, 1), # det_scores_h_.reshape(-1, 1), # boxes_h)) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) # all_boxes_h.append(dets_h) all_boxes_r.append(dets_r) tools.view_bar( '{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs)) fw1 = open(cfgs.VERSION + '_detections_h.pkl', 'w') fw2 = open(cfgs.VERSION + '_detections_r.pkl', 'w') pickle.dump(all_boxes_h, fw1) pickle.dump(all_boxes_r, fw2)
def test_dota(det_net, real_test_img_list, args, txt_name): save_path = os.path.join('./test_dota', cfgs.VERSION) nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(args.gpus.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(500) procs = [] for i, gpu_id in enumerate(args.gpus.strip().split(',')): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_id), split_records, det_net, args, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) for i in range(nr_records): res = result_queue.get() if args.show_box: nake_name = res['image_id'].split('/')[-1] tools.mkdir(os.path.join(save_path, 'dota_img_vis')) draw_path = os.path.join(save_path, 'dota_img_vis', nake_name) draw_img = np.array(cv2.imread(res['image_id']), np.float32) detected_indices = res['scores'] >= cfgs.VIS_SCORE detected_scores = res['scores'][detected_indices] detected_boxes = res['boxes'][detected_indices] detected_categories = res['labels'][detected_indices] final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=False) cv2.imwrite(draw_path, final_detections) else: CLASS_DOTA = NAME_LABEL_MAP.keys() write_handle = {} tools.mkdir(os.path.join(save_path, 'dota_res')) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle[sub_class] = open( os.path.join(save_path, 'dota_res', 'Task1_%s.txt' % sub_class), 'a+') rboxes = forward_convert(res['boxes'], with_label=False) for i, rbox in enumerate(rboxes): command = '%s %.3f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n' % ( res['image_id'].split('/')[-1].split('.')[0], res['scores'][i], rbox[0], rbox[1], rbox[2], rbox[3], rbox[4], rbox[5], rbox[6], rbox[7], ) write_handle[LABEL_NAME_MAP[res['labels'][i]]].write(command) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle[sub_class].close() fw = open(txt_name, 'a+') fw.write('{}\n'.format(res['image_id'].split('/')[-1])) fw.close() pbar.set_description("Test image %s" % res['image_id'].split('/')[-1]) pbar.update(1) for p in procs: p.join()
def test_coco(det_net, real_test_img_list, eval_data, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') save_path = os.path.join('./eval_coco', cfgs.VERSION) tools.mkdir(save_path) fw_json_dt = open(os.path.join(save_path, 'coco_test-dev.json'), 'w') coco_det = [] for i, a_img in enumerate(real_test_img_list): raw_img = cv2.imread(os.path.join(eval_data, a_img['file_name'])) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() eval_indices = detected_scores >= 0.01 detected_scores = detected_scores[eval_indices] detected_boxes = detected_boxes[eval_indices] detected_categories = detected_categories[eval_indices] # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + '{}.jpg'.format(a_img['id']), final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose(np.stack([xmin, ymin, xmax-xmin, ymax-ymin])) # cost much time for j, box in enumerate(boxes): coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]), float(box[3])], 'score': float(detected_scores[j]), 'image_id': a_img['id'], 'category_id': int(classes_originID[LABEl_NAME_MAP[detected_categories[j]]])}) tools.view_bar('%s image cost %.3fs' % (a_img['id'], (end - start)), i + 1, len(real_test_img_list)) json.dump(coco_det, fw_json_dt) fw_json_dt.close()
def eval_coco(det_net, real_test_img_list, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') save_path = os.path.join('./eval_coco', cfgs.VERSION) tools.mkdir(save_path) fw_json_dt = open(os.path.join(save_path, 'coco_minival.json'), 'w') coco_det = [] pbar = tqdm(real_test_img_list) for a_img in pbar: record = json.loads(a_img) img_path = os.path.join('/data/COCO/val2017', record['fpath'].split('_')[-1]) raw_img = cv2.imread(img_path) # raw_img = cv2.imread(record['fpath']) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) eval_indices = detected_scores >= 0.01 detected_scores = detected_scores[eval_indices] detected_boxes = detected_boxes[eval_indices] detected_categories = detected_categories[eval_indices] if draw_imgs: show_indices = detected_scores >= 0.3 show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + record['ID'], final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose( np.stack([xmin, ymin, xmax - xmin, ymax - ymin])) # cost much time for j, box in enumerate(boxes): coco_det.append({ 'bbox': [ float(box[0]), float(box[1]), float(box[2]), float(box[3]) ], 'score': float(detected_scores[j]), 'image_id': int(record['ID'].split('.jpg')[0].split('_000000')[-1]), 'category_id': int(classes_originID[LABEL_NAME_MAP[ detected_categories[j]]]) }) pbar.set_description("Eval image %s" % record['ID']) json.dump(coco_det, fw_json_dt) fw_json_dt.close() return os.path.join(save_path, 'coco_minival.json')
def train(): faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # optimizer = tf.train.AdamOptimizer(lr) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients_YJR'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=30) # Create session #allow_soft_placement=True自动将无法放到GPU上的操作放回到CPU config = tf.ConfigProto(allow_soft_placement=True) #让GPU按需分配,不一定占用某个GPU的全部内存 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) #日志保存地址 summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) timer = Timer() for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \ fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \ sess.run( [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss]) end = time.time() print('{}: step:{}/{}iter'.format(training_time, global_stepnp, cfgs.MAX_ITERATION)) print('>>> rpn_loc_loss:{:.4f} | rpn_cla_loss:{:.4f} | rpn_total_loss:{:.4f}\n' '>>> fast_rcnn_loc_loss:{:.4f} | fast_rcnn_cla_loss:{:.4f} | fast_rcnn_total_loss:{:.4f}\n' '>>> single_time:{:.4f}s | total_loss:{:.4f} '\ .format(rpnLocLoss, rpnClsLoss,rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss,(end-start), totalLoss)) print( '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' ) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def eval_coco(det_net, real_test_img_list, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH, is_resize=False) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') save_path = os.path.join('./eval_coco', cfgs.VERSION) tools.mkdir(save_path) fw_json_dt = open(os.path.join(save_path, 'coco_minival_ms.json'), 'w') coco_det = [] for i, a_img in enumerate(real_test_img_list): record = json.loads(a_img) raw_img = cv2.imread(record['fpath']) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() detected_scores_, detected_boxes_, detected_categories_ = [], [], [] for ss in [600, 800, 1000, 1200]: # cfgs.IMG_SHORT_SIDE_LEN: img_resize = cv2.resize(raw_img, (ss, ss)) resized_img, tmp_detected_boxes, tmp_detected_scores, tmp_detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} # cv is BGR. But need RGB ) eval_indices = tmp_detected_scores >= 0.01 tmp_detected_scores = tmp_detected_scores[eval_indices] tmp_detected_boxes = tmp_detected_boxes[eval_indices] tmp_detected_categories = tmp_detected_categories[eval_indices] xmin, ymin, xmax, ymax = tmp_detected_boxes[:, 0], tmp_detected_boxes[:, 1], \ tmp_detected_boxes[:, 2], tmp_detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h resize_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) detected_scores_.append(tmp_detected_scores) detected_boxes_.append(resize_boxes) detected_categories_.append(tmp_detected_categories) detected_scores_ = np.concatenate(detected_scores_) detected_boxes_ = np.concatenate(detected_boxes_) detected_categories_ = np.concatenate(detected_categories_) detected_scores, detected_boxes, detected_categories = [], [], [] for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(detected_categories_ == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = detected_boxes_[index] tmp_label_h = detected_categories_[index] tmp_score_h = detected_scores_[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros([tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32), thresh=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, max_output_size=500) detected_boxes.extend(np.array(tmp_boxes_h)[inx]) detected_scores.extend(np.array(tmp_score_h)[inx]) detected_categories.extend(np.array(tmp_label_h)[inx]) detected_scores = np.array(detected_scores) detected_boxes = np.array(detected_boxes) detected_categories = np.array(detected_categories) # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: # draw_img = (raw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 # else: # draw_img = raw_img + np.array(cfgs.PIXEL_MEAN) # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) raw_img = np.array(raw_img, np.float32) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(raw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + record['ID'], final_detections) # cost much time for j, box in enumerate(detected_boxes): coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]-box[0]), float(box[3]-box[1])], 'score': float(detected_scores[j]), 'image_id': int(record['ID'].split('.jpg')[0].split('_000000')[-1]), 'category_id': int(classes_originID[LABEl_NAME_MAP[detected_categories[j]]])}) end = time.time() tools.view_bar('%s image cost %.3fs' % (record['ID'], (end - start)), i + 1, len(real_test_img_list)) json.dump(coco_det, fw_json_dt) fw_json_dt.close() return os.path.join(save_path, 'coco_minival_ms.json')
def inference(det_net, file_paths, des_folder, h_len, w_len, h_overlap, w_overlap, save_res=False): if save_res: assert cfgs.SHOW_SCORE_THRSHOLD >= 0.5, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.5) in cfgs.py' else: assert cfgs.SHOW_SCORE_THRSHOLD < 0.005, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.00) in cfgs.py' # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for count, img_path in enumerate(file_paths): start = timer() img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] imgH = img.shape[0] imgW = img.shape[1] if imgH < h_len: temp = np.zeros([h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = h_len if imgW < w_len: temp = np.zeros([imgH, w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = w_len for hh in range(0, imgH, h_len - h_overlap): if imgH - hh - 1 < h_len: hh_ = imgH - h_len else: hh_ = hh for ww in range(0, imgW, w_len - w_overlap): if imgW - ww - 1 < w_len: ww_ = imgW - w_len else: ww_ = ww src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :] det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: src_img[:, :, ::-1]} ) if len(det_boxes_h_) > 0: for ii in range(len(det_boxes_h_)): box = det_boxes_h_[ii] box[0] = box[0] + ww_ box[1] = box[1] + hh_ box[2] = box[2] + ww_ box[3] = box[3] + hh_ box_res.append(box) label_res.append(det_category_h_[ii]) score_res.append(det_scores_h_[ii]) if len(det_boxes_r_) > 0: for ii in range(len(det_boxes_r_)): box_rotate = det_boxes_r_[ii] box_rotate[0] = box_rotate[0] + ww_ box_rotate[1] = box_rotate[1] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_[ii]) score_res_rotate.append(det_scores_r_[ii]) box_res = np.array(box_res) label_res = np.array(label_res) score_res = np.array(score_res) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_, label_res_rotate_, score_res_rotate_ = [], [], [] box_res_, label_res_, score_res_ = [], [], [] r_threshold = { 'roundabout': 0.1, 'tennis-court': 0.3, 'swimming-pool': 0.1, 'storage-tank': 0.2, 'soccer-ball-field': 0.3, 'small-vehicle': 0.2, 'ship': 0.05, 'plane': 0.3, 'large-vehicle': 0.1, 'helicopter': 0.2, 'harbor': 0.0001, 'ground-track-field': 0.3, 'bridge': 0.0001, 'basketball-court': 0.3, 'baseball-diamond': 0.3 } h_threshold = { 'roundabout': 0.35, 'tennis-court': 0.35, 'swimming-pool': 0.4, 'storage-tank': 0.3, 'soccer-ball-field': 0.3, 'small-vehicle': 0.4, 'ship': 0.35, 'plane': 0.35, 'large-vehicle': 0.4, 'helicopter': 0.4, 'harbor': 0.3, 'ground-track-field': 0.4, 'bridge': 0.3, 'basketball-court': 0.4, 'baseball-diamond': 0.3 } for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r = np.array(tmp_boxes_r) tmp = np.zeros( [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r tmp[:, -1] = np.array(tmp_score_r) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r), scores=np.array(tmp_score_r), iou_threshold=r_threshold[LABEl_NAME_MAP[sub_class]], max_output_size=500) except: # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(r_threshold[LABEl_NAME_MAP[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = box_res[index] tmp_label_h = label_res[index] tmp_score_h = score_res[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros( [tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) inx = nms.py_cpu_nms( dets=np.array(tmp, np.float32), thresh=h_threshold[LABEl_NAME_MAP[sub_class]], max_output_size=500) box_res_.extend(np.array(tmp_boxes_h)[inx]) score_res_.extend(np.array(tmp_score_h)[inx]) label_res_.extend(np.array(tmp_label_h)[inx]) time_elapsed = timer() - start if save_res: det_detections_h = draw_box_in_img.draw_box_cv( np.array(img, np.float32) - np.array(cfgs.PIXEL_MEAN), boxes=np.array(box_res_), labels=np.array(label_res_), scores=np.array(score_res_)) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.array(img, np.float32) - np.array(cfgs.PIXEL_MEAN), boxes=np.array(box_res_rotate_), labels=np.array(label_res_rotate_), scores=np.array(score_res_rotate_)) save_dir = os.path.join(des_folder, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite( save_dir + '/' + img_path.split('/')[-1].split('.')[0] + '_h.jpg', det_detections_h) cv2.imwrite( save_dir + '/' + img_path.split('/')[-1].split('.')[0] + '_r.jpg', det_detections_r) view_bar( '{} cost {}s'.format( img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths)) else: # eval txt CLASS_DOTA = NAME_LABEL_MAP.keys() # Task1 write_handle_r = {} write_handle_h_ = {} txt_dir_r = os.path.join('txt_output', cfgs.VERSION + '_r') txt_dir_h_minAreaRect = os.path.join( 'txt_output', cfgs.VERSION + '_h_minAreaRect') tools.mkdir(txt_dir_r) tools.mkdir(txt_dir_h_minAreaRect) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_r[sub_class] = open( os.path.join(txt_dir_r, 'Task1_%s.txt' % sub_class), 'a+') write_handle_h_[sub_class] = open( os.path.join(txt_dir_h_minAreaRect, 'Task2_%s.txt' % sub_class), 'a+') rboxes = coordinate_convert.forward_convert(box_res_rotate_, with_label=False) for i, rbox in enumerate(rboxes): command = '%s %.3f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n' % ( img_path.split('/')[-1].split('.')[0], score_res_rotate_[i], rbox[0], rbox[1], rbox[2], rbox[3], rbox[4], rbox[5], rbox[6], rbox[7], ) command_ = '%s %.3f %.1f %.1f %.1f %.1f\n' % ( img_path.split('/')[-1].split('.')[0], score_res_rotate_[i], min(rbox[::2]), min( rbox[1::2]), max(rbox[::2]), max(rbox[1::2])) write_handle_r[LABEl_NAME_MAP[label_res_rotate_[i]]].write( command) write_handle_h_[LABEl_NAME_MAP[ label_res_rotate_[i]]].write(command_) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_r[sub_class].close() # Task2 write_handle_h = {} txt_dir_h = os.path.join('txt_output', cfgs.VERSION + '_h') tools.mkdir(txt_dir_h) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_h[sub_class] = open( os.path.join(txt_dir_h, 'Task2_%s.txt' % sub_class), 'a+') for i, hbox in enumerate(box_res_): command = '%s %.3f %.1f %.1f %.1f %.1f\n' % ( img_path.split('/')[-1].split('.')[0], score_res_[i], hbox[0], hbox[1], hbox[2], hbox[3]) write_handle_h[LABEl_NAME_MAP[label_res_[i]]].write( command) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_h[sub_class].close() view_bar( '{} cost {}s'.format( img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths))
def train(): # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients # gradients = faster_rcnn.get_gradients(optimizer, total_loss) # # # enlarge_gradients for bias # if cfgs.MUTILPY_BIAS_GRADIENT: # gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) # # if cfgs.GRADIENT_CLIPPING_BY_NORM: # with tf.name_scope('clip_gradients_YJR'): # gradients = slim.learning.clip_gradient_norms(gradients, # cfgs.GRADIENT_CLIPPING_BY_NORM) # # _____________________________________________________________________________________________compute gradients # # # # # train_op # train_op = optimizer.apply_gradients(grads_and_vars=gradients, # global_step=global_step) tower_grads = [] for i in range(len(cfgs.GPU_GROUP)): with tf.device('/gpu:%d' % i): with tf.name_scope('GGpu%d' % i) as scope: loss, faster_rcnn, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,\ fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss = tower_loss(scope) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(loss) if cfgs.MUTILPY_BIAS_GRADIENT: grads = faster_rcnn.enlarge_gradients_for_bias(grads) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients_YJR'): grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads, global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \ fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \ sess.run( [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, loss]) end = time.time() print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, global_stepnp, str(img_name[0]), rpnLocLoss, rpnClsLoss, rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) retinanet = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func(backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) total_loss_dict = { 'cls_loss': tf.constant(0., tf.float32), 'reg_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer( 0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = retinanet.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[ k] += loss_dict[k] / num_gpu total_losses /= num_gpu total_loss_dict['total_losses'] += total_losses if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] if cfgs.MUTILPY_BIAS_GRADIENT is not None: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. if '/biases:' in var.name: scale *= cfgs.MUTILPY_BIAS_GRADIENT if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients( final_gvs, global_step=global_step) else: apply_gradient_op = optimizer.apply_gradients( grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = retinanet.get_restorer() saver = tf.train.Saver(max_to_keep=5) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_gpu): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***' * 20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp - 1) * num_gpu, step * num_gpu)) print("""per_cost_time:%.3fs""" % ((end - start) / num_gpu)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) if np.isnan(total_loss_dict_['total_losses']): sys.exit(0) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary( summary_str, (global_stepnp - 1) * num_gpu) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, '{}_'.format(cfgs.DATASET_NAME) + str( (global_stepnp - 1) * num_gpu) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def eval_coco(det_net, real_test_img_list, gpu_ids): save_path = os.path.join('./eval_coco', cfgs.VERSION) tools.mkdir(save_path) fw_json_dt = open(os.path.join(save_path, 'coco_minival.json'), 'w') coco_det = [] nr_records = len(real_test_img_list) pbar = tqdm(total=nr_records) gpu_num = len(gpu_ids.strip().split(',')) nr_image = math.ceil(nr_records / gpu_num) result_queue = Queue(5000) procs = [] for i in range(gpu_num): start = i * nr_image end = min(start + nr_image, nr_records) split_records = real_test_img_list[start:end] proc = Process(target=worker, args=(int(gpu_ids.strip().split(',')[i]), split_records, det_net, result_queue)) print('process:%d, start:%d, end:%d' % (i, start, end)) proc.start() procs.append(proc) for i in range(nr_records): res = result_queue.get() xmin, ymin, xmax, ymax = res['boxes'][:, 0], res['boxes'][:, 1], \ res['boxes'][:, 2], res['boxes'][:, 3] xmin = xmin * res['scales'][0] xmax = xmax * res['scales'][0] ymin = ymin * res['scales'][1] ymax = ymax * res['scales'][1] boxes = np.transpose(np.stack([xmin, ymin, xmax-xmin, ymax-ymin])) sort_scores = np.array(res['scores']) sort_labels = np.array(res['labels']) sort_boxes = np.array(boxes) # if len(res['scores']) > cfgs.MAXIMUM_DETECTIONS: # sort_indx = np.argsort(np.array(res['scores']) * -1)[:cfgs.MAXIMUM_DETECTIONS] # # print(sort_indx) # sort_scores = np.array(res['scores'])[sort_indx] # sort_labels = np.array(res['labels'])[sort_indx] # sort_boxes = np.array(boxes)[sort_indx] for j, box in enumerate(sort_boxes): coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]), float(box[3])], 'score': float(sort_scores[j]), 'image_id': int(res['image_id'].split('.jpg')[0].split('_000000')[-1]), 'category_id': int(classes_originID[LABEL_NAME_MAP[sort_labels[j]]])}) pbar.set_description("Eval image %s" % res['image_id']) pbar.update(1) for p in procs: p.join() json.dump(coco_det, fw_json_dt) fw_json_dt.close() return os.path.join(save_path, 'coco_minival.json')