def inference(test_dir, inference_save_path): test_imgname_list = [ os.path.join(test_dir, img_name) for img_name in os.listdir(test_dir) if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff')) ] assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \ ' Note that, we only support img format of (.jpg, .png, and .tiff) ' faster_rcnn = build_whole_network_r3det.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=False) detect(det_net=faster_rcnn, inference_save_path=inference_save_path, real_test_imgname_list=test_imgname_list)
def eval(num_imgs, args): txt_name = '{}.txt'.format(cfgs.VERSION) if not args.show_box: if not os.path.exists(txt_name): fw = open(txt_name, 'w') fw.close() fr = open(txt_name, 'r') img_filter = fr.readlines() print('****************************' * 3) print('Already tested imgs:', img_filter) print('****************************' * 3) fr.close() test_imgname_list = [ os.path.join(args.test_dir, img_name) for img_name in os.listdir(args.test_dir) if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff')) and (img_name + '\n' not in img_filter) ] else: test_imgname_list = [ os.path.join(args.test_dir, img_name) for img_name in os.listdir(args.test_dir) if img_name.endswith(('.jpg', '.png', '.jpeg', '.tif', '.tiff')) ] assert len(test_imgname_list) != 0, 'test_dir has no imgs there.' \ ' Note that, we only support img format of (.jpg, .png, and .tiff) ' if num_imgs == np.inf: real_test_img_list = test_imgname_list else: real_test_img_list = test_imgname_list[:num_imgs] retinanet = build_whole_network_r3det.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=False) test_dota(det_net=retinanet, real_test_img_list=real_test_img_list, args=args, txt_name=txt_name) if not args.show_box: os.remove(txt_name)
def eval(num_imgs, img_dir, image_ext, test_annotation_path, draw_imgs): r3det = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=False) all_boxes_r = eval_with_plac(img_dir=img_dir, det_net=r3det, num_imgs=num_imgs, image_ext=image_ext, draw_imgs=draw_imgs) # with open(cfgs.VERSION + '_detections_r.pkl', 'rb') as f2: # all_boxes_r = pickle.load(f2) # # print(len(all_boxes_r)) imgs = os.listdir(img_dir) real_test_imgname_list = [i.split(image_ext)[0] for i in imgs] print(10 * "**") print('rotation eval:') voc_eval_r.voc_evaluate_detections(all_boxes=all_boxes_r, test_imgid_list=real_test_imgname_list, test_annotation_path=test_annotation_path)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) r3det = build_whole_network_r3det.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func(backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) total_loss_dict = { 'cls_loss': tf.constant(0., tf.float32), 'reg_loss': tf.constant(0., tf.float32), 'refine_cls_loss': tf.constant(0., tf.float32), 'refine_reg_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } if cfgs.USE_SUPERVISED_MASK: total_loss_dict['mask_loss'] = tf.constant(0., tf.float32) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer( 0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = r3det.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[ k] += loss_dict[k] / num_gpu total_losses /= num_gpu total_loss_dict['total_losses'] += total_losses if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] if cfgs.MUTILPY_BIAS_GRADIENT is not None: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. if '/biases:' in var.name: scale *= cfgs.MUTILPY_BIAS_GRADIENT if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients( final_gvs, global_step=global_step) else: apply_gradient_op = optimizer.apply_gradients( grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = r3det.get_restorer() saver = tf.train.Saver(max_to_keep=5) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_gpu): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***' * 20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp - 1) * num_gpu, step * num_gpu)) print("""per_cost_time:%.3fs""" % ((end - start) / num_gpu)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) if np.isnan(total_loss_dict_['total_losses']): sys.exit(0) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary( summary_str, (global_stepnp - 1) * num_gpu) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, '{}_'.format(cfgs.DATASET_NAME) + str( (global_stepnp - 1) * num_gpu) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def build_detection_graph(): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.float32, shape=[1, 640, 640, 3], name='input_img') # is RGB. not BGR #img_plac = tf.placeholder(dtype=tf.uint8, shape=[1, None, None, 3], # name='input_img') # is RGB. not BGR #raw_shape = tf.shape(img_plac) #raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1]) #img_batch = tf.cast(img_plac, tf.float32) #img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3], # name='input_img') # is RGB. not GBR #raw_shape = tf.shape(img_plac) #raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1]) #img_batch = tf.cast(img_plac, tf.float32) #img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, # target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, # length_limitation=cfgs.IMG_MAX_LENGTH) #if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: # img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) #else: # img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) #img_batch = tf.expand_dims(img_batch, axis=0) #img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) #img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] det_net = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=False) #det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, # is_training=False) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_plac, gtboxes_batch_h=None, gtboxes_batch_r=None) #detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( # input_img_batch=img_batch, # gtboxes_batch_h=None, # gtboxes_batch_r=None) #detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( # input_img_batch=img_batch, # gtboxes_batch=None) #x_c, y_c, w, h, theta = detection_boxes[:, 0], detection_boxes[:, 1],\ # detection_boxes[:, 2], detection_boxes[:, 3],\ # detection_boxes[:, 4] #xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ # detected_boxes[:, 2], detected_boxes[:, 3] #resized_shape = tf.shape(img_batch) #resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2]) #x_c = x_c * raw_w / resized_w #w = w * raw_w / resized_w #xmin = xmin * raw_w / resized_w #xmax = xmax * raw_w / resized_w #y_c = y_c * raw_h / resized_h #h = h * raw_h / resized_h #ymin = ymin * raw_h / resized_h #ymax = ymax * raw_h / resized_h boxes = tf.transpose(tf.stack([detection_boxes[:, 0], detection_boxes[:, 1], detection_boxes[:, 2], detection_boxes[:, 3], detection_boxes[:, 4]])) #boxes = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) #boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax])) dets = tf.concat([tf.reshape(detection_category, [-1, 1]), tf.reshape(detection_scores, [-1, 1]), boxes], axis=1, name='DetResults') return dets