def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, cfgs.NUM_GPU*cfgs.BATCH_SIZE) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=True, batch_size=cfgs.BATCH_SIZE) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE * cfgs.NUM_GPU, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) # data processing inputs_list = [] for i in range(cfgs.NUM_GPU): start = i*cfgs.BATCH_SIZE end = (i+1)*cfgs.BATCH_SIZE img = img_batch[start:end, :, :, :] if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[start:end, :, :], [cfgs.BATCH_SIZE, -1, 5]), tf.float32) num_objects = num_objects_batch[start:end] num_objects = tf.cast(tf.reshape(num_objects, [cfgs.BATCH_SIZE, -1, ]), tf.float32) img_h = img_h_batch[start:end] img_w = img_w_batch[start:end] # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32) # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32) inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w]) # put_op_list = [] # get_op_list = [] # for i in range(cfgs.NUM_GPU): # with tf.device("/GPU:%s" % i): # area = tf.contrib.staging.StagingArea( # dtypes=[tf.float32, tf.float32, tf.float32]) # put_op_list.append(area.put(inputs_list[i])) # get_op_list.append(area.get()) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) total_loss_dict = { 'rpn_cls_loss': tf.constant(0., tf.float32), 'rpn_bbox_loss': tf.constant(0., tf.float32), 'rpn_ctr_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(cfgs.NUM_GPU): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): gtboxes_and_label = tf.py_func(get_gtboxes_and_label, inp=[inputs_list[i][1], inputs_list[i][2]], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [cfgs.BATCH_SIZE, -1, 5]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] h_crop = tf.reduce_max(img_shape[0]) w_crop = tf.reduce_max(img_shape[1]) img = tf.image.crop_to_bounding_box(image=img, offset_height=0, offset_width=0, target_height=tf.cast(h_crop, tf.int32), target_width=tf.cast(w_crop, tf.int32)) outputs = faster_rcnn.build_whole_detection_network(input_img_batch=img, gtboxes_batch=gtboxes_and_label) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img[0, :, :, :], boxes=gtboxes_and_label[0, :, :-1], labels=gtboxes_and_label[0, :, -1]) gtboxes_in_img = tf.expand_dims(gtboxes_in_img, 0) tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img[0, :, :, :], boxes=outputs[0], scores=outputs[1], labels=outputs[2]) detections_in_img = tf.expand_dims(detections_in_img, 0) tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[k] += loss_dict[k] / cfgs.NUM_GPU total_losses = total_losses / cfgs.NUM_GPU total_loss_dict['total_losses'] += total_losses if i == cfgs.NUM_GPU - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n(regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] # final_gvs = [] # with tf.variable_scope('Gradient_Mult'): # for grad, var in grads: # scale = 1. # # if '/biases:' in var.name: # # scale *= 2. # if 'conv_new' in var.name: # scale *= 3. # if not np.allclose(scale, 1.0): # grad = tf.multiply(grad, scale) # final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) tfconfig = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True num_per_iter = cfgs.NUM_GPU * cfgs.BATCH_SIZE with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_per_iter): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***'*20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp-1)*num_per_iter, step*num_per_iter)) print("""per_cost_time:%.3fs""" % ((end - start) / num_per_iter)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, (global_stepnp-1)*num_per_iter) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_per_iter) == 0) or (step >= cfgs.MAX_ITERATION // cfgs.NUM_GPU - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_per_iter) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) retinanet = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func(backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) total_loss_dict = { 'cls_loss': tf.constant(0., tf.float32), 'reg_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer( 0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = retinanet.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[ k] += loss_dict[k] / num_gpu total_losses /= num_gpu total_loss_dict['total_losses'] += total_losses if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] if cfgs.MUTILPY_BIAS_GRADIENT is not None: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. if '/biases:' in var.name: scale *= cfgs.MUTILPY_BIAS_GRADIENT if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients( final_gvs, global_step=global_step) else: apply_gradient_op = optimizer.apply_gradients( grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = retinanet.get_restorer() saver = tf.train.Saver(max_to_keep=5) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_gpu): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***' * 20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp - 1) * num_gpu, step * num_gpu)) print("""per_cost_time:%.3fs""" % ((end - start) / num_gpu)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) if np.isnan(total_loss_dict_['total_losses']): sys.exit(0) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary( summary_str, (global_stepnp - 1) * num_gpu) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, '{}_'.format(cfgs.DATASET_NAME) + str( (global_stepnp - 1) * num_gpu) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant(global_step, boundaries=[np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]), np.int64(cfgs.DECAY_STEP[2])], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100., cfgs.LR / 1000.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) # gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: # img_batch = img_batch / tf.constant([cfgs.PIXEL_STD]) # data processing inputs_list = [] for i in range(num_gpu): # img_name = img_name_batch[i] img = tf.expand_dims(img_batch[i], axis=0) gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[i], [-1, 9]), tf.float32) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [-1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32) # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32) inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) total_loss_dict = { 'rpn_cls_loss': tf.constant(0., tf.float32), 'rpn_loc_loss': tf.constant(0., tf.float32), 'fastrcnn_cls_loss_h': tf.constant(0., tf.float32), 'fastrcnn_loc_loss_h': tf.constant(0., tf.float32), 'fastrcnn_cls_loss_r': tf.constant(0., tf.float32), 'fastrcnn_loc_loss_r': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): gtboxes_and_label = tf.py_func(get_gtboxes_and_label, inp=[inputs_list[i][1], inputs_list[i][2]], Tout=tf.float32) gtboxes_and_label = tf.py_func(back_forward_convert, inp=[gtboxes_and_label], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6]) gtboxes_and_label_AreaRectangle = get_horizen_minAreaRectangle(gtboxes_and_label) gtboxes_and_label_AreaRectangle = tf.reshape(gtboxes_and_label_AreaRectangle, [-1, 5]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box(image=img, offset_height=0, offset_width=0, target_height=tf.cast(img_shape[0], tf.int32), target_width=tf.cast(img_shape[1], tf.int32)) final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img, gtboxes_r_batch=gtboxes_and_label, gtboxes_h_batch=gtboxes_and_label_AreaRectangle) if cfgs.ADD_BOX_IN_TENSORBOARD: dets_in_img = draw_boxes_with_categories_and_scores(img_batch=img, boxes=final_boxes_h, labels=final_category_h, scores=final_scores_h) dets_rotate_in_img = draw_boxes_with_categories_and_scores_rotate(img_batch=img, boxes=final_boxes_r, labels=final_category_r, scores=final_scores_r) tf.summary.image('Compare/final_detection_rotate_gpu:%d' % i, dets_rotate_in_img) total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[k] += loss_dict[k] / num_gpu total_losses = total_losses / num_gpu total_loss_dict['total_losses'] += total_losses if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n(regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) # average loss tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) tfconfig = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) #tfconfig.gpu_options.allow_growth = True tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=tfconfig) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION // num_gpu): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***'*20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp-1)*num_gpu, step*num_gpu)) print("""per_cost_time:%.3fs""" % ((end - start) / num_gpu)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, (global_stepnp-1)*num_gpu) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_gpu) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)