name='im_info_input') rpn_data, roi_data, rpn_cls_score_reshape, rpn_bbox_pred, conv5_3, cls_score, cls_pred, bbox_pred = vgg16( data_tensor, input_gt_box_tensor, input_im_info_tensor) model_path = '/home/give/PycharmProjects/MyFasterRCNN/parameters' from lib.fast_rcnn.config import cfg_from_file, cfg_from_list from RPN.train import parse_args args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) imdb = get_imdb('voc_2007_trainval') roidb = get_training_roidb(imdb) roidb = filter_roidb(roidb) bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) data_layer = get_data_layer(roidb, imdb.num_classes) blobs = data_layer.forward() print np.shape(blobs['data']) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) # restore model saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) if ckpt and ckpt.model_checkpoint_path: print 'load model from ', ckpt.model_checkpoint_path saver.restore(sess, ckpt.model_checkpoint_path) print blobs.keys() print np.shape(blobs['gt_boxes']) rpn_rois_values, cls_pred_value, bbox_pred_value = sess.run(
print('gt_heights:', gt_heights, '\nex_heights:', ex_heights) target_v = np.vstack((target_dvc, target_dvh)).transpose() target_do = (gt_ctr_x - ex_ctr_x) / ex_widths target_o = target_do return target_v, target_o if __name__ == '__main__': from lib.fast_rcnn.train import get_data_layer, get_training_roidb from lib.datasets.factory import get_imdb imdb = get_imdb('voc_2007_trainval') roidb = get_training_roidb(imdb) data_layer = get_data_layer(roidb, 2) DEBUG = True while True: db_inds, blobs = data_layer.forward() # if blobs['im_name']!='auto_50_5768038962_20180628224921_20180629100000_161.jpg': # continue im_name = blobs['im_name'] data = blobs['data'] im_info = blobs['im_info'] gt_boxes = blobs['gt_boxes'] gt_ishard = blobs['gt_ishard'] dontcare_areas = blobs['dontcare_areas'] rpn_cls_score = np.ones((1, 30, 62, 20))
def train(max_step=50000, pretrained_model=None, restore=None, output_dir='./trained_model'): cfg_from_file('/home/give/PycharmProjects/MyCTPN/ctpn/text.yml') cfg.TRAIN.DISPLAY = 1 imdb = get_imdb('voc_2007_trainval') print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) print('Computing bounding-box regression targets...') if cfg.TRAIN.BBOX_REG: bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) print('done') data_layer = get_data_layer(roidb, imdb.num_classes) vggModel = VGGTrainModel(trainable=True) dataset = Dataset('/home/give/Game/OCR/data/ICDAR2017/img', '/home/give/Game/OCR/data/ICDAR2017/txt') train_generator = dataset.train_generator val_generator = dataset.val_generator total_loss, model_loss, rpn_regression_l1_loss, rpn_cross_entropy_loss, regularization_loss = vggModel.build_loss( ) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('regularization_loss', regularization_loss) global_step = tf.Variable(0, trainable=False) lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) if cfg.TRAIN.SOLVER == 'Adam': opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) elif cfg.TRAIN.SOLVER == 'RMS': opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) else: # lr = tf.Variable(0.0, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) with_clip = True if with_clip: tvars = tf.trainable_variables() grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 10.0) train_op = opt.apply_gradients(list(zip(grads, tvars)), global_step=global_step) else: train_op = opt.minimize(total_loss, global_step=global_step) # tf_config = tf.ConfigProto(allow_soft_placement=True) # tf_config.gpu_options.allow_growth = True # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.75 start = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=5) if pretrained_model is not None and restore is None: try: print(('Loading pretrained model ' 'weights from {:s}').format(pretrained_model)) load(pretrained_model, sess, saver, ignore_missing=True) except: raise Exception('Check your pretrained model {:s}'.format( pretrained_model)) if pretrained_model is None and restore is not None: try: print(('Loading pretrained model ' 'weights from {:s}').format(restore)) global_step_restore = load(restore, sess, saver) sess.run(global_step.assign(global_step_restore)) start = global_step_restore except: raise Exception( 'Check your pretrained model {:s}'.format(restore)) summary_op = tf.summary.merge_all() timer = Timer() for iter in range(start, max_step): timer.tic() imageGT = train_generator.next() blobs = data_layer.forward() fetch_list = [ train_op, total_loss, model_loss, rpn_regression_l1_loss, rpn_cross_entropy_loss, regularization_loss, summary_op ] feed_dict_obj = { vggModel.input_img: imageGT.re_imgs, vggModel.input_gt: imageGT.gt_bboxes, vggModel.input_img_info: imageGT.im_info, vggModel.input_is_hard: imageGT.is_hard, vggModel.input_notcare: np.reshape(imageGT.notcare, [-1, 4]), vggModel.input_keepprob: 0.5 } _, total_loss_val, model_loss_val, rpn_regression_l1_loss_value, rpn_cross_entropy_loss_value, regularization_loss_val, summary_op_value = sess.run( fetch_list, feed_dict=feed_dict_obj) if DEBUG: outputs = sess.run([ vggModel.input_img, vggModel.conv1_1, vggModel.conv1_2, vggModel.conv2_1, vggModel.conv2_2, vggModel.conv3_1, vggModel.conv3_2, vggModel.conv3_3, vggModel.conv4_1, vggModel.conv4_2, vggModel.conv4_3, vggModel.conv5_1, vggModel.conv5_2, vggModel.conv5_3, vggModel.rpn_conv, vggModel.lstm_o, vggModel.lstm_bilstm, vggModel.lstm_fc, vggModel.rpn_bbox_score, vggModel.rpn_bbox_pred, vggModel.rpn_data[0], vggModel.rpn_data[1], vggModel.rpn_data[2], vggModel.rpn_data[3], vggModel.rpn_cls_prob ], feed_dict=feed_dict_obj) layer_names = [ 'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', ] weights_biases_names = [] for i in range(13): with tf.variable_scope(layer_names[i], reuse=True): weights_biases_names.append(tf.get_variable('weights')) weights_biases_names.append(tf.get_variable('biases')) # tf.gradients() parameters_output = sess.run(weights_biases_names, feed_dict=feed_dict_obj) for i in range(13): print '%s max is %.4f, min is %.4f' % ( layer_names[i], np.max( outputs[i + 1]), np.min(outputs[i + 1])) print 'responding parameters max is %.4f, min is %.4f' % ( np.max(parameters_output[i]), np.min(parameters_output[i])) print 'rpn_conv', np.max(outputs[14]), np.min(outputs[14]) print 'lstm_o', np.max(outputs[15]), np.min(outputs[15]) print 'lstm_bilstm', np.max(outputs[16]), np.min(outputs[16]) print 'lstm_fc', np.max(outputs[17]), np.min(outputs[17]) print 'rpn bbox score ', np.max(outputs[18]), np.min( outputs[18]) print 'rpn bbox pre', np.max(outputs[19]), np.min(outputs[19]) print 'rpn_labels', np.max(outputs[20]), np.min(outputs[20]) print 'rpn_bbox_targets', np.max(outputs[21]), np.min( outputs[21]) print 'rpn_bbox_inside_weights', np.max(outputs[22]), np.min( outputs[22]) print 'rpn_bbox_outside_weights', np.max(outputs[23]), np.min( outputs[23]) print 'rpn_cls_prob', np.max(outputs[24]), np.min(outputs[24]) # 1717 _diff_time = timer.toc(average=False) if (iter) % (cfg.TRAIN.DISPLAY) == 0: print( 'iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f' % \ (iter, max_step, total_loss_val, model_loss_val, rpn_cross_entropy_loss_value, regularization_loss_val, lr.eval())) print('speed: {:.3f}s / iter'.format(_diff_time)) if np.isnan(total_loss_val): label_value, score_value, rpn_cross_entropy_loss_value = sess.run( [ vggModel.rpn_data[0], vggModel.rpn_cls_score_reshape, vggModel.rpn_cross_entropy_loss ], feed_dict={ vggModel.input_img: imageGT.re_imgs, vggModel.input_gt: imageGT.gt_bboxes, vggModel.input_img_info: imageGT.im_info, vggModel.input_is_hard: imageGT.is_hard, vggModel.input_notcare: np.reshape(imageGT.notcare, [-1, 4]), vggModel.input_keepprob: 0.5 }) print 'label value is \n', label_value print 'score value is \n', score_value print 'rpn_cross_entropy_loss_value is \n', rpn_cross_entropy_loss_value # print 'label value is \n', label_value assert not np.isnan(total_loss_val) if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter save(sess, saver=saver, output_dir=output_dir, prefix='VGG', infix='_', iter_index=last_snapshot_iter)
def train(rpn_data, roi_data, rpn_cls_score_reshape, rpn_bbox_pred, feature_map, cls_score, bbox_pred, input_image_tensor, input_gt_box_tensor, input_im_info_tensor, pretrain_model=None): output_dir = '/home/give/PycharmProjects/MyFasterRCNN/parameters' saver = tf.train.Saver(max_to_keep=5) # RPN # classification loss(fg bg) rpn_cls_score = tf.reshape(rpn_cls_score_reshape, [-1, 2]) rpn_label = tf.reshape(rpn_data[0], [-1]) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))), [-1, 2]) rpn_label = tf.reshape( tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) # bounding box regression L1 loss rpn_bbox_pred = rpn_bbox_pred rpn_bbox_targets = tf.transpose(rpn_data[1], [0, 2, 3, 1]) rpn_bbox_inside_weights = tf.transpose(rpn_data[2], [0, 2, 3, 1]) rpn_bbox_outside_weights = tf.transpose(rpn_data[3], [0, 2, 3, 1]) rpn_smooth_l1 = _modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) rpn_loss_box = tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) # classification loss label = tf.reshape(roi_data[1], [-1]) cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) smooth_l1 = _modified_smooth_l1(1.0, bbox_pred, roi_data[2], roi_data[3], roi_data[4]) loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1])) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, cfg.TRAIN.STEPSIZE, 0.001, staircase=True) momentum = cfg.TRAIN.MOMENTUM train_op = tf.train.MomentumOptimizer(lr, momentum).minimize( loss, global_step=global_step) with tf.Session() as sess: args = parse_args() from lib.datasets.factory import get_imdb from lib.fast_rcnn.train import get_training_roidb, filter_roidb from lib.fast_rcnn.train import get_data_layer import lib.roi_data_layer.roidb as rdl_roidb import numpy as np from lib.fast_rcnn.config import cfg_from_file, cfg_from_list if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) imdb = get_imdb('voc_2007_trainval') roidb = get_training_roidb(imdb) roidb = filter_roidb(roidb) bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) data_layer = get_data_layer(roidb, imdb.num_classes) init_op = tf.global_variables_initializer() sess.run(init_op) if pretrain_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(pretrain_model) load(pretrain_model, sess, saver, True) for iter_index in range(args.max_iters): blobs = data_layer.forward() # blobs keys include: data, gt_boxesm im_info _, rpn_bbox_targets_value, rpn_bbox_inside_weights_value, rpn_bbox_outside_weights_value = sess.run( [ train_op, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights ], feed_dict={ input_image_tensor: blobs['data'], input_gt_box_tensor: blobs['gt_boxes'], input_im_info_tensor: blobs['im_info'] }) rpn_label_value, loss_value, loss_box_value, cross_entropy_value, rpn_cross_entropy_value, rpn_loss_box_value = sess.run( [ rpn_label, loss, loss_box, cross_entropy, rpn_cross_entropy, rpn_loss_box ], feed_dict={ input_image_tensor: blobs['data'], input_gt_box_tensor: blobs['gt_boxes'], input_im_info_tensor: blobs['im_info'] }) if iter_index % 100 == 0: print 'iter: %d / %d' % (iter_index, args.max_iters) print 'total loss: %.4f, rpn cross entropy: %.4f, rpn_loss_box: %.4f, cross entroopy: %.4f, loss box: %.4f' % ( loss_value, rpn_cross_entropy_value, rpn_loss_box_value, cross_entropy_value, loss_box_value) # print np.shape(rpn_label_value), rpn_label_value if iter_index == 0: continue infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + '_iter_{:d}'.format(iter_index + 1) + '.ckpt') filename = os.path.join(output_dir, filename) saver.save(sess, filename)