def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None): """Initialize the SolverWrapper.""" self.net = network self.imdb = imdb self.roidb = roidb self.output_dir = output_dir self.pretrained_model = pretrained_model print('Computing bounding-box regression targets...') if cfg.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( roidb) print('done') # For checkpoint self.saver = tf.train.Saver(max_to_keep=100, write_version=tf.train.SaverDef.V2) self.writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph(), flush_secs=5)
def __init__(self, sess, network, imdb, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.net = network self.imdb = imdb self.roidb = roidb self.output_dir = output_dir self.pretrained_model = pretrained_model print('Computing bounding-box regression targets...') if cfg.ZLRM.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( roidb) print('done') # For checkpoint # variables = tf.all_variables() # print('---------------', variables) # variables_to_resotre = [v for v in variables if (v.name.split('/')[0] != 'conv_new_1' and v.name.split('/')[0] !='rfcn_cls' and v.name.split('/')[0] !='rfcn_bbox')] # print('=============', variables_to_resotre) self.saver = tf.train.Saver(max_to_keep=100) self.rpn_restor_saver = tf.train.Saver()
def __init__(self, solver_prototxt, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.output_dir = output_dir if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS): # RPN can only use precomputed normalization because there are no # fixed statistics to compute a priori assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED if cfg.TRAIN.BBOX_REG: print 'Computing bounding-box regression targets...' self.bbox_means, self.bbox_stds = \ rdl_roidb.add_bbox_regression_targets(roidb) print 'done' self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print ('Loading pretrained model ' 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) self.solver.net.layers[0].set_roidb(roidb)
def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None): """Initialize the SolverWrapper.""" self.net = network self.imdb = imdb self.roidb = roidb self.output_dir = output_dir self.pretrained_model = pretrained_model print('Computing bounding-box regression targets...') if cfg.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) print('done') # For checkpoint self.saver = tf.train.Saver(max_to_keep=100,write_version=tf.train.SaverDef.V2) self.writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph(), flush_secs=5)
def __init__(self, sess, network, imdb, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.net = network self.imdb = imdb self.roidb = roidb self.output_dir = output_dir self.pretrained_model = pretrained_model print('Computing bounding-box regression targets...') if cfg.ZLRM.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( roidb) print('done') self.saver = tf.train.Saver(max_to_keep=100) self.rpn_restor_saver = tf.train.Saver()
def __init__(self, sess, saver, network, imdb, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.net = network self.imdb = imdb self.roidb = roidb self.output_dir = output_dir self.pretrained_model = pretrained_model print 'Computing bounding-box regression targets...' if cfg.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( roidb) print 'done' # For checkpoint self.saver = saver
def __init__(self, solver_prototxt, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.output_dir = output_dir print 'Computing bounding-box regression targets...' self.bbox_means, self.bbox_stds = \ rdl_roidb.add_bbox_regression_targets(roidb) print 'done' self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) self.solver.net.layers[0].set_roidb(roidb)
def train(max_step=50000, pretrained_model=None, restore=None, output_dir='./trained_model'): cfg_from_file('/home/give/PycharmProjects/MyCTPN/ctpn/text.yml') cfg.TRAIN.DISPLAY = 1 imdb = get_imdb('voc_2007_trainval') print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) print('Computing bounding-box regression targets...') if cfg.TRAIN.BBOX_REG: bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) print('done') data_layer = get_data_layer(roidb, imdb.num_classes) vggModel = VGGTrainModel(trainable=True) dataset = Dataset('/home/give/Game/OCR/data/ICDAR2017/img', '/home/give/Game/OCR/data/ICDAR2017/txt') train_generator = dataset.train_generator val_generator = dataset.val_generator total_loss, model_loss, rpn_regression_l1_loss, rpn_cross_entropy_loss, regularization_loss = vggModel.build_loss( ) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('regularization_loss', regularization_loss) global_step = tf.Variable(0, trainable=False) lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) if cfg.TRAIN.SOLVER == 'Adam': opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) elif cfg.TRAIN.SOLVER == 'RMS': opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) else: # lr = tf.Variable(0.0, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) with_clip = True if with_clip: tvars = tf.trainable_variables() grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 10.0) train_op = opt.apply_gradients(list(zip(grads, tvars)), global_step=global_step) else: train_op = opt.minimize(total_loss, global_step=global_step) # tf_config = tf.ConfigProto(allow_soft_placement=True) # tf_config.gpu_options.allow_growth = True # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.75 start = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=5) if pretrained_model is not None and restore is None: try: print(('Loading pretrained model ' 'weights from {:s}').format(pretrained_model)) load(pretrained_model, sess, saver, ignore_missing=True) except: raise Exception('Check your pretrained model {:s}'.format( pretrained_model)) if pretrained_model is None and restore is not None: try: print(('Loading pretrained model ' 'weights from {:s}').format(restore)) global_step_restore = load(restore, sess, saver) sess.run(global_step.assign(global_step_restore)) start = global_step_restore except: raise Exception( 'Check your pretrained model {:s}'.format(restore)) summary_op = tf.summary.merge_all() timer = Timer() for iter in range(start, max_step): timer.tic() imageGT = train_generator.next() blobs = data_layer.forward() fetch_list = [ train_op, total_loss, model_loss, rpn_regression_l1_loss, rpn_cross_entropy_loss, regularization_loss, summary_op ] feed_dict_obj = { vggModel.input_img: imageGT.re_imgs, vggModel.input_gt: imageGT.gt_bboxes, vggModel.input_img_info: imageGT.im_info, vggModel.input_is_hard: imageGT.is_hard, vggModel.input_notcare: np.reshape(imageGT.notcare, [-1, 4]), vggModel.input_keepprob: 0.5 } _, total_loss_val, model_loss_val, rpn_regression_l1_loss_value, rpn_cross_entropy_loss_value, regularization_loss_val, summary_op_value = sess.run( fetch_list, feed_dict=feed_dict_obj) if DEBUG: outputs = sess.run([ vggModel.input_img, vggModel.conv1_1, vggModel.conv1_2, vggModel.conv2_1, vggModel.conv2_2, vggModel.conv3_1, vggModel.conv3_2, vggModel.conv3_3, vggModel.conv4_1, vggModel.conv4_2, vggModel.conv4_3, vggModel.conv5_1, vggModel.conv5_2, vggModel.conv5_3, vggModel.rpn_conv, vggModel.lstm_o, vggModel.lstm_bilstm, vggModel.lstm_fc, vggModel.rpn_bbox_score, vggModel.rpn_bbox_pred, vggModel.rpn_data[0], vggModel.rpn_data[1], vggModel.rpn_data[2], vggModel.rpn_data[3], vggModel.rpn_cls_prob ], feed_dict=feed_dict_obj) layer_names = [ 'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', ] weights_biases_names = [] for i in range(13): with tf.variable_scope(layer_names[i], reuse=True): weights_biases_names.append(tf.get_variable('weights')) weights_biases_names.append(tf.get_variable('biases')) # tf.gradients() parameters_output = sess.run(weights_biases_names, feed_dict=feed_dict_obj) for i in range(13): print '%s max is %.4f, min is %.4f' % ( layer_names[i], np.max( outputs[i + 1]), np.min(outputs[i + 1])) print 'responding parameters max is %.4f, min is %.4f' % ( np.max(parameters_output[i]), np.min(parameters_output[i])) print 'rpn_conv', np.max(outputs[14]), np.min(outputs[14]) print 'lstm_o', np.max(outputs[15]), np.min(outputs[15]) print 'lstm_bilstm', np.max(outputs[16]), np.min(outputs[16]) print 'lstm_fc', np.max(outputs[17]), np.min(outputs[17]) print 'rpn bbox score ', np.max(outputs[18]), np.min( outputs[18]) print 'rpn bbox pre', np.max(outputs[19]), np.min(outputs[19]) print 'rpn_labels', np.max(outputs[20]), np.min(outputs[20]) print 'rpn_bbox_targets', np.max(outputs[21]), np.min( outputs[21]) print 'rpn_bbox_inside_weights', np.max(outputs[22]), np.min( outputs[22]) print 'rpn_bbox_outside_weights', np.max(outputs[23]), np.min( outputs[23]) print 'rpn_cls_prob', np.max(outputs[24]), np.min(outputs[24]) # 1717 _diff_time = timer.toc(average=False) if (iter) % (cfg.TRAIN.DISPLAY) == 0: print( 'iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f' % \ (iter, max_step, total_loss_val, model_loss_val, rpn_cross_entropy_loss_value, regularization_loss_val, lr.eval())) print('speed: {:.3f}s / iter'.format(_diff_time)) if np.isnan(total_loss_val): label_value, score_value, rpn_cross_entropy_loss_value = sess.run( [ vggModel.rpn_data[0], vggModel.rpn_cls_score_reshape, vggModel.rpn_cross_entropy_loss ], feed_dict={ vggModel.input_img: imageGT.re_imgs, vggModel.input_gt: imageGT.gt_bboxes, vggModel.input_img_info: imageGT.im_info, vggModel.input_is_hard: imageGT.is_hard, vggModel.input_notcare: np.reshape(imageGT.notcare, [-1, 4]), vggModel.input_keepprob: 0.5 }) print 'label value is \n', label_value print 'score value is \n', score_value print 'rpn_cross_entropy_loss_value is \n', rpn_cross_entropy_loss_value # print 'label value is \n', label_value assert not np.isnan(total_loss_val) if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter save(sess, saver=saver, output_dir=output_dir, prefix='VGG', infix='_', iter_index=last_snapshot_iter)
def __init__(self, imdb, roidb, rpn_graph, detect_graph, shared_conv_graph, rpn_output_dir, detect_output_dir, shared_conv_rpn_output_dir, shared_conv_detect_output_dir, train_step='train_step_rpn', restore=False, pretrained_model=None): """Initialize the SolverWrapper.""" self.imdb = imdb self.roidb = roidb self.rpn_graph = rpn_graph self.detect_graph = detect_graph self.shared_conv_graph = shared_conv_graph self.rpn_output_dir = rpn_output_dir self.detect_output_dir = detect_output_dir self.shared_conv_rpn_output_dir = shared_conv_rpn_output_dir self.shared_conv_detect_output_dir = shared_conv_detect_output_dir self.train_step = train_step self.restore = restore self.pretrained_model = pretrained_model self.train_op = {} # 训练的操作节点,在会话中使用,比如 rpn_train_op rpn_init print('Computing bounding-box regression targets...') if cfg.ZLRM.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( roidb) print('done') with self.rpn_graph.as_default(): self.rpn_restore_saver = tf.train.Saver() with self.detect_graph.as_default(): self.detect_restore_saver = tf.train.Saver() with self.shared_conv_graph.as_default(): if self.restore: self.shared_conv_rpn_restore_saver = tf.train.Saver() self.shared_conv_detect_restore_saver = tf.train.Saver() elif self.train_step == 'train_step_shared_conv_rpn' or self.train_step == 'train_step_shared_conv_detect': tvars = tf.all_variables() # 从 detect_ckpt = tf.train.get_checkpoint_state( self.detect_output_dir) reader = tf.train.NewCheckpointReader( detect_ckpt.model_checkpoint_path) detect_variables = reader.get_variable_to_shape_map() tvars_detect = [ v for v in tvars if (v.name.split(':')[0] in detect_variables) ] print('tvars_detect', tvars_detect) self.shared_conv_rpn_restore_detect_fraction_saver = tf.train.Saver( var_list=tvars_detect) tvars_rpn = [ v for v in tvars if (v.name.split('/')[0] == 'rpn_conv' or v.name.split('/')[0] == 'rpn_cls_score' or v.name.split('/')[0] == 'rpn_bbox_pred') ] print('tvars_rpn', tvars_rpn) self.shared_conv_rpn_restore_rpn_fraction_saver = tf.train.Saver( var_list=tvars_rpn) self.shared_conv_rpn_restore_saver = tf.train.Saver() self.shared_conv_detect_restore_saver = tf.train.Saver()
def train(rpn_data, roi_data, rpn_cls_score_reshape, rpn_bbox_pred, feature_map, cls_score, bbox_pred, input_image_tensor, input_gt_box_tensor, input_im_info_tensor, pretrain_model=None): output_dir = '/home/give/PycharmProjects/MyFasterRCNN/parameters' saver = tf.train.Saver(max_to_keep=5) # RPN # classification loss(fg bg) rpn_cls_score = tf.reshape(rpn_cls_score_reshape, [-1, 2]) rpn_label = tf.reshape(rpn_data[0], [-1]) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))), [-1, 2]) rpn_label = tf.reshape( tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) # bounding box regression L1 loss rpn_bbox_pred = rpn_bbox_pred rpn_bbox_targets = tf.transpose(rpn_data[1], [0, 2, 3, 1]) rpn_bbox_inside_weights = tf.transpose(rpn_data[2], [0, 2, 3, 1]) rpn_bbox_outside_weights = tf.transpose(rpn_data[3], [0, 2, 3, 1]) rpn_smooth_l1 = _modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) rpn_loss_box = tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) # classification loss label = tf.reshape(roi_data[1], [-1]) cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) smooth_l1 = _modified_smooth_l1(1.0, bbox_pred, roi_data[2], roi_data[3], roi_data[4]) loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1])) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, cfg.TRAIN.STEPSIZE, 0.001, staircase=True) momentum = cfg.TRAIN.MOMENTUM train_op = tf.train.MomentumOptimizer(lr, momentum).minimize( loss, global_step=global_step) with tf.Session() as sess: args = parse_args() from lib.datasets.factory import get_imdb from lib.fast_rcnn.train import get_training_roidb, filter_roidb from lib.fast_rcnn.train import get_data_layer import lib.roi_data_layer.roidb as rdl_roidb import numpy as np from lib.fast_rcnn.config import cfg_from_file, cfg_from_list if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) imdb = get_imdb('voc_2007_trainval') roidb = get_training_roidb(imdb) roidb = filter_roidb(roidb) bbox_means, bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) data_layer = get_data_layer(roidb, imdb.num_classes) init_op = tf.global_variables_initializer() sess.run(init_op) if pretrain_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(pretrain_model) load(pretrain_model, sess, saver, True) for iter_index in range(args.max_iters): blobs = data_layer.forward() # blobs keys include: data, gt_boxesm im_info _, rpn_bbox_targets_value, rpn_bbox_inside_weights_value, rpn_bbox_outside_weights_value = sess.run( [ train_op, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights ], feed_dict={ input_image_tensor: blobs['data'], input_gt_box_tensor: blobs['gt_boxes'], input_im_info_tensor: blobs['im_info'] }) rpn_label_value, loss_value, loss_box_value, cross_entropy_value, rpn_cross_entropy_value, rpn_loss_box_value = sess.run( [ rpn_label, loss, loss_box, cross_entropy, rpn_cross_entropy, rpn_loss_box ], feed_dict={ input_image_tensor: blobs['data'], input_gt_box_tensor: blobs['gt_boxes'], input_im_info_tensor: blobs['im_info'] }) if iter_index % 100 == 0: print 'iter: %d / %d' % (iter_index, args.max_iters) print 'total loss: %.4f, rpn cross entropy: %.4f, rpn_loss_box: %.4f, cross entroopy: %.4f, loss box: %.4f' % ( loss_value, rpn_cross_entropy_value, rpn_loss_box_value, cross_entropy_value, loss_box_value) # print np.shape(rpn_label_value), rpn_label_value if iter_index == 0: continue infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + '_iter_{:d}'.format(iter_index + 1) + '.ckpt') filename = os.path.join(output_dir, filename) saver.save(sess, filename)