def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu now = datetime.datetime.now() StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(FLAGS.logs_path + StyleTime) if not os.path.exists(FLAGS.checkpoint_path): os.makedirs(FLAGS.checkpoint_path) input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_bbox = tf.placeholder(tf.float32, shape=[None, 5], name='input_bbox') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') deepnet_ouput = tf.placeholder(tf.float32, shape=[None, None, None, 2], name='input_label') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) gpu_id = int(FLAGS.gpu) with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: bbox_pred, cls_pred, cls_prob, deep_network, init_fn = model.model_z( input_image) total_loss, model_loss, rpn_cross_entropy, rpn_loss_box, deep_loss = model.loss( bbox_pred, cls_pred, input_bbox, input_im_info, deep_network, deepnet_ouput) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime, tf.get_default_graph()) init = tf.global_variables_initializer() # if FLAGS.pretrained_model_path is not None: # variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, # slim.get_trainable_variables(), # ignore_missing_vars=True) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.allow_soft_placement = True with tf.Session(config=config) as sess: if FLAGS.restore: ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) restore_step = int(ckpt.split('.')[0].split('_')[-1]) print("continue training from previous checkpoint {}".format( restore_step)) saver.restore(sess, ckpt) else: sess.run(init) if init_fn is not None: init_fn(sess) restore_step = 0 ### variable_restore_op was replaced by init_fn ### # if FLAGS.pretrained_model_path is not None: # variable_restore_op(sess) data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers) start = time.time() for step in range(restore_step, FLAGS.max_steps): data = next(data_generator) ml, dl, tl, _, summary_str = sess.run( [model_loss, deep_loss, total_loss, train_op, summary_op], feed_dict={ input_image: data[0], input_bbox: data[1], input_im_info: data[2], deepnet_ouput: data[3] }) summary_writer.add_summary(summary_str, global_step=step) if step != 0 and step % FLAGS.decay_steps == 0: sess.run( tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate)) if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 start = time.time() print( 'Step {:06d}, model loss {:.4f}, deep loss {:.4f},total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}' .format(step, ml, dl, tl, avg_time_per_step, learning_rate.eval())) if (step + 1) % FLAGS.save_checkpoint_steps == 0: filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt') filename = os.path.join(FLAGS.checkpoint_path, filename) saver.save(sess, filename) print('Write model to: {:s}'.format(filename))
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu now = datetime.datetime.now() StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(FLAGS.logs_path + StyleTime + mianzhi) if not os.path.exists(FLAGS.checkpoint_path): os.makedirs(FLAGS.checkpoint_path) input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_bbox = tf.placeholder(tf.float32, shape=[None, 5], name='input_bbox') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) # learning_rate = FLAGS.learning_rate #xzy 为了避免加载预训练时,强制加载预训练的学习率(lr= 1e-5,太小了),而手动设置lr。 learning_rate = tf.train.piecewise_constant(global_step, lrboundaries, lrvalues) #xzy 1800训练时加入学习率策略 tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) gpu_id = int(FLAGS.gpu) with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: bbox_pred, cls_pred, cls_prob = model.model(input_image) total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss( bbox_pred, cls_pred, input_bbox, input_im_info) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) summary_writer = tf.summary.FileWriter( FLAGS.logs_path + StyleTime + mianzhi, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.allow_soft_placement = True with tf.Session(config=config) as sess: if FLAGS.restore: ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) # restore_step = int(ckpt.split('/')[-1].split('.')[0].split('_')[-1]) restore_step = int( ckpt.split('/')[-1].split('.')[0].split('_') [1]) #xzy 1800标签版本,ckpt文件命名更改。 print("continue training from previous checkpoint {}".format( restore_step)) saver.restore(sess, ckpt) else: sess.run(init) restore_step = 0 if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers) start = time.time() for step in range(restore_step, FLAGS.max_steps): data = next(data_generator) ml, tl, _, summary_str = sess.run( [model_loss, total_loss, train_op, summary_op], feed_dict={ input_image: data[0], input_bbox: data[1], input_im_info: data[2] }) summary_writer.add_summary(summary_str, global_step=step) # if step != 0 and step % FLAGS.decay_steps == 0: # sess.run(tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate)) #xzy 手动修改学习率(源码为3万epoch改一次..) # if step % 10 == 0: if step % 1 == 0: #xzy 每个epoch打印一次 # avg_time_per_step = (time.time() - start) / 10 avg_time_per_step = (time.time() - start) #xzy 每个epoch打印一次 start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}' .format(step, ml, tl, avg_time_per_step, learning_rate.eval() )) #xzy 6.24更新,添加lr策略后,learning_rate又成了tensor # print('Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}'.format( # step, ml, tl, avg_time_per_step, learning_rate)) #xzy 手动设置学习率 if (step + 1) % FLAGS.save_checkpoint_steps == 0: filename = ('ctpn_{:d}'.format(step + 1) + mianzhi + '.ckpt') filename = os.path.join(FLAGS.checkpoint_path, filename) saver.save(sess, filename) print('Write model to: {:s}'.format(filename))