def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print 'continue training from previous checkpoint' ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size * len(gpus)) start = time.time() for step in xrange(FLAGS.max_steps): data = data_generator.next() ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) if np.isnan(tl): print 'Loss diverged, stop training' break if step % 10 == 0: avg_time_per_step = (time.time() - start)/10 avg_examples_per_second = (10 * FLAGS.batch_size * len(gpus))/(time.time() - start) start = time.time() print 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format( step, ml, tl, avg_time_per_step, avg_examples_per_second) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step)
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) input_labels_split = tf.split(input_labels, len(gpus)) #x = tf.placeholder(tf.int16, shape=[None, None, 4, 2]) #y = tf.split(x, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] il = input_labels_split[i] total_loss, model_loss, f_score, f_geometry, f_dat = tower_loss(iis, isms, igms, itms, il, reuse_variables) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion') grads = opt.compute_gradients(total_loss, var_list=train_var) tower_grads.append(grads) #stuff = tf.split(x,len(gpus))[i] grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') variables = slim.get_variables_to_restore() #print variables[0].name.split('/') #print variables var_list = [] for v in variables: if len(v.name.split('/')) == 1: var_list.append(v) elif v.name.split('/')[1] != "myconv1" or not v.name.find('custom_filter'): var_list.append(v) else: pass #var_list=[v for v in variables if v.name.split('/')[1] != "conv1"] saver = tf.train.Saver(var_list) #print var_list summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) ''' training_list = ["D0006-0285025", "D0017-1592006", "D0041-5370006", "D0041-5370026", "D0042-1070001", "D0042-1070002", "D0042-1070003", "D0042-1070004", "D0042-1070005", "D0042-1070006", "D0042-1070007", "D0042-1070008", "D0042-1070009", "D0042-1070010", "D0042-1070015", "D0042-1070012", "D0042-1070013", "D0079-0019007", "D0089-5235001"] validation_list = ["D0090-5242001", "D0117-5755018", "D0117-5755024", "D0117-5755025", "D0117-5755033"] with open('Data/cropped_annotations0.txt', 'r') as f: annotation_file = f.readlines() val_data0 = [] val_data1 = [] train_data0 = [] train_data1 = [] labels = [] trainValTest = 2 for line in annotation_file: if len(line)>1 and line[:11] == 'cropped_img': if (len(labels) > 0): if trainValTest == 0: train_data1.append(labels) elif trainValTest == 1: val_data1.append(labels) labels = [] trainValTest = 2 if line[12:25] in training_list: file_name = "Data/cropped_img_train/"+line[12:].split(".tiff",1)[0]+".tiff" im = cv2.imread(file_name)[:, :, ::-1] train_data0.append(im.astype(np.float32)) trainValTest = 0 elif line[12:25] in validation_list: file_name = "Data/cropped_img_val/"+line[12:].split(".tiff",1)[0]+".tiff" im = cv2.imread(file_name)[:, :, ::-1] val_data0.append(im.astype(np.float32)) trainValTest = 1 elif trainValTest != 2: annotation_data = line.split(" ") if (len(annotation_data) > 2): x, y = float(annotation_data[0]), float(annotation_data[1]) w, h = float(annotation_data[2]), float(annotation_data[3]) labels.append([[int(x),int(y-h)],[int(x+w),int(y-h)],[int(x+w),int(y)],[int(x),int(y)]]) if trainValTest == 0: train_data1.append(labels) elif trainValTest == 1: val_data1.append(labels) ''' init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: print "hereeeee" variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: #reader = tf.train.NewCheckpointReader("./"+FLAGS.checkpoint_path) if FLAGS.restore: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Continue training from previous checkpoint here {}'.format(model_path)) saver.restore(sess, model_path) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) sess.run(tf.global_variables_initializer()) variables_names = [v.name for v in tf.trainable_variables()] #print "................." #print variables_names #print tf.all_variables() training_list = ["D0006-0285025", "D0017-1592006", "D0041-5370006", "D0041-5370026", "D0042-1070001", "D0042-1070002", "D0042-1070003", "D0042-1070004", "D0042-1070005", "D0042-1070006", "D0042-1070007", "D0042-1070008", "D0042-1070009", "D0042-1070010", "D0042-1070015", "D0042-1070012", "D0042-1070013", "D0079-0019007", "D0089-5235001"] a = FLAGS.checkpoint_path[-2] data_size = 0 with open('Data/cropped_annotations.txt', 'r') as f: annotation_file = f.readlines() for line in annotation_file: if len(line)>1 and line[:13] == './cropped_img' and line[14:27] in training_list: data_size +=1 print "Char model: " + a print "Reg constant: " + str(reg_constant) print "Data size: " + str(data_size) epoche_size = 3 #ata_size / 32 print "This many steps per epoche: " + str(epoche_size) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, q_size=10, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus), data_path=a, trainOrVal="train") #print "getting the data batches" val_data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, q_size=10, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus), data_path=a, trainOrVal="val") start = time.time() epochsA, ml_list, tl_list = [], [], [] epochsB, train_fscore, val_fscore = [], [], [] #print "entering model training" for step in range(FLAGS.max_steps): print "this is an iteration............" data = next(data_generator) #val_data = next(val_data_generator) if (step % epoche_size == 100): #print 'Epochs {:.4f}, ml {:.4f}, tl {:.4f}'.format(float(step)/epoche_size, ml, tl) ''' train_size = len(train_data0) TP, FP, FN = 0.0, 0.0, 0.0 for i in range(train_size / 128): score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: train_data0[128*i: 128*(i+1)]}) labels = sess.run(stuff, feed_dict = {x: train_data1[128*i:128*(i+1)]}) TP0, FP0, FN0 = evalEAST.evaluate(score, geometry, labels) TP += TP0 FP += FP0 FN += FN0 p_train, r_train = TP / (TP + FP), TP / (TP + FN) fscore_train = 2 * p_train * r_train / (p_train + r_train) ''' #for i in range(len(data[0])): # count_right_cache = 0 #score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: data[0]}) #p_train, r_train, fscore_train = evalEAST.evaluate(score, geometry, data[5]) #score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]}) #p_val, r_val, fscore_val = evalEAST.evaluate(score, geometry, val_data[1]) ''' for i in range(len(score)): count_right_cache = 0 print score[i].shape, geometry[i].shape boxes = detect(score_map=score[i], geo_map=geometry[i]) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: continue count_wrong += 1 num_true_pos = len(data[5][i]) for i2 in range(num_true_pos): #print box #print label[i][i2] if (checkIOU(box, label[i][i2]) == True): count_right_cache += 1 count_wrong -= 1 count_posNotDetected += num_true_pos - count_right_cache count_right += count_right_cache p_train = (float) (count_right) / (float) (count_right + count_wrong) # TP / TP + FP r_train = (float) (count_right) / (float) (count_right + count_posNotDetected) # TP / TP + FN fscore_train = 2 * (p_train * r_train) / (p_train + r_train) print "hi" score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]}) for i in range(len(score)): count_right_cache = 0 #score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0][i]}) boxes = detect(score_map=score[i], geo_map=geometry[i]) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: continue count_wrong += 1 num_true_pos = len(val_data[1][i]) for i2 in range(num_true_pos): #print box #print label[i][i2] if (checkIOU(box, label[i][i2]) == True): count_right_cache += 1 count_wrong -= 1 count_posNotDetected += num_true_pos - count_right_cache count_right += count_right_cache p_val = (float) (count_right) / (float) (count_right + count_wrong) # TP / TP + FP r_val = (float) (count_right) / (float) (count_right + count_posNotDetected) # TP / TP + FN fscore_val = 2 * (p_val * r_val) / (p_val + r_val) #return precision, recall, fscore ''' # score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: data[0][i]}) # fscore_train, p_train, r_train = evalEAST.evaluate(score, geometry, data[5][i]) # score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]}) # fscore_val, p_val, r_val = evalEAST.evaluate(score, geometry, val_data[1]) print 'Epochs {:.4f}, train fscore {:.4f}, train p {:.4f}, train r {:.4f}, val fscore {:.4f}, val p {:.4f}, val r {:.4f}'.format(float(step)/epoche_size, fscore_train, p_train, r_train, fscore_val, p_val, r_val) #data0 = np.zeros((32,512,512,39)) ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) print ml, tl if step % epoche_size == 0: print 'Epochs {:.4f}, ml {:.4f}, tl {:.4f}'.format(float(step)/epoche_size, ml, tl) #score2, geometry2, dat2 = sess.run([f_score, f_geometry, f_dat], feed_dict={input_images: data[0], input_labels: abc}) #p_train, r_train, fscore_train = evalEAST.evaluate(score2, geometry2, dat2) #print ".." #score2, geometry2 = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]}) #p_val, r_val, fscore_val = evalEAST.evaluate(score2, geometry2, val_data[5]) #print 'Train fscore {:.4f}, train p {:.4f}, train r {:.4f}, val fscore {:.4f}, val p {:.4f}, val r {:.4f}'.format(fscore_train, p_train, r_train, fscore_val, p_val, r_val) if np.isnan(tl): print('Loss diverged, stop training') break if step % epoche_size == 0: #FLAGS.save_summary_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data0, input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list # os.environ['CUDA_VISIBLE_DEVICES'] = "1" if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() i = 1 for step in range(FLAGS.max_steps): print(2333) #TODO data = next(data_generator) i += 1 print(i) ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],input_score_maps: data[2],input_geo_maps: data[3],input_training_masks: data[4]}) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start)/10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus))/(time.time() - start) start = time.time() print('Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format( step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step)
def main(): #----------------------------------------------------------------- # 1: Set some necessary parameters data_path = 'model/v2_0_convnet_227_weights_epoch05_loss0.0033.h5' size = 227 labels = { '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '15': 11, '16': 12 } #----------------------------------------------------------------- # 2: Build the Keras model sgd = SGD(lr=0.01, decay=5e-4, momentum=0.9, nesterov=True) model = convnet('alexnet', weights_path=data_path, heatmap=False) model.compile(optimizer=sgd, loss='mse', metrics=['accuracy']) #----------------------------------------------------------------- # 4: Instantiate an encoder that can encode ground truth labels into # the format needed by the EAST loss function #----------------------------------------------------------------- # 5: Create the validation set batch generator data_generator = icdar.get_batch(num_workers=1, input_size=size, batch_size=1, labels=labels) valid_generator = icdar.get_batch(num_workers=1, input_size=size, batch_size=1, labels=labels) data_generator.next() #----------------------------------------------------------------- # 6: Run training model.fit_generator( generator=data_generator, steps_per_epoch=5000, epochs=100, callbacks=[ ModelCheckpoint( './model/convnet_227_weights_epoch{epoch:02d}_loss{loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto', period=1), ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=0, epsilon=0.001, cooldown=0) ], validation_data=valid_generator, validation_steps=500)
def train_east(config_yaml): import sys sys.path.append('./detection_model/EAST') import time import numpy as np import tensorflow as tf from tensorflow.contrib import slim import cv2 from yacs.config import CfgNode as CN import model import icdar def read_config_file(config_file): # 用yaml重构配置文件 f = open(config_file) opt = CN.load_cfg(f) return opt # TODO 这里需要一些适配处理 FLAGS = read_config_file(config_yaml) gpus = list(range(len(FLAGS.gpu_list.split(',')))) def tower_loss(images, score_maps1, geo_maps1, training_masks1, score_maps2, geo_maps2, training_masks2, reuse_variables=None): # Build inference graph with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): f_score, f_geometry = model.model(images, is_training=True) model_loss1 = model.loss(score_maps1, f_score['F_score1'], geo_maps1, f_geometry['F_geometry1'], training_masks1) model_loss2 = model.loss(score_maps2, f_score['F_score2'], geo_maps2, f_geometry['F_geometry2'], training_masks2) model_loss = model_loss1 + model_loss2 total_loss = tf.add_n( [model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # add summary if reuse_variables is None: # tf.summary.image('input', images) # tf.summary.image('score_map', score_maps) # tf.summary.image('score_map_pred', f_score * 255) # tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:1]) # tf.summary.image('geo_map_0_pred', f_geometry[:, :, :, 0:1]) # tf.summary.image('training_masks', training_masks) tf.summary.scalar('model_loss1', model_loss1) tf.summary.scalar('model_loss2', model_loss2) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss def average_gradients(tower_grads): average_grads = [] for grad_and_vars in zip(*tower_grads): grads = [] for g, _ in grad_and_vars: expanded_g = tf.expand_dims(g, 0) grads.append(expanded_g) grad = tf.concat(grads, 0) grad = tf.reduce_mean(grad, 0) v = grad_and_vars[0][1] grad_and_var = (grad, v) average_grads.append(grad_and_var) return average_grads import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, 512, 512, 3], name='input_images') input_score_maps1 = tf.placeholder(tf.float32, shape=[None, 128, 128, 1], name='input_score_maps1') input_score_maps2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 1], name='input_score_maps2') if FLAGS.geometry == 'RBOX': input_geo_maps1 = tf.placeholder(tf.float32, shape=[None, 128, 128, 5], name='input_geo_maps1') input_geo_maps2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 5], name='input_geo_maps2') else: input_geo_maps1 = tf.placeholder(tf.float32, shape=[None, 128, 128, 8], name='input_geo_maps1') input_geo_maps2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 8], name='input_geo_maps2') input_training_masks1 = tf.placeholder(tf.float32, shape=[None, 128, 128, 1], name='input_training_masks1') input_training_masks2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 1], name='input_training_masks2') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=2000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split print('gpu', len(gpus)) input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split1 = tf.split(input_score_maps1, len(gpus)) input_geo_maps_split1 = tf.split(input_geo_maps1, len(gpus)) input_training_masks_split1 = tf.split(input_training_masks1, len(gpus)) input_score_maps_split2 = tf.split(input_score_maps2, len(gpus)) input_geo_maps_split2 = tf.split(input_geo_maps2, len(gpus)) input_training_masks_split2 = tf.split(input_training_masks2, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms1 = input_score_maps_split1[i] igms1 = input_geo_maps_split1[i] itms1 = input_training_masks_split1[i] isms2 = input_score_maps_split2[i] igms2 = input_geo_maps_split2[i] itms2 = input_training_masks_split2[i] total_loss, model_loss = tower_loss(iis, isms1, igms1, itms1, isms2, igms2, itms2, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() for step in range(FLAGS.max_steps): data = next(data_generator) # print('hello:',data[2]['score_map1'][0].shape) # print('hello:',data[2]['score_map2'][0].shape) # print('hello:',data[3]['geo_map1'][0].shape) # print('hello:',data[3]['geo_map2'][0].shape) # debug # import cv2 # print(type(data[0])) # cv2.imwrite('input.jpg', data[0][0]) ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps1: data[2]['score_map1'], input_geo_maps1: data[3]['geo_map1'], input_training_masks1: data[4]['training_mask1'], input_score_maps2: data[2]['score_map2'], input_geo_maps2: data[3]['geo_map2'], input_training_masks2: data[4]['training_mask2'] }) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps1: data[2]['score_map1'], input_geo_maps1: data[3]['geo_map1'], input_training_masks1: data[4]['training_mask1'], input_score_maps2: data[2]['score_map2'], input_geo_maps2: data[3]['geo_map2'], input_training_masks2: data[4]['training_mask2'] }) summary_writer.add_summary(summary_str, global_step=step)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) step = 0 with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) if ckpt_state is not None: print('continue training from previous checkpoint') model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) print(sess.run(global_step)) step = int(ckpt.split('-')[-1]) - 1 #else : # print('Load the backbone, Name {}'.format(FLAGS.backbone)) # load_layers = tf.global_variables(scope=FLAGS.backbone) # print(load_layers) # saver = tf.train.Saver(load_layers) # saver.restore(sess, FLAGS.backbone_ckpt) # step = 0 else: sess.run(init) #for layer in tf.global_variables(scope='Mobilenet')[:2]: # print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess))))) if FLAGS.pretrained_model_path is not None: print("--------------------------------") print("---Load the Pretraiend-Weight---") print("--------------------------------") variable_restore_op(sess) #for layer in tf.global_variables(scope='Mobilenet')[:2]: # print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess))))) else: sess.run(init) total_parameters = 0 for variable in tf.trainable_variables(): local_parameters = 1 shape = variable.get_shape() #getting shape of a variable for i in shape: local_parameters *= i.value #mutiplying dimension values total_parameters += local_parameters print("-----params-----", total_parameters) if os.name is 'nt': workers = 0 else: workers = multiprocessing.cpu_count() print(" num of worker : ", workers) data_generator = icdar.get_batch(num_workers=workers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() while step < FLAGS.max_steps: data = next(data_generator) ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) summary_writer.add_summary(summary_str, global_step=step) step += 1
def main(argv=None): import os #os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "3" if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') # print("printing inputimages------------------") # print(input_images) input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') print("infdcxcfkjdmx----------------") print(input_score_maps) print(type(input_score_maps)) print("kefjdncx-----------------------") if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() for step in range(FLAGS.max_steps): data = next(data_generator) ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) summary_writer.add_summary(summary_str, global_step=step) export_path = "saving_models_sc_ge_vamshi" builder = tf.saved_model.builder.SavedModelBuilder(export_path) # NUMBER_OF_OUTPUTS = 2 # INPUT_TENSOR = "input_images" # output = [None]*NUMBER_OF_OUTPUTS # output_node_names = [None]*NUMBER_OF_OUTPUTS # for i in range(NUMBER_OF_OUTPUTS): # output_node_names[i] = OUTPUT_NODE_PREFIX+str(i) # output[i] = tf.identity(model.outputs[i], name=output_node_names[i]) # print('Output Tensor names: ', output_node_names) # sess = tf.get_default_session() # builder = tf.saved_model.builder.SavedModelBuilder(OUTPUT_SERVABLE_FOLDER) # sigs = {} # OUTPUT_TENSOR = output_node_names # g = tf.get_default_graph() # inp = g.get_tensor_by_name(INPUT_TENSOR) # out = g.get_tensor_by_name(OUTPUT_TENSOR[0] + ':0') # sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \ # tf.saved_model.signature_def_utils.predict_signature_def( # {"model_2_input": inp}, {"dense_03/Sigmoid:0": out}) # builder.add_meta_graph_and_variables(sess, # [tag_constants.SERVING], # signature_def_map=sigs) # try: # builder.save() # print(f'Model ready for deployment at {OUTPUT_SERVABLE_FOLDER}/saved_model.pb') # print('Prediction signature : ') # print(sigs['serving_default']) # except: # print('Error Occured, please checked frozen graph') f_scores = tf.identity("feature_fusion/Conv_7/Sigmoid:0", name="f_scores") f_geometrys = tf.identity("feature_fusion/concat_3:0", name="f_geometrys") f_scores = "feature_fusion/Conv_7/Sigmoid" f_geometrys = "feature_fusion/concat_3" print("something") print(f_scores) print(type(f_scores)) print("sollu") prediction_signature = predict_signature_def( inputs={'input_images': input_images}, outputs={ 'f_scores': f_scores, 'f_geometrys': f_geometrys }) builder.add_meta_graph_and_variables( sess=sess, tags=["myTag"], clear_devices=True, signature_def_map={'predict': signature}) builder.save() builder = tf.saved_model.builder.SavedModelBuilder(export_path) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], clear_devices=True, signature_def_map={'serving_default': prediction_signature}, main_op=tf.tables_initializer()) builder.save()
def testThroughput(): data_generator = icdar.get_batch(num_workers=1, input_size=512, batch_size=1) data_generator
def main1(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) # get training data data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) data = next(data_generator) #input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') #input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') input_images = tf.constant(np.asarray(data[0])) input_score_maps = tf.constant(np.asarray(data[2])) input_geo_maps = tf.constant(np.asarray(data[3])) input_training_masks = tf.constant(np.asarray(data[4])) #if FLAGS.geometry == 'RBOX': # input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') #else: # input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') #input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') # establish gradient descent global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # split the images among the gpus input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) # train model tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): # for each gpu with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: # take in training data iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] #calculate loss total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True # add gradient to update later grads = opt.compute_gradients(total_loss) tower_grads.append(grads) # update gradients grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # update batch norm with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() # load a pretrained model if it exists if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) data = next(data_generator) # train the model for each step start = time.time() for step in range(FLAGS.max_steps): # get data to train #import cProfile, pstats, StringIO #pr = cProfile.Profile() #pr.enable() #data = next(data_generator) #pr.disable() #s = StringIO.StringIO() #ps = pstats.Stats(pr, stream=s).sort_stats('cumtime') #ps.print_stats() #print s.getvalue() # do a forward pass ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) if np.isnan(tl): print('Loss diverged, stop training') break # print performance statistics if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) summary_writer.add_summary(summary_str, global_step=step)
def train(): dataGenerator = icdar.get_batch(num_workers=num_readers, training_data_path='path/to_data/icdar15/train/', input_size=input_size, batch_size=batch_size_per_gpu * len(gpus))
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') input_transcription = tf.sparse_placeholder(tf.int32, name='input_transcription') input_transform_matrix = tf.placeholder(tf.float32, shape=[None, 6], name='input_transform_matrix') input_transform_matrix = tf.stop_gradient(input_transform_matrix) input_box_masks = [] # input_box_mask = tf.placeholder(tf.int32, shape=[None], name='input_box_mask') input_box_widths = tf.placeholder(tf.int32, shape=[None], name='input_box_widths') input_seq_len = input_box_widths[tf.argmax(input_box_widths, 0)] * tf.ones_like(input_box_widths) # input_box_nums = tf.placeholder(tf.int32, name='input_box_nums') # input_seq_len = tf.placeholder(tf.int32, shape=[None], name='input_seq_len') for i in range(FLAGS.batch_size_per_gpu): input_box_masks.append(tf.placeholder(tf.int32, shape=[None], name='input_box_masks_' + str(i))) # f_score, f_geometry, recognition_logits, dense_decode = build_graph(input_images, input_transform_matrix, input_box_mask, input_box_widths, input_box_nums, input_seq_len) f_score, f_geometry, recognition_logits = build_graph(input_images, input_transform_matrix, input_box_masks, input_box_widths, input_seq_len) # f_score, f_geometry = build_graph(input_images, input_transform_matrix, input_box_mask, input_box_widths, input_box_nums, input_seq_len) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # d_loss, r_loss, model_loss = compute_loss(f_score, f_geometry, recognition_logits, input_score_maps, input_geo_maps, input_training_masks, input_transcription, input_seq_len) # d_loss, r_loss, model_loss = compute_loss(f_score, f_geometry, recognition_logits, input_score_maps, input_geo_maps, input_training_masks, input_transcription, input_seq_len) d_loss, r_loss, model_loss = compute_loss(f_score, f_geometry, recognition_logits, input_score_maps, input_geo_maps, input_training_masks, input_transcription, input_box_widths) # total_loss = detect_part.loss(input_score_maps, f_score, input_geo_maps, f_geometry, input_training_masks) tf.summary.scalar('total_loss', model_loss) total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # total_loss = model_loss batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS)) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: if os.path.isdir(FLAGS.pretrained_model_path): print("Restore pretrained model from other datasets") ckpt = tf.train.latest_checkpoint(FLAGS.pretrained_model_path) variable_restore_op = slim.assign_from_checkpoint_fn(ckpt, slim.get_trainable_variables(), ignore_missing_vars=True) else: # is *.ckpt print("Restore pretrained model from imagenet") variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) # ckpt = tf.train.latest_checkpoint(FLAGS.pretrained_model_path) # variable_restore_op = slim.assign_from_checkpoint_fn(ckpt, slim.get_trainable_variables(), ignore_missing_vars=True) # with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) as sess: with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu) """ data_generator = synth.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu) """ start = time.time() for step in range(FLAGS.max_steps): data = next(data_generator) inp_dict = {input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4], input_transform_matrix: data[5], input_box_widths: data[7], input_transcription: data[8]} for i in range(FLAGS.batch_size_per_gpu): inp_dict[input_box_masks[i]] = data[6][i] dl, rl, tl, _ = sess.run([d_loss, r_loss, total_loss, train_op], feed_dict=inp_dict) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start)/10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu)/(time.time() - start) start = time.time() print('Step {:06d}, detect_loss {:.4f}, recognize_loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format( step, dl, rl, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: dl, rl, tl, _, summary_str = sess.run([d_loss, r_loss, total_loss, train_op, summary_op], feed_dict=inp_dict) summary_writer.add_summary(summary_str, global_step=step)
return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer if __name__ == "__main__": import icdar import os data_generator = icdar.get_batch(num_workers=1, input_size=256, batch_size=1) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') input_enhancement_mask = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') # f_score, f_geometry = model.model(input_images, is_training=False) with tf.Session() as sess: # restore_from_dir(sess, FLAGS.checkpoint_east)