예제 #1
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)


    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print 'continue training from previous checkpoint'
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size * len(gpus))

        start = time.time()
        for step in xrange(FLAGS.max_steps):
            data = data_generator.next()
            ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],
                                                                                input_score_maps: data[2],
                                                                                input_geo_maps: data[3],
                                                                                input_training_masks: data[4]})
            if np.isnan(tl):
                print 'Loss diverged, stop training'
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start)/10
                avg_examples_per_second = (10 * FLAGS.batch_size * len(gpus))/(time.time() - start)
                start = time.time()
                print 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format(
                    step, ml, tl, avg_time_per_step, avg_examples_per_second)

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0],
                                                                                             input_score_maps: data[2],
                                                                                             input_geo_maps: data[3],
                                                                                             input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
예제 #2
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')
    input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))
    input_labels_split = tf.split(input_labels, len(gpus))
    #x = tf.placeholder(tf.int16, shape=[None, None, 4, 2])
    #y = tf.split(x, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
		il = input_labels_split[i]
                total_loss, model_loss, f_score, f_geometry, f_dat = tower_loss(iis, isms, igms, itms, il, reuse_variables)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True
                train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion')
                grads = opt.compute_gradients(total_loss, var_list=train_var)
                tower_grads.append(grads)
	        #stuff = tf.split(x,len(gpus))[i]

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')
    
    variables = slim.get_variables_to_restore()
    #print variables[0].name.split('/')
    #print variables
    var_list = []
    for v in variables:
	if len(v.name.split('/')) == 1:
		var_list.append(v)
	elif v.name.split('/')[1] != "myconv1" or not v.name.find('custom_filter'):
		var_list.append(v)
	else:
		pass
    #var_list=[v for v in variables if v.name.split('/')[1] != "conv1"]
    saver = tf.train.Saver(var_list)
    #print var_list
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())
    
    '''
    training_list = ["D0006-0285025", "D0017-1592006", "D0041-5370006", "D0041-5370026", "D0042-1070001", "D0042-1070002", "D0042-1070003", "D0042-1070004", "D0042-1070005", "D0042-1070006", "D0042-1070007", "D0042-1070008", "D0042-1070009", "D0042-1070010", "D0042-1070015", "D0042-1070012", "D0042-1070013", "D0079-0019007", "D0089-5235001"]
    validation_list = ["D0090-5242001", "D0117-5755018", "D0117-5755024", "D0117-5755025", "D0117-5755033"]

    with open('Data/cropped_annotations0.txt', 'r') as f:
            annotation_file = f.readlines()
    val_data0 = []
    val_data1 = []
    train_data0 = []
    train_data1 = []
    labels = []
    trainValTest = 2
    for line in annotation_file:
    	if len(line)>1 and line[:11] == 'cropped_img':
                if (len(labels) > 0):
		    if trainValTest == 0:
			train_data1.append(labels)
		    elif trainValTest == 1: 	
			val_data1.append(labels)
                    labels = []
		    trainValTest = 2
        	if line[12:25] in training_list:
		    file_name = "Data/cropped_img_train/"+line[12:].split(".tiff",1)[0]+".tiff"
		    im = cv2.imread(file_name)[:, :, ::-1]
                    train_data0.append(im.astype(np.float32))
		    trainValTest = 0
		elif line[12:25] in validation_list:
	            file_name = "Data/cropped_img_val/"+line[12:].split(".tiff",1)[0]+".tiff"
                    im = cv2.imread(file_name)[:, :, ::-1]
		    val_data0.append(im.astype(np.float32))
		    trainValTest = 1
        elif trainValTest != 2:
	 	annotation_data = line.split(" ")
                if (len(annotation_data) > 2):
		    x, y = float(annotation_data[0]), float(annotation_data[1])
                    w, h = float(annotation_data[2]), float(annotation_data[3])
                    labels.append([[int(x),int(y-h)],[int(x+w),int(y-h)],[int(x+w),int(y)],[int(x),int(y)]])
    if trainValTest == 0:
	train_data1.append(labels)
    elif trainValTest == 1:
	val_data1.append(labels)
    '''  
    init = tf.global_variables_initializer()
    
    if FLAGS.pretrained_model_path is not None:
        print "hereeeee"
	variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        #reader = tf.train.NewCheckpointReader("./"+FLAGS.checkpoint_path)
        if FLAGS.restore:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
	    model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Continue training from previous checkpoint here {}'.format(model_path))
            saver.restore(sess, model_path)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)
 	sess.run(tf.global_variables_initializer())
	variables_names = [v.name for v in tf.trainable_variables()]
	#print "................."
 	#print variables_names
        #print tf.all_variables()       
        training_list = ["D0006-0285025", "D0017-1592006", "D0041-5370006", "D0041-5370026", "D0042-1070001", "D0042-1070002", "D0042-1070003", "D0042-1070004", "D0042-1070005", "D0042-1070006", "D0042-1070007", "D0042-1070008", "D0042-1070009", "D0042-1070010", "D0042-1070015", "D0042-1070012", "D0042-1070013", "D0079-0019007", "D0089-5235001"]


	a = FLAGS.checkpoint_path[-2]
        data_size = 0
	
        with open('Data/cropped_annotations.txt', 'r') as f:
            annotation_file = f.readlines()
        for line in annotation_file:
            if len(line)>1 and line[:13] == './cropped_img' and line[14:27] in training_list:
                data_size +=1
	print "Char model: " + a
	print "Reg constant: " + str(reg_constant)
	print "Data size: " + str(data_size)
	epoche_size = 3 #ata_size / 32
	print "This many steps per epoche: " + str(epoche_size)
        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, q_size=10,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu * len(gpus), data_path=a, trainOrVal="train")
        #print "getting the data batches"
	val_data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, q_size=10,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu * len(gpus), data_path=a, trainOrVal="val")
	start = time.time()
        epochsA, ml_list, tl_list = [], [], []
        epochsB, train_fscore, val_fscore = [], [], []
	#print "entering model training"
        for step in range(FLAGS.max_steps):
	    print "this is an iteration............"
            data = next(data_generator)
	    #val_data = next(val_data_generator)
	    
	    if (step % epoche_size == 100):
		#print 'Epochs {:.4f}, ml {:.4f}, tl {:.4f}'.format(float(step)/epoche_size, ml, tl) 
		'''
		train_size = len(train_data0)
                TP, FP, FN = 0.0, 0.0, 0.0
                for i in range(train_size / 128):
                    score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: train_data0[128*i: 128*(i+1)]})
                    labels = sess.run(stuff, feed_dict = {x: train_data1[128*i:128*(i+1)]})
                    TP0, FP0, FN0 = evalEAST.evaluate(score, geometry, labels)
                    TP += TP0
                    FP += FP0
                    FN += FN0
                p_train, r_train = TP / (TP + FP), TP / (TP + FN)
                fscore_train = 2 * p_train * r_train / (p_train + r_train)
		'''
                #for i in range(len(data[0])):
		#    count_right_cache = 0
                #score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: data[0]})
		#p_train, r_train, fscore_train = evalEAST.evaluate(score, geometry, data[5])
		#score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]})
                #p_val, r_val, fscore_val = evalEAST.evaluate(score, geometry, val_data[1])
		'''
                for i in range(len(score)):
		    count_right_cache = 0
		    print score[i].shape, geometry[i].shape
	            boxes = detect(score_map=score[i], geo_map=geometry[i])
                    if boxes is not None:
                        boxes = boxes[:, :8].reshape((-1, 4, 2))
                        for box in boxes:
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                                continue
                            count_wrong += 1
                            num_true_pos = len(data[5][i])
                            for i2 in range(num_true_pos):
                                #print box
                                #print label[i][i2]
                                if (checkIOU(box, label[i][i2]) == True):
                                    count_right_cache += 1
                                    count_wrong -= 1
                    count_posNotDetected += num_true_pos - count_right_cache
                    count_right += count_right_cache
                p_train = (float) (count_right) / (float) (count_right + count_wrong)  # TP / TP + FP
                r_train = (float) (count_right) / (float) (count_right + count_posNotDetected)  # TP / TP + FN
                fscore_train = 2 * (p_train * r_train) / (p_train + r_train)
		print "hi"
	
		score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]})
                for i in range(len(score)):
                    count_right_cache = 0
                    #score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0][i]})
                    boxes = detect(score_map=score[i], geo_map=geometry[i])
                    if boxes is not None:
                        boxes = boxes[:, :8].reshape((-1, 4, 2))
                        for box in boxes:
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                                continue
                            count_wrong += 1
                            num_true_pos = len(val_data[1][i])
                            for i2 in range(num_true_pos):
                                #print box
                                #print label[i][i2]
                                if (checkIOU(box, label[i][i2]) == True):
                                    count_right_cache += 1
                                    count_wrong -= 1
                    count_posNotDetected += num_true_pos - count_right_cache
                    count_right += count_right_cache
                p_val = (float) (count_right) / (float) (count_right + count_wrong)  # TP / TP + FP
                r_val = (float) (count_right) / (float) (count_right + count_posNotDetected)  # TP / TP + FN
                fscore_val = 2 * (p_val * r_val) / (p_val + r_val)
                #return precision, recall, fscore
		'''
		#    score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: data[0][i]})
		#    fscore_train, p_train, r_train = evalEAST.evaluate(score, geometry, data[5][i])
		#    score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]})
                #    fscore_val, p_val, r_val = evalEAST.evaluate(score, geometry, val_data[1])

		print 'Epochs {:.4f}, train fscore {:.4f}, train p {:.4f}, train r {:.4f}, val fscore {:.4f}, val p {:.4f}, val r {:.4f}'.format(float(step)/epoche_size, fscore_train, p_train, r_train, fscore_val, p_val, r_val)            
               
	    #data0 = np.zeros((32,512,512,39)) 
	    ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],
                                                                                input_score_maps: data[2],
                                                                                input_geo_maps: data[3],
                                                                                input_training_masks: data[4]})
            print ml, tl
	    if step % epoche_size == 0:
		print 'Epochs {:.4f}, ml {:.4f}, tl {:.4f}'.format(float(step)/epoche_size, ml, tl)	
	        #score2, geometry2, dat2 = sess.run([f_score, f_geometry, f_dat], feed_dict={input_images: data[0], input_labels: abc})
                #p_train, r_train, fscore_train = evalEAST.evaluate(score2, geometry2, dat2)
		#print ".."
                #score2, geometry2 = sess.run([f_score, f_geometry], feed_dict={input_images: val_data[0]})
                #p_val, r_val, fscore_val = evalEAST.evaluate(score2, geometry2, val_data[5])
		#print 'Train fscore {:.4f}, train p {:.4f}, train r {:.4f}, val fscore {:.4f}, val p {:.4f}, val r {:.4f}'.format(fscore_train, p_train, r_train, fscore_val, p_val, r_val) 
            
	    if np.isnan(tl):
                print('Loss diverged, stop training')
                break
                       
	    if step % epoche_size == 0: #FLAGS.save_summary_steps == 0:
                saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)
		_, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data0,
                                                                                             input_score_maps: data[2],
                                                                                             input_geo_maps: data[3],
                                                                                             input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
예제 #3
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    # os.environ['CUDA_VISIBLE_DEVICES'] = "1"
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)


    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu * len(gpus))

        start = time.time()
        i = 1
        for step in range(FLAGS.max_steps):
            print(2333)
            #TODO
            data = next(data_generator)
            i += 1
            print(i)
            ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],input_score_maps: data[2],input_geo_maps: data[3],input_training_masks: data[4]})
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start)/10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus))/(time.time() - start)
                start = time.time()
                print('Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format(
                    step, ml, tl, avg_time_per_step, avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0],
                                                                                             input_score_maps: data[2],
                                                                                             input_geo_maps: data[3],
                                                                                             input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
예제 #4
0
def main():
    #-----------------------------------------------------------------
    # 1: Set some necessary parameters
    data_path = 'model/v2_0_convnet_227_weights_epoch05_loss0.0033.h5'
    size = 227
    labels = {
        '0': 0,
        '1': 1,
        '2': 2,
        '3': 3,
        '4': 4,
        '5': 5,
        '6': 6,
        '7': 7,
        '8': 8,
        '9': 9,
        '10': 10,
        '15': 11,
        '16': 12
    }

    #-----------------------------------------------------------------
    # 2: Build the Keras model
    sgd = SGD(lr=0.01, decay=5e-4, momentum=0.9, nesterov=True)

    model = convnet('alexnet', weights_path=data_path, heatmap=False)
    model.compile(optimizer=sgd, loss='mse', metrics=['accuracy'])

    #-----------------------------------------------------------------
    # 4: Instantiate an encoder that can encode ground truth labels into
    #    the format needed by the EAST loss function

    #-----------------------------------------------------------------
    # 5: Create the validation set batch generator
    data_generator = icdar.get_batch(num_workers=1,
                                     input_size=size,
                                     batch_size=1,
                                     labels=labels)
    valid_generator = icdar.get_batch(num_workers=1,
                                      input_size=size,
                                      batch_size=1,
                                      labels=labels)
    data_generator.next()

    #-----------------------------------------------------------------
    # 6: Run training
    model.fit_generator(
        generator=data_generator,
        steps_per_epoch=5000,
        epochs=100,
        callbacks=[
            ModelCheckpoint(
                './model/convnet_227_weights_epoch{epoch:02d}_loss{loss:.4f}.h5',
                monitor='val_loss',
                verbose=1,
                save_best_only=True,
                save_weights_only=True,
                mode='auto',
                period=1),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.5,
                              patience=0,
                              epsilon=0.001,
                              cooldown=0)
        ],
        validation_data=valid_generator,
        validation_steps=500)
예제 #5
0
파일: east.py 프로젝트: happog/FudanOCR
def train_east(config_yaml):
    import sys
    sys.path.append('./detection_model/EAST')

    import time
    import numpy as np
    import tensorflow as tf
    from tensorflow.contrib import slim
    import cv2
    from yacs.config import CfgNode as CN
    import model
    import icdar

    def read_config_file(config_file):
        # 用yaml重构配置文件
        f = open(config_file)
        opt = CN.load_cfg(f)
        return opt

    # TODO 这里需要一些适配处理
    FLAGS = read_config_file(config_yaml)

    gpus = list(range(len(FLAGS.gpu_list.split(','))))

    def tower_loss(images,
                   score_maps1,
                   geo_maps1,
                   training_masks1,
                   score_maps2,
                   geo_maps2,
                   training_masks2,
                   reuse_variables=None):
        # Build inference graph
        with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
            f_score, f_geometry = model.model(images, is_training=True)

        model_loss1 = model.loss(score_maps1, f_score['F_score1'], geo_maps1,
                                 f_geometry['F_geometry1'], training_masks1)
        model_loss2 = model.loss(score_maps2, f_score['F_score2'], geo_maps2,
                                 f_geometry['F_geometry2'], training_masks2)

        model_loss = model_loss1 + model_loss2

        total_loss = tf.add_n(
            [model_loss] +
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

        # add summary
        if reuse_variables is None:
            #         tf.summary.image('input', images)
            #         tf.summary.image('score_map', score_maps)
            #         tf.summary.image('score_map_pred', f_score * 255)
            #         tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:1])
            #         tf.summary.image('geo_map_0_pred', f_geometry[:, :, :, 0:1])
            #         tf.summary.image('training_masks', training_masks)
            tf.summary.scalar('model_loss1', model_loss1)
            tf.summary.scalar('model_loss2', model_loss2)
            tf.summary.scalar('model_loss', model_loss)
            tf.summary.scalar('total_loss', total_loss)

        return total_loss, model_loss

    def average_gradients(tower_grads):
        average_grads = []
        for grad_and_vars in zip(*tower_grads):
            grads = []
            for g, _ in grad_and_vars:
                expanded_g = tf.expand_dims(g, 0)
                grads.append(expanded_g)

            grad = tf.concat(grads, 0)
            grad = tf.reduce_mean(grad, 0)

            v = grad_and_vars[0][1]
            grad_and_var = (grad, v)
            average_grads.append(grad_and_var)

        return average_grads

    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, 512, 512, 3],
                                  name='input_images')
    input_score_maps1 = tf.placeholder(tf.float32,
                                       shape=[None, 128, 128, 1],
                                       name='input_score_maps1')
    input_score_maps2 = tf.placeholder(tf.float32,
                                       shape=[None, 64, 64, 1],
                                       name='input_score_maps2')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps1 = tf.placeholder(tf.float32,
                                         shape=[None, 128, 128, 5],
                                         name='input_geo_maps1')
        input_geo_maps2 = tf.placeholder(tf.float32,
                                         shape=[None, 64, 64, 5],
                                         name='input_geo_maps2')
    else:
        input_geo_maps1 = tf.placeholder(tf.float32,
                                         shape=[None, 128, 128, 8],
                                         name='input_geo_maps1')
        input_geo_maps2 = tf.placeholder(tf.float32,
                                         shape=[None, 64, 64, 8],
                                         name='input_geo_maps2')
    input_training_masks1 = tf.placeholder(tf.float32,
                                           shape=[None, 128, 128, 1],
                                           name='input_training_masks1')
    input_training_masks2 = tf.placeholder(tf.float32,
                                           shape=[None, 64, 64, 1],
                                           name='input_training_masks2')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=2000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split

    print('gpu', len(gpus))
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split1 = tf.split(input_score_maps1, len(gpus))
    input_geo_maps_split1 = tf.split(input_geo_maps1, len(gpus))
    input_training_masks_split1 = tf.split(input_training_masks1, len(gpus))

    input_score_maps_split2 = tf.split(input_score_maps2, len(gpus))
    input_geo_maps_split2 = tf.split(input_geo_maps2, len(gpus))
    input_training_masks_split2 = tf.split(input_training_masks2, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms1 = input_score_maps_split1[i]
                igms1 = input_geo_maps_split1[i]
                itms1 = input_training_masks_split1[i]

                isms2 = input_score_maps_split2[i]
                igms2 = input_geo_maps_split2[i]
                itms2 = input_training_masks_split2[i]
                total_loss, model_loss = tower_loss(iis, isms1, igms1, itms1,
                                                    isms2, igms2, itms2,
                                                    reuse_variables)

                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu *
                                         len(gpus))

        start = time.time()
        for step in range(FLAGS.max_steps):
            data = next(data_generator)

            #             print('hello:',data[2]['score_map1'][0].shape)
            #             print('hello:',data[2]['score_map2'][0].shape)
            #             print('hello:',data[3]['geo_map1'][0].shape)
            #             print('hello:',data[3]['geo_map2'][0].shape)

            # debug
            # import cv2
            #            print(type(data[0]))
            # cv2.imwrite('input.jpg', data[0][0])

            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps1: data[2]['score_map1'],
                    input_geo_maps1: data[3]['geo_map1'],
                    input_training_masks1: data[4]['training_mask1'],
                    input_score_maps2: data[2]['score_map2'],
                    input_geo_maps2: data[3]['geo_map2'],
                    input_training_masks2: data[4]['training_mask2']
                })
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps1: data[2]['score_map1'],
                        input_geo_maps1: data[3]['geo_map1'],
                        input_training_masks1: data[4]['training_mask1'],
                        input_score_maps2: data[2]['score_map2'],
                        input_geo_maps2: data[3]['geo_map2'],
                        input_training_masks2: data[4]['training_mask2']
                    })
                summary_writer.add_summary(summary_str, global_step=step)
예제 #6
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 1],
                                      name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 5],
                                        name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 8],
                                        name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000)
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)

    step = 0
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            if ckpt_state is not None:
                print('continue training from previous checkpoint')
                model_path = os.path.join(
                    FLAGS.checkpoint_path,
                    os.path.basename(ckpt_state.model_checkpoint_path))
                print('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)
                print(sess.run(global_step))
                step = int(ckpt.split('-')[-1]) - 1

            #else :
            #    print('Load the backbone, Name {}'.format(FLAGS.backbone))
            #    load_layers = tf.global_variables(scope=FLAGS.backbone)
            #    print(load_layers)
            #    saver = tf.train.Saver(load_layers)
            #    saver.restore(sess,  FLAGS.backbone_ckpt)
            #    step = 0
            else:
                sess.run(init)
                #for layer in tf.global_variables(scope='Mobilenet')[:2]:
                #    print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess)))))
                if FLAGS.pretrained_model_path is not None:
                    print("--------------------------------")
                    print("---Load the Pretraiend-Weight---")
                    print("--------------------------------")

                    variable_restore_op(sess)
                #for layer in tf.global_variables(scope='Mobilenet')[:2]:
                #    print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess)))))
        else:
            sess.run(init)

        total_parameters = 0
        for variable in tf.trainable_variables():
            local_parameters = 1
            shape = variable.get_shape()  #getting shape of a variable
            for i in shape:
                local_parameters *= i.value  #mutiplying dimension values
            total_parameters += local_parameters
        print("-----params-----", total_parameters)
        if os.name is 'nt':
            workers = 0
        else:
            workers = multiprocessing.cpu_count()
        print(" num of worker : ", workers)
        data_generator = icdar.get_batch(num_workers=workers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu *
                                         len(gpus))

        start = time.time()

        while step < FLAGS.max_steps:
            data = next(data_generator)
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
            step += 1
예제 #7
0
def main(argv=None):
    import os
    #os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"

    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    # print("printing inputimages------------------")
    # print(input_images)
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 1],
                                      name='input_score_maps')
    print("infdcxcfkjdmx----------------")
    print(input_score_maps)
    print(type(input_score_maps))
    print("kefjdncx-----------------------")
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 5],
                                        name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 8],
                                        name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu *
                                         len(gpus))

        start = time.time()
        for step in range(FLAGS.max_steps):
            data = next(data_generator)
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4]
                    })
                summary_writer.add_summary(summary_str, global_step=step)

        export_path = "saving_models_sc_ge_vamshi"
        builder = tf.saved_model.builder.SavedModelBuilder(export_path)

        #   NUMBER_OF_OUTPUTS = 2
        #   INPUT_TENSOR = "input_images"
        #   output = [None]*NUMBER_OF_OUTPUTS
        #   output_node_names = [None]*NUMBER_OF_OUTPUTS
        #   for i in range(NUMBER_OF_OUTPUTS):
        #      output_node_names[i] = OUTPUT_NODE_PREFIX+str(i)
        #      output[i] = tf.identity(model.outputs[i], name=output_node_names[i])
        #   print('Output Tensor names: ', output_node_names)

        #  sess = tf.get_default_session()

        #  builder = tf.saved_model.builder.SavedModelBuilder(OUTPUT_SERVABLE_FOLDER)

        #  sigs = {}
        #  OUTPUT_TENSOR = output_node_names

        #  g = tf.get_default_graph()
        #  inp = g.get_tensor_by_name(INPUT_TENSOR)
        #  out = g.get_tensor_by_name(OUTPUT_TENSOR[0] + ':0')

        #  sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
        #      tf.saved_model.signature_def_utils.predict_signature_def(
        #         {"model_2_input": inp}, {"dense_03/Sigmoid:0": out})

        #  builder.add_meta_graph_and_variables(sess,
        #                                   [tag_constants.SERVING],
        #                                   signature_def_map=sigs)
        #  try:
        #     builder.save()
        #     print(f'Model ready for deployment at     {OUTPUT_SERVABLE_FOLDER}/saved_model.pb')
        #     print('Prediction signature : ')
        #     print(sigs['serving_default'])
        # except:
        #     print('Error Occured, please checked frozen graph')

        f_scores = tf.identity("feature_fusion/Conv_7/Sigmoid:0",
                               name="f_scores")
        f_geometrys = tf.identity("feature_fusion/concat_3:0",
                                  name="f_geometrys")

        f_scores = "feature_fusion/Conv_7/Sigmoid"
        f_geometrys = "feature_fusion/concat_3"
        print("something")
        print(f_scores)
        print(type(f_scores))
        print("sollu")
        prediction_signature = predict_signature_def(
            inputs={'input_images': input_images},
            outputs={
                'f_scores': f_scores,
                'f_geometrys': f_geometrys
            })
        builder.add_meta_graph_and_variables(
            sess=sess,
            tags=["myTag"],
            clear_devices=True,
            signature_def_map={'predict': signature})
        builder.save()

        builder = tf.saved_model.builder.SavedModelBuilder(export_path)
        builder.add_meta_graph_and_variables(
            sess, [tf.saved_model.tag_constants.SERVING],
            clear_devices=True,
            signature_def_map={'serving_default': prediction_signature},
            main_op=tf.tables_initializer())
        builder.save()
예제 #8
0
def testThroughput():
    data_generator = icdar.get_batch(num_workers=1,
                                     input_size=512,
                                     batch_size=1)
    data_generator
예제 #9
0
def main1(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    # get training data
    data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                     input_size=FLAGS.input_size,
                                     batch_size=FLAGS.batch_size_per_gpu *
                                     len(gpus))
    data = next(data_generator)
    #input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    #input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    input_images = tf.constant(np.asarray(data[0]))
    input_score_maps = tf.constant(np.asarray(data[2]))
    input_geo_maps = tf.constant(np.asarray(data[3]))
    input_training_masks = tf.constant(np.asarray(data[4]))

    #if FLAGS.geometry == 'RBOX':
    #    input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    #else:
    #    input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    #input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')

    # establish gradient descent
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)

    # split the images among the gpus
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    # train model
    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        # for each gpu
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                # take in training data
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]

                #calculate loss
                total_loss, model_loss = tower_loss(iis, isms, igms, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                # add gradient to update later
                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    # update gradients
    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    summary_op = tf.summary.merge_all()

    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # update batch norm
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    # load a pretrained model if it exists
    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu *
                                         len(gpus))
        data = next(data_generator)

        # train the model for each step
        start = time.time()
        for step in range(FLAGS.max_steps):

            # get data to train
            #import cProfile, pstats, StringIO
            #pr = cProfile.Profile()
            #pr.enable()
            #data = next(data_generator)
            #pr.disable()
            #s = StringIO.StringIO()
            #ps = pstats.Stats(pr, stream=s).sort_stats('cumtime')
            #ps.print_stats()
            #print s.getvalue()

            # do a forward pass
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            # print performance statistics
            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
예제 #10
0
def train():
    dataGenerator = icdar.get_batch(num_workers=num_readers,
                                    training_data_path='path/to_data/icdar15/train/',
                                    input_size=input_size,
                                    batch_size=batch_size_per_gpu * len(gpus))
예제 #11
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')
    input_transcription = tf.sparse_placeholder(tf.int32, name='input_transcription')
    
    input_transform_matrix = tf.placeholder(tf.float32, shape=[None, 6], name='input_transform_matrix')
    input_transform_matrix = tf.stop_gradient(input_transform_matrix)
    input_box_masks = []
    # input_box_mask = tf.placeholder(tf.int32, shape=[None], name='input_box_mask')
    input_box_widths = tf.placeholder(tf.int32, shape=[None], name='input_box_widths')
    input_seq_len = input_box_widths[tf.argmax(input_box_widths, 0)] * tf.ones_like(input_box_widths)
    # input_box_nums = tf.placeholder(tf.int32, name='input_box_nums')
    # input_seq_len = tf.placeholder(tf.int32, shape=[None], name='input_seq_len')

    for i in range(FLAGS.batch_size_per_gpu):
        input_box_masks.append(tf.placeholder(tf.int32, shape=[None], name='input_box_masks_' + str(i)))

    # f_score, f_geometry, recognition_logits, dense_decode = build_graph(input_images, input_transform_matrix, input_box_mask, input_box_widths, input_box_nums, input_seq_len)
    f_score, f_geometry, recognition_logits = build_graph(input_images, input_transform_matrix, input_box_masks, input_box_widths, input_seq_len)
    # f_score, f_geometry = build_graph(input_images, input_transform_matrix, input_box_mask, input_box_widths, input_box_nums, input_seq_len)

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # d_loss, r_loss, model_loss = compute_loss(f_score, f_geometry, recognition_logits, input_score_maps, input_geo_maps, input_training_masks, input_transcription, input_seq_len)
    # d_loss, r_loss, model_loss = compute_loss(f_score, f_geometry, recognition_logits, input_score_maps, input_geo_maps, input_training_masks, input_transcription, input_seq_len)
    d_loss, r_loss, model_loss = compute_loss(f_score, f_geometry, recognition_logits, input_score_maps, input_geo_maps, input_training_masks, input_transcription, input_box_widths)
    # total_loss = detect_part.loss(input_score_maps, f_score, input_geo_maps, f_geometry, input_training_masks)
    tf.summary.scalar('total_loss', model_loss)
    total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    # total_loss = model_loss
    batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS))
    grads = opt.compute_gradients(total_loss)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        if os.path.isdir(FLAGS.pretrained_model_path):
            print("Restore pretrained model from other datasets")
            ckpt = tf.train.latest_checkpoint(FLAGS.pretrained_model_path)
            variable_restore_op = slim.assign_from_checkpoint_fn(ckpt, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
        else: # is *.ckpt
            print("Restore pretrained model from imagenet")
            variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
        # ckpt = tf.train.latest_checkpoint(FLAGS.pretrained_model_path)
        # variable_restore_op = slim.assign_from_checkpoint_fn(ckpt, slim.get_trainable_variables(), ignore_missing_vars=True)
    # with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) as sess:
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        
        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu)
        
        """
        data_generator = synth.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu)
        """
        start = time.time()
        for step in range(FLAGS.max_steps):
            data = next(data_generator)
            inp_dict = {input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4],
                        input_transform_matrix: data[5],
                        input_box_widths: data[7],
                        input_transcription: data[8]}

            for i in range(FLAGS.batch_size_per_gpu):
                inp_dict[input_box_masks[i]] = data[6][i]


            dl, rl, tl,  _ = sess.run([d_loss, r_loss, total_loss, train_op], feed_dict=inp_dict)
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break
            

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start)/10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu)/(time.time() - start)
                start = time.time()
                print('Step {:06d}, detect_loss {:.4f}, recognize_loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format(
                    step, dl, rl, tl, avg_time_per_step, avg_examples_per_second))

            
            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                dl, rl, tl, _, summary_str = sess.run([d_loss, r_loss, total_loss, train_op, summary_op], feed_dict=inp_dict)
                
                summary_writer.add_summary(summary_str, global_step=step)
		return None, timer

	# here we filter some low score boxes by the average score map, this is different from the orginal paper
	for i, box in enumerate(boxes):
		mask = np.zeros_like(score_map, dtype=np.uint8)
		cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
		boxes[i, 8] = cv2.mean(score_map, mask)[0]
	boxes = boxes[boxes[:, 8] > box_thresh]

	return boxes, timer

if __name__ == "__main__":
	import icdar
	import os

	data_generator = icdar.get_batch(num_workers=1,
			input_size=256, batch_size=1)

	input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3],
		name='input_images')
	input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1],
		name='input_score_maps')
	input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5],
		name='input_geo_maps')
	input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1],
		name='input_training_masks')
	input_enhancement_mask = tf.placeholder(tf.float32, shape=[None, None, None, 3],
		name='input_images')

	# f_score, f_geometry = model.model(input_images, is_training=False)	
	with tf.Session() as sess:
		# restore_from_dir(sess, FLAGS.checkpoint_east)