def get_batch(dataset_dir, num_readers, batch_size, out_shape, net, anchors, num_preprocessing_threads, file_pattern = '*.tfrecord', is_training = True): dataset = sythtextprovider.get_datasets(dataset_dir,file_pattern = file_pattern) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, common_queue_capacity=20 * batch_size, common_queue_min=10 * batch_size, shuffle=True) [image, shape, glabels, gbboxes] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) image, glabels, gbboxes,num = \ ssd_vgg_preprocessing.preprocess_image(image, glabels,gbboxes, out_shape,is_training=is_training) gclasses, glocalisations, gscores = \ net.bboxes_encode( glabels, gbboxes, anchors, num) batch_shape = [1] + [len(anchors)] * 3 r = tf.train.batch( tf_utils.reshape_list([image, gclasses, glocalisations, gscores]), batch_size=batch_size, num_threads=num_preprocessing_threads, capacity=5 * batch_size) b_image, b_gclasses, b_glocalisations, b_gscores= \ tf_utils.reshape_list(r, batch_shape) return [b_image, b_gclasses, b_glocalisations, b_gscores]
def get_batch(dataset_dir, num_readers, batch_size, out_shape, net, anchors, FLAGS, file_pattern='*.tfrecord', is_training=True, shuffe=False): dataset = sythtextprovider.get_datasets(dataset_dir, file_pattern=file_pattern) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, common_queue_capacity=512 * 16 + 20 * batch_size, common_queue_min=512 * 16, shuffle=shuffe) [image, shape, glabels, gbboxes, height, width] = provider.get( ['image', 'shape', 'object/label', 'object/bbox', 'height', 'width']) if is_training: image, glabels, gbboxes,num = \ txt_preprocessing.preprocess_image(image, glabels, gbboxes, height, width, out_shape,use_whiten=FLAGS.use_whiten,is_training=is_training) glocalisations, gscores = \ net.bboxes_encode( gbboxes, anchors, num) batch_shape = [1] + [len(anchors)] * 2 r = tf.train.shuffle_batch(tf_utils.reshape_list( [image, glocalisations, gscores]), batch_size=batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=100 * batch_size, min_after_dequeue=50 * batch_size) b_image, b_glocalisations, b_gscores= \ tf_utils.reshape_list(r, batch_shape) return b_image, b_glocalisations, b_gscores else: image, glabels, gbboxes,bbox_img, num = \ txt_preprocessing.preprocess_image(image, glabels,gbboxes, height,width, out_shape,use_whiten=FLAGS.use_whiten,is_training=is_training) glocalisations, gscores = \ net.bboxes_encode( gbboxes, anchors, num) batch_shape = [1] * 4 + [len(anchors)] * 2 r = tf.train.batch(tf_utils.reshape_list( [image, glabels, gbboxes, bbox_img, glocalisations, gscores]), batch_size=batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=50 * batch_size, dynamic_pad=True) image, glabels, gbboxes,g_bbox_img,glocalisations, gscores = \ tf_utils.reshape_list(r, batch_shape) return image, glabels, gbboxes, g_bbox_img, glocalisations, gscores
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Config model_deploy. Keep TF Slim Models structure. # Useful if want to need multiple GPUs and/or servers in the future. deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=0, num_replicas=1, num_ps_tasks=0) # Create global_step. with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() # Select the dataset. #dataset = dataset_factory.get_dataset( # FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) dataset = sythtextprovider.get_datasets(FLAGS.dataset_dir) # Get the SSD network and its anchors. #ssd_class = nets_factory.get_network(FLAGS.model_name) #ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) text_net = txtbox_300.TextboxNet() text_shape = text_net.params.img_shape print 'text_shape '+ str(text_shape) text_anchors = text_net.anchors(text_shape) print len(text_anchors) # Select the preprocessing function. ''' preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ''' #tf_utils.print_configuration(FLAGS.__flags, ssd_params, # dataset.data_sources, FLAGS.train_dir) # =================================================================== # # Create a dataset provider and batches. # =================================================================== # with tf.device(deploy_config.inputs_device()): with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, shuffle=True) # Get for SSD network: image, labels, bboxes. [image, shape, glabels, gbboxes] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) init_op = tf.global_variables_initializer() # Pre-processing image, labels and bboxes. image, glabels, gbboxes,num = \ ssd_vgg_preprocessing.preprocess_image(image, glabels,gbboxes, text_shape,is_training=True, data_format='NHWC') # Encode groundtruth labels and bboxes. print 'bboxes num' + str(gbboxes.get_shape()) print 'glabes' + str(tf.shape(glabels)) glocalisations, gscores = \ text_net.bboxes_encode( gbboxes, text_anchors,num) batch_shape = [1] + [len(text_anchors)] * 2 # Training batches and queue. r = tf.train.batch( tf_utils.reshape_list([image, glocalisations, gscores]), batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) print 'r shape' + str(r[0]) + str(r[1]) + str(r[10]) b_image, b_glocalisations, b_gscores= \ tf_utils.reshape_list(r, batch_shape) # Intermediate queueing: unique batch computation pipeline for all # GPUs running the training. batch_queue = slim.prefetch_queue.prefetch_queue( tf_utils.reshape_list([b_image, b_glocalisations,b_gscores]), capacity=2 * deploy_config.num_clones) # =================================================================== # # Define the model running on every GPU. # =================================================================== # def clone_fn(batch_queue): #Allows data parallelism by creating multiple #clones of network_fn. # Dequeue batch. b_image, b_glocalisations, b_gscores = \ tf_utils.reshape_list(batch_queue.dequeue(), batch_shape) # Construct SSD network. arg_scope = text_net.arg_scope(weight_decay=FLAGS.weight_decay) with slim.arg_scope(arg_scope): localisations, logits, end_points = \ text_net.net(b_image, is_training=True) # Add loss function. text_net.losses(logits, localisations, b_glocalisations, b_gscores, match_threshold=FLAGS.match_threshold, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, label_smoothing=FLAGS.label_smoothing) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # =================================================================== # # Add summaries from first clone. # =================================================================== # clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses and extra losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar(loss.op.name, loss)) for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope): summaries.add(tf.summary.scalar(loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) # =================================================================== # # Configure the moving averages. # =================================================================== # if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None # =================================================================== # # Configure the optimization procedure. # =================================================================== # with tf.device(deploy_config.optimizer_device()): learning_rate = tf_utils.configure_learning_rate(FLAGS, dataset.num_samples, global_step) optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = tf_utils.get_variables_to_train(FLAGS) # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # =================================================================== # # Kicks off the training. # =================================================================== # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, allow_soft_placement = True) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1.0, write_version=2, pad_step_number=False) slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master='', is_chief=True, init_fn=tf_utils.get_init_fn(FLAGS), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, saver=saver, save_interval_secs=FLAGS.save_interval_secs, session_config=config, sync_optimizer=None)
def get_batch(dataset_dir, num_readers, batch_size, out_shape, net, anchors, FLAGS, file_pattern='*.tfrecord', is_training=True, shuffe=False): dataset = sythtextprovider.get_datasets(dataset_dir, file_pattern=file_pattern) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, common_queue_capacity=20 * batch_size, common_queue_min=10 * batch_size, shuffle=shuffe) [image, shape, glabels, gbboxes, corx, cory] = provider.get([ 'image', 'shape', 'object/label', 'object/bbox', 'object/corx', 'object/cory' ]) corx = tf.expand_dims(corx, -1) cory = tf.expand_dims(cory, -1) cord = tf.concat([corx, cory], -1) if is_training: image, glabels, gbboxes, cord, num = \ txt_preprocessing.preprocess_image(image, glabels,gbboxes, cord, out_shape,is_training=is_training) glocalisations, glabels, glinks = \ net.bboxes_encode(cord, anchors ,num) batch_shape = [1] + [len(anchors)] * 3 r = tf.train.batch( tf_utils.reshape_list([image, glocalisations, glabels, glinks]), batch_size=batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * batch_size, ) b_image, b_glocalisations, b_glabels, b_glinks= \ tf_utils.reshape_list(r, batch_shape) return b_image, b_glocalisations, b_glabels, b_glinks else: image, labels, bboxes, cord, num = \ txt_preprocessing.preprocess_image(image, glabels,gbboxes, cord, out_shape,is_training=is_training) glocalisations, glabels, glinks = \ net.bboxes_encode(cord, anchors ,num) batch_shape = [1] * 3 + [len(anchors)] * 3 r = tf.train.batch( tf_utils.reshape_list( [image, labels, cord, glocalisations, glabels, glinks]), batch_size=batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * batch_size, ) b_image, b_labels, b_cord, b_glocalisations, b_glabels, b_glinks= \ tf_utils.reshape_list(r, batch_shape) return b_image, b_labels, b_cord, b_glocalisations, b_glabels, b_glinks
def run(): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') print('-----start test-------') if not os.path.exists(save_dir): os.makedirs(save_dir) with tf.device('/GPU:0'): dataset = sythtextprovider.get_datasets(FLAGS.dataset_dir) print(dataset) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, shuffle=True) print('provider:', provider) [image, shape, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3, y4] = provider.get([ 'image', 'shape', 'object/label', 'object/bbox', 'object/oriented_bbox/x1', 'object/oriented_bbox/x2', 'object/oriented_bbox/x3', 'object/oriented_bbox/x4', 'object/oriented_bbox/y1', 'object/oriented_bbox/y2', 'object/oriented_bbox/y3', 'object/oriented_bbox/y4' ]) print('image:', image) print('shape:', shape) print('glabel:', glabels) print('gboxes:', gbboxes) gxs = tf.transpose(tf.stack([x1, x2, x3, x4])) #shape = (N,4) gys = tf.transpose(tf.stack([y1, y2, y3, y4])) image = tf.identity(image, 'input_image') text_shape = (384, 384) image, glabels, gbboxes, gxs, gys = ssd_vgg_preprocessing.preprocess_image( image, glabels, gbboxes, gxs, gys, text_shape, is_training=True, data_format='NHWC') x1, x2, x3, x4 = tf.unstack(gxs, axis=1) y1, y2, y3, y4 = tf.unstack(gys, axis=1) text_net = txtbox_384.TextboxNet() text_anchors = text_net.anchors(text_shape) e_localisations, e_scores, e_labels = text_net.bboxes_encode( glabels, gbboxes, text_anchors, gxs, gys) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) j = 0 all_time = 0 try: while not coord.should_stop() and j < show_pic_sum: start_time = time.time() image_sess, label_sess, gbbox_sess, x1_sess, x2_sess, x3_sess, x4_sess, y1_sess, y2_sess, y3_sess, y4_sess, p_localisations, p_scores, p_labels = sess.run( [ image, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3, y4, e_localisations, e_scores, e_labels ]) end_time = time.time() - start_time all_time += end_time image_np = image_sess # print(image_np) # print('label_sess:',label_sess) p_labels_concat = np.concatenate(p_labels) p_scores_concat = np.concatenate(p_scores) debug = False if debug is True: print(p_labels) print('l_labels:', len(p_labels_concat[p_labels_concat.nonzero()]), p_labels_concat[p_labels_concat.nonzero()]) print('p_socres:', len(p_scores_concat[p_scores_concat.nonzero()]), p_scores_concat[p_scores_concat.nonzero()]) # print(img_np.shape) print('label_sess:', np.array(list(label_sess)).shape, list(label_sess)) img_np = np.array(image_np) cv2.imwrite('{}/{}.png'.format(save_dir, j), img_np) img_np = cv2.imread('{}/{}.png'.format(save_dir, j)) h, w, d = img_np.shape label_sess = list(label_sess) # for i , label in enumerate(label_sess): i = 0 num_correct = 0 for label in label_sess: # print(int(label) == 1) if int(label) == 1: num_correct += 1 img_np = draw_polygon(img_np, x1_sess[i] * w, y1_sess[i] * h, x2_sess[i] * w, y2_sess[i] * h, x3_sess[i] * w, y3_sess[i] * h, x4_sess[i] * w, y4_sess[i] * h) if int(label) == 0: img_np = draw_polygon(img_np, x1_sess[i] * w, y1_sess[i] * h, x2_sess[i] * w, y2_sess[i] * h, x3_sess[i] * w, y3_sess[i] * h, x4_sess[i] * w, y4_sess[i] * h, color=(0, 0, 255)) i += 1 img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) cv2.imwrite( '{}'.format(os.path.join(save_dir, str(j) + '.png')), img_np) j += 1 print('correct:', num_correct) except tf.errors.OutOfRangeError: print('done') finally: print('done') coord.request_stop() print('all time:', all_time, 'average:', all_time / show_pic_sum) coord.join(threads=threads)
'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), #'image/object/bbox/label_text' : tf.VarLenFeature(dtype=tf.string), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), }, name='features') # image was saved as uint8, so we have to decode as uint8. image = tf.decode_raw(tfrecord_features['image/encoded'], tf.uint8) shape = tf.cast(tfrecord_features['image/shape'], tf.int64) #image = tf.reshape(image, shape) height = tf.cast(tfrecord_features['image/height'],tf.int64) width = tf.cast(tfrecord_features['image/width'],tf.int64) """ dataset_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/' dataset = get_datasets(dataset_dir) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=1, common_queue_capacity=20 * 32, common_queue_min=10 * 32, shuffle=True) # Get for SSD network: image, labels, bboxes. [ image, shape, height, width, glabels, gbboxes,
for min_s in min_scala: for max_s in max_scala: scales = [min_s + i * (max_s - min_s) / 6 for i in range(7)] anchor_sizes = [(512 * scales[i], 512 * scales[i] + 50) for i in range(7)] with tf.Graph().as_default(): # build a net params = txtbox512.TextboxNet.default_params params = params._replace(anchor_sizes=anchor_sizes) text_net = txtbox512.TextboxNet(params) text_shape = text_net.params.img_shape print 'text_shape ' + str(text_shape) text_anchors = text_net.anchors(text_shape) ## dataset provider dataset = sythtextprovider.get_datasets('../data/ICDAR2013/', file_pattern='*.tfrecord') data_provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity=32, common_queue_min=2) [image, shape, glabels, gbboxes] = \ data_provider.get(['image', 'shape', 'object/label', 'object/bbox']) dst_image, glabels, gbboxes,num = \ txt_preprocessing.preprocess_image(image, glabels,gbboxes, text_shape,is_training=True) glocalisations, gscores = \ text_net.bboxes_encode( gbboxes, text_anchors, num)