def __init__(self, model_dir, in_dir, out_dir, nms_th_for_all_scale=0.5, score_th=0.2, scales=([384, 384], [768, 384], [768, 768]), min_side_scale=384, save_res_path='eval_res.txt' ): if os.path.exists(in_dir): self.in_dir = in_dir else: raise ValueError('{} does not existed!!!'.format(in_dir)) self.out_dir = out_dir self.suffixes = ['.png', '.PNG', '.jpg', '.jpeg'] self.img_path, self.img_num = self.get_img_path() self.nms_th_for_all_scale = nms_th_for_all_scale self.nms_threshold = 0.45 self.score_th = score_th print('self.score_th', self.score_th) self.make_out_dir() self.text_scales = scales self.data_format = 'NHWC' self.select_threshold = 0.01 self.min_side_scale = min_side_scale self.max_side_scale = self.min_side_scale * 2 # 384 * 2 self.save_xml_flag = True self.save_txt_flag = True self.dynamic_scale_flag = False self.allow_padding = False self.allow_post_processing = False self.allow_eval_flag = False self.resize_flag = False self.save_eval_resut_path = save_res_path self.model_path = None self.config = tf.ConfigProto(allow_soft_placement=True) self.config.gpu_options.allow_growth = True self.graph = tf.Graph() self.session_text = tf.Session(graph=self.graph, config=self.config) with self.session_text.as_default(): with self.graph.as_default(): self.img_text = tf.placeholder( tf.float32, shape=(None, None, 3)) print(len(self.text_scales)) self.scale_text = tf.placeholder(tf.int32, shape=(2)) img_pre_text, label_pre_text, bboxes_pre_text, self.bboxes_img_text, xs_text, ys_text = ssd_vgg_preprocessing.preprocess_for_eval( self.img_text, None, None, None, None, self.scale_text, self.data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_text_4d = tf.expand_dims(img_pre_text, 0) image_text_4d = tf.cast(image_text_4d, tf.float32) self.image_text_4d = image_text_4d self.net_text = txtbox_384.TextboxNet() with slim.arg_scope( self.net_text.arg_scope(data_format=self.data_format)): self.predictions_text, self.localisations_text, self.logits_text, self.endpoints_text, self.l_shape = self.net_text.net( self.image_text_4d, is_training=False, reuse=tf.AUTO_REUSE, update_feat_shapes=True) saver_text = tf.train.Saver() if os.path.isdir(model_dir): ckpt_path = tf.train.latest_checkpoint(model_dir) self.model_path = os.path.join(model_dir, ckpt_path) else: ckpt_path = model_dir self.model_path = ckpt_path print(model_dir) saver_text.restore(self.session_text, ckpt_path) logging.info("Textbox++ model initialized.")
# Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img, xs, ys = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) image_4d = tf.cast(image_4d, tf.float32) # Define the txt_box model. reuse = True if 'txt_net' in locals() else None txt_net = txtbox_384.TextboxNet() print(txt_net.params.img_shape) print('reuse:', reuse) with slim.arg_scope(txt_net.arg_scope(data_format=data_format)): predictions, localisations, logits, end_points = txt_net.net( image_4d, is_training=False, reuse=reuse) ckpt_dir = 'model' isess.run(tf.compat.v1.global_variables_initializer()) saver = tf.compat.v1.train.Saver() ckpt_filename = tf.train.latest_checkpoint(ckpt_dir) if ckpt_dir and ckpt_filename:
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') # Sets the threshold for what messages will be logged. (DEBUG / INFO / WARN / ERROR / FATAL) tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Config model_deploy. Keep TF Slim Models structure. # Useful if want to need multiple GPUs and/or servers in the future. deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=0, num_replicas=1, num_ps_tasks=0) # Create global_step, the training iteration counter. with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() # Select the dataset. dataset = TFrecords2Dataset.get_datasets(FLAGS.dataset_dir) # Get the TextBoxes++ network and its anchors. text_net = txtbox_384.TextboxNet() # Stage 2 training using the 768x768 input size. if FLAGS.large_training: # replace the input image shape and the extracted feature map size from each indicated layer which #associated to each textbox layer. text_net.params = text_net.params._replace(img_shape=(768, 768)) text_net.params = text_net.params._replace( feat_shapes=[(96, 96), (48, 48), (24, 24), (12, 12), (10, 10), (8, 8)]) img_shape = text_net.params.img_shape print('img_shape: ' + str(img_shape)) # Compute the default anchor boxes with the given image shape, get anchor list. text_anchors = text_net.anchors(img_shape) # Print the training configuration before training. tf_utils.print_configuration(FLAGS.__flags, text_net.params, dataset.data_sources, FLAGS.train_dir) # =================================================================== # # Create a dataset provider and batches. # =================================================================== # with tf.device(deploy_config.inputs_device()): # setting the dataset provider with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=1000 * FLAGS.batch_size, common_queue_min=300 * FLAGS.batch_size, shuffle=True) # Get for SSD network: image, labels, bboxes. [image, shape, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3, y4] = provider.get([ 'image', 'shape', 'object/label', 'object/bbox', 'object/oriented_bbox/x1', 'object/oriented_bbox/x2', 'object/oriented_bbox/x3', 'object/oriented_bbox/x4', 'object/oriented_bbox/y1', 'object/oriented_bbox/y2', 'object/oriented_bbox/y3', 'object/oriented_bbox/y4' ]) gxs = tf.transpose(tf.stack([x1, x2, x3, x4])) #shape = (N,4) gys = tf.transpose(tf.stack([y1, y2, y3, y4])) image = tf.identity(image, 'input_image') init_op = tf.global_variables_initializer() # tf.global_variables_initializer() # Pre-processing image, labels and bboxes. training_image_crop_area = FLAGS.training_image_crop_area area_split = training_image_crop_area.split(',') assert len(area_split) == 2 training_image_crop_area = [ float(area_split[0]), float(area_split[1]) ] image, glabels, gbboxes, gxs, gys= \ ssd_vgg_preprocessing.preprocess_for_train(image, glabels, gbboxes, gxs, gys, img_shape, data_format='NHWC', crop_area_range=training_image_crop_area) # Encode groundtruth labels and bboxes. image = tf.identity(image, 'processed_image') glocalisations, gscores, glabels = \ text_net.bboxes_encode( glabels, gbboxes, text_anchors, gxs, gys) batch_shape = [1] + [len(text_anchors)] * 3 # Training batches and queue. r = tf.train.batch(tf_utils.reshape_list( [image, glocalisations, gscores, glabels]), batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) b_image, b_glocalisations, b_gscores, b_glabels= \ tf_utils.reshape_list(r, batch_shape) # Intermediate queueing: unique batch computation pipeline for all # GPUs running the training. batch_queue = slim.prefetch_queue.prefetch_queue( tf_utils.reshape_list( [b_image, b_glocalisations, b_gscores, b_glabels]), capacity=2 * deploy_config.num_clones) # =================================================================== # # Define the model running on every GPU. # =================================================================== # def clone_fn(batch_queue): #Allows data parallelism by creating multiple #clones of network_fn. # Dequeue batch. b_image, b_glocalisations, b_gscores, b_glabels = \ tf_utils.reshape_list(batch_queue.dequeue(), batch_shape) # Construct TextBoxes network. arg_scope = text_net.arg_scope(weight_decay=FLAGS.weight_decay) with slim.arg_scope(arg_scope): predictions,localisations, logits, end_points = \ text_net.net(b_image, is_training=True) # Add loss function. text_net.losses(logits, localisations, b_glabels, b_glocalisations, b_gscores, match_threshold=FLAGS.match_threshold, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, label_smoothing=FLAGS.label_smoothing, batch_size=FLAGS.batch_size) return end_points # Gather initial tensorboard summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # =================================================================== # # Add summaries from first clone. # =================================================================== # clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES): summaries.add(tf.summary.scalar(loss.op.name, loss)) # Add summaries for extra losses. for loss in tf.get_collection('EXTRA_LOSSES'): summaries.add(tf.summary.scalar(loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) # =================================================================== # # Configure the moving averages. # =================================================================== # if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None # =================================================================== # # Configure the optimization procedure. # =================================================================== # with tf.device(deploy_config.optimizer_device()): learning_rate = tf_utils.configure_learning_rate( FLAGS, dataset.num_samples, global_step) optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) # Add summaries for learning_rate. summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = tf_utils.get_variables_to_train(FLAGS) # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # =================================================================== # # Kicks off the training. # =================================================================== # gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, gpu_options=gpu_options) saver = tf.train.Saver(max_to_keep=100, keep_checkpoint_every_n_hours=1.0, write_version=2, pad_step_number=False) slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master='', is_chief=True, # init_op=init_op, init_fn=tf_utils.get_init_fn(FLAGS), summary_op=summary_op, ##output variables to logdir number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, saver=saver, save_interval_secs=FLAGS.save_interval_secs, session_config=config, sync_optimizer=None)
def run(): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') print('-----start test-------') if not os.path.exists(save_dir): os.makedirs(save_dir) with tf.device('/GPU:0'): dataset = TFrecords2Dataset.get_datasets(FLAGS.dataset_dir) print(dataset) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, shuffle=True) print('provider:', provider) [image, shape, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3, y4] = provider.get([ 'image', 'shape', 'object/label', 'object/bbox', 'object/oriented_bbox/x1', 'object/oriented_bbox/x2', 'object/oriented_bbox/x3', 'object/oriented_bbox/x4', 'object/oriented_bbox/y1', 'object/oriented_bbox/y2', 'object/oriented_bbox/y3', 'object/oriented_bbox/y4' ]) print('image:', image) print('shape:', shape) print('glabel:', glabels) print('gboxes:', gbboxes) gxs = tf.transpose(tf.stack([x1, x2, x3, x4])) #shape = (N,4) gys = tf.transpose(tf.stack([y1, y2, y3, y4])) image = tf.identity(image, 'input_image') text_shape = (384, 384) image, glabels, gbboxes, gxs, gys = ssd_vgg_preprocessing.preprocess_image( image, glabels, gbboxes, gxs, gys, text_shape, is_training=True, data_format='NHWC') x1, x2, x3, x4 = tf.unstack(gxs, axis=1) y1, y2, y3, y4 = tf.unstack(gys, axis=1) text_net = txtbox_384.TextboxNet() text_anchors = text_net.anchors(text_shape) e_localisations, e_scores, e_labels = text_net.bboxes_encode( glabels, gbboxes, text_anchors, gxs, gys) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) j = 0 all_time = 0 try: while not coord.should_stop() and j < show_pic_sum: start_time = time.time() image_sess, label_sess, gbbox_sess, x1_sess, x2_sess, x3_sess, x4_sess, y1_sess, y2_sess, y3_sess, y4_sess, p_localisations, p_scores, p_labels = sess.run( [ image, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3, y4, e_localisations, e_scores, e_labels ]) end_time = time.time() - start_time all_time += end_time image_np = image_sess # print(image_np) # print('label_sess:',label_sess) p_labels_concat = np.concatenate(p_labels) p_scores_concat = np.concatenate(p_scores) debug = False if debug is True: print(p_labels) print('l_labels:', len(p_labels_concat[p_labels_concat.nonzero()]), p_labels_concat[p_labels_concat.nonzero()]) print('p_socres:', len(p_scores_concat[p_scores_concat.nonzero()]), p_scores_concat[p_scores_concat.nonzero()]) # print(img_np.shape) print('label_sess:', np.array(list(label_sess)).shape, list(label_sess)) img_np = np.array(image_np) cv2.imwrite('{}/{}.png'.format(save_dir, j), img_np) img_np = cv2.imread('{}/{}.png'.format(save_dir, j)) h, w, d = img_np.shape label_sess = list(label_sess) # for i , label in enumerate(label_sess): i = 0 num_correct = 0 for label in label_sess: # print(int(label) == 1) if int(label) == 1: num_correct += 1 img_np = draw_polygon(img_np, x1_sess[i] * w, y1_sess[i] * h, x2_sess[i] * w, y2_sess[i] * h, x3_sess[i] * w, y3_sess[i] * h, x4_sess[i] * w, y4_sess[i] * h) if int(label) == 0: img_np = draw_polygon(img_np, x1_sess[i] * w, y1_sess[i] * h, x2_sess[i] * w, y2_sess[i] * h, x3_sess[i] * w, y3_sess[i] * h, x4_sess[i] * w, y4_sess[i] * h, color=(0, 0, 255)) i += 1 img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) cv2.imwrite( '{}'.format(os.path.join(save_dir, str(j) + '.png')), img_np) j += 1 print('correct:', num_correct) except tf.errors.OutOfRangeError: print('done') finally: print('done') coord.request_stop() print('all time:', all_time, 'average:', all_time / show_pic_sum) coord.join(threads=threads)