def testPascalVocSegTestData(self): dataset = data_generator.Dataset( dataset_name='pascal_voc_seg', split_name='val', dataset_dir='research/deeplab/testing/pascal_voc_seg', batch_size=1, crop_size=[3, 3], # Use small size for testing. min_resize_value=3, max_resize_value=3, resize_factor=None, min_scale_factor=0.01, max_scale_factor=2.0, scale_factor_step_size=0.25, is_training=False, model_variant='mobilenet_v2') self.assertAllEqual(dataset.num_of_classes, 21) self.assertAllEqual(dataset.ignore_label, 255) num_of_images = 3 with self.test_session() as sess: iterator = dataset.get_one_shot_iterator() for i in range(num_of_images): batch = iterator.get_next() batch, = sess.run([batch]) image_attributes = _get_attributes_of_image(i) self.assertAllEqual(batch[common.IMAGE][0], image_attributes.image) self.assertAllEqual(batch[common.LABEL][0], image_attributes.label) self.assertEqual(batch[common.HEIGHT][0], image_attributes.height) self.assertEqual(batch[common.WIDTH][0], image_attributes.width) self.assertEqual(batch[common.IMAGE_NAME][0], image_attributes.image_name) # All data have been read. with self.assertRaisesRegexp(tf.errors.OutOfRangeError, ''): sess.run([iterator.get_next()])
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.vis_batch_size, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, is_training=False, should_shuffle=False, should_repeat=False) train_id_to_eval_id = None if dataset.dataset_name == data_generator.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join( FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes}, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we resize the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH])]) predictions = tf.squeeze( tf.image.resize_images(tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) tf.train.get_or_create_global_step() if FLAGS.quantize_delay_step >= 0: contrib_quantize.create_eval_graph() num_iteration = 0 max_num_iteration = FLAGS.max_number_of_iterations checkpoints_iterator = contrib_training.checkpoints_iterator( FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs) for checkpoint_path in checkpoints_iterator: num_iteration += 1 tf.logging.info( 'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', checkpoint_path) scaffold = tf.train.Scaffold(init_op=tf.global_variables_initializer()) session_creator = tf.train.ChiefSessionCreator( scaffold=scaffold, master=FLAGS.master, checkpoint_filename_with_path=checkpoint_path) with tf.train.MonitoredSession( session_creator=session_creator, hooks=None) as sess: batch = 0 image_id_offset = 0 while not sess.should_stop(): tf.logging.info('Visualizing batch %d', batch + 1) _process_batch(sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) image_id_offset += FLAGS.vis_batch_size batch += 1 tf.logging.info( 'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) if max_num_iteration > 0 and num_iteration >= max_num_iteration: break
def train(): # From build_cityscapes_data.py: example = image_data, filename, height, width, seg_data tf.logging.set_verbosity(tf.logging.INFO) clone_batch_size = FLAGS.train_batch_size dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, batch_size=clone_batch_size, crop_size=[int(sz) for sz in FLAGS.train_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=None, num_readers=10, is_training=True, should_shuffle=True, should_repeat=True) # reading batch: keys of samples ['height', 'width', 'image_name', 'label', 'image'] num_classes = dataset.num_of_classes samples = dataset.get_one_shot_iterator().get_next() in_imgs = samples['image'] / 255 labels = samples['label'] #channel=1 latents, skip = seg_encoder('Encoder', in_imgs, training=True) if FLAGS.use_skip_1by1: skip = tf.layers.conv2d(inputs=skip, filters=32, kernel_size=[1, 1], strides=(1, 1), use_bias=False, padding="same") if not FLAGS.use_skip: skip = None logits = seg_decoder('Decoder', latents, training=True, num_classes=num_classes, skip=skip) #train_loss, _, _ = normal_loss(logits, labels, num_classes, dataset.ignore_label) train_loss = softmax_cross_entropy_loss_mining( logits, labels, num_classes, dataset.ignore_label, loss_weight=1.0, upsample_logits=False, hard_example_mining_step=FLAGS.hard_example_mining_step, top_k_percent_pixels=FLAGS.top_k_percent_pixels, scope='CI_Loss') log_summaries(in_imgs, num_classes, logits, labels, train_loss) step = tf.train.get_or_create_global_step() main_optimizer = tf.train.AdamOptimizer( learning_rate=1e-4) #1e-4/100ksteps/bs 2 ==> same but lr = 1e-5 main_step = main_optimizer.minimize(train_loss, global_step=step) train_op = tf.group(main_step) hooks = [ tf.train.StopAtStepHook(last_step=FLAGS.last_step), tf.train.NanTensorHook(train_loss), ] step_c = 0 with tf.train.MonitoredTrainingSession(hooks=hooks, checkpoint_dir=FLAGS.checkpoint_dir, save_checkpoint_secs=300, save_summaries_secs=60) as sess: while not sess.should_stop(): sess.run(train_op)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes}, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples[common.IMAGE].set_shape( [FLAGS.eval_batch_size, int(FLAGS.eval_crop_size[0]), int(FLAGS.eval_crop_size[1]), 3]) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. miou, update_op = tf.metrics.mean_iou( predictions, labels, dataset.num_of_classes, weights=weights) tf.summary.scalar(predictions_tag, miou) summary_op = tf.summary.merge_all() summary_hook = tf.contrib.training.SummaryAtEndHook( log_dir=FLAGS.eval_logdir, summary_op=summary_op) hooks = [summary_hook] num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: tf.contrib.quantize.create_eval_graph() tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) tf.contrib.training.evaluate_repeatedly( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, eval_ops=[update_op], max_number_of_evaluations=num_eval_iters, hooks=hooks, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes }, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples[common.IMAGE].set_shape([ FLAGS.eval_batch_size, int(FLAGS.eval_crop_size[0]), int(FLAGS.eval_crop_size[1]), 3 ]) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Calculate iou for each class metric_map = {} iou_v, update_op = iou_each_class.iou(predictions, labels, dataset.num_of_classes, weights=weights) for index in range(0, dataset.num_of_classes): metric_map['class_' + str(index) + '_iou'] = (iou_v[index], update_op[index]) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary(metric_value, metric_name, print_summary=True) summary_op = tf.summary.merge_all() summary_hook = tf.contrib.training.SummaryAtEndHook( log_dir=FLAGS.eval_logdir, summary_op=summary_op) hooks = [summary_hook] num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: tf.contrib.quantize.create_eval_graph() tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) ###################################################################### ############### Modified to evaluate all chekpoints in a folder ###### evaluation.evaluate_repeatedly( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, eval_ops=[update_op], max_number_of_evaluations=num_eval_iters, hooks=hooks, eval_interval_secs=FLAGS.eval_interval_secs) ##################################################################### ###### read iou from tensorboard to csv file ######################## checkpoint_path_list = tf.train.get_checkpoint_state( FLAGS.checkpoint_dir).all_model_checkpoint_paths iterations = [ os.path.basename(a).split('-')[1] for a in checkpoint_path_list ] iou_from_Tensorboard.to_csv(FLAGS.eval_logdir, dataset.num_of_classes, iterations)
def main(unused_argv): print("logging params") print("Learning rate: ", FLAGS.base_learning_rate) print("Momentum: ", FLAGS.momentum) print("Weight decay: ", FLAGS.weight_decay) print("training steps: ", FLAGS.training_number_of_steps) print("Dataset name: ",FLAGS.dataset) print("Using dataset for training: ",FLAGS.train_split) print("Dataset directory: ",FLAGS.dataset_dir) print("batch size: ", FLAGS.train_batch_size) print("crop size: ", FLAGS.train_crop_size) print("Model variant used: ",FLAGS.model_variant) print("Train log directory: ", FLAGS.train_logdir) train_list = [] val_list = [] count= 0 best_val_mean_iou = 0.718 dir_path='deeplab/best_ckpt/' tf.logging.set_verbosity(tf.logging.INFO) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) graph = tf.Graph() with graph.as_default(): with tf.device(tf.train.replica_device_setter(ps_tasks=FLAGS.num_ps_tasks)): assert FLAGS.train_batch_size % FLAGS.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones # will be equivalent to train_batch_size dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, batch_size=clone_batch_size, crop_size=[int(sz) for sz in FLAGS.train_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=FLAGS.model_variant, num_readers=1, #check?? is_training=True, should_shuffle=True, should_repeat=True) train_tensor, summary_op = _train_deeplab_model( dataset.get_one_shot_iterator(), dataset.num_of_classes, dataset.ignore_label) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) init_fn = None if FLAGS.tf_initial_checkpoint: init_fn = train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True) scaffold = tf.train.Scaffold( init_fn=init_fn, summary_op=summary_op, ) stop_hook = tf.train.StopAtStepHook( last_step=FLAGS.training_number_of_steps) profile_dir = FLAGS.profile_logdir if profile_dir is not None: tf.gfile.MakeDirs(profile_dir) with tf.contrib.tfprof.ProfileContext( enabled=profile_dir is not None, profile_dir=profile_dir): with tf.train.MonitoredTrainingSession( master=FLAGS.master, is_chief=(FLAGS.task == 0), config=session_config, scaffold=scaffold, checkpoint_dir=FLAGS.train_logdir, summary_dir=FLAGS.train_logdir, log_step_count_steps=FLAGS.log_steps, save_summaries_steps=FLAGS.save_summaries_secs, save_checkpoint_secs=FLAGS.save_interval_secs, hooks=[stop_hook]) as sess: while not sess.should_stop(): count+=1 training_loss = sess.run([train_tensor]) if np.isnan(training_loss): print("learning rate too high. exiting!") exit() try: if count>5000 and count%200==0: train_iou = subprocess.check_output([sys.executable, "deeplab/vistrain.py"]) val_iou = subprocess.check_output([sys.executable, "deeplab/vis.py"]) val_mean_iou = float(val_iou.decode("utf-8").split('\n')[-2]) val_list.append(val_mean_iou*100) train_mean_iou=float(train_iou.decode("utf-8").split('\n')[-2])*100 train_list.append(train_mean_iou) print("Mean IoU on training dataset: ", train_mean_iou) print("Mean IoU on validation dataset: ", val_mean_iou) sys.stdout.flush() if val_mean_iou > best_val_mean_iou: if os.path.isdir(dir_path): shutil.rmtree(dir_path) print("Validation Mean IoU: ", val_mean_iou) shutil.copytree(FLAGS.train_logdir, dir_path) best_val_mean_iou = val_mean_iou except: print("Validation script returned non-zero status.")
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = data_generator.Dataset( # 获取相应数据集 dataset_name=FLAGS.dataset, # 数据集名称 split_name=FLAGS.vis_split, # 用于语义分割的数据集的tfrecorder文件 默认带有val dataset_dir=FLAGS.dataset_dir, # 数据集目录 batch_size=FLAGS.vis_batch_size, # 一次性处理的image_batch_size 默认为1 crop_size=[int(sz) for sz in FLAGS.vis_crop_size], #crop_size 默认为513,513 min_resize_value=FLAGS.min_resize_value, # None max_resize_value=FLAGS.max_resize_value, # None resize_factor=FLAGS.resize_factor, # None model_variant=FLAGS. model_variant, # 模型的变体 默认为mobilenet_v2 本次训练为 xception_65 is_training=False, # 不训练 should_shuffle=False, # 不将输入的数据随机打乱 should_repeat=False) # 不一直重复 train_id_to_eval_id = None if dataset.dataset_name == data_generator.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) # 可视化图片放置的文件夹 save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) # 创建存放文件夹 tf.gfile.MakeDirs(save_dir) # 创建segmentation_results文件夹 raw_save_dir = os.path.join(FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) # 创建 raw_segmentation_results文件夹 tf.logging.info('Visualizing on %s set', FLAGS.vis_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() # 获取数据 model_options = common.ModelOptions( outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes }, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], # 1024,2048 atrous_rates=FLAGS.atrous_rates, # 6,12,18 output_stride=FLAGS.output_stride) # 4 if tuple(FLAGS.eval_scales) == (1.0, ): # 不缩放进行评估 tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( # 标签预测 跟eval一样 samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: # 多尺度评估 tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) ''' predictions: {'semantic': <tf.Tensor 'ArgMax:0' shape=(1, 1024, 2048) dtype=int64>, 'semantic_prob': <tf.Tensor 'Softmax:0' shape=(1, 1024, 2048, 19) dtype=float32>} ''' predictions = predictions[common.OUTPUT_TYPE] if FLAGS.min_resize_value and FLAGS.max_resize_value: # None 暂不考虑 # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we resize the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([ tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH]) ]) predictions = tf.squeeze( tf.image.resize_images( tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) # 计数作用,每进行一个batch, global加1 tf.train.get_or_create_global_step() if FLAGS.quantize_delay_step >= 0: # 默认为-1 contrib_quantize.create_eval_graph() num_iteration = 0 max_num_iteration = FLAGS.max_number_of_iterations # 0 checkpoints_iterator = contrib_training.checkpoints_iterator( FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs) for checkpoint_path in checkpoints_iterator: num_iteration += 1 tf.logging.info('Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', checkpoint_path) scaffold = tf.train.Scaffold( init_op=tf.global_variables_initializer()) session_creator = tf.train.ChiefSessionCreator( scaffold=scaffold, master=FLAGS.master, checkpoint_filename_with_path=checkpoint_path) with tf.train.MonitoredSession(session_creator=session_creator, hooks=None) as sess: batch = 0 image_id_offset = 0 while not sess.should_stop(): tf.logging.info('Visualizing batch %d', batch + 1) _process_batch( sess=sess, original_images=samples[ common. ORIGINAL_IMAGE], # <tf.Tensor 'IteratorGetNext:4' shape=(?, ?, ?, 3) dtype=uint8> semantic_predictions= predictions, # <tf.Tensor 'ArgMax:0' shape=(1, 1024, 2048) dtype=int64> image_names=samples[ common. IMAGE_NAME], # <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=string> image_heights=samples[ common. HEIGHT], # <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=int64> image_widths=samples[ common. WIDTH], # <tf.Tensor 'IteratorGetNext:5' shape=(?,) dtype=int64> image_id_offset=image_id_offset, # 0 save_dir=save_dir, # 语义分割结果放置的路径 raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id ) # 只有cityscape中不为None image_id_offset += FLAGS.vis_batch_size # 可视化的imageId batch += 1 tf.logging.info('Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) if max_num_iteration > 0 and num_iteration >= max_num_iteration: break
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones tf.gfile.MakeDirs(FLAGS.train_logdir) common.outputlogMessage('Training on %s set' % FLAGS.train_split) common.outputlogMessage('Dataset: %s' % FLAGS.dataset) common.outputlogMessage('train_crop_size: %s' % str(FLAGS.train_crop_size)) common.outputlogMessage(str(FLAGS.train_crop_size)) common.outputlogMessage('atrous_rates: %s' % str(FLAGS.atrous_rates)) common.outputlogMessage('number of classes: %s' % str(FLAGS.num_classes)) common.outputlogMessage('Ignore label value: %s' % str(FLAGS.ignore_label)) pid = os.getpid() with open('train_py_pid.txt', 'w') as f_obj: f_obj.writelines('%d' % pid) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, batch_size=clone_batch_size, crop_size=[int(sz) for sz in FLAGS.train_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=FLAGS.model_variant, num_readers=4, is_training=True, should_shuffle=True, should_repeat=True, num_classes=FLAGS.num_classes, ignore_label=FLAGS.ignore_label) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (dataset.get_one_shot_iterator(), { common.OUTPUT_TYPE: dataset.num_of_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in tf.model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_of_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate, decay_steps=FLAGS.decay_steps, end_learning_rate=FLAGS.end_learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) elif FLAGS.optimizer == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.adam_learning_rate, epsilon=FLAGS.adam_epsilon) else: raise ValueError('Unknown optimizer') if FLAGS.quantize_delay_step >= 0: if FLAGS.num_clones > 1: raise ValueError( 'Quantization doesn\'t support multi-clone yet.') contrib_quantize.create_training_graph( quant_delay=FLAGS.quantize_delay_step) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # Start the training. profile_dir = FLAGS.profile_logdir if profile_dir is not None: tf.gfile.MakeDirs(profile_dir) with contrib_tfprof.ProfileContext(enabled=profile_dir is not None, profile_dir=profile_dir): init_fn = None if FLAGS.tf_initial_checkpoint: init_fn = train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True) slim.learning.train(train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=init_fn, summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). # 设置多gpu训练的相关参数 config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, # gpu数量 clone_on_cpu=FLAGS.clone_on_cpu, # 默认为False replica_id=FLAGS.task, # taskId num_replicas=FLAGS.num_replicas, # 默认为1 num_ps_tasks=FLAGS.num_ps_tasks) # 默认为0 # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # 各个gpu均分batch_size tf.gfile.MakeDirs(FLAGS.train_logdir) # 创建存放训练日志的文件 tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): dataset = data_generator.Dataset( # 定义数据集参数 dataset_name=FLAGS.dataset, # 数据集名称 cityscapes split_name=FLAGS.train_split, # 指定带有train的tfrecorder数据集 默认为“train” dataset_dir=FLAGS.dataset_dir, # 数据集目录 tfrecoder文件的数据集目录 batch_size=clone_batch_size, # 均分后各个gpu训练中指定batch_size 的大小 crop_size=[int(sz) for sz in FLAGS.train_crop_size], # 训练中裁剪的图像大小 513,513 min_resize_value=FLAGS.min_resize_value, # 默认为 None max_resize_value=FLAGS.max_resize_value, # 默认为None resize_factor=FLAGS.resize_factor, # 默认为None min_scale_factor=FLAGS.min_scale_factor, # 训练中,图像变换尺度,用于数据增强 默认最小为0.5 max_scale_factor=FLAGS.max_scale_factor, # 训练中,图像变换尺度,用于数据增强 默认最大为2 scale_factor_step_size=FLAGS.scale_factor_step_size, # 训练中,图像变换尺度增加的步长,默认为0.25 从0.5到2 model_variant=FLAGS.model_variant, # 指定模型 xception_65 num_readers=4, # 读取数据个数 若多gpu可增大加快训练速度 is_training=True, should_shuffle=True, should_repeat=True) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): # 计数作用,每训练一个batch, global加1 global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab # 定义deeplab模型 model_args = (dataset.get_one_shot_iterator(), { common.OUTPUT_TYPE: dataset.num_of_classes }, dataset.ignore_label) #模型参数 clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in tf.model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: # 默认为False summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_of_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image( 'samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( # 获取模型学习率 FLAGS.learning_policy, # poly学习策略 FLAGS.base_learning_rate, # 0.0001 FLAGS.learning_rate_decay_step, # 固定2000次进行一次学习率衰退 FLAGS.learning_rate_decay_factor, # 0.1 FLAGS.training_number_of_steps, # 训练次数 20000 FLAGS.learning_power, # poly power 0.9 FLAGS.slow_start_step, # 0 FLAGS.slow_start_learning_rate, # 1e-4 缓慢开始的学习率 decay_steps=FLAGS.decay_steps, # 0.0 end_learning_rate=FLAGS.end_learning_rate) # 0.0 summaries.add(tf.summary.scalar('learning_rate', learning_rate)) # 模型训练优化器 if FLAGS.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) elif FLAGS.optimizer == 'adam': # adam优化器 寻找全局最优点的优化算法,引入了二次方梯度校正 optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.adam_learning_rate, epsilon=FLAGS.adam_epsilon) else: raise ValueError('Unknown optimizer') if FLAGS.quantize_delay_step >= 0: # 默认为-1 忽略 if FLAGS.num_clones > 1: raise ValueError('Quantization doesn\'t support multi-clone yet.') contrib_quantize.create_training_graph( quant_delay=FLAGS.quantize_delay_step) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps # FLAGS.startup_delay_steps 默认为15 with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) # 计算total_loss total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) # 获取梯度乘子 grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) # grad_mult : {'logits/semantic/biases': 2.0, 'logits/semantic/weights': 1.0} if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( # 将计算的梯度用于变量上,返回一个应用指定的梯度的操作 opration grads_and_vars, global_step=global_step) # 对global_step进行自增 update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) # Start the training. profile_dir = FLAGS.profile_logdir # 默认为None if profile_dir is not None: tf.gfile.MakeDirs(profile_dir) with contrib_tfprof.ProfileContext( enabled=profile_dir is not None, profile_dir=profile_dir): init_fn = None if FLAGS.tf_initial_checkpoint: # 获取预训练权重 init_fn = train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True) slim.learning.train( train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=init_fn, summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False, non_uniform_sampling=FLAGS.nus_preprocess, output_target_sampling=FLAGS.eval_type == "nus") tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() if FLAGS.eval_type == "nus": sampling_location = _nus_locations(samples[common.IMAGE], False) target_locations = samples[TARGET_SAMPLING] mse, update_op = tf.metrics.mean_squared_error(sampling_location, target_locations) # update_op = tf.Print(update_op, [mse]) # tf.summary.image("InputImages", samples[common.IMAGE]) # tf.summary.image("InputLabel", tf.to_float(samples[common.LABEL]) / 19) # tf.summary.image("ResViz", viz(sampling_location)) tf.summary.scalar('mse', mse) else: crop_size = [int(sz) for sz in FLAGS.eval_crop_size] if FLAGS.nus_preprocess: crop_size = [FLAGS.nus_sampling_size] * 2 model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes}, crop_size=crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid, output_logits=FLAGS.nus_preprocess) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) if FLAGS.nus_preprocess: with tf.name_scope("nus_interpolation"): assert FLAGS.eval_batch_size == 1, "Only support eval_batch_size == 1" sampling = samples[SAMPLING] logits = predictions[common.OUTPUT_TYPE + "/logits"] shape = tf.shape(samples[common.LABEL])[1:3] predictions = tf.py_func( nus.nus_interpolate, [logits[0], sampling[0], shape], logits.dtype, )[None, ...] predictions = tf.argmax(predictions, axis=3) else: predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. miou, update_op = tf.metrics.mean_iou( predictions, labels, dataset.num_of_classes, weights=weights) miou = tf.Print(miou, ["mIoU", miou]) tf.summary.scalar(predictions_tag, miou) summary_op = tf.summary.merge_all() summary_hook = tf.contrib.training.SummaryAtEndHook( log_dir=FLAGS.eval_logdir, summary_op=summary_op) hooks = [summary_hook] num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: tf.contrib.quantize.create_eval_graph() tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) tf.contrib.training.evaluate_repeatedly( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, eval_ops=[update_op], max_number_of_evaluations=num_eval_iters, hooks=hooks, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( # 获取验证集图片数据 dataset_name=FLAGS.dataset, # 数据集名称 cityscapes 默认为 pascal_voc_seg split_name=FLAGS.eval_split, # 指定带有val的tfrecorder数据集 默认为“val” dataset_dir=FLAGS.dataset_dir, # 数据集目录 tfrecoder文件的数据集目录 batch_size=FLAGS.eval_batch_size, # 每个batch包含的image数量 默认为1 crop_size=[int(sz) for sz in FLAGS.eval_crop_size], # 评估时crop_size 默认为513,513 min_resize_value=FLAGS.min_resize_value, # 默认为None max_resize_value=FLAGS.max_resize_value, # 默认为None resize_factor=FLAGS.resize_factor, # 默认为None model_variant=FLAGS.model_variant, # 模型的变体 本次训练为 xception_65 num_readers=2, # 并行读取图片的数量 is_training=False, # 不训练 should_shuffle=False, # 不将输入的数据随机打乱 should_repeat=False) # 不一直重复 tf.gfile.MakeDirs(FLAGS.eval_logdir) # 创建评估目录 tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() # 获取一次迭代的验证集数据 ''' samples: {'image_name': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=string>, 'width': <tf.Tensor 'IteratorGetNext:5' shape=(?,) dtype=int64>, 'image': <tf.Tensor 'IteratorGetNext:1' shape=(?, 1024, 2048, 3) dtype=float32>, 'height': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=int64>, 'label': <tf.Tensor 'IteratorGetNext:3' shape=(?, 1024, 2048, 1) dtype=int32>, 'original_image': <tf.Tensor 'IteratorGetNext:4' shape=(?, ?, ?, 3) dtype=uint8>} ''' model_options = common.ModelOptions( # 模型参数 outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes }, # {semantic: 19} crop_size=[int(sz) for sz in FLAGS.eval_crop_size], # 1024,2048 atrous_rates=FLAGS.atrous_rates, # 6,12,18 output_stride=FLAGS.output_stride) # 16 # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples[common.IMAGE].set_shape( # 设置形状 [ FLAGS.eval_batch_size, # 默认为1 int(FLAGS.eval_crop_size[0]), int(FLAGS.eval_crop_size[1]), 3 ]) if tuple(FLAGS.eval_scales) == (1.0, ): # 默认 评估尺度为1 tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, # 进行每个像素点预测 image_pyramid=FLAGS.image_pyramid) ''' predictions: {'semantic': <tf.Tensor 'ArgMax:0' shape=(1, 1024, 2048) dtype=int64>, 'semantic_prob': <tf.Tensor 'Softmax:0' shape=(1, 1024, 2048, 19) dtype=float32>} ''' else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) # 预测标签 labels = tf.reshape(samples[common.LABEL], shape=[-1]) # 真实标签 weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # 各标签权重 # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' # MIoU predictions_tag1 = 'accuracy_pixel' # 像素精度 for eval_scale in FLAGS.eval_scales: # 默认为单尺度[1.0] predictions_tag += '_' + str(eval_scale) predictions_tag1 += '_' + str(eval_scale) if FLAGS.add_flipped_images: # 默认为False 不设置左右翻转来评估模型 predictions_tag += '_flipped' predictions_tag1 += '_flipped' # Define the evaluation metric. metric_map = {} num_classes = dataset.num_of_classes # 19 metric_map['eval/%s_overall' % predictions_tag] = tf.metrics.mean_iou( labels=labels, predictions=predictions, num_classes=num_classes, weights=weights) ''' metric_map: {'eval/miou_1.0_overall': (<tf.Tensor 'mean_iou/Select_1:0' shape=() dtype=float32>, <tf.Tensor 'mean_iou/AssignAdd:0' shape=(19, 19) dtype=float64_ref>)} ''' metric_map['eval/%s_overall_accuracy_' % predictions_tag] = tf.metrics.accuracy( labels=labels, predictions=predictions, weights=weights) # IoU for each class. ''' tf.one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None) Returns a one-hot tensor. ndices表示输入的多个数值,通常是矩阵形式;depth表示输出的尺寸。 ''' one_hot_predictions = tf.one_hot(predictions, num_classes) one_hot_predictions = tf.reshape(one_hot_predictions, [-1, num_classes]) # 预测输出的one_hot one_hot_labels = tf.one_hot(labels, num_classes) one_hot_labels = tf.reshape(one_hot_labels, [-1, num_classes]) # 真实label的one_hot for c in range(num_classes): predictions_tag_c = '%s_class_%d' % (predictions_tag, c ) # miou_1.0_class_c predictions_tag_c1 = '%s_class_%d' % (predictions_tag1, c) tp, tp_op = tf.metrics.true_positives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) fp, fp_op = tf.metrics.false_positives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) fn, fn_op = tf.metrics.false_negatives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) tn, tn_op = tf.metrics.true_negatives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) tp_fp_fn_op = tf.group(tp_op, fp_op, fn_op) iou = tf.where(tf.greater(tp + fn, 0.0), tp / (tp + fn + fp), tf.constant(np.NaN)) ap = tf.where(tf.greater(tp + fn, 0.0), (tp + tn) / (tp + tn + fn + fp), tf.constant(np.NaN)) metric_map['eval/%s' % predictions_tag_c] = (iou, tp_fp_fn_op) metric_map['eval/%s' % predictions_tag_c1] = (ap, tp_fp_fn_op) (metrics_to_values, metrics_to_updates) = contrib_metrics.aggregate_metric_map(metric_map) ''' (metrics_to_values, metrics_to_updates): ({'eval/miou_1.0_class_5': <tf.Tensor 'Select_6:0' shape=() dtype=float32>, 'eval/miou_1.0_class_18': <tf.Tensor 'Select_19:0' shape=() dtype=float32>, 'eval/miou_1.0_class_13': <tf.Tensor 'Select_14:0' shape=() dtype=float32>, 'eval/miou_1.0_class_1': <tf.Tensor 'Select_2:0' shape=() dtype=float32>, 'eval/miou_1.0_overall': <tf.Tensor 'mean_iou/Select_1:0' shape=() dtype=float32>, 'eval/miou_1.0_class_17': <tf.Tensor 'Select_18:0' shape=() dtype=float32>, 'eval/miou_1.0_class_8': <tf.Tensor 'Select_9:0' shape=() dtype=float32>, 'eval/miou_1.0_class_2': <tf.Tensor 'Select_3:0' shape=() dtype=float32>, 'eval/miou_1.0_class_0': <tf.Tensor 'Select_1:0' shape=() dtype=float32>, 'eval/miou_1.0_class_3': <tf.Tensor 'Select_4:0' shape=() dtype=float32>, 'eval/miou_1.0_class_14': <tf.Tensor 'Select_15:0' shape=() dtype=float32>, 'eval/miou_1.0_class_11': <tf.Tensor 'Select_12:0' shape=() dtype=float32>, 'eval/miou_1.0_class_6': <tf.Tensor 'Select_7:0' shape=() dtype=float32>, 'eval/miou_1.0_class_15': <tf.Tensor 'Select_16:0' shape=() dtype=float32>, 'eval/miou_1.0_class_4': <tf.Tensor 'Select_5:0' shape=() dtype=float32>, 'eval/miou_1.0_class_9': <tf.Tensor 'Select_10:0' shape=() dtype=float32>, 'eval/miou_1.0_class_16': <tf.Tensor 'Select_17:0' shape=() dtype=float32>, 'eval/miou_1.0_class_7': <tf.Tensor 'Select_8:0' shape=() dtype=float32>, 'eval/miou_1.0_class_10': <tf.Tensor 'Select_11:0' shape=() dtype=float32>, 'eval/miou_1.0_class_12': <tf.Tensor 'Select_13:0' shape=() dtype=float32>}, {'eval/miou_1.0_class_5': <tf.Operation 'group_deps_5' type=NoOp>, 'eval/miou_1.0_class_18': <tf.Operation 'group_deps_18' type=NoOp>, 'eval/miou_1.0_class_13': <tf.Operation 'group_deps_13' type=NoOp>, 'eval/miou_1.0_class_1': <tf.Operation 'group_deps_1' type=NoOp>, 'eval/miou_1.0_overall': <tf.Tensor 'mean_iou/AssignAdd:0' shape=(19, 19) dtype=float64_ref>, 'eval/miou_1.0_class_17': <tf.Operation 'group_deps_17' type=NoOp>, 'eval/miou_1.0_class_8': <tf.Operation 'group_deps_8' type=NoOp>, 'eval/miou_1.0_class_2': <tf.Operation 'group_deps_2' type=NoOp>, 'eval/miou_1.0_class_0': <tf.Operation 'group_deps' type=NoOp>, 'eval/miou_1.0_class_3': <tf.Operation 'group_deps_3' type=NoOp>, 'eval/miou_1.0_class_14': <tf.Operation 'group_deps_14' type=NoOp>, 'eval/miou_1.0_class_11': <tf.Operation 'group_deps_11' type=NoOp>, 'eval/miou_1.0_class_6': <tf.Operation 'group_deps_6' type=NoOp>, 'eval/miou_1.0_class_15': <tf.Operation 'group_deps_15' type=NoOp>, 'eval/miou_1.0_class_4': <tf.Operation 'group_deps_4' type=NoOp>, 'eval/miou_1.0_class_9': <tf.Operation 'group_deps_9' type=NoOp>, 'eval/miou_1.0_class_16': <tf.Operation 'group_deps_16' type=NoOp>, 'eval/miou_1.0_class_7': <tf.Operation 'group_deps_7' type=NoOp>, 'eval/miou_1.0_class_10': <tf.Operation 'group_deps_10' type=NoOp>, 'eval/miou_1.0_class_12': <tf.Operation 'group_deps_12' type=NoOp>}) ''' ''' tf.Print(input, data, message=None, first_n=None, summarize=None, name=None) 最低要求两个输入,input和data,input是需要打印的变量的名字,data要求是一个list,里面包含要打印的内容。 ''' summary_ops = [] for metric_name, metric_value in six.iteritems(metrics_to_values): op = tf.summary.scalar(metric_name, metric_value) # 显示标量信息 op = tf.Print(op, [metric_value], metric_name) summary_ops.append(op) summary_op = tf.summary.merge(summary_ops) summary_hook = contrib_training.SummaryAtEndHook( log_dir=FLAGS.eval_logdir, summary_op=summary_op) hooks = [summary_hook] num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: # 为0 暂不考虑 num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: # -1 暂不考虑 contrib_quantize.create_eval_graph() contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS) contrib_training.evaluate_repeatedly( checkpoint_dir=FLAGS.checkpoint_dir, master=FLAGS.master, eval_ops=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, hooks=hooks, eval_interval_secs=FLAGS.eval_interval_secs)
def save_output_samples(checkpoint_dir, best_chekpnt, eval_preprocess_threads=8, eval_crop_size=[1024, 2048]): eval_batch_size = 1 compressed_reconstructed_dir = os.path.join( checkpoint_dir, 'compressed_reconstructed_images') if not os.path.exists(compressed_reconstructed_dir): os.makedirs(compressed_reconstructed_dir) logger.info('Creating directory ' + compressed_reconstructed_dir + '/') eval_split = 'val' num_sample_output = 20 dataset = data_generator.Dataset( dataset_name='cityscapes', split_name=eval_split, dataset_dir= '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/', batch_size=eval_batch_size, crop_size=[int(sz) for sz in eval_crop_size], min_resize_value=None, max_resize_value=None, resize_factor=None, model_variant=None, num_readers=eval_preprocess_threads, is_training=False, should_shuffle=False, should_repeat=False) samples = dataset.get_one_shot_iterator().get_next() in_imgs = samples['image'] / 255 depth = samples['depth'] / 255 labels = samples['label'] num_classes = dataset.num_of_classes # =================================== arch _, _, _, _, _, _, _, _, logits, _ = \ build_model(in_imgs, depth, None, num_classes, mode='testing') # =================================== predictions = tf.argmax(logits, 3) # batch*H*W*1 with tf.Session() as sess: if best_chekpnt is None: latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) best_chekpnt = latest tf.train.Saver().restore(sess, save_path=best_chekpnt) for i in range(num_sample_output): test_file_name = str(i) depth_path = os.path.join(compressed_reconstructed_dir, test_file_name + '_depth' + '.png') orig_path = os.path.join(compressed_reconstructed_dir, test_file_name + '_orig' + '.png') map_gt_path = os.path.join(compressed_reconstructed_dir, test_file_name + '_map_gt' + '.png') map_pred_path = os.path.join(compressed_reconstructed_dir, test_file_name + '_map_pred' + '.png') p, l, input_img, dep = sess.run( [predictions, labels, in_imgs, depth]) l = np.squeeze(l) p = np.squeeze(p) input_img = np.squeeze(input_img) dep = np.squeeze(dep) p[l == 255] = 255 colored_label = get_dataset_colormap.label_to_color_image( l, 'cityscapes') colored_pred = get_dataset_colormap.label_to_color_image( p, 'cityscapes') dep_jet = cv2.applyColorMap(np.uint8(dep * (255 * 2)), cv2.COLORMAP_JET) cv2.imwrite(depth_path, dep_jet) colored_pred = np.uint8(colored_pred[:, :, ::-1]) cv2.imwrite(map_pred_path, colored_pred) colored_label = np.uint8(colored_label[:, :, ::-1]) cv2.imwrite(map_gt_path, colored_label) input_img = np.uint8(input_img[:, :, ::-1] * 255) cv2.imwrite(orig_path, input_img)
def eval_seg(checkpoint_dir, eval_preprocess_threads=8, eval_crop_size=[1024, 2048], eval_logdir='tmp_eval_log/', eval_batch_size=1, eval_repeatedly=False): eval_split = 'val' dataset = data_generator.Dataset( dataset_name='cityscapes', split_name=eval_split, dataset_dir= '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/', batch_size=eval_batch_size, crop_size=[int(sz) for sz in eval_crop_size], min_resize_value=None, max_resize_value=None, resize_factor=None, model_variant=None, num_readers=eval_preprocess_threads, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(eval_logdir) logger.info('Evaluating on ' + eval_split + ' set') with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples['image'].set_shape([ eval_batch_size, int(eval_crop_size[0]), int(eval_crop_size[1]), 3 ]) num_classes = dataset.num_of_classes in_imgs = samples['image'] / 255 depth = samples['depth'] / 255 labels = samples['label'] # =================================== arch _, _, _, _, _, _, _, _, logits, _ = \ build_model(in_imgs, depth, None, num_classes, mode='testing') if logits is None: highest_val_miou = 0 best_chekpnt = None return highest_val_miou, best_chekpnt # =================================== predictions = tf.argmax(logits, 3) predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(labels, shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' eval_scales = [1.0] for eval_scale in eval_scales: predictions_tag += '_' + str(eval_scale) # Define the evaluation metric ==> mIOU over class miou, update_op = tf.metrics.mean_iou(predictions, labels, num_classes, weights=weights) tf.summary.scalar(predictions_tag, miou) summary_op = tf.summary.merge_all() summary_hook = tf.contrib.training.SummaryAtEndHook( log_dir=eval_logdir, summary_op=summary_op) hooks = [summary_hook] num_eval_iters = 100000 tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) if eval_repeatedly: logger.info('start evaluation repeatedly') tf.contrib.training.evaluate_repeatedly( master='', checkpoint_dir=checkpoint_dir, eval_ops=[update_op], max_number_of_evaluations=num_eval_iters, hooks=hooks, eval_interval_secs=eval_interval_secs) else: logger.info('start evaluating last 5 checkpoints') checkpnts_paths = tf.train.get_checkpoint_state( checkpoint_dir=checkpoint_dir).all_model_checkpoint_paths best_chekpnt = latest highest_val_miou = 0 for chekpnt_path in checkpnts_paths: final_m = tf.contrib.training.evaluate_once( checkpoint_path=chekpnt_path, master='', eval_ops=[update_op], final_ops=miou, hooks=hooks) if final_m > highest_val_miou: highest_val_miou = final_m best_chekpnt = chekpnt_path logger.info(chekpnt_path + ' ==> mIOU ' + str(final_m)) logger.info('==============================================') logger.info('highest_val_miou = ' + str(highest_val_miou)) logger.info('best_chekpnt = ' + str(best_chekpnt)) logger.info('==============================================') if not (eval_repeatedly): tf.reset_default_graph() save_output_samples(checkpoint_dir, best_chekpnt, eval_preprocess_threads, eval_crop_size) return highest_val_miou, best_chekpnt
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.vis_batch_size, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, is_training=False, should_shuffle=False, should_repeat=False) train_id_to_eval_id = None # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) save_dir1 = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER1) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join( FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) raw_save_dir_label = os.path.join( FLAGS.vis_logdir, _LABEL_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.gfile.MakeDirs(raw_save_dir_label) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes}, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predict = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) miou_tf, update_op_tf = tf.metrics.mean_iou( predict, labels, dataset.num_of_classes, weights=weights) from deeplab import my_metrics iou_v, update_op = my_metrics.iou(predict, labels, dataset.num_of_classes, weights=weights) if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we resize the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH])]) predictions = tf.squeeze( tf.image.resize_images(tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) tf.train.get_or_create_global_step() if FLAGS.quantize_delay_step >= 0: tf.contrib.quantize.create_eval_graph() num_iteration = 0 max_num_iteration = FLAGS.max_number_of_iterations checkpoints_iterator = tf.contrib.training.checkpoints_iterator( FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs) for checkpoint_path in checkpoints_iterator: num_iteration += 1 tf.logging.info( 'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', checkpoint_path) scaffold = tf.train.Scaffold(init_op=tf.global_variables_initializer()) session_creator = tf.train.ChiefSessionCreator( scaffold=scaffold, master=FLAGS.master, checkpoint_filename_with_path=checkpoint_path) with tf.train.MonitoredSession( session_creator=session_creator, hooks=None) as sess: batch = 0 image_id_offset = 0 while not sess.should_stop(): tf.logging.info('Visualizing batch %d', batch + 1) _process_batch(sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, gt_labels=samples[common.LABEL], image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, update_op=update_op, iou=iou_v, save_dir=save_dir, save_dir1=save_dir1, raw_save_dir=raw_save_dir, raw_save_dir_label=raw_save_dir_label, mtf = miou_tf, utf = update_op_tf, train_id_to_eval_id=train_id_to_eval_id ) image_id_offset += FLAGS.vis_batch_size batch += 1 tf.logging.info( 'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) if max_num_iteration > 0 and num_iteration >= max_num_iteration: break mean_eval = np.array(mean_iou_eval) print("Mean IoU on validation dataset: ", np.mean(mean_eval)) print("Mean IoU for background: ", np.nanmean(miou_class1_back)) print("Mean IoU for stem: ", np.nanmean(miou_class2_stem)) print("Mean IoU for callus: ", np.nanmean(miou_class3_cal)) print("Mean IoU for shoot: ", np.nanmean(miou_class4_shoot)) print("Renaming files.") segmentation_res_path = os.path.join('./', FLAGS.vis_logdir, _LABEL_SEMANTIC_PREDICTION_SAVE_FOLDER) for file in os.listdir(segmentation_res_path): src = file dst = os.path.join(segmentation_res_path, src[2:-5] + '.png') os.rename(os.path.join(segmentation_res_path, src), dst) print(np.mean(mean_eval))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) graph = tf.Graph() with graph.as_default(): with tf.device( tf.train.replica_device_setter(ps_tasks=FLAGS.num_ps_tasks)): assert FLAGS.train_batch_size % FLAGS.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, batch_size=clone_batch_size, crop_size=FLAGS.train_crop_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=FLAGS.model_variant, num_readers=2, is_training=True, should_shuffle=True, should_repeat=True) vdataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.trainval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.trainval_batch_size, crop_size=FLAGS.train_crop_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=FLAGS.model_variant, num_readers=2, is_training=True, should_shuffle=False, should_repeat=False) viterator = vdataset.get_initializable_iterator() next_element = viterator.get_next() val_image = tf.placeholder(tf.float32, shape=(None, FLAGS.train_crop_size[0], FLAGS.train_crop_size[1], 3)) val_label = tf.placeholder(tf.int32, shape=(None, FLAGS.train_crop_size[0], FLAGS.train_crop_size[1], 1)) train_tensor, summary_op = _train_deeplab_model( dataset.get_one_shot_iterator(), dataset.num_of_classes, dataset.ignore_label) val_tensor = _val_loss(dataset=vdataset, image=val_image, label=val_label, num_of_classes=vdataset.num_of_classes, ignore_label=vdataset.ignore_label) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) init_fn = None if FLAGS.tf_initial_checkpoint: init_fn = train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True) scaffold = tf.train.Scaffold( init_fn=init_fn, summary_op=summary_op, ) stop_hook = tf.train.StopAtStepHook(FLAGS.training_number_of_steps) # Validation set variables epoch = 0 val_loss_per_epoch = [] steps_per_epoch = int(dataset.num_samples / FLAGS.train_batch_size) saver = tf.train.Saver(max_to_keep=1) profile_dir = FLAGS.profile_logdir if profile_dir is not None: tf.gfile.MakeDirs(profile_dir) with tf.contrib.tfprof.ProfileContext(enabled=profile_dir is not None, profile_dir=profile_dir): with tf.train.MonitoredTrainingSession( master=FLAGS.master, is_chief=(FLAGS.task == 0), config=session_config, scaffold=scaffold, checkpoint_dir=FLAGS.train_logdir, log_step_count_steps=FLAGS.log_steps, save_summaries_steps=FLAGS.save_summaries_secs, save_checkpoint_secs=FLAGS.save_interval_secs, hooks=[]) as sess: while not sess.should_stop(): step = sess.run(tf.train.get_global_step()) sess.run([train_tensor]) if step % steps_per_epoch == 0: count_validation = 0 stop_training = False val_losses = [] sess.run(viterator.initializer) while True: try: val_element = sess.run(next_element) val_loss, val_summary = sess.run( val_tensor, feed_dict={ val_image: val_element[common.IMAGE], val_label: val_element[common.LABEL] }) val_losses.append(val_loss) count_validation += 1 #print(' {} [validation] {} {}'.format(count_validation, val_loss, val_element[common.IMAGE_NAME])) except tf.errors.OutOfRangeError: total_val_loss = sum(val_losses) / len( val_losses) val_loss_per_epoch.append(total_val_loss) print(' {} [validation loss] {}'.format( count_validation * FLAGS.train_batch_size, total_val_loss)) print(' {} [current epoch] {}'.format( step, epoch)) break if epoch > 0: min_delta = 0.01 patience = 8 stop_training = early_stopping( epoch, val_loss_per_epoch, min_delta, patience, sess, saver, total_val_loss) # Stops training if current model val loss is worse than previous model val loss if stop_training: break epoch += 1
def train(l_args): """Trains the model.""" if l_args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. dataset = data_generator.Dataset( dataset_name='cityscapes', split_name='train', dataset_dir= '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/', batch_size=l_args.batchsize, crop_size=[int(sz) for sz in [l_args.patchsize, l_args.patchsize]], min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=0.5, max_scale_factor=2., scale_factor_step_size=0.25, model_variant=None, num_readers=l_args.preprocess_threads, is_training=True, should_shuffle=True, should_repeat=True) # reading batch: keys of samples ['height', 'width', 'image_name', 'label', 'image'] num_classes = dataset.num_of_classes samples = dataset.get_one_shot_iterator().get_next() #num_pixels = l_args.batchsize * l_args.patchsize ** 2 x = samples['image'] / 255 depth = samples['depth'] / 255 labels = samples['label'] # Build autoencoder. train_loss, train_bpp, train_mse, x_tilde, _, _, _, entropy_bottleneck, seg_logits, seg_loss = \ build_model(x, depth, l_args.lmbda, num_classes, mode = 'training', seg_labels = labels, ignore_label = dataset.ignore_label) # Minimize loss and auxiliary loss, and execute update op. step = tf.train.get_or_create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) if entropy_bottleneck is not None: aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) entropy_bottleneck.visualize( ) ## Creates summary for the probability mass function (PMF) estimated in the bottleneck train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) else: train_op = tf.group(main_step) log_all_summaries(x, x_tilde, seg_logits, labels, train_loss, train_bpp, train_mse, seg_loss) hooks = [ tf.train.StopAtStepHook(last_step=l_args.last_step), tf.train.NanTensorHook(train_loss), ] with tf.train.MonitoredTrainingSession( hooks=hooks, checkpoint_dir=l_args.checkpoint_dir, save_checkpoint_secs=300, save_summaries_secs=60) as sess: while not sess.should_stop(): sess.run(train_op)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( model_name=FLAGS.model_name, outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes }, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples[common.IMAGE].set_shape([ FLAGS.eval_batch_size, int(FLAGS.eval_crop_size[0]), int(FLAGS.eval_crop_size[1]), 3 ]) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} num_classes = dataset.num_of_classes metric_map['eval/%s_overall' % predictions_tag] = tf.metrics.mean_iou( labels=labels, predictions=predictions, num_classes=num_classes, weights=weights) # IoU for each class. one_hot_predictions = tf.one_hot(predictions, num_classes) one_hot_predictions = tf.reshape(one_hot_predictions, [-1, num_classes]) one_hot_labels = tf.one_hot(labels, num_classes) one_hot_labels = tf.reshape(one_hot_labels, [-1, num_classes]) for c in range(num_classes): predictions_tag_c = '%s_class_%d' % (predictions_tag, c) tp, tp_op = tf.metrics.true_positives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) fp, fp_op = tf.metrics.false_positives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) fn, fn_op = tf.metrics.false_negatives( labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c], weights=weights) tp_fp_fn_op = tf.group(tp_op, fp_op, fn_op) iou = tf.where(tf.greater(tp + fn, 0.0), tp / (tp + fn + fp), tf.constant(np.NaN)) metric_map['eval/%s' % predictions_tag_c] = (iou, tp_fp_fn_op) (metrics_to_values, metrics_to_updates) = contrib_metrics.aggregate_metric_map(metric_map) summary_ops = [] for metric_name, metric_value in six.iteritems(metrics_to_values): op = tf.summary.scalar(metric_name, metric_value) op = tf.Print(op, [metric_value], metric_name) summary_ops.append(op) summary_op = tf.summary.merge(summary_ops) summary_hook = contrib_training.SummaryAtEndHook( log_dir=FLAGS.eval_logdir, summary_op=summary_op) hooks = [summary_hook] num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: contrib_quantize.create_eval_graph() contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS) contrib_training.evaluate_repeatedly( checkpoint_dir=FLAGS.checkpoint_dir, master=FLAGS.master, eval_ops=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, hooks=hooks, eval_interval_secs=FLAGS.eval_interval_secs)
def eval(l_args, expDir, lmbda, best_chekpnt, val_miou): train_dir = l_args.checkpoint_dir metrics_path = os.path.join(train_dir, 'metrics_args.pkl') l_args.lmbda = lmbda compressed_reconstructed_dir = os.path.join( train_dir, 'compressed_reconstructed_images') if not os.path.exists(compressed_reconstructed_dir): os.makedirs(compressed_reconstructed_dir) val_split_size = 500 dataset = data_generator.Dataset( dataset_name='cityscapes', split_name='val', dataset_dir= '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/', batch_size=1, #l_args.batchsize crop_size=[int(sz) for sz in [1024, 2048]], min_resize_value=None, max_resize_value=None, resize_factor=None, model_variant=None, num_readers=l_args.preprocess_threads, is_training=False, should_shuffle=False, should_repeat=False) # reading batch: keys of samples ['height', 'width', 'image_name', 'label', 'image'] num_classes = dataset.num_of_classes samples = dataset.get_one_shot_iterator().get_next() x = samples['image'] / 255 depth = samples['depth'] / 255 labels = samples['label'] num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) # ======================== Input image dim should be multiple of 16 x_shape = tf.shape(x) x_shape = tf.ceil(x_shape / 16) * 16 x = tf.image.resize_images(x, (x_shape[1], x_shape[2])) # ======================== """ build model """ _, eval_bpp, _, x_hat, y_hat, y, string, _, seg_logits, seg_loss = \ build_model(x, depth, l_args.lmbda, num_classes, mode = 'testing') # Bring both images back to 0..255 range. x *= 255 img_file_name = tf.placeholder(tf.string) noReconstuction = False if x_hat is None: noReconstuction = True save_reconstructed_op = None else: x_hat_to_save = tf.identity(x_hat[0, :, :, :]) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) # Write reconstructed image out as a PNG file. save_reconstructed_op = save_image(img_file_name, x_hat_to_save) logger.info('Testing the model on ' + str(val_split_size) + ' images and save the reconstructed images') msel, psnrl, msssiml, msssim_dbl, eval_bppl, bppl = [], [], [], [], [], [] with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. if best_chekpnt is None: latest = tf.train.latest_checkpoint( checkpoint_dir=l_args.checkpoint_dir) best_chekpnt = latest tf.train.Saver().restore(sess, save_path=best_chekpnt) for i in range(val_split_size): test_file_name = str(i) compressed_im_path = os.path.join( compressed_reconstructed_dir, test_file_name + '_compressed' + '.bin') reconstucted_im_path = os.path.join( compressed_reconstructed_dir, test_file_name + '_reconstructed' + '.png') im_metrics_path = os.path.join( compressed_reconstructed_dir, test_file_name + '_metrics' + '.pkl') l_args.output = reconstucted_im_path if (i < 50) and not (noReconstuction): eval_bpp_, mse_, psnr_, msssim_, num_pixels_, string_, x_shape, y_shape, _ = \ sess.run( [eval_bpp, mse, psnr, msssim, num_pixels, string, tf.shape(x), tf.shape(y), save_reconstructed_op], feed_dict={img_file_name:reconstucted_im_path}) else: if eval_bpp is not None: if noReconstuction: eval_bpp_, num_pixels_, string_, x_shape, y_shape = \ sess.run( [eval_bpp, num_pixels, string, tf.shape(x), tf.shape(y)], feed_dict={img_file_name:reconstucted_im_path}) mse_ = 0 psnr_ = 0 msssim_ = 0 else: eval_bpp_, mse_, psnr_, msssim_, num_pixels_, string_, x_shape, y_shape = \ sess.run( [eval_bpp, mse, psnr, msssim, num_pixels, string,tf.shape(x), tf.shape(y)], feed_dict={img_file_name:reconstucted_im_path}) else: mse_ = 0 psnr_ = 0 msssim_ = 0 eval_bpp_ = 0 num_pixels_ = None string_ = None x_shape = None y_shape = None if i < 50 and (string_ is not None): # save only the first 50 test samples # Write a binary file with the shape information and the compressed string. with open(compressed_im_path, "wb") as f: f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes()) f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes()) f.write(string_) if string_ is not None: # The actual bits per pixel including overhead. bpp_ = (8 + len(string_)) * 8 / num_pixels_ else: bpp_ = 0 print("Mean squared error: {:0.4f}".format(mse_)) print("PSNR (dB): {:0.2f}".format(psnr_)) print("Multiscale SSIM: {:0.4f}".format(msssim_)) print("Multiscale SSIM (dB): {:0.2f}".format( -10 * np.log10(1 - msssim_))) print("Information content in bpp: {:0.4f}".format(eval_bpp_)) print("Actual bits per pixel: {:0.4f}".format(bpp_)) msssim_db_ = (-10 * np.log10(1 - msssim_)) im_metrics = { 'mse': mse_, 'psnr': psnr_, 'msssim': msssim_, 'msssim_db': msssim_db_, 'eval_bpp': eval_bpp_, 'bpp': bpp_ } with open(im_metrics_path, "wb") as fp: pickle.dump(im_metrics, fp) msel.append(mse_) psnrl.append(psnr_) msssiml.append(msssim_) msssim_dbl.append(msssim_db_) eval_bppl.append(eval_bpp_) bppl.append(bpp_) logger.info( 'Averaging metrics and save them with the exp_args in pickle file metrics_args.pkl' ) mse_ = np.mean(msel) psnr_ = np.mean(psnrl) msssim_ = np.mean(msssiml) eval_bpp_ = np.mean(eval_bppl) bpp_ = np.mean(bppl) msssim_db_ = np.mean(msssim_dbl) logger.info('MSE = ' + str(mse_)) logger.info('PSNR = ' + str(psnr_)) logger.info('MS-SSIM = ' + str(msssim_)) logger.info('MS-SSIM db = ' + str(msssim_db_)) logger.info('Eval_bpp = ' + str(eval_bpp_)) logger.info('bpp = ' + str(bpp_)) logger.info('mIOU = ' + str(val_miou)) exp_avg_metrics = { 'mse': mse_, 'psnr': psnr_, 'msssim': msssim_, 'msssim_db': msssim_db_, 'eval_bpp': eval_bpp_, 'bpp': bpp_, 'mIOU': val_miou, 'chk_pnt': best_chekpnt } with open(metrics_path, "wb") as fp: pickle.dump({ 'exp_avg_metrics': exp_avg_metrics, 'exp_args': l_args }, fp)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) graph = tf.Graph() with graph.as_default(): with tf.device( tf.train.replica_device_setter(ps_tasks=FLAGS.num_ps_tasks)): assert FLAGS.train_batch_size % FLAGS.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, batch_size=clone_batch_size, crop_size=FLAGS.train_crop_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=FLAGS.model_variant, num_readers=2, is_training=True, should_shuffle=True, should_repeat=True) train_tensor, summary_op = _train_deeplab_model( dataset.get_one_shot_iterator(), dataset.num_of_classes, dataset.ignore_label) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) init_fn = None if FLAGS.tf_initial_checkpoint: init_fn = train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True) scaffold = tf.train.Scaffold( init_fn=init_fn, summary_op=summary_op, ) stop_hook = tf.train.StopAtStepHook(FLAGS.training_number_of_steps) profile_dir = FLAGS.profile_logdir if profile_dir is not None: tf.gfile.MakeDirs(profile_dir) with tf.contrib.tfprof.ProfileContext(enabled=profile_dir is not None, profile_dir=profile_dir): with tf.train.MonitoredTrainingSession( master=FLAGS.master, is_chief=(FLAGS.task == 0), config=session_config, scaffold=scaffold, checkpoint_dir=FLAGS.train_logdir, log_step_count_steps=FLAGS.log_steps, save_summaries_steps=FLAGS.save_summaries_secs, save_checkpoint_secs=FLAGS.save_interval_secs, hooks=[stop_hook]) as sess: while not sess.should_stop(): sess.run([train_tensor])
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = data_generator.Dataset(dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=FLAGS.eval_crop_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions(outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes }, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_of_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} # ============ Added by B.A.D. ===================== indices = tf.squeeze( tf.where(tf.less_equal(labels, dataset.num_of_classes - 1)), 1) labels = tf.cast(tf.gather(labels, indices), tf.int32) predictions = tf.gather(predictions, indices) # ============================================== metric_map[predictions_tag] = tf.metrics.mean_iou( predictions, labels, dataset.num_of_classes, weights=weights) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary(metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)