def get_dataset(flags, mode): if mode == tf.estimator.ModeKeys.TRAIN: dataset_split = 'train' elif mode == tf.estimator.ModeKeys.EVAL: dataset_split = 'val' else: assert False, 'unknown mode' FLAGS = flags data_config = edict() data_config.edge_width = 20 data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] data_config.edge_class_num = FLAGS.edge_class_num img_files, label_files = get_dataset_files( FLAGS.dataset, dataset_split) dataset_pp = dataset_pipeline( data_config, img_files, label_files, is_train=True) data_list = dataset_pp.iterator() if mode == tf.estimator.ModeKeys.TRAIN: samples = input_generator.get( (data_list, data_config.ignore_label), FLAGS.train_crop_size, FLAGS.train_batch_size//FLAGS.num_clones, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) elif mode == tf.estimator.ModeKeys.EVAL: samples = input_generator.get( (data_list, data_config.ignore_label), FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) else: assert False, 'unknown mode' return samples[common.IMAGE], samples[common.LABEL]
def do_prepare(): dataset = segmentation_dataset.get_dataset(A_dataset, A_vis_split, dataset_dir=A_dataset_dir) # with tf.Graph().as_default(): # with tf.get_default_graph(): samples = input_generator.get(dataset, A_vis_crop_size, A_vis_batch_size, min_resize_value=A_min_resize_value, max_resize_value=A_max_resize_value, resize_factor=A_resize_factor, dataset_split=A_vis_split, is_training=False, model_variant=A_model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=A_vis_crop_size, atrous_rates=A_atrous_rates, output_stride=A_output_stride) print(samples[common.IMAGE]) predictions = model.predict_labels(samples[common.IMAGE], model_options=model_options, image_pyramid=A_image_pyramid) predictions = predictions[common.OUTPUT_TYPE] return samples, predictions
def vis_main(sess): dataset = segmentation_dataset.get_dataset(A_dataset, A_vis_split, dataset_dir=A_dataset_dir) train_id_to_eval_id = None save_dir = os.path.join(work_dir, _SEMANTIC_PREDICTION_SAVE_FOLDER) print('min_resize_value: %s' % str(A_min_resize_value)) print('max_resize_value: %s' % str(A_max_resize_value)) samples = input_generator.get(dataset, A_vis_crop_size, A_vis_batch_size, min_resize_value=A_min_resize_value, max_resize_value=A_max_resize_value, resize_factor=A_resize_factor, dataset_split=A_vis_split, is_training=False, model_variant=A_model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=A_vis_crop_size, atrous_rates=A_atrous_rates, output_stride=A_output_stride) print(samples[common.IMAGE]) predictions = model.predict_labels(samples[common.IMAGE], model_options=model_options, image_pyramid=A_image_pyramid) predictions = predictions[common.OUTPUT_TYPE]
def test(): vis_graph = tf.Graph() with vis_graph.as_default(): dataset = segmentation_dataset.get_dataset(A_dataset, A_vis_split, dataset_dir=A_dataset_dir) samples = input_generator.get(dataset, A_vis_crop_size, A_vis_batch_size, min_resize_value=A_min_resize_value, max_resize_value=A_max_resize_value, resize_factor=A_resize_factor, dataset_split=A_vis_split, is_training=False, model_variant=A_model_variant) model_options = mycommon.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=A_vis_crop_size, atrous_rates=A_atrous_rates, output_stride=A_output_stride) print(samples[common.IMAGE]) predictions = model.predict_labels(samples[common.IMAGE], model_options=model_options, image_pyramid=A_image_pyramid) predictions = predictions[common.OUTPUT_TYPE] tf.train.get_or_create_global_step() #vis_session = tf.Session(graph=vis_graph) saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=vis_graph, logdir=A_vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) vis_one( vis_graph, '/DATA/ylxiong/homeplus/data_general/JPEGImages/building(11).jpg', samples, predictions, sv) vis_one( vis_graph, '/DATA/ylxiong/homeplus/data_general/JPEGImages/building(12).jpg', samples, predictions, sv) vis_one( vis_graph, '/DATA/ylxiong/homeplus/data_general/JPEGImages/building(13).jpg', samples, predictions, sv)
def main(unused_argv): print("DEEPLABv3+") print("SAVE TO "+FLAGS.train_logdir) tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) print("batch_norm: "+str(FLAGS.fine_tune_batch_norm)) print("initialize_last_layer: "+str(FLAGS.initialize_last_layer)) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) dataset_val = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.val_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) tf.logging.info('Validating on %s set', FLAGS.val_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): samples = input_generator.get( dataset, FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue( samples, capacity=128 * config.num_clones) # 4 val samples_val = input_generator.get( dataset_val, FLAGS.train_crop_size, FLAGS.train_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.val_split, is_training=False, model_variant=FLAGS.model_variant) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image( 'samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) # 4 val model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) predictions_val = model.predict_labels(samples_val[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) predictions_val = predictions_val[common.OUTPUT_TYPE] predictions_val = tf.reshape(predictions_val, shape=[-1]) labels_val = tf.reshape(samples_val[common.LABEL], shape=[-1]) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. #labels = tf.where( # tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) accuracy_validation = slim.metrics.accuracy(tf.to_int32(predictions_val), tf.to_int32(labels_val)) iou,conf_mat = tf.metrics.mean_iou(labels_val, predictions_val, num_classes=6) #sess.run(tf.local_variables_initializer()) def train_step_fn(session, *args, **kwargs): total_loss, should_stop = train_step(session, *args, **kwargs) if train_step_fn.step % FLAGS.validation_check == 0: pass # throws OutOfRange error after some time # accuracy = session.run(train_step_fn.accuracy_validation) # print('Step %s - Loss: %.2f Accuracy: %.2f%%' % ( # str(train_step_fn.step).rjust(6, '0'), total_loss, accuracy * 100)) # if train_step_fn.step == (FLAGS.max_steps - 1): # accuracy = session.run(accuracy_test) # print('%s - Loss: %.2f Accuracy: %.2f%%' % ('FINAL TEST', total_loss, accuracy * 100)) train_step_fn.step += 1 return [total_loss, should_stop] train_step_fn.step = 0 train_step_fn.accuracy_validation = accuracy_validation # Start the training. slim.learning.train( train_tensor, train_step_fn=train_step_fn, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) if FLAGS.eval_batch_size != 1: raise ValueError('Batch size {} is not allowed. ' 'Inference graph can only be ' 'evaluated image by image.'.format( FLAGS.eval_batch_size)) batch_size = 1 g = tf.Graph() with g.as_default(): samples = input_generator.get(dataset, FLAGS.eval_crop_size, batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) graph_def = tf.GraphDef() with open(FLAGS.graph_path, "rb") as f: graph_def.ParseFromString(f.read()) samples[common.IMAGE] = tf.cast(samples[common.IMAGE], tf.uint8) tf.import_graph_def(graph_def, input_map={_INPUT_TENSOR: samples[common.IMAGE]}) predictions = g.get_tensor_by_name('import/' + _OUTPUT_TENSOR) predictions = tf.reshape(predictions, shape=[-1]) (_, summary_op, metrics_to_updates, confusion_matrix, category_iou) = eval.create_metrics(g, samples, dataset, predictions) tf.train.get_or_create_global_step() sv = tf.train.Supervisor(graph=g, logdir=FLAGS.eval_logdir, init_op=tf.global_variables_initializer(), summary_op=None, global_step=None, saver=None) log_steps = int(math.floor(dataset.num_samples / 10)) with sv.managed_session(start_standard_services=False) as sess: sv.start_queue_runners(sess) for image_number in range(dataset.num_samples): if ((image_number + 1) % log_steps == 0 or image_number == dataset.num_samples - 1): tf.logging.info('Evaluation [%d/%d]', image_number + 1, dataset.num_samples) sess.run([samples[common.IMAGE], metrics_to_updates.values()]) sv.summary_computed(sess, sess.run(summary_op)) sess.run([confusion_matrix, category_iou])
def main(unused_argv): # syaru: Sets the threshold(入口) for what messages will be logged. 加上这句才能输出训练过程的log. tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). # syaru: models/research/slim/deployment/model_deploy.DeploymentConfig(object) config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = int(FLAGS.train_batch_size / config.num_clones) # Get dataset-dependent information. """ syaru: deeplab/datasets/segmentation_dataset.get_dataset() Gets an instance of slim Dataset. Args: dataset_name: Dataset name. split_name: A train/val Split name. dataset_dir: The directory of the dataset sources. """ dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs( FLAGS.train_logdir ) # sayru: FLAGS.train_logdir = "pascal_voc_seg/exp/train_on_trainval_set/train" tf.logging.info('Training on %s set', FLAGS.train_split) # FLAGS.train_split = "trainval" with tf.Graph().as_default() as graph: with tf.device( config.inputs_device() ): # syaru: deeplab/utils/input_generator.get(): This functions gets the dataset split for semantic segmentation. samples = input_generator.get( # Returns: A dictionary of batched Tensors for semantic segmentation. dataset, # Args: dataset: An instance of slim Dataset. FLAGS. train_crop_size, # train_crop_size: 如果定义了crop_size,那么在train时会对大于crop_size的图片进行随机裁剪 clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS. min_scale_factor, # syaru: min_scale_factor: 'Minmum scale factor for data augmentation.' max_scale_factor=FLAGS. max_scale_factor, # min_scale_factor: 'Maximum scale factor for data augmentation.' scale_factor_step_size=FLAGS. scale_factor_step_size, # scale_factor_step_size: 'Scale factor step size for data augmentation.'(from minmum to maximum) dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) # syaru: /tensorflow/contrib/slim/python/slim/data/prefetch_queue.py inputs_queue = prefetch_queue.prefetch_queue( # tensors: A list or dictionary of `Tensors` to enqueue in the buffer. samples, capacity=128 * config.num_clones ) # capacity: An integer. The maximum number of elements in the queue. # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. """ syaru: models/research/slim/deployment/model_deploy.create_clones(): The `model_fn(*args, **kwargs)` function is called `config.num_clones` times to create the model clones. (and one or several clones are deployed on different GPUs and one or several replicas of such clones.) Then it return the scope and device in a namedtuple `Clone(outputs, scope, device)`. Args: config: A DeploymentConfig object. model_fn: A callable. Called as `model_fn(*args, **kwargs)` args: Optional list of arguments to pass to `model_fn`. kwargs: Optional list of keyword arguments to pass to `model_fn`.. Returns: A list of namedtuples `Clone`. Note: it is assumed that any loss created by `model_fn` is collected at the tf.GraphKeys.LOSSES collection. To recover the losses, summaries or update_ops created by the clone use: ```python losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope) ``` """ model_fn = _build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( # syaru: get_tensor_by_name(name): return tensor by specifily 'name'. ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/') ) # str.strip (): is used to remove the specified characters at the front/end of the string (the default is space). summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) summary_label = tf.cast( graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')), tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) predictions = tf.cast( tf.expand_dims( tf.argmax( graph.get_tensor_by_name( # syaru: tf.argmax(axis=3) ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')), 3), -1), tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.OUTPUT_TYPE, predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): # syaru: train_utils.get_model_learning_rate(): # Computes the model's learning rate for different learning policy("step" and "poly"). learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): # syaru: Compute clone losses and gradients for the given list of `Clones`. total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. """ syaru: For the task of semantic segmentation, the models are usually fine-tuned from the models trained on the task of image classification. To fine-tune the models, we usually set larger (e.g., 10 times larger) learning rate for the parameters of last layer. deeplab/model/model.get_extra_layer_scopes(): Returns: A list of scopes for extra layers. deeplab/utils/train_utils.get_model_gradient_multipliers(): Returns: The gradient multiplier map with variables as key, and multipliers as value. """ last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) update_ops.append(grad_updates) # syaru: tf.identity()和tf.group()均可将语句变为操作(ops). # (我们需要`optimizer.apply_gradients`后才计算`total_loss`(as 'train_op'),而tf.control_dependencies()适用于tf.ops) # And `update_ops = tf.get_collection(..)` only return a list of variables. update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. # syaru: set gpu_options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) # Start the training. # syaru: /tensorflow/contrib/slim/python/slim/learning.py # train_utils.get_model_init_fn(): Gets the function initializing model variables from a checkpoint. slim.learning.train( train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, # syaru: init_fn=train_utils. get_model_init_fn( # `init_fn`: An optional callable to be executed after `init_op` is called. The FLAGS. train_logdir, # callable must accept one argument, the session being initialized. FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): datasetDescriptor = None if FLAGS.config and os.path.isfile(FLAGS.config): with open(FLAGS.config) as f: trainingConfig = json.load(f) for key in trainingConfig: if key in FLAGS: FLAGS[key].value = trainingConfig[key] elif key == 'DatasetDescriptor': datasetDescriptor = segmentation_dataset.DatasetDescriptor( name=trainingConfig[key]['name'], splits_to_sizes=trainingConfig[key]['splits_to_sizes'], num_classes=trainingConfig[key]['num_classes'], ignore_label=trainingConfig[key]['ignore_label'], ) assert FLAGS.dataset_dir, ( 'flag --dataset_dir=None: Flag --dataset_dir must be specified.') assert FLAGS.train_logdir, ( 'flag --train_logdir=None: Flag --train_logdir must be specified.') tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones if datasetDescriptor is None: datasetDescriptor = FLAGS.dataset # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( datasetDescriptor, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): samples = input_generator.get( dataset, FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue( samples, capacity=128 * config.num_clones) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image( 'samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) # Start the training. slim.learning.train( train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size / config.num_clones # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default(): with tf.device(config.inputs_device()): samples = input_generator.get( dataset, FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue( samples, capacity=128 * config.num_clones) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes() grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth # Save checkpoints regularly. saver = tf.train.Saver(max_to_keep=FLAGS.max_to_keep) # Start the training. slim.learning.train( train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, saver=saver, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) # vars_to_restore = get_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path) # print([v.name for v in tf.global_variables()]) # vars_to_restore = [v for v in tf.global_variables() if v.name[:-2] in vars_to_restore] with tf.Graph().as_default(): samples = input_generator.get(dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' prediction_tag_2 = 'iou_per_class' prediction_tag_confMatrix = 'confusion_matrix' # segnet iou in numpy def fast_hist(gt, pred, n_clss): # true false mask where gt is valid # k = (gt >= 0) & (gt < n_clss) # return tf.reshape(tf.bincount(n_clss * tf.cast(gt[k], tf.int8) + pred[k], minlength=n_clss ** 2), # [n_clss, n_clss]) print(type(gt)) print(type(pred)) return tf.reshape( tf.bincount(tf.add(tf.multiply(n_clss, gt), tf.cast(pred, tf.int32)), minlength=n_clss**2), [n_clss, n_clss]) def get_hist(predictions, labels, num_class, batch_size): print(predictions.shape) # num_class = predictions.shape[3] # batch_size = predictions.shape[0] hist = tf.zeros((num_class, num_class), dtype=tf.int32) print(labels.shape) print(predictions.shape) for i in range(batch_size): # hist += fast_hist(labels[i], predictions[i], num_class) hist += fast_hist(labels[i], predictions[i], num_class) return hist # Define the evaluation metric. metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights) # metric_map[prediction_tag_2] = tf.map_fn(get_hist, [predictions, labels, # dataset.num_classes, FLAGS.eval_batch_size]) # metric_map[prediction_tag_2] = get_hist(predictions, labels, # num_class=dataset.num_classes, batch_size=FLAGS.eval_batch_size) # wieso geht das nicht? # metric_map[prediction_tag_confMatrix] = tf.contrib.metrics.confusion_matrix(labels, predictions, # num_classes=dataset.num_classes) # metric_map['precision'] = tf.contrib.metrics.streaming_precision(predictions, labels) metric_map['accuracy'] = tf.metrics.accuracy(labels, predictions) # compute each class's iou thx 2 MrZhousf :D mean_iou_v, update_op = my_metrics.iou(predictions, labels, dataset.num_classes, weights=weights) acc_v, update_op2 = my_metrics.acc(predictions, labels, dataset.num_classes, weights=weights) for index in range(0, dataset.num_classes): metric_map[str(index)+'_' + segmentation_dataset.get_classname(FLAGS.dataset, index) + '_iou'] =\ (mean_iou_v[index], update_op[index]) metric_map[str(index)+'_' + segmentation_dataset.get_classname(FLAGS.dataset, index) + '_acc'] =\ (acc_v[index], update_op2[index]) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary(metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.batch_iter < 1: FLAGS.batch_iter = 1 if FLAGS.batch_iter != 1: if not (FLAGS.num_clones == 1 and FLAGS.num_replicas == 1): raise NotImplementedError( "train.py: **NOTE** -- train_utils.train_step_custom may not work with parallel GPUs / clones > 1! Be sure you are only using one GPU." ) print('\ntrain.py: Accumulating gradients over {} iterations\n'.format( FLAGS.batch_iter)) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, ) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): samples = input_generator.get( dataset, FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab if FLAGS.class_balanced_loss: print( 'train.py: class_balanced_loss=True. Reading loss weights from segmentation_dataset.py' ) else: print( 'train.py: class_balanced_loss=False. Setting loss weights to 1.0 for every class.' ) dataset.loss_weight = 1.0 #_build_deeplab has model args: #(inputs_queue, outputs_to_num_classes, ignore_label, loss_weight): outputs_to_num_classes = {common.OUTPUT_TYPE: dataset.num_classes} model_args = (inputs_queue,\ outputs_to_num_classes, dataset.ignore_label, dataset.loss_weight) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) if FLAGS.batch_iter <= 1: FLAGS.batch_iter = 0 summaries.add(tf.summary.scalar('total_loss', total_loss)) grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') accum_tensor = None else: ############ Accumulate grads_and_vars op. #################### accum_update_ops = list(update_ops) #.copy() # Create (grad, var) list to accumulate gradients in. Inititalize to 0. accum_grads_and_vars = [ (tf.Variable(tf.zeros_like(gv[0]), trainable=False, name=gv[0].name.strip(":0") + "_accum"), gv[1]) for gv in grads_and_vars ] assert len(accum_grads_and_vars) == len(grads_and_vars) total_loss_accum = tf.Variable(0.0, dtype=tf.float32, trainable=False) accum_loss_update_op = [ total_loss_accum.assign_add(total_loss) ] accum_update_ops.append(accum_loss_update_op) ## Accumulate gradients: accum_grad[i] += (grad[i] / FLAGS.batch_iter) # scaled gradients. accum_ops = [ accum_grads_and_vars[i][0].assign_add( tf.div(gv[0], 1.0 * FLAGS.batch_iter)) for i, gv in enumerate(grads_and_vars) ] accum_update_ops.append(accum_ops) accum_update_op = tf.group(*accum_update_ops) with tf.control_dependencies([accum_update_op]): accum_print_ops = [] if FLAGS.batch_iter_verbose: accum_print_ops.extend([ tf.Print( tf.constant(0), [tf.add(global_step, 1)], message= 'train.py: accumulating gradients for step: '), #tf.Print(total_loss, [total_loss], message=' step total_loss: ') #tf.Print(tf.constant(0), [accum_grads_and_vars[0][0]], message=' '), ]) accum_update_ops.append(accum_print_ops) with tf.control_dependencies([tf.group(*accum_print_ops)]): accum_tensor = tf.identity(total_loss_accum, name='accum_op') ##################### Train op (apply [accumulated] grads and vars) ############################### train_update_ops = list(update_ops) #.copy() ## Create gradient update op. # Apply gradients from accumulated gradients grad_updates = optimizer.apply_gradients( accum_grads_and_vars, global_step=global_step) train_update_ops.append(grad_updates) grad_print_ops = [] if FLAGS.batch_iter_verbose: grad_print_ops.extend([ # tf.Print(tf.constant(0), [grads_and_vars[0][0], grads_and_vars[0][1]], message='---grads[0] and vars[0]---------\n'), #tf.Print(tf.constant(0), [], message=grads_and_vars[0][1].name), tf.Print(tf.constant(0), [accum_grads_and_vars[0][0]], message='GRADS BEFORE ZERO: ') ]) train_update_ops.append(grad_print_ops) total_loss_accum_average = tf.div(total_loss_accum, FLAGS.batch_iter) summaries.add( tf.summary.scalar('total_loss', total_loss_accum_average)) train_update_op = tf.group(*train_update_ops) with tf.control_dependencies([train_update_op]): zero_ops = [] zero_accum_ops = [ agv[0].assign(tf.zeros_like(agv[0])) for agv in accum_grads_and_vars ] zero_ops.append(zero_accum_ops) zero_accum_total_loss_op = [total_loss_accum.assign(0)] zero_ops.append(zero_accum_total_loss_op) zero_op = tf.group(*zero_ops) with tf.control_dependencies([zero_op]): grad_print_ops = [] if FLAGS.batch_iter_verbose: grad_print_ops.extend([ #tf.Print(tf.constant(0), [accum_grads_and_vars[0][0]], message='GRADS AFTER ZERO ') ]) with tf.control_dependencies( [tf.group(*grad_print_ops)]): train_tensor = tf.identity( total_loss_accum_average, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or # _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True #train_step_exit = train_utils.train_step_exit train_step_custom = train_utils.train_step_custom if FLAGS.validation_interval <= 0: FLAGS.validation_interval = FLAGS.training_number_of_steps else: print("*** Validation interval: {} ***".format( FLAGS.validation_interval)) # Start the training. slim.learning.train(train_tensor, logdir=FLAGS.train_logdir, train_step_fn=train_step_custom( VALIDATION_N=FLAGS.validation_interval, ACCUM_OP=accum_tensor, ACCUM_STEPS=FLAGS.batch_iter), log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): samples = input_generator.get( dataset, FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones) samples = inputs_queue.dequeue() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) print(samples) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print('Start verification process...') try: while True: out_image, out_label = session.run( [samples[common.IMAGE], samples[common.LABEL]]) #write_file("out_label.csv",np.squeeze(out_label[0], axis=2)) cv2.imshow( 'out_image', cv2.cvtColor(out_image[0] / 255, cv2.COLOR_RGB2BGR)) cv2.imshow('out_label', np.asarray(out_label[0] * 100, dtype=np.uint8)) colored_label = get_dataset_colormap.label_to_color_image( np.squeeze(out_label[0]), dataset=get_dataset_colormap.get_pascal_name()) cv2.imshow( "colored_label", cv2.cvtColor(colored_label.astype(np.uint8), cv2.COLOR_RGB2BGR)) alpha = 0.5 img_add = cv2.addWeighted(out_image[0], alpha, colored_label.astype(np.float32), 1 - alpha, 0) cv2.imshow("colored_overlap", cv2.cvtColor(img_add, cv2.COLOR_RGB2BGR) / 255) cv2.waitKey(0) except tf.errors.OutOfRangeError: print("end!") coord.request_stop() coord.join(threads)
def process_one(filepath): movefile(filepath) record.write_record(None, reader) # with get_prepare_graph().as_default(): # samples, predictions = do_prepare() # # with vis_graph.as_default(): # tf.train.get_or_create_global_step() # sv = get_supervisor() # with sv.managed_session(A_master, start_standard_services=False) as sess: # do_process_batch(sess, samples, predictions) # skel_extract.extract() # skel_extract.load() vis_graph = tf.Graph() with vis_graph.as_default(): dataset = segmentation_dataset.get_dataset(A_dataset, A_vis_split, dataset_dir=A_dataset_dir) samples = input_generator.get(dataset, A_vis_crop_size, A_vis_batch_size, min_resize_value=A_min_resize_value, max_resize_value=A_max_resize_value, resize_factor=A_resize_factor, dataset_split=A_vis_split, is_training=False, model_variant=A_model_variant) model_options = mycommon.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=A_vis_crop_size, atrous_rates=A_atrous_rates, output_stride=A_output_stride) print(samples[common.IMAGE]) predictions = model.predict_labels(samples[common.IMAGE], model_options=model_options, image_pyramid=A_image_pyramid) predictions = predictions[common.OUTPUT_TYPE] tf.train.get_or_create_global_step() vis_session = tf.Session(graph=vis_graph) saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=vis_graph, logdir=A_vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) with sv.managed_session('', start_standard_services=False) as sess: sv.start_queue_runners(sess) sv.saver.restore(sess, tf.train.latest_checkpoint(A_checkpoint_dir)) #samples, predictions = do_prepare() #tf.train.get_or_create_global_step() #do_process_batch(get_session(), samples, predictions) save_dir = os.path.join(work_dir, _SEMANTIC_PREDICTION_SAVE_FOLDER) my_process_batch(sess, samples[common.ORIGINAL_IMAGE], predictions, samples[common.IMAGE_NAME], samples[common.HEIGHT], samples[common.WIDTH], save_dir)
def main(unused_argv): # input('in vis.main:') tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. # FLAGS.dataset: homeplus # FLAGS.vis_split: val # FLAGS.dataset_dir: tfrecord dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir) # input('dataset finish') train_id_to_eval_id = None # if dataset.name == segmentation_dataset.get_cityscapes_dataset_name(): # tf.logging.info('Cityscapes requires converting train_id to eval_id.') # train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join(FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) input('prepare finished') g = tf.Graph() with g.as_default(): # get preprocessed data print('min_resize_value: %s' % str(FLAGS.min_resize_value)) print('max_resize_value: %s' % str(FLAGS.max_resize_value)) input('--') print(FLAGS.vis_crop_size) print(FLAGS.vis_batch_size) print(FLAGS.min_resize_value) print(FLAGS.max_resize_value) print(FLAGS.resize_factor) print(FLAGS.vis_split) print(FLAGS.model_variant) print(FLAGS.atrous_rates) print(FLAGS.output_stride) # print(FLAGS) samples = input_generator.get( dataset, # a dataset FLAGS.vis_crop_size, # [1505, 2049] FLAGS.vis_batch_size, # 1 min_resize_value=FLAGS.min_resize_value, # None max_resize_value=FLAGS.max_resize_value, # None resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.vis_split, # val is_training=False, model_variant=FLAGS.model_variant) # xception 65 model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.vis_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) print(samples[common.IMAGE]) input('before predict') # maybe predict print(dataset.num_classes) print(FLAGS.vis_crop_size) print(FLAGS.atrous_rates) print(FLAGS.output_stride) input() if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') # images: A tensor of size [batch, height, width, channels] predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) predictions = predictions[common.OUTPUT_TYPE] print(predictions.shape) print(predictions.dtype) input() input('predictions finish') if FLAGS.min_resize_value and FLAGS.max_resize_value: input('not pos') # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we reisze the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([ tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH]) ]) predictions = tf.squeeze( tf.image.resize_images( tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) input('before saver') tf.train.get_or_create_global_step() saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=g, logdir=FLAGS.vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) print('dataset.num_samples: ' + str(dataset.num_samples)) print('FLAGS.vis_batch_size: ' + str(FLAGS.vis_batch_size)) # input() num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size))) last_checkpoint = None # Loop to visualize the results when new checkpoint is created. # last_checkpoint = slim.evaluation.wait_for_new_checkpoint(FLAGS.checkpoint_dir, last_checkpoint) tf.logging.info('Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', last_checkpoint) # start nvidia in opening session with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: input('in session') print(FLAGS.checkpoint_dir) sv.start_queue_runners(sess) my_checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) sv.saver.restore(sess, my_checkpoint) # sv.saver.restore(sess, last_checkpoint) tf.logging.info('Visualizing batch %d / %d', 1, num_batches) input('before batch') # save one prediction png _process_batch(sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=0, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) tf.logging.info('Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir, use_input_hints=FLAGS.input_hints, hint_types=FLAGS.hint_types) train_id_to_eval_id = None if dataset.name == segmentation_dataset.get_cityscapes_dataset_name( ) and FLAGS.convert_to_eval_id: tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join(FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) logit_save_dir = os.path.join(FLAGS.vis_logdir, 'logits') tf.gfile.MakeDirs(logit_save_dir) uncertainty_save_dir = os.path.join(FLAGS.vis_logdir, 'uncertainties') tf.gfile.MakeDirs(uncertainty_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) g = tf.Graph() with g.as_default(): # Running samples_orig will grab a new batch samples_orig = input_generator.get( dataset, FLAGS.vis_crop_size, FLAGS.vis_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.vis_split, is_training=False, model_variant=FLAGS.model_variant) # samples_placeholders will represent a batch of data for the network. The values will be filled # by samples_orig. Decoupled so that same batch can be run through network multiple times. # See _process_batch. samples_placeholders = {} for k, v in samples_orig.items(): samples_placeholders[k] = tf.placeholder( dtype=v.dtype, shape=v.shape, name='samples_{}'.format(k)) # Since original code was written with 'samples' variable, leave original code alone and initialize samples dictionary here # The reason we don't use samples = samples_placeholders is because samples is overwritten several times # and we need to keep samples_placeholders in its original state in order to fill it with values from samples_orig. samples = {k: v for k, v in samples_placeholders.items()} model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.vis_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if FLAGS.input_hints: # or if common.HINT in samples.keys(): if 'dynamic_class_partial_boundary_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using dynamic partial boundary class hints, do not use other hint types!' print("----") print( "eval.py: Partial boundary hints with grid {}x{}.".format( FLAGS.dynamic_class_partial_boundary_hint_B, FLAGS.dynamic_class_partial_boundary_hint_B)) print("eval.py: Drawing blocks with p {}.".format( FLAGS.dynamic_class_partial_boundary_hint_p)) if FLAGS.dynamic_class_partial_boundary_full_block: print( "eval.py: Keeping entire block instead of masking boundaries." .format(FLAGS.boundary_threshold)) else: print( "eval.py: Masking with boundary threshold {}.".format( FLAGS.boundary_threshold)) print("----") if FLAGS.dynamic_class_partial_boundary_full_block: boundary_mask = tf.cast( tf.ones_like(samples[common.LABEL]), tf.uint8) else: boundary_mask = tf.cast( tf.less(samples[common.BOUNDARY_DMAP], FLAGS.boundary_threshold), tf.uint8) class_hints, hinted = tf.py_func( func=train_utils.generate_class_partial_boundaries_helper( B=FLAGS.dynamic_class_partial_boundary_hint_B, p=FLAGS.dynamic_class_partial_boundary_hint_p), inp=[samples[common.LABEL], boundary_mask], Tout=[tf.uint8, tf.bool]) samples[common.HINT] = class_hints samples[common.HINT].set_shape( samples[common.LABEL].get_shape().as_list()) # Now preprocess this. Set the flag so that the rest of the work will be done as usual. FLAGS.hint_types = ['class_hint'] ### if 'dynamic_class_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using dynamic class hints, do not use other hint types!' print("----") print( "WARNING: Do not use dynamic class hints when simulating crowdsourced points as the points should not change between runs." ) print("vis.py: Drawing hints with geo mean {}.".format( FLAGS.dynamic_class_hint_geo_mean)) print("vis.py: Masking with boundary threshold {}.".format( FLAGS.boundary_threshold)) print("----") boundary_mask = tf.cast( tf.less(samples[common.BOUNDARY_DMAP], FLAGS.boundary_threshold), tf.uint8) class_hints, hinted = tf.py_func( func=train_utils.generate_class_clicks_helper( geo_mean=FLAGS.dynamic_class_hint_geo_mean), inp=[samples[common.LABEL], boundary_mask], Tout=[tf.uint8, tf.bool]) samples[common.HINT] = class_hints samples[common.HINT].set_shape( samples[common.LABEL].get_shape().as_list()) # Now preprocess this. Set the flag so that the rest of the work will be done as usual. FLAGS.hint_types = ['class_hint'] # If using class hints, preprocess into num_class binary mask channels if 'class_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using class hints, do not use other hint types!' num_classes = dataset.num_classes print('vis.py: num classes is {}'.format(num_classes)) class_hint_channels_list = [] for label in range(num_classes): # Multiply by 255 is to bring into same range as image pixels..., # and so feature_extractor mean subtraction will reduce it back to 0,1 range class_hint_channel = tf.to_float( tf.equal(samples[common.HINT], label)) * 255 class_hint_channels_list.append(class_hint_channel) class_hint_channels = tf.concat(class_hint_channels_list, axis=-1) samples[common.HINT] = class_hint_channels # Get hints and concat to image as input into network samples[common.HINT] = tf.identity(samples[common.HINT], name=common.HINT) model_inputs = tf.concat( [samples[common.IMAGE], tf.to_float(samples[common.HINT])], axis=-1) else: # Just image is input into network model_inputs = samples[common.IMAGE] outputs_to_scales_to_logits = None logits = None predictions = None fixed_features = None extra_to_run = {} if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') if FLAGS.compute_uncertainty and FLAGS.force_dropout_only_branch: fixed_features = model._get_features_after_decoder( images=model_inputs, model_options=model_options, reuse=None, is_training=False, fine_tune_batch_norm=False, force_dropout=True, force_dropout_only_branch=FLAGS.force_dropout_only_branch, keep_prob=FLAGS.keep_prob) samples_placeholders['fixed_features'] = tf.placeholder( dtype=fixed_features.dtype, shape=fixed_features.shape) logits_from_fixed_features = model._get_branch_logits( samples_placeholders['fixed_features'], model_options.outputs_to_num_classes[common.OUTPUT_TYPE], model_options.atrous_rates, aspp_with_batch_norm=model_options.aspp_with_batch_norm, kernel_size=model_options.logits_kernel_size, reuse=None, scope_suffix=common.OUTPUT_TYPE, keep_prob=FLAGS.keep_prob, force_dropout=True) logits_from_fixed_features = tf.image.resize_bilinear( logits_from_fixed_features, size=tf.shape(samples[common.IMAGE])[1:3], align_corners=True) softmax_from_fixed_features = tf.nn.softmax( logits_from_fixed_features) samples_placeholders['accumulated_softmax'] = tf.placeholder( dtype=softmax_from_fixed_features.dtype, shape=FLAGS.vis_placeholder_size) #shape=[1, 1025, 2049, 19]) #shape=[1, 513, 513, 23]) samples_placeholders[ 'accumulated_softmax_sq'] = tf.placeholder( dtype=softmax_from_fixed_features.dtype, shape=FLAGS.vis_placeholder_size) #shape=[1, 1025, 2049, 19]) #shape=[1, 513, 513, 23]) accumulated_softmax = samples_placeholders[ 'accumulated_softmax'] + softmax_from_fixed_features accumulated_softmax_sq = samples_placeholders[ 'accumulated_softmax_sq'] + tf.square( softmax_from_fixed_features) extra_to_run['accumulated_softmax'] = accumulated_softmax extra_to_run['accumulated_softmax_sq'] = accumulated_softmax_sq elif FLAGS.save_logits or FLAGS.compute_uncertainty: predictions, outputs_to_scales_to_logits = model.predict_labels( # samples[common.IMAGE], model_inputs, model_options=model_options, image_pyramid=FLAGS.image_pyramid, also_return_logits=True, force_dropout=(FLAGS.compute_uncertainty or FLAGS.force_dropout), force_dropout_only_branch=FLAGS.force_dropout_only_branch, keep_prob=FLAGS.keep_prob) assert tuple(FLAGS.eval_scales) == (1.0, ) assert len(outputs_to_scales_to_logits) == 1 for output in sorted(outputs_to_scales_to_logits): scales_to_logits = outputs_to_scales_to_logits[output] logits = scales_to_logits[model._MERGED_LOGITS_SCOPE] if FLAGS.compute_uncertainty: assert not FLAGS.save_logits # We need full size logits to compute final predition and uncertainty. logits = tf.image.resize_bilinear( logits, size=tf.shape(model_inputs)[1:3], align_corners=True) softmax_logits = tf.nn.softmax(logits) samples_placeholders[ 'accumulated_softmax'] = tf.placeholder( dtype=softmax_logits.dtype, shape=FLAGS.vis_placeholder_size) #shape=[1, 1025, 2049, 19]) #shape=[1, 513, 513, 23]) samples_placeholders[ 'accumulated_softmax_sq'] = tf.placeholder( dtype=softmax_logits.dtype, shape=FLAGS.vis_placeholder_size) #shape=[1, 1025, 2049, 19]) #shape=[1, 513, 513, 23]) accumulated_softmax = samples_placeholders[ 'accumulated_softmax'] + softmax_logits accumulated_softmax_sq = samples_placeholders[ 'accumulated_softmax_sq'] + tf.square(softmax_logits) extra_to_run['accumulated_softmax'] = accumulated_softmax extra_to_run[ 'accumulated_softmax_sq'] = accumulated_softmax_sq else: predictions = model.predict_labels( # samples[common.IMAGE], model_inputs, model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( # samples[common.IMAGE], model_inputs, model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) if FLAGS.save_logits: raise NotImplementedError("Multiscale logits aren't saved") if predictions is not None: predictions = predictions[common.OUTPUT_TYPE] if FLAGS.min_resize_value and FLAGS.max_resize_value: if FLAGS.input_hints: #raise Exception("***Unclear if this will work with hints. Look over the code.") print( "***Unclear if this will work with hints. Look over the code." ) # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we reisze the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([ tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH]) ]) predictions = tf.squeeze( tf.image.resize_images( tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) tf.train.get_or_create_global_step() saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=g, logdir=FLAGS.vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) if FLAGS.vis_num_batches <= 0: num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size))) else: num_batches = FLAGS.vis_num_batches if FLAGS.shuffle: shuffled_idxs = range(dataset.num_samples) np.random.seed(FLAGS.shuffle_seed) np.random.shuffle(shuffled_idxs) else: shuffled_idxs = range(dataset.num_samples) idxs_to_keep = shuffled_idxs[FLAGS.start_idx:FLAGS.start_idx + FLAGS.vis_num_batches] if FLAGS.also_vis_first_N > 0: idxs_to_keep.extend(shuffled_idxs[0:FLAGS.also_vis_first_N]) print(sorted(idxs_to_keep)[:10]) print("There are {} indices to keep.".format(len(idxs_to_keep))) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size))) last_checkpoint = None # Loop to visualize the results when new checkpoint is created. num_iters = 0 while (FLAGS.max_number_of_iterations <= 0 or num_iters < FLAGS.max_number_of_iterations): num_iters += 1 last_checkpoint = slim.evaluation.wait_for_new_checkpoint( FLAGS.checkpoint_dir, last_checkpoint) start = time.time() tf.logging.info('Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', last_checkpoint) with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: sv.start_queue_runners(sess) sv.saver.restore(sess, last_checkpoint) image_id_offset = 0 for batch in range(num_batches): if batch in idxs_to_keep: tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches) _process_batch( sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, save_logits=FLAGS.save_logits, logits=logits, fixed_features=fixed_features, extra_to_run=extra_to_run, logit_save_dir=logit_save_dir, uncertainty_save_dir=uncertainty_save_dir, train_id_to_eval_id=train_id_to_eval_id, samples_orig=samples_orig, samples_placeholders=samples_placeholders, compute_uncertainty=FLAGS.compute_uncertainty, num_forward_passes=FLAGS. compute_uncertainty_iterations) else: # Run batch generator to skip this batch sess.run([samples_orig]) image_id_offset += FLAGS.vis_batch_size tf.logging.info('Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() if time_to_next_eval > 0 and num_iters < FLAGS.max_number_of_iterations: time.sleep(time_to_next_eval)
def train(self): FLAGS = self.flags dataset_split = 'train' data_config = edict() data_config.edge_width = 20 data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] data_config.edge_class_num = FLAGS.edge_class_num img_files, label_files = get_dataset_files(FLAGS.dataset, dataset_split) dataset = edict() dataset_pp = dataset_pipeline(data_config, img_files, label_files, is_train=True) dataset.num_classes = DATASETS_CLASS_NUM[FLAGS.dataset] dataset.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] dataset.num_samples = len(dataset_pp) tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. # dataset = segmentation_dataset.get_dataset( # FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): data_list = dataset_pp.iterator() samples = input_generator.get( (data_list, dataset.ignore_label), FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = self._build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add( tf.summary.histogram(model_var.op.name, model_var)) label_name = ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/') print('first clone label name is:', label_name) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for miou,acc labels = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) predictions = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.image.resize_bilinear(predictions, tf.shape(labels)[1:3], align_corners=True) # predictions shape (2, 513, 513, 19/21) print('predictions shape', predictions.shape) self.get_metric(labels, predictions, 'train') # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add( tf.summary.scalar('losses/%s' % loss.op.name, loss)) # losses = {} # for key in [common.OUTPUT_TYPE,common.EDGE]: # losses[key]=graph.get_tensor_by_name(name='losses/%s:0'%key) # summaries.add(tf.summary.scalar('losses/'+key,losses[key])) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add( tf.summary.scalar('losses/total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True # init_fn=train_utils.get_model_init_fn( # FLAGS.train_logdir, # FLAGS.tf_initial_checkpoint, # FLAGS.initialize_last_layer, # last_layers, # ignore_missing_vars=True) exclude_list = ['global_step'] if not FLAGS.initialize_last_layer: exclude_list.extend(last_layers) variables_to_restore = slim.get_variables_to_restore( exclude=exclude_list) init_fn = slim.assign_from_checkpoint_fn( model_path=FLAGS.tf_initial_checkpoint, var_list=variables_to_restore, ignore_missing_vars=True) # saver = tf.train.Saver() # train_writer = tf.summary.FileWriter(FLAGS.train_logdir) # sess=tf.Session(config=session_config) # init_fn(sess) # sess.run(tf.global_variables_initializer()) # sess.run(tf.local_variables_initializer()) # tf.train.start_queue_runners(sess) # # for i in trange(FLAGS.training_number_of_steps): # loss,summary,n_step=sess.run([train_tensor,summary_op,global_step]) # train_writer.add_summary(summary,i) # if i%100==1: # tqdm.write('%d/%d global_step=%0.2f, loss=%0.5f'%(i,FLAGS.training_number_of_steps,n_step,loss)) # # saver.save(sess,os.path.join(FLAGS.train_logdir,'model'),global_step=FLAGS.training_number_of_steps) # train_writer.close() # Start the training. slim.learning.train(train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=init_fn, summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def val(self): FLAGS = self.flags tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. # dataset = segmentation_dataset.get_dataset( # FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) dataset_split = 'val' data_config = edict() data_config.edge_width = 20 data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] data_config.edge_class_num = FLAGS.edge_class_num img_files, label_files = get_dataset_files(FLAGS.dataset, dataset_split) dataset_pp = dataset_pipeline(data_config, img_files, label_files, is_train=False) num_classes = DATASETS_CLASS_NUM[FLAGS.dataset] ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] num_samples = len(dataset_pp) print('eval_logdir is', FLAGS.eval_logdir) print('checkpoint dir is', FLAGS.checkpoint_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): data_list = dataset_pp.iterator() samples = input_generator.get( (data_list, ignore_label), FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou(predictions, labels, num_classes, weights=weights) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary(metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): FLAGS.train_logdir = FLAGS.base_logdir + '/' + FLAGS.task_name if FLAGS.restore_name == None: FLAGS.restore_logdir = FLAGS.train_logdir else: FLAGS.restore_logdir = FLAGS.base_logdir + '/' + FLAGS.restore_name tf.logging.set_verbosity(tf.logging.INFO) # Get logging dir ready. if not (os.path.isdir(FLAGS.train_logdir)): tf.gfile.MakeDirs(FLAGS.train_logdir) elif len(os.listdir(FLAGS.train_logdir)) != 0: if not (FLAGS.if_restore): if_delete_all = raw_input( '#### The log folder %s exists and non-empty; delete all logs? [y/n] ' % FLAGS.train_logdir) if if_delete_all == 'y': os.system('rm -rf %s/*' % FLAGS.train_logdir) print '==== Log folder emptied.' else: print '==== Log folder exists; not emptying it because we need to restore from it.' tf.logging.info('==== Logging in dir:%s; Training on %s set', FLAGS.train_logdir, FLAGS.train_split) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # /device:CPU:0 # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. dataset = regression_dataset.get_dataset(FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) dataset_val = regression_dataset.get_dataset(FLAGS.dataset, FLAGS.val_split, dataset_dir=FLAGS.dataset_dir) print '#### The data has size:', dataset.num_samples, dataset_val.num_samples codes = np.load( '/ssd2/public/zhurui/Documents/mesh-voxelization/models/cars_64/codes.npy' ) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): codes_max = np.amax(codes, axis=1).reshape((-1, 1)) codes_min = np.amin(codes, axis=1).reshape((-1, 1)) shape_range = np.hstack( (codes_max + (codes_max - codes_min) / (dataset.SHAPE_BINS - 1.), codes_min - (codes_max - codes_min) / (dataset.SHAPE_BINS - 1.))) bin_range = [ np.linspace(r[0], r[1], num=b).tolist() for r, b in zip(np.vstack((dataset.pose_range, shape_range)), dataset.bin_nums) ] # print np.vstack((dataset.pose_range, shape_range)) # print bin_range[0] # print bin_range[-1] outputs_to_num_classes = {} outputs_to_indices = {} for output, bin_num, idx in zip(dataset.output_names, dataset.bin_nums, range(len(dataset.output_names))): if FLAGS.if_discrete_loss: outputs_to_num_classes[output] = bin_num else: outputs_to_num_classes[output] = 1 outputs_to_indices[output] = idx bin_vals = [tf.constant(value=[bin_range[i]], dtype=tf.float32, shape=[1, dataset.bin_nums[i]], name=name) \ for i, name in enumerate(dataset.output_names)] # print outputs_to_num_classes # print spaces_to_indices samples = input_generator.get(dataset, codes, clone_batch_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones) samples_val = input_generator.get( dataset_val, codes, clone_batch_size, dataset_split=FLAGS.val_split, is_training=False, model_variant=FLAGS.model_variant) inputs_queue_val = prefetch_queue.prefetch_queue(samples_val, capacity=128) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (FLAGS, inputs_queue.dequeue(), outputs_to_num_classes, outputs_to_indices, bin_vals, bin_range, dataset, codes, True, False) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) # clone_0 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) with tf.device('/device:GPU:3'): if FLAGS.if_val: ## Construct the validation graph; takes one GPU. _build_deeplab(FLAGS, inputs_queue_val.dequeue(), outputs_to_num_classes, outputs_to_indices, bin_vals, bin_range, dataset_val, codes, is_training=False, reuse=True) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for images, labels, semantic predictions summary_loss_dict = {} if FLAGS.save_summaries_images: if FLAGS.num_clones > 1: pattern_train = first_clone_scope + '/%s:0' else: pattern_train = '%s:0' pattern_val = 'val-%s:0' pattern = pattern_val if FLAGS.if_val else pattern_train gather_list = [0] if FLAGS.num_clones < 3 else [0, 1, 2] summary_mask = graph.get_tensor_by_name(pattern % 'not_ignore_mask_in_loss') summary_mask = tf.reshape(summary_mask, [-1, dataset.height, dataset.width, 1]) summary_mask_float = tf.to_float(summary_mask) summaries.add( tf.summary.image( 'gt/%s' % 'not_ignore_mask', tf.gather(tf.cast(summary_mask_float * 255., tf.uint8), gather_list))) summary_image = graph.get_tensor_by_name(pattern % common.IMAGE) summaries.add( tf.summary.image('gt/%s' % common.IMAGE, tf.gather(summary_image, gather_list))) summary_image_name = graph.get_tensor_by_name(pattern % common.IMAGE_NAME) summaries.add( tf.summary.text('gt/%s' % common.IMAGE_NAME, tf.gather(summary_image_name, gather_list))) summary_image_name = graph.get_tensor_by_name(pattern_train % common.IMAGE_NAME) summaries.add( tf.summary.text('gt/%s_train' % common.IMAGE_NAME, tf.gather(summary_image_name, gather_list))) summary_vis = graph.get_tensor_by_name(pattern % 'vis') summaries.add( tf.summary.image('gt/%s' % 'vis', tf.gather(summary_vis, gather_list))) def scale_to_255(tensor, pixel_scaling=None): tensor = tf.to_float(tensor) if pixel_scaling == None: offset_to_zero = tf.reduce_min(tensor) scale_to_255 = tf.div( 255., tf.reduce_max(tensor - offset_to_zero)) else: offset_to_zero, scale_to_255 = pixel_scaling summary_tensor_float = tensor - offset_to_zero summary_tensor_float = summary_tensor_float * scale_to_255 summary_tensor_float = tf.clip_by_value( summary_tensor_float, 0., 255.) summary_tensor_uint8 = tf.cast(summary_tensor_float, tf.uint8) return summary_tensor_uint8, (offset_to_zero, scale_to_255) label_outputs = graph.get_tensor_by_name(pattern % 'label_pose_shape_map') label_id_outputs = graph.get_tensor_by_name( pattern % 'pose_shape_label_id_map') logit_outputs = graph.get_tensor_by_name( pattern % 'scaled_prob_logits_pose_shape_map') summary_rot_diffs = graph.get_tensor_by_name(pattern % 'rot_error_map') summary_rot_diffs = tf.where(summary_mask, summary_rot_diffs, tf.zeros_like(summary_rot_diffs)) summary_rot_diffs_uint8, _ = scale_to_255(summary_rot_diffs) summaries.add( tf.summary.image( 'metrics_map/%s' % 'rot_diffs', tf.gather(summary_rot_diffs_uint8, gather_list))) summary_trans_diffs = graph.get_tensor_by_name(pattern % 'trans_error_map') summary_trans_diffs = tf.where(summary_mask, summary_trans_diffs, tf.zeros_like(summary_trans_diffs)) summary_trans_diffs_uint8, _ = scale_to_255(summary_trans_diffs) summaries.add( tf.summary.image('metrics_map/%s' % 'trans_diffs', tf.gather(summary_trans_diffs, gather_list))) shape_id_outputs = graph.get_tensor_by_name(pattern % 'shape_id_map') shape_id_outputs = tf.where(summary_mask, shape_id_outputs + 1, tf.zeros_like(shape_id_outputs)) summary_shape_id_output_uint8, _ = scale_to_255(shape_id_outputs) summaries.add( tf.summary.image( 'shape/shape_id_map', tf.gather(summary_shape_id_output_uint8, gather_list))) shape_id_outputs_gt = graph.get_tensor_by_name(pattern % 'shape_id_map_gt') shape_id_outputs_gt = tf.where(summary_mask, shape_id_outputs_gt + 1, tf.zeros_like(shape_id_outputs)) summary_shape_id_output_uint8_gt, _ = scale_to_255( shape_id_outputs_gt) summaries.add( tf.summary.image( 'shape/shape_id_map_gt', tf.gather(summary_shape_id_output_uint8_gt, gather_list))) if FLAGS.if_summary_metrics: shape_id_outputs = graph.get_tensor_by_name( pattern % 'shape_id_map_predict') summary_shape_id_output = tf.where( summary_mask, shape_id_outputs, tf.zeros_like(shape_id_outputs)) summary_shape_id_output_uint8, _ = scale_to_255( summary_shape_id_output) summaries.add( tf.summary.image( 'shape/shape_id_map_predict', tf.gather(summary_shape_id_output_uint8, gather_list))) shape_id_sim_map_train = graph.get_tensor_by_name( pattern_train % 'shape_id_sim_map') # shape_id_sim_map_train = tf.where(summary_mask, shape_id_sim_map_train, tf.zeros_like(shape_id_sim_map_train)) shape_id_sim_map_uint8_train, _ = scale_to_255( shape_id_sim_map_train, pixel_scaling=(0., 255.)) summaries.add( tf.summary.image( 'metrics_map/shape_id_sim_map-trainInv', tf.gather(shape_id_sim_map_uint8_train, gather_list))) shape_id_sim_map = graph.get_tensor_by_name(pattern % 'shape_id_sim_map') # shape_id_sim_map = tf.where(summary_mask, shape_id_sim_map, tf.zeros_like(shape_id_sim_map)) shape_id_sim_map_uint8, _ = scale_to_255(shape_id_sim_map, pixel_scaling=(0., 255.)) summaries.add( tf.summary.image( 'metrics_map/shape_id_sim_map-valInv', tf.gather(shape_id_sim_map_uint8, gather_list))) for output_idx, output in enumerate(dataset.output_names): # # Scale up summary image pixel values for better visualization. summary_label_output = tf.gather(label_outputs, [output_idx], axis=3) summary_label_output = tf.where( summary_mask, summary_label_output, tf.zeros_like(summary_label_output)) summary_label_output_uint8, pixel_scaling = scale_to_255( summary_label_output) summaries.add( tf.summary.image( 'output/%s_label' % output, tf.gather(summary_label_output_uint8, gather_list))) summary_logit_output = tf.gather(logit_outputs, [output_idx], axis=3) summary_logit_output = tf.where( summary_mask, summary_logit_output, tf.zeros_like(summary_logit_output)) summary_logit_output_uint8, _ = scale_to_255( summary_logit_output, pixel_scaling) summaries.add( tf.summary.image( 'output/%s_logit' % output, tf.gather(summary_logit_output_uint8, gather_list))) # summary_label_id_output = tf.to_float(tf.gather(label_id_outputs, [output_idx], axis=3)) # summary_label_id_output = tf.where(summary_mask, summary_label_id_output+1, tf.zeros_like(summary_label_id_output)) # summary_label_id_output_uint8, _ = scale_to_255(summary_label_id_output) # summary_label_id_output_uint8 = tf.identity(summary_label_id_output_uint8, 'tttt'+output) # summaries.add(tf.summary.image( # 'test/%s_label_id' % output, tf.gather(summary_label_id_output_uint8, gather_list))) summary_diff = tf.abs( tf.to_float(summary_label_output_uint8) - tf.to_float(summary_logit_output_uint8)) summary_diff = tf.where(summary_mask, summary_diff, tf.zeros_like(summary_diff)) summaries.add( tf.summary.image( 'diff_map/%s_ldiff' % output, tf.gather(tf.cast(summary_diff, tf.uint8), gather_list))) summary_loss = graph.get_tensor_by_name( (pattern % 'loss_slice_reg_').replace(':0', '') + output + ':0') summaries.add( tf.summary.scalar( 'slice_loss/' + (pattern % 'reg_').replace(':0', '') + output, summary_loss)) summary_loss = graph.get_tensor_by_name( (pattern % 'loss_slice_cls_').replace(':0', '') + output + ':0') summaries.add( tf.summary.scalar( 'slice_loss/' + (pattern % 'cls_').replace(':0', '') + output, summary_loss)) for pattern in [pattern_train, pattern_val ] if FLAGS.if_val else [pattern_train]: add_metrics = ['loss_all_shape_id_cls_metric' ] if FLAGS.if_summary_metrics else [] for loss_name in [ 'loss_reg_rot_quat_metric', 'loss_reg_rot_quat', 'loss_reg_trans_metric', 'loss_reg_trans', 'loss_cls_ALL', 'loss_reg_shape' ] + add_metrics: if pattern == pattern_val: summary_loss_avg = graph.get_tensor_by_name(pattern % loss_name) # summary_loss_dict['val-'+loss_name] = summary_loss_avg else: summary_loss_avg = train_utils.get_avg_tensor_from_scopes( FLAGS.num_clones, '%s:0', graph, config, loss_name) # summary_loss_dict['train-'+loss_name] = summary_loss_avg summaries.add( tf.summary.scalar( ('total_loss/' + pattern % loss_name).replace( ':0', ''), summary_loss_avg)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) # optimizer = tf.train.AdamOptimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) print '------ total_loss', total_loss, tf.get_collection( tf.GraphKeys.LOSSES, first_clone_scope) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss/train', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) print '////last layers', last_layers # Filter trainable variables for last layers ONLY. # grads_and_vars = train_utils.filter_gradients(last_layers, grads_and_vars) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = False def train_step_fn(sess, train_op, global_step, train_step_kwargs): train_step_fn.step += 1 # or use global_step.eval(session=sess) # calc training losses loss, should_stop = slim.learning.train_step( sess, train_op, global_step, train_step_kwargs) print loss # print 'loss: ', loss # first_clone_test = graph.get_tensor_by_name( # ('%s/%s:0' % (first_clone_scope, 'shape_map')).strip('/')) # test = sess.run(first_clone_test) # # print test # print 'test: ', test.shape, np.max(test), np.min(test), np.mean(test), test.dtype should_stop = 0 if FLAGS.if_val and train_step_fn.step % FLAGS.val_interval_steps == 0: # first_clone_test = graph.get_tensor_by_name('val-loss_all:0') # test = sess.run(first_clone_test) print '-- Validating...' first_clone_test = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, 'shape_id_map')).strip('/')) first_clone_test2 = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, 'shape_id_sim_map')).strip('/')) # 'ttttrow:0') first_clone_test3 = graph.get_tensor_by_name(( '%s/%s:0' % (first_clone_scope, 'not_ignore_mask_in_loss')).strip('/')) # 'ttttrow:0') test_out, test_out2, test_out3 = sess.run( [first_clone_test, first_clone_test2, first_clone_test3]) # test_out = test[:, :, :, 3] test_out = test_out[test_out3] # test_out2 = test2[:, :, :, 3] test_out2 = test_out2[test_out3] # print test_out print 'shape_id_map: ', test_out.shape, np.max( test_out), np.min(test_out), np.mean(test_out), np.median( test_out), test_out.dtype print 'shape_id_sim_map: ', test_out2.shape, np.max( test_out2), np.min(test_out2), np.mean( test_out2), np.median(test_out2), test_out2.dtype print 'masks sum: ', test_out3.dtype, np.sum( test_out3.astype(float)) # assert np.max(test_out) == np.max(test_out2), 'MAtch1!!!' # assert np.min(test_out) == np.min(test_out2), 'MAtch2!!!' # first_clone_label = graph.get_tensor_by_name( # ('%s/%s:0' % (first_clone_scope, 'pose_map')).strip('/')) # clone_0/val-loss:0 # # first_clone_pose_dict = graph.get_tensor_by_name( # # ('%s/%s:0' % (first_clone_scope, 'pose_dict')).strip('/')) # first_clone_logit = graph.get_tensor_by_name( # ('%s/%s:0' % (first_clone_scope, 'scaled_regression')).strip('/')) # not_ignore_mask = graph.get_tensor_by_name( # ('%s/%s:0' % (first_clone_scope, 'not_ignore_mask_in_loss')).strip('/')) # label, logits, mask = sess.run([first_clone_label, first_clone_logit, not_ignore_mask]) # mask = np.reshape(mask, (-1, FLAGS.train_crop_size[0], FLAGS.train_crop_size[1], dataset.num_classes)) # print '... shapes, types, loss', label.shape, label.dtype, logits.shape, logits.dtype, loss # print 'mask', mask.shape, np.mean(mask) # logits[mask==0.] = 0. # print 'logits', logits.shape, np.max(logits), np.min(logits), np.mean(logits), logits.dtype # for idx in range(6): # print idx, np.max(label[:, :, :, idx]), np.min(label[:, :, :, idx]) # label = label[:, :, :, 5] # print 'label', label.shape, np.max(label), np.min(label), np.mean(label), label.dtype # print pose_dict, pose_dict.shape # # print 'training....... logits stats: ', np.max(logits), np.min(logits), np.mean(logits) # # label_one_piece = label[0, :, :, 0] # # print 'training....... label stats', np.max(label_one_piece), np.min(label_one_piece), np.sum(label_one_piece[label_one_piece!=255.]) return [loss, should_stop] train_step_fn.step = 0 # trainables = [v.name for v in tf.trainable_variables()] # alls =[v.name for v in tf.all_variables()] # print '----- Trainables %d: '%len(trainables), trainables # print '----- All %d: '%len(alls), alls[:10] # print '===== ', len(list(set(trainables) - set(alls))) # print '===== ', len(list(set(alls) - set(trainables))) if FLAGS.if_print_tensors: for op in tf.get_default_graph().get_operations(): print str(op.name) # Start the training. slim.learning.train(train_tensor, train_step_fn=train_step_fn, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.restore_logdir, FLAGS.tf_initial_checkpoint, FLAGS.if_restore, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def train(self): FLAGS = self.flags dataset_split = 'train' data_config = edict() data_config.edge_width = 20 data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] data_config.edge_class_num = FLAGS.edge_class_num img_files, label_files = get_dataset_files(FLAGS.dataset, dataset_split) dataset = edict() dataset_pp = dataset_pipeline(data_config, img_files, label_files, is_train=True) dataset.num_classes = DATASETS_CLASS_NUM[FLAGS.dataset] dataset.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset] dataset.num_samples = len(dataset_pp) tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. # dataset = segmentation_dataset.get_dataset( # FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): data_list = dataset_pp.iterator() samples = input_generator.get( (data_list, dataset.ignore_label), FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue(samples, capacity=128 * config.num_clones) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = self._build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add( tf.summary.histogram(model_var.op.name, model_var)) label_name = ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/') print('first clone label name is:', label_name) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for miou,acc labels = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) predictions = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.image.resize_bilinear(predictions, tf.shape(labels)[1:3], align_corners=True) labels = tf.reshape(labels, shape=[-1]) predictions = tf.reshape(tf.argmax(predictions, 3), shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) # Define the evaluation metric. metric_map = {} metric_map['miou'], _ = tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights) metric_map['acc'], _ = tf.metrics.accuracy( labels=labels, predictions=predictions, weights=tf.reshape(weights, shape=[-1])) for x in ['miou', 'acc']: summaries.add( tf.summary.scalar('metrics/%s' % x, metric_map[x])) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add( tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # Start the training. slim.learning.train(train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): samples = input_generator.get( dataset, FLAGS.train_crop_size, clone_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, dataset_split=FLAGS.train_split, is_training=True, model_variant=FLAGS.model_variant) inputs_queue = prefetch_queue.prefetch_queue( samples, capacity=128 * config.num_clones) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (inputs_queue, { common.OUTPUT_TYPE: dataset.num_classes }, dataset.ignore_label) clones = model_deploy.create_clones(config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in slim.get_model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_classes) summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image( 'samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) # Start the training. slim.learning.train( train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True), summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = input_generator.get( dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): # syaru: `(1.0,)`, it's a tupple! tf.logging.info('Performing single-scale test.') """ syaru: Returns: A dictionary with keys specifying the output_type (e.g., semantic prediction) and values storing Tensors representing predictions (argmax over channels). Each prediction has size [batch, height, width]. """ predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) # syaru: common.OUTPUT_TYPE = 'semantic' # common.LABEL='label' # `tf.reshape(..., shape=[-1])`: -1自动匹配reshape成一维,下述操作将predictions和labels从[batch, image_height, image_width, 1]平铺成一维 # `tf.not_equal(x, y, name=None)`: Returns a Tensor of type bool, which represents the truth value of (x != y) element-wise. predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes]. Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. # syaru :label等于ignore_label的样本将不参与Loss计算,并且反向传播时梯度直接置0. # 为了符合tf.metrics.mean_iou的range([0, dataset.num_classes]), 利用`tf.where(input, a, b)`实现. # `tf.where(input, a, b)`: 其中a,b均为尺寸一致的tensor,作用是将a中对应input中true的位置的元素值不变,其余元素进行替换. # `tf.zeros_like(labels)`: 生成与labels shape相同的的全零tensor,作为参数pass进tf.where将labels中值为255(dataset.ignore_label=255)的替换为0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. # syaru: With regard to segmentation task, in `tensorflow/python/ops/metrics_impl.py`, IOU is defined as follows: # IOU = true_positive / (true_positive + false_positive + false_negative). # 其实很好理解, segmentation task相当于dense prediction, prediction/labels都平铺成一维, false negative是两者都黑的部分, 不参与交并比的计算. metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou( predictions, labels, dataset.num_classes, weights=weights) # syaru: `tensorflow/contrib/ops/metrics/python/ops/metric_ops.py` # Pairing metric names with their associated value and update ops (when the list of metrics is long) -> (value_tensor, update_op). # metric name: 'miou_1.0' (only one name in this ocasion), value and update ops: tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights) # Returns: A `dictionary` from `metric names to value ops` and a `dictionary` from `metric names to update ops`. # In this ocasion, it will returns `dict1={'miou_1.0': value_tensor}, dict2={'miou_1.0': update_op}` metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) # syaru: key: `metric_name, value: `metric_value` for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary( metric_value, metric_name, print_summary=True) # syaru: 'eval_batch_size'= 1, dataset.num_samples为`eval`验证集的图片数 num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) # syaru: 命令行传入的max_number_of_evaluations=1, num_eval_iters!=None时, evaluation_loop只会迭代一次 # `tensorflow/contrib/slim/python/slim/evaluation.py` num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir) train_id_to_eval_id = None if dataset.name == segmentation_dataset.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join( FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) g = tf.Graph() with g.as_default(): samples = input_generator.get(dataset, FLAGS.vis_crop_size, FLAGS.vis_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.vis_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.vis_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we reisze the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH])]) predictions = tf.squeeze( tf.image.resize_images(tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) tf.train.get_or_create_global_step() saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=g, logdir=FLAGS.vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) num_batches = int(math.ceil( dataset.num_samples / float(FLAGS.vis_batch_size))) last_checkpoint = None # Loop to visualize the results when new checkpoint is created. num_iters = 0 while (FLAGS.max_number_of_iterations <= 0 or num_iters < FLAGS.max_number_of_iterations): num_iters += 1 last_checkpoint = slim.evaluation.wait_for_new_checkpoint( FLAGS.checkpoint_dir, last_checkpoint) start = time.time() tf.logging.info( 'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', last_checkpoint) with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: sv.start_queue_runners(sess) sv.saver.restore(sess, last_checkpoint) image_id_offset = 0 for batch in range(num_batches): tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches) _process_batch(sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) image_id_offset += FLAGS.vis_batch_size tf.logging.info( 'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, use_input_hints=FLAGS.input_hints, hint_types=FLAGS.hint_types) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) if FLAGS.force_dropout: raise Exception("Need to implement force dropout for eval.py") with tf.Graph().as_default(): samples = input_generator.get(dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) if FLAGS.input_hints: ### # TODO: Can modify this to checkerboard block hints. if 'dynamic_class_partial_boundary_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using dynamic partial boundary class hints, do not use other hint types!' print("----") print( "eval.py: Partial boundary hints with grid {}x{}.".format( FLAGS.dynamic_class_partial_boundary_hint_B, FLAGS.dynamic_class_partial_boundary_hint_B)) print("eval.py: Drawing blocks with p {}.".format( FLAGS.dynamic_class_partial_boundary_hint_p)) if FLAGS.dynamic_class_partial_boundary_full_block: print( "eval.py: Keeping entire block instead of masking boundaries." .format(FLAGS.boundary_threshold)) else: print( "eval.py: Masking with boundary threshold {}.".format( FLAGS.boundary_threshold)) print("----") if FLAGS.dynamic_class_partial_boundary_full_block: boundary_mask = tf.cast( tf.ones_like(samples[common.LABEL]), tf.uint8) else: boundary_mask = tf.cast( tf.less(samples[common.BOUNDARY_DMAP], FLAGS.boundary_threshold), tf.uint8) class_hints, hinted = tf.py_func( func=train_utils.generate_class_partial_boundaries_helper( B=FLAGS.dynamic_class_partial_boundary_hint_B, p=FLAGS.dynamic_class_partial_boundary_hint_p), inp=[samples[common.LABEL], boundary_mask], Tout=[tf.uint8, tf.bool]) samples[common.HINT] = class_hints samples[common.HINT].set_shape( samples[common.LABEL].get_shape().as_list()) # Now preprocess this. Set the flag so that the rest of the work will be done as usual. FLAGS.hint_types = ['class_hint'] ### if 'dynamic_class_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using dynamic class hints, do not use other hint types!' print("----") print("eval.py: Drawing hints with geo mean {}.".format( FLAGS.dynamic_class_hint_geo_mean)) print("eval.py: Masking with boundary threshold {}.".format( FLAGS.boundary_threshold)) print("----") boundary_mask = tf.cast( tf.less(samples[common.BOUNDARY_DMAP], FLAGS.boundary_threshold), tf.uint8) class_hints, hinted = tf.py_func( func=train_utils.generate_class_clicks_helper( geo_mean=FLAGS.dynamic_class_hint_geo_mean), inp=[samples[common.LABEL], boundary_mask], Tout=[tf.uint8, tf.bool]) samples[common.HINT] = class_hints samples[common.HINT].set_shape( samples[common.LABEL].get_shape().as_list()) # Now preprocess this. Set the flag so that the rest of the work will be done as usual. FLAGS.hint_types = ['class_hint'] # If using class hints, preprocess into num_class binary mask channels if 'class_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using class hints, do not use other hint types!' num_classes = dataset.num_classes print('eval.py: num classes is {}'.format(num_classes)) class_hint_channels_list = [] for label in range(num_classes): # Multiply by 255 is to bring into same range as image pixels..., # and so feature_extractor mean subtraction will reduce it back to 0,1 range class_hint_channel = tf.to_float( tf.equal(samples[common.HINT], label)) * 255 class_hint_channels_list.append(class_hint_channel) class_hint_channels = tf.concat(class_hint_channels_list, axis=-1) samples[common.HINT] = class_hint_channels # Get hints and concat to image as input into network samples[common.HINT] = tf.identity(samples[common.HINT], name=common.HINT) model_inputs = tf.concat( [samples[common.IMAGE], tf.to_float(samples[common.HINT])], axis=-1) else: # Just image is input into network model_inputs = samples[common.IMAGE] print('eval.py: shape {}'.format(model_inputs.get_shape().as_list())) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( # samples[common.IMAGE], model_inputs, model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( # samples[common.IMAGE], model_inputs, model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] #predictions = tf.Print(predictions, predictions.get_shape()) #from utils import train_utils #gen_boundaries = train_utils.generate_boundaries_helper(pixel_shift=1, ignore_label=255, distance_map=True, distance_map_scale=100, set_ignore_regions_to_ignore_value=False) #prediction_distance_map, _ = gen_boundaries(tf.to_float(tf.reshape(predictions, [1,1025, 2049]))) #label_distance_map, _ = gen_boundaries(tf.to_float(tf.reshape(labels, [1, 1025, 2049]))) predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = # 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} # mean iou metric_map[predictions_tag] = tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights) # boundary distancemap l2 #metric_map['boundary distance L2'] = tf.metrics.mean_squared_error( #prediction_distance_map, label_distance_map) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary(metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations config = tf.ConfigProto(intra_op_parallelism_threads=16, inter_op_parallelism_threads=1) slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs, session_config=config)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) g = tf.Graph() with g.as_default(): samples = input_generator.get(dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) (num_batches, summary_op, metrics_to_updates, confusion_matrix, category_iou) = create_metrics(g, samples, dataset, predictions) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, summary_op=summary_op, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs, final_op=[confusion_matrix, category_iou])
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir) train_id_to_eval_id = None if dataset.name == segmentation_dataset.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join(FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) g = tf.Graph() with g.as_default(): samples = input_generator.get(dataset, FLAGS.vis_crop_size, FLAGS.vis_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.vis_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.vis_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we reisze the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([ tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH]) ]) predictions = tf.squeeze( tf.image.resize_images( tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) if FLAGS.is_quant: tf.contrib.quantize.experimental_create_eval_graph( weight_bits=FLAGS.weight_bits, activation_bits=FLAGS.activation_bits) tf.train.get_or_create_global_step() saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=g, logdir=FLAGS.vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size))) last_checkpoint = None # Loop to visualize the results when new checkpoint is created. num_iters = 0 while (FLAGS.max_number_of_iterations <= 0 or num_iters < FLAGS.max_number_of_iterations): num_iters += 1 last_checkpoint = slim.evaluation.wait_for_new_checkpoint( FLAGS.checkpoint_dir, last_checkpoint) start = time.time() tf.logging.info('Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', last_checkpoint) with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: sv.start_queue_runners(sess) sv.saver.restore(sess, last_checkpoint) image_id_offset = 0 for batch in range(num_batches): tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches) _process_batch( sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) image_id_offset += FLAGS.vis_batch_size tf.logging.info('Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = input_generator.get( dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou( predictions, labels, dataset.num_classes, weights=weights) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary( metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations ''' Original code: slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs) ''' #Code change for checkpoint path #saver = None #variables_to_restore = slim.get_variables_to_restore() #saver = tf.train.Saver(variables_to_restore) #variables_to_restore = slim.get_variables_to_restore() #initial_op = tf.group( # tf.global_variables_initializer(), #tf.local_variables_initializer()) #num_batches = math.ceil(1449/float(144)) #names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ #'Accuracy': slim.metrics.streaming_accuracy(tf.cast(predictions,tf.int32),labels),}) #"mse": slim.metrics.mean_squared_error(tf.cast(predictions,tf.int32),labels),}) #'Recall_5': slim.metrics.streaming_recall_at_k( # logits, labels, 5),}) #"mse": slim.metrics.mean_squared_error(tf.cast(predictions,tf.int32),labels), metric_values = slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=metrics_to_updates.values(), final_op=metrics_to_values.values(),) #scaffold=tf.train.Scaffold( init_op=initial_op, init_feed_dict=None, saver=saver), #variables_to_restore=variables_to_restore, #initial_op=initial_op) #eval_op=list(names_to_updates.values()), #final_op=list(names_to_values.values()),) #eval_interval_secs=FLAGS.eval_interval_secs) for metric,value in zip(metrics_to_values.keys(), metric_values): tf.logging.info('Metric %s has value %f',metric,value)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = input_generator.get( dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou( predictions, labels, dataset.num_classes, weights=weights) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary( metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir) train_id_to_eval_id = None if dataset.name == segmentation_dataset.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID global filename_list global perf_all filename_list = open(os.path.join(FLAGS.dataset_dir, '../ReOrg/filename-val.txt'), encoding='utf-8').readlines() filename_list = [x.strip() for x in filename_list] perf_all = np.empty((len(filename_list), 3)) # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join( FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) g = tf.Graph() with g.as_default(): samples = input_generator.get(dataset, FLAGS.vis_crop_size, FLAGS.vis_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.vis_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.vis_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] pred_compute = tf.reshape(predictions, shape=[-1]) label_compute = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(label_compute, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. label_compute = tf.where( tf.equal(label_compute, dataset.ignore_label), tf.zeros_like(label_compute), label_compute) m_iou, m_accu = compute_m_iou_accu( pred_compute, label_compute, dataset.num_classes, weights=weights) # m_iou = compute_miou( # pred_compute, label_compute, dataset.num_classes, weights=weights) # m_accu = compute_maccu( # pred_compute, label_compute, dataset.num_classes, weights=weights) accu = compute_accu( pred_compute, label_compute, weights=weights) if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we reisze the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH])]) predictions = tf.squeeze( tf.image.resize_images(tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) tf.train.get_or_create_global_step() saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=g, logdir=FLAGS.vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) num_batches = int(math.ceil( dataset.num_samples / float(FLAGS.vis_batch_size))) last_checkpoint = FLAGS.checkpoint_dir start = time.time() tf.logging.info( 'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', last_checkpoint) with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: sv.start_queue_runners(sess) sv.saver.restore(sess, last_checkpoint) image_id_offset = 1 for batch in range(num_batches): tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches) _process_batch(sess=sess, original_images=samples[common.ORIGINAL_IMAGE], labels=samples[common.LABEL], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], perf_metrics=[m_iou, m_accu, accu], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) image_id_offset += FLAGS.vis_batch_size np.savetxt(os.path.join(FLAGS.vis_logdir, 'per-image-metrics.txt'), perf_all) tf.logging.info( 'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset( FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = input_generator.get( dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0,): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels(samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where( tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} # insert by trobr indices = tf.squeeze(tf.where(tf.less_equal( labels, dataset.num_classes - 1)), 1) labels = tf.cast(tf.gather(labels, indices), tf.int32) predictions = tf.gather(predictions, indices) # end of insert metric_map[predictions_tag] = tf.metrics.mean_iou( predictions, labels, dataset.num_classes, weights=weights) ''' metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou( predictions, labels, dataset.num_classes, weights=weights) ''' metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary( metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir) train_id_to_eval_id = None if dataset.name == segmentation_dataset.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join(FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) g = tf.Graph() with g.as_default(): samples = input_generator.get(dataset, FLAGS.vis_crop_size, FLAGS.vis_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.vis_split, is_training=False, model_variant="mobilenet_v2") model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.vis_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) mask = tf.squeeze(samples[common.LABEL]) original_images = tf.squeeze(samples[common.ORIGINAL_IMAGE]) images = tf.squeeze(samples[common.IMAGE]) if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 tf.train.get_or_create_global_step() saver = tf.train.Saver(slim.get_variables_to_restore()) sv = tf.train.Supervisor(graph=g, logdir=FLAGS.vis_logdir, init_op=tf.global_variables_initializer(), summary_op=None, summary_writer=None, global_step=None, saver=saver) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size))) # Loop to visualize the results when new checkpoint is created. num_iters = 0 while (FLAGS.max_number_of_iterations <= 0 or num_iters < FLAGS.max_number_of_iterations): num_iters += 1 start = time.time() tf.logging.info('Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: sv.start_queue_runners(sess) image_id_offset = 0 for batch in range(num_batches): tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches) _process_batch(sess=sess, original_images=original_images, resize_images=images, image_names=samples[common.IMAGE_NAME], mask=mask, image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) image_id_offset += FLAGS.vis_batch_size tf.logging.info('Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)