def _make_evaluation_dict(self, resized_groundtruth_masks=False): input_data_fields = fields.InputDataFields detection_fields = fields.DetectionResultFields image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8) key = tf.constant('image1') detection_boxes = tf.constant([[[0., 0., 1., 1.]]]) detection_scores = tf.constant([[0.8]]) detection_classes = tf.constant([[0]]) detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32) num_detections = tf.constant([1]) groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_classes = tf.constant([1]) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) if resized_groundtruth_masks: groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) detections = { detection_fields.detection_boxes: detection_boxes, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, detection_fields.num_detections: num_detections } groundtruth = { input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks } return eval_util.result_dict_for_single_example( image, key, detections, groundtruth)
def _make_evaluation_dict(self, resized_groundtruth_masks=False): input_data_fields = fields.InputDataFields detection_fields = fields.DetectionResultFields image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8) key = tf.constant('image1') detection_boxes = tf.constant([[[0., 0., 1., 1.]]]) detection_scores = tf.constant([[0.8]]) detection_classes = tf.constant([[0]]) detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32) num_detections = tf.constant([1]) groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_classes = tf.constant([1]) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) if resized_groundtruth_masks: groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) detections = { detection_fields.detection_boxes: detection_boxes, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, detection_fields.num_detections: num_detections } groundtruth = { input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks } return eval_util.result_dict_for_single_example(image, key, detections, groundtruth)
def _make_evaluation_dict(self, resized_groundtruth_masks=False, batch_size=1, max_gt_boxes=None, scale_to_absolute=False): input_data_fields = fields.InputDataFields detection_fields = fields.DetectionResultFields image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8) if batch_size == 1: key = tf.constant('image1') else: key = tf.constant([str(i) for i in range(batch_size)]) detection_boxes = tf.tile(tf.constant([[[0., 0., 1., 1.]]]), multiples=[batch_size, 1, 1]) detection_scores = tf.tile(tf.constant([[0.8]]), multiples=[batch_size, 1]) detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1]) detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32), multiples=[batch_size, 1, 1, 1]) num_detections = tf.ones([batch_size]) groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_classes = tf.constant([1]) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) if resized_groundtruth_masks: groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) if batch_size > 1: groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0), multiples=[batch_size, 1, 1]) groundtruth_classes = tf.tile(tf.expand_dims(groundtruth_classes, 0), multiples=[batch_size, 1]) groundtruth_instance_masks = tf.tile( tf.expand_dims(groundtruth_instance_masks, 0), multiples=[batch_size, 1, 1, 1]) detections = { detection_fields.detection_boxes: detection_boxes, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, detection_fields.num_detections: num_detections } groundtruth = { input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks } if batch_size > 1: return eval_util.result_dict_for_batched_example( image, key, detections, groundtruth, scale_to_absolute=scale_to_absolute, max_gt_boxes=max_gt_boxes) else: return eval_util.result_dict_for_single_example( image, key, detections, groundtruth, scale_to_absolute=scale_to_absolute)
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) original_audio = tf.expand_dims(input_dict[fields.InputDataFields.audio], 0) preprocessed_image = model.preprocess(tf.to_float(original_image), False) preprocessed_audio = model.preprocess(tf.to_float(original_audio), True) prediction_dict = model.predict(preprocessed_image, preprocessed_audio) detections = model.postprocess(prediction_dict) groundtruth = None if not ignore_groundtruth: groundtruth = { fields.InputDataFields.groundtruth_boxes: input_dict[fields.InputDataFields.groundtruth_boxes], fields.InputDataFields.groundtruth_classes: input_dict[fields.InputDataFields.groundtruth_classes], fields.InputDataFields.groundtruth_area: input_dict[fields.InputDataFields.groundtruth_area], fields.InputDataFields.groundtruth_is_crowd: input_dict[fields.InputDataFields.groundtruth_is_crowd], fields.InputDataFields.groundtruth_difficult: input_dict[fields.InputDataFields.groundtruth_difficult], fields.InputDataFields.groundtruth_image_classes: input_dict[fields.InputDataFields.groundtruth_image_classes] } if fields.InputDataFields.groundtruth_group_of in input_dict: groundtruth[fields.InputDataFields.groundtruth_group_of] = ( input_dict[fields.InputDataFields.groundtruth_group_of]) if fields.DetectionResultFields.detection_masks in detections: groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( input_dict[fields.InputDataFields.groundtruth_instance_masks]) return eval_util.result_dict_for_single_example( original_image, input_dict[fields.InputDataFields.source_id], detections, groundtruth, class_agnostic=(fields.DetectionResultFields.detection_classes not in detections), scale_to_absolute=True)
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image, true_image_shapes = model.preprocess( tf.to_float(original_image)) prediction_dict = model.predict(preprocessed_image, true_image_shapes) detections = model.postprocess(prediction_dict, true_image_shapes) groundtruth = None if not ignore_groundtruth: groundtruth = { fields.InputDataFields.groundtruth_boxes: input_dict[fields.InputDataFields.groundtruth_boxes], fields.InputDataFields.groundtruth_classes: input_dict[fields.InputDataFields.groundtruth_classes], fields.InputDataFields.groundtruth_area: input_dict[fields.InputDataFields.groundtruth_area], fields.InputDataFields.groundtruth_is_crowd: input_dict[fields.InputDataFields.groundtruth_is_crowd], fields.InputDataFields.groundtruth_difficult: input_dict[fields.InputDataFields.groundtruth_difficult] } if fields.InputDataFields.groundtruth_group_of in input_dict: groundtruth[fields.InputDataFields.groundtruth_group_of] = ( input_dict[fields.InputDataFields.groundtruth_group_of]) if fields.DetectionResultFields.detection_masks in detections: groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( input_dict[fields.InputDataFields.groundtruth_instance_masks]) return eval_util.result_dict_for_single_example( original_image, input_dict[fields.InputDataFields.source_id], detections, groundtruth, class_agnostic=( fields.DetectionResultFields.detection_classes not in detections), scale_to_absolute=True)
def _extract_predictions_and_losses(model, create_input_dict_fn, ignore_groundtruth=False): """Constructs tensorflow detection graph and returns output tensors. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed by standard_fields.DetectionResultsFields) and optional groundtruth tensors (keyed by standard_fields.InputDataFields). losses_dict: A dictionary containing detection losses. This is empty when ignore_groundtruth is true. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image, true_image_shapes = model.preprocess( tf.cast(original_image, dtype=tf.float32)) prediction_dict = model.predict(preprocessed_image, true_image_shapes) detections = model.postprocess(prediction_dict, true_image_shapes) groundtruth = None losses_dict = {} if not ignore_groundtruth: groundtruth = { fields.InputDataFields.groundtruth_boxes: input_dict[fields.InputDataFields.groundtruth_boxes], fields.InputDataFields.groundtruth_classes: input_dict[fields.InputDataFields.groundtruth_classes], fields.InputDataFields.groundtruth_area: input_dict[fields.InputDataFields.groundtruth_area], fields.InputDataFields.groundtruth_is_crowd: input_dict[fields.InputDataFields.groundtruth_is_crowd], fields.InputDataFields.groundtruth_difficult: input_dict[fields.InputDataFields.groundtruth_difficult] } if fields.InputDataFields.groundtruth_group_of in input_dict: groundtruth[fields.InputDataFields.groundtruth_group_of] = ( input_dict[fields.InputDataFields.groundtruth_group_of]) groundtruth_masks_list = None if fields.DetectionResultFields.detection_masks in detections: groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( input_dict[fields.InputDataFields.groundtruth_instance_masks]) groundtruth_masks_list = [ input_dict[fields.InputDataFields.groundtruth_instance_masks] ] groundtruth_keypoints_list = None if fields.DetectionResultFields.detection_keypoints in detections: groundtruth[fields.InputDataFields.groundtruth_keypoints] = ( input_dict[fields.InputDataFields.groundtruth_keypoints]) groundtruth_keypoints_list = [ input_dict[fields.InputDataFields.groundtruth_keypoints] ] label_id_offset = 1 model.provide_groundtruth( [input_dict[fields.InputDataFields.groundtruth_boxes]], [ tf.one_hot( input_dict[fields.InputDataFields.groundtruth_classes] - label_id_offset, depth=model.num_classes) ], groundtruth_masks_list=groundtruth_masks_list, groundtruth_keypoints_list=groundtruth_keypoints_list) losses_dict.update(model.loss(prediction_dict, true_image_shapes)) result_dict = eval_util.result_dict_for_single_example( original_image, input_dict[fields.InputDataFields.source_id], detections, groundtruth, class_agnostic=(fields.DetectionResultFields.detection_classes not in detections), scale_to_absolute=True) return result_dict, losses_dict
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() batch = None if 'batch' in input_dict: batch = input_dict.pop('batch') else: prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() # consistent format for images and videos for key, value in input_dict.iteritems(): input_dict[key] = (value,) detections = _create_detection_op(model, input_dict, batch) # Print out anaylsis of the model. tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) num_frames = len(input_dict[fields.InputDataFields.image]) ret = [] for i in range(num_frames): original_image = tf.expand_dims(input_dict[fields.InputDataFields.image][i], 0) groundtruth = None if not ignore_groundtruth: groundtruth = { fields.InputDataFields.groundtruth_boxes: input_dict[fields.InputDataFields.groundtruth_boxes][i], fields.InputDataFields.groundtruth_classes: input_dict[fields.InputDataFields.groundtruth_classes][i], } optional_keys = ( fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_group_of, ) for opt_key in optional_keys: if opt_key in input_dict: groundtruth[opt_key] = input_dict[opt_key][i] if fields.DetectionResultFields.detection_masks in detections: groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( input_dict[fields.InputDataFields.groundtruth_instance_masks][i]) detections_frame = { key: tf.expand_dims(value[i], 0) for key, value in detections.iteritems() } source_id = ( batch.key[0] if batch is not None else input_dict[fields.InputDataFields.source_id][i]) ret.append( eval_util.result_dict_for_single_example( original_image, source_id, detections_frame, groundtruth, class_agnostic=(fields.DetectionResultFields.detection_classes not in detections), scale_to_absolute=True)) return ret
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: labels = unstack_batch(labels, unpad_groundtruth_tensors=False) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: asg_map = detection_model.restore_map( from_detection_checkpoint=train_config.from_detection_checkpoint, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] total_loss = tf.add_n(losses, name='total_loss') if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if use_tpu: training_optimizer = tpu_optimizer.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None if train_config.freeze_variables: trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), exclude_patterns=train_config.freeze_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None if mode == tf.estimator.ModeKeys.EVAL: # Detection summaries during eval. class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _get_groundtruth_data(detection_model, class_agnostic) eval_dict = eval_util.result_dict_for_single_example( tf.expand_dims(features[fields.InputDataFields.original_image][0], 0), features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=False) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2) if not use_tpu: tf.summary.image('Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single image. detection_fields = fields.DetectionResultFields() input_data_fields = fields.InputDataFields() coco_evaluator = coco_evaluation.CocoDetectionEvaluator( category_index.values()) eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops( image_id=eval_dict[input_data_fields.key], groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], groundtruth_classes=eval_dict[input_data_fields.groundtruth_classes], detection_boxes=eval_dict[detection_fields.detection_boxes], detection_scores=eval_dict[detection_fields.detection_scores], detection_classes=eval_dict[detection_fields.detection_classes]) if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs)
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch(labels, unpad_groundtruth_tensors=train_config. unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes]. get_shape().as_list()) unpad_groundtruth_tensors = True if boxes_shape[ 1] is not None else False labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[ fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[ fields.InputDataFields.groundtruth_keypoints] gt_weights_list = None if fields.InputDataFields.groundtruth_weights in labels: gt_weights_list = labels[ fields.InputDataFields.groundtruth_weights] if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[ fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=gt_weights_list, groundtruth_is_crowd_list=gt_is_crowd_list) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config. fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict[ 'Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = (train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = (train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features eval_images = (features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( eval_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index( ) else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) img_summary = None if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2, use_normalized_coordinates=False)) img_summary = tf.summary.image( 'Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, category_index.values(), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if img_summary is not None: eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( img_summary, tf.no_op()) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours ) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec(mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)
def _extract_predictions_and_losses(model, create_input_dict_fn, ignore_groundtruth=False): """Constructs tensorflow detection graph and returns output tensors. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed by standard_fields.DetectionResultsFields) and optional groundtruth tensors (keyed by standard_fields.InputDataFields). losses_dict: A dictionary containing detection losses. This is empty when ignore_groundtruth is true. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image, true_image_shapes = model.preprocess( tf.to_float(original_image)) prediction_dict = model.predict(preprocessed_image, true_image_shapes) detections = model.postprocess(prediction_dict, true_image_shapes) groundtruth = None losses_dict = {} if not ignore_groundtruth: groundtruth = { fields.InputDataFields.groundtruth_boxes: input_dict[fields.InputDataFields.groundtruth_boxes], fields.InputDataFields.groundtruth_classes: input_dict[fields.InputDataFields.groundtruth_classes], fields.InputDataFields.groundtruth_area: input_dict[fields.InputDataFields.groundtruth_area], fields.InputDataFields.groundtruth_is_crowd: input_dict[fields.InputDataFields.groundtruth_is_crowd], fields.InputDataFields.groundtruth_difficult: input_dict[fields.InputDataFields.groundtruth_difficult] } if fields.InputDataFields.groundtruth_group_of in input_dict: groundtruth[fields.InputDataFields.groundtruth_group_of] = ( input_dict[fields.InputDataFields.groundtruth_group_of]) groundtruth_masks_list = None if fields.DetectionResultFields.detection_masks in detections: groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( input_dict[fields.InputDataFields.groundtruth_instance_masks]) groundtruth_masks_list = [ input_dict[fields.InputDataFields.groundtruth_instance_masks]] groundtruth_keypoints_list = None if fields.DetectionResultFields.detection_keypoints in detections: groundtruth[fields.InputDataFields.groundtruth_keypoints] = ( input_dict[fields.InputDataFields.groundtruth_keypoints]) groundtruth_keypoints_list = [ input_dict[fields.InputDataFields.groundtruth_keypoints]] label_id_offset = 1 model.provide_groundtruth( [input_dict[fields.InputDataFields.groundtruth_boxes]], [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes] - label_id_offset, depth=model.num_classes)], groundtruth_masks_list, groundtruth_keypoints_list) losses_dict.update(model.loss(prediction_dict, true_image_shapes)) result_dict = eval_util.result_dict_for_single_example( original_image, input_dict[fields.InputDataFields.source_id], detections, groundtruth, class_agnostic=( fields.DetectionResultFields.detection_classes not in detections), scale_to_absolute=True) return result_dict, losses_dict
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() batch = None if 'batch' in input_dict: batch = input_dict.pop('batch') else: prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() # consistent format for images and videos for key, value in input_dict.iteritems(): input_dict[key] = (value, ) detections = _create_detection_op(model, input_dict, batch) # Print out anaylsis of the model. tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) num_frames = len(input_dict[fields.InputDataFields.image]) ret = [] for i in range(num_frames): original_image = tf.expand_dims( input_dict[fields.InputDataFields.image][i], 0) groundtruth = None if not ignore_groundtruth: groundtruth = { fields.InputDataFields.groundtruth_boxes: input_dict[fields.InputDataFields.groundtruth_boxes][i], fields.InputDataFields.groundtruth_classes: input_dict[fields.InputDataFields.groundtruth_classes][i], } optional_keys = ( fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_group_of, ) for opt_key in optional_keys: if opt_key in input_dict: groundtruth[opt_key] = input_dict[opt_key][i] if fields.DetectionResultFields.detection_masks in detections: groundtruth[ fields.InputDataFields.groundtruth_instance_masks] = ( input_dict[fields.InputDataFields. groundtruth_instance_masks][i]) detections_frame = { key: tf.expand_dims(value[i], 0) for key, value in detections.iteritems() } source_id = (batch.key[0] if batch is not None else input_dict[fields.InputDataFields.source_id][i]) ret.append( eval_util.result_dict_for_single_example( original_image, source_id, detections_frame, groundtruth, class_agnostic=(fields.DetectionResultFields.detection_classes not in detections), scale_to_absolute=True)) return ret
def _make_evaluation_dict(self, resized_groundtruth_masks=False, batch_size=1, max_gt_boxes=None, scale_to_absolute=False): input_data_fields = standard_fields.InputDataFields detection_fields = standard_fields.DetectionResultFields image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8) if batch_size == 1: key = tf.constant('image1') else: key = tf.constant([str(i) for i in range(batch_size)]) detection_boxes = tf.concat([ tf.tile(tf.constant([[[0., 0., 1., 1.]]]), multiples=[batch_size - 1, 1, 1]), tf.constant([[[0., 0., 0.5, 0.5]]]) ], axis=0) detection_scores = tf.concat([ tf.tile(tf.constant([[0.5]]), multiples=[batch_size - 1, 1]), tf.constant([[0.8]]) ], axis=0) detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1]) detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32), multiples=[batch_size, 1, 1, 1]) groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_classes = tf.constant([1]) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) num_detections = tf.ones([batch_size]) if resized_groundtruth_masks: groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) if batch_size > 1: groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0), multiples=[batch_size, 1, 1]) groundtruth_classes = tf.tile(tf.expand_dims( groundtruth_classes, 0), multiples=[batch_size, 1]) groundtruth_instance_masks = tf.tile( tf.expand_dims(groundtruth_instance_masks, 0), multiples=[batch_size, 1, 1, 1]) detections = { detection_fields.detection_boxes: detection_boxes, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, detection_fields.num_detections: num_detections } groundtruth = { input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks, } if batch_size > 1: return eval_util.result_dict_for_batched_example( image, key, detections, groundtruth, scale_to_absolute=scale_to_absolute, max_gt_boxes=max_gt_boxes) else: return eval_util.result_dict_for_single_example( image, key, detections, groundtruth, scale_to_absolute=scale_to_absolute)
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: labels = unstack_batch(labels, unpad_groundtruth_tensors=False) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, sets finetune_checkpoint_type based on # from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) if not use_tpu: tf.summary.scalar('regularization_loss', regularization_loss) total_loss = tf.add_n(losses, name='total_loss') if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if use_tpu: training_optimizer = tpu_optimizer.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None if train_config.freeze_variables: trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), exclude_patterns=train_config.freeze_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None if mode == tf.estimator.ModeKeys.EVAL: # Detection summaries during eval. class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _get_groundtruth_data(detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features eval_images = ( features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( eval_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=False) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)) tf.summary.image('Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single image. eval_metrics = eval_config.metrics_set if not eval_metrics: eval_metrics = ['coco_detection_metrics'] eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metrics, category_index.values(), eval_dict, include_metrics_per_category=False) if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs)
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = ( labels[fields.InputDataFields.groundtruth_boxes].get_shape() .as_list()) unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=labels[ fields.InputDataFields.groundtruth_weights], groundtruth_is_crowd_list=gt_is_crowd_list) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict['Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None if train_config.freeze_variables: trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), exclude_patterns=train_config.freeze_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features eval_images = ( features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( eval_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) img_summary = None if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2, use_normalized_coordinates=False)) img_summary = tf.summary.image('Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single example. eval_metrics = eval_config.metrics_set if not eval_metrics: eval_metrics = ['coco_detection_metrics'] eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metrics, category_index.values(), eval_dict, include_metrics_per_category=eval_config.include_metrics_per_category) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if img_summary is not None: eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( img_summary, tf.no_op()) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.iteritems()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) scaffold = tf.train.Scaffold(saver=saver) if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)