def test_build_softmax_score_converter(self):
     post_processing_text_proto = """
   score_converter: SOFTMAX
 """
     post_processing_config = post_processing_pb2.PostProcessing()
     text_format.Merge(post_processing_text_proto, post_processing_config)
     _, score_converter = post_processing_builder.build(
         post_processing_config)
     self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
    def test_build_identity_score_converter(self):
        post_processing_text_proto = """
      score_converter: IDENTITY
    """
        post_processing_config = post_processing_pb2.PostProcessing()
        text_format.Merge(post_processing_text_proto, post_processing_config)
        _, score_converter = post_processing_builder.build(
            post_processing_config)
        self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')

        inputs = tf.constant([1, 1], tf.float32)
        outputs = score_converter(inputs)
        with self.test_session() as sess:
            converted_scores = sess.run(outputs)
            expected_converted_scores = sess.run(inputs)
            self.assertAllClose(converted_scores, expected_converted_scores)
 def test_build_non_max_suppressor_with_correct_parameters(self):
     post_processing_text_proto = """
   batch_non_max_suppression {
     score_threshold: 0.7
     iou_threshold: 0.6
     max_detections_per_class: 100
     max_total_detections: 300
   }
 """
     post_processing_config = post_processing_pb2.PostProcessing()
     text_format.Merge(post_processing_text_proto, post_processing_config)
     non_max_suppressor, _ = post_processing_builder.build(
         post_processing_config)
     self.assertEqual(non_max_suppressor.keywords['max_size_per_class'],
                      100)
     self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
     self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'],
                            0.7)
     self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor,
        is_training,
        inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
        frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0
            or frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and
            frcnn_config.second_stage_batch_size > first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (ops.matmul_crop_and_resize
                          if frcnn_config.use_matmul_crop_and_resize else
                          ops.native_crop_and_resize)
    clip_anchors_to_image = (frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_target_assigner': first_stage_target_assigner,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_sampler': first_stage_sampler,
        'first_stage_non_max_suppression_fn':
        first_stage_non_max_suppression_fn,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_target_assigner': second_stage_target_assigner,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_sampler': second_stage_sampler,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss': second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries,
        'crop_and_resize_fn': crop_and_resize_fn,
        'clip_anchors_to_image': clip_anchors_to_image,
        'use_static_shapes': use_static_shapes,
        'resize_masks': frcnn_config.resize_masks
    }

    if isinstance(second_stage_box_predictor,
                  rfcn_box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    if feature_extractor.is_keras_model:
        ssd_box_predictor = box_predictor_builder.build_keras(
            conv_hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
            freeze_batchnorm=ssd_config.freeze_batchnorm,
            inplace_batchnorm_update=False,
            num_predictions_per_location_list=anchor_generator.
            num_anchors_per_location(),
            box_predictor_config=ssd_config.box_predictor,
            is_training=is_training,
            num_classes=num_classes,
            add_background_class=ssd_config.add_background_class)
    else:
        ssd_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build, ssd_config.box_predictor, is_training,
            num_classes, ssd_config.add_background_class)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner, random_example_sampler,
     expected_loss_weights_fn) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize

    equalization_loss_config = ops.EqualizationLossConfig(
        weight=ssd_config.loss.equalization_loss.weight,
        exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes)

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        matcher,
        box_coder,
        negative_class_weight=negative_class_weight)

    ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
    kwargs = {}

    return ssd_meta_arch_fn(
        is_training=is_training,
        anchor_generator=anchor_generator,
        box_predictor=ssd_box_predictor,
        box_coder=box_coder,
        feature_extractor=feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=score_conversion_fn,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_weight,
        localization_loss_weight=localization_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        target_assigner_instance=target_assigner_instance,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=ssd_config.add_background_class,
        explicit_background_class=ssd_config.explicit_background_class,
        random_example_sampler=random_example_sampler,
        expected_loss_weights_fn=expected_loss_weights_fn,
        use_confidences_as_targets=ssd_config.use_confidences_as_targets,
        implicit_example_weight=ssd_config.implicit_example_weight,
        equalization_loss_config=equalization_loss_config,
        **kwargs)
def export_tflite_graph(pipeline_config,
                        trained_checkpoint_prefix,
                        output_dir,
                        add_postprocessing_op,
                        max_detections,
                        max_classes_per_detection,
                        detections_per_class=100,
                        use_regular_nms=False):
    """Exports a tflite compatible graph and anchors for ssd detection model.

  Anchors are written to a tensor and tflite compatible graph
  is written to output_dir/tflite_graph.pb.

  Args:
    pipeline_config: a pipeline.proto object containing the configuration for
      SSD model to export.
    trained_checkpoint_prefix: a file prefix for the checkpoint containing the
      trained parameters of the SSD model.
    output_dir: A directory to write the tflite graph and anchor file to.
    add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
      TFLite_Detection_PostProcess custom op
    max_detections: Maximum number of detections (boxes) to show
    max_classes_per_detection: Number of classes to display per detection
    detections_per_class: In regular NonMaxSuppression, number of anchors used
    for NonMaxSuppression per class
    use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
      of Fast NMS.

  Raises:
    ValueError: if the pipeline config contains models other than ssd or uses an
      fixed_shape_resizer and provides a shape as well.
  """
    tf.gfile.MakeDirs(output_dir)
    if pipeline_config.model.WhichOneof('model') != 'ssd':
        raise ValueError('Only ssd models are supported in tflite. '
                         'Found {} in config'.format(
                             pipeline_config.model.WhichOneof('model')))

    num_classes = pipeline_config.model.ssd.num_classes
    nms_score_threshold = {
        pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
        score_threshold
    }
    nms_iou_threshold = {
        pipeline_config.model.ssd.post_processing.batch_non_max_suppression.
        iou_threshold
    }
    scale_values = {}
    scale_values['y_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale
    }
    scale_values['x_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale
    }
    scale_values['h_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale
    }
    scale_values['w_scale'] = {
        pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale
    }

    image_resizer_config = pipeline_config.model.ssd.image_resizer
    image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
    num_channels = _DEFAULT_NUM_CHANNELS
    if image_resizer == 'fixed_shape_resizer':
        height = image_resizer_config.fixed_shape_resizer.height
        width = image_resizer_config.fixed_shape_resizer.width
        if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
            num_channels = 1
        shape = [1, height, width, num_channels]
    else:
        raise ValueError(
            'Only fixed_shape_resizer'
            'is supported with tflite. Found {}'.format(
                image_resizer_config.WhichOneof('image_resizer_oneof')))

    image = tf.placeholder(tf.float32,
                           shape=shape,
                           name='normalized_input_image_tensor')

    detection_model = model_builder.build(pipeline_config.model,
                                          is_training=False)
    predicted_tensors = detection_model.predict(image, true_image_shapes=None)
    # The score conversion occurs before the post-processing custom op
    _, score_conversion_fn = post_processing_builder.build(
        pipeline_config.model.ssd.post_processing)
    class_predictions = score_conversion_fn(
        predicted_tensors['class_predictions_with_background'])

    with tf.name_scope('raw_outputs'):
        # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
        #  containing the encoded box predictions. Note that these are raw
        #  predictions and no Non-Max suppression is applied on them and
        #  no decode center size boxes is applied to them.
        tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
        # 'raw_outputs/class_predictions': a float32 tensor of shape
        #  [1, num_anchors, num_classes] containing the class scores for each anchor
        #  after applying score conversion.
        tf.identity(class_predictions, name='class_predictions')
    # 'anchors': a float32 tensor of shape
    #   [4, num_anchors] containing the anchors as a constant node.
    tf.identity(get_const_center_size_encoded_anchors(
        predicted_tensors['anchors']),
                name='anchors')

    # Add global step to the graph, so we know the training step number when we
    # evaluate the model.
    tf.train.get_or_create_global_step()

    # graph rewriter
    is_quantized = pipeline_config.HasField('graph_rewriter')
    if is_quantized:
        graph_rewriter_config = pipeline_config.graph_rewriter
        graph_rewriter_fn = graph_rewriter_builder.build(graph_rewriter_config,
                                                         is_training=False)
        graph_rewriter_fn()

    if pipeline_config.model.ssd.feature_extractor.HasField('fpn'):
        exporter.rewrite_nn_resize_op(is_quantized)

    # freeze the graph
    saver_kwargs = {}
    if pipeline_config.eval_config.use_moving_averages:
        saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
        moving_average_checkpoint = tempfile.NamedTemporaryFile()
        exporter.replace_variable_values_with_moving_averages(
            tf.get_default_graph(), trained_checkpoint_prefix,
            moving_average_checkpoint.name)
        checkpoint_to_use = moving_average_checkpoint.name
    else:
        checkpoint_to_use = trained_checkpoint_prefix

    saver = tf.train.Saver(**saver_kwargs)
    input_saver_def = saver.as_saver_def()
    frozen_graph_def = exporter.freeze_graph_with_def_protos(
        input_graph_def=tf.get_default_graph().as_graph_def(),
        input_saver_def=input_saver_def,
        input_checkpoint=checkpoint_to_use,
        output_node_names=','.join([
            'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
            'anchors'
        ]),
        restore_op_name='save/restore_all',
        filename_tensor_name='save/Const:0',
        clear_devices=True,
        output_graph='',
        initializer_nodes='')

    # Add new operation to do post processing in a custom op (TF Lite only)
    if add_postprocessing_op:
        transformed_graph_def = append_postprocessing_op(
            frozen_graph_def, max_detections, max_classes_per_detection,
            nms_score_threshold, nms_iou_threshold, num_classes, scale_values,
            detections_per_class, use_regular_nms)
    else:
        # Return frozen without adding post-processing custom op
        transformed_graph_def = frozen_graph_def

    binary_graph = os.path.join(output_dir, 'tflite_graph.pb')
    with tf.gfile.GFile(binary_graph, 'wb') as f:
        f.write(transformed_graph_def.SerializeToString())
    txt_graph = os.path.join(output_dir, 'tflite_graph.pbtxt')
    with tf.gfile.GFile(txt_graph, 'w') as f:
        f.write(str(transformed_graph_def))