Ejemplo n.º 1
0
 def _get_target_assigner(self):
     similarity_calc = region_similarity_calculator.IouSimilarity()
     matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
                                            unmatched_threshold=0.5)
     box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
     return targetassigner.TargetAssigner(similarity_calc, matcher,
                                          box_coder)
Ejemplo n.º 2
0
 def graph_fn(anchor_means, groundtruth_box_corners):
     similarity_calc = region_similarity_calculator.IouSimilarity()
     matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
                                            unmatched_threshold=0.3)
     box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
     target_assigner = targetassigner.TargetAssigner(
         similarity_calc, matcher, box_coder)
     anchors_boxlist = box_list.BoxList(anchor_means)
     groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
     result = target_assigner.assign(anchors_boxlist,
                                     groundtruth_boxlist,
                                     unmatched_class_label=None)
     (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
     return (cls_targets, cls_weights, reg_targets, reg_weights)
Ejemplo n.º 3
0
 def graph_fn(anchor_means, groundtruth_box_corners,
              groundtruth_keypoints):
     similarity_calc = region_similarity_calculator.IouSimilarity()
     matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
                                            unmatched_threshold=0.5)
     box_coder = keypoint_box_coder.KeypointBoxCoder(
         num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
     target_assigner = targetassigner.TargetAssigner(
         similarity_calc, matcher, box_coder)
     anchors_boxlist = box_list.BoxList(anchor_means)
     groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
     groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
                                   groundtruth_keypoints)
     result = target_assigner.assign(anchors_boxlist,
                                     groundtruth_boxlist,
                                     unmatched_class_label=None)
     (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
     return (cls_targets, cls_weights, reg_targets, reg_weights)
Ejemplo n.º 4
0
        def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels,
                     groundtruth_weights):
            similarity_calc = region_similarity_calculator.IouSimilarity()
            matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
                                                   unmatched_threshold=0.5)
            box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
            unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0],
                                                tf.float32)
            target_assigner = targetassigner.TargetAssigner(
                similarity_calc, matcher, box_coder)

            anchors_boxlist = box_list.BoxList(anchor_means)
            groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
            result = target_assigner.assign(
                anchors_boxlist,
                groundtruth_boxlist,
                groundtruth_labels,
                unmatched_class_label=unmatched_class_label,
                groundtruth_weights=groundtruth_weights)
            (_, cls_weights, _, reg_weights, _) = result
            return (cls_weights, reg_weights)
Ejemplo n.º 5
0
    def test_raises_error_on_invalid_groundtruth_labels(self):
        similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
        matcher = bipartite_matcher.GreedyBipartiteMatcher()
        box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0)
        unmatched_class_label = tf.constant([[0, 0], [0, 0], [0, 0]],
                                            tf.float32)
        target_assigner = targetassigner.TargetAssigner(
            similarity_calc, matcher, box_coder)

        prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
        priors = box_list.BoxList(prior_means)

        box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9],
                       [.75, 0, .95, .27]]
        boxes = box_list.BoxList(tf.constant(box_corners))
        groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)

        with self.assertRaises(ValueError):
            target_assigner.assign(priors,
                                   boxes,
                                   groundtruth_labels,
                                   unmatched_class_label=unmatched_class_label)
Ejemplo n.º 6
0
    def _create_model(self,
                      model_fn=ssd_meta_arch.SSDMetaArch,
                      apply_hard_mining=True,
                      normalize_loc_loss_by_codesize=False,
                      add_background_class=True,
                      random_example_sampling=False,
                      weight_regression_loss_by_score=False,
                      use_expected_classification_loss_under_sampling=False,
                      minimum_negative_sampling=1,
                      desired_negative_sampling_ratio=3,
                      use_keras=False,
                      predict_mask=False,
                      use_static_shapes=False,
                      nms_max_size_per_class=5):
        is_training = False
        num_classes = 1
        mock_anchor_generator = MockAnchorGenerator2x2()
        if use_keras:
            mock_box_predictor = test_utils.MockKerasBoxPredictor(
                is_training, num_classes, predict_mask=predict_mask)
        else:
            mock_box_predictor = test_utils.MockBoxPredictor(
                is_training, num_classes, predict_mask=predict_mask)
        mock_box_coder = test_utils.MockBoxCoder()
        if use_keras:
            fake_feature_extractor = FakeSSDKerasFeatureExtractor()
        else:
            fake_feature_extractor = FakeSSDFeatureExtractor()
        mock_matcher = test_utils.MockMatcher()
        region_similarity_calculator = sim_calc.IouSimilarity()
        encode_background_as_zeros = False

        def image_resizer_fn(image):
            return [tf.identity(image), tf.shape(image)]

        classification_loss = losses.WeightedSigmoidClassificationLoss()
        localization_loss = losses.WeightedSmoothL1LocalizationLoss()
        non_max_suppression_fn = functools.partial(
            post_processing.batch_multiclass_non_max_suppression,
            score_thresh=-20.0,
            iou_thresh=1.0,
            max_size_per_class=nms_max_size_per_class,
            max_total_size=nms_max_size_per_class,
            use_static_shapes=use_static_shapes)
        classification_loss_weight = 1.0
        localization_loss_weight = 1.0
        negative_class_weight = 1.0
        normalize_loss_by_num_matches = False

        hard_example_miner = None
        if apply_hard_mining:
            # This hard example miner is expected to be a no-op.
            hard_example_miner = losses.HardExampleMiner(
                num_hard_examples=None, iou_threshold=1.0)

        random_example_sampler = None
        if random_example_sampling:
            random_example_sampler = sampler.BalancedPositiveNegativeSampler(
                positive_fraction=0.5)

        target_assigner_instance = target_assigner.TargetAssigner(
            region_similarity_calculator,
            mock_matcher,
            mock_box_coder,
            negative_class_weight=negative_class_weight,
            weight_regression_loss_by_score=weight_regression_loss_by_score)

        expected_classification_loss_under_sampling = None
        if use_expected_classification_loss_under_sampling:
            expected_classification_loss_under_sampling = functools.partial(
                ops.expected_classification_loss_under_sampling,
                minimum_negative_sampling=minimum_negative_sampling,
                desired_negative_sampling_ratio=desired_negative_sampling_ratio
            )

        code_size = 4
        model = model_fn(
            is_training=is_training,
            anchor_generator=mock_anchor_generator,
            box_predictor=mock_box_predictor,
            box_coder=mock_box_coder,
            feature_extractor=fake_feature_extractor,
            encode_background_as_zeros=encode_background_as_zeros,
            image_resizer_fn=image_resizer_fn,
            non_max_suppression_fn=non_max_suppression_fn,
            score_conversion_fn=tf.identity,
            classification_loss=classification_loss,
            localization_loss=localization_loss,
            classification_loss_weight=classification_loss_weight,
            localization_loss_weight=localization_loss_weight,
            normalize_loss_by_num_matches=normalize_loss_by_num_matches,
            hard_example_miner=hard_example_miner,
            target_assigner_instance=target_assigner_instance,
            add_summaries=False,
            normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
            freeze_batchnorm=False,
            inplace_batchnorm_update=False,
            add_background_class=add_background_class,
            random_example_sampler=random_example_sampler,
            expected_classification_loss_under_sampling=
            expected_classification_loss_under_sampling)
        return model, num_classes, mock_anchor_generator.num_anchors(
        ), code_size
Ejemplo n.º 7
0
    def __init__(self,
                 is_training,
                 anchor_generator,
                 box_predictor,
                 box_coder,
                 feature_extractor,
                 matcher,
                 region_similarity_calculator,
                 encode_background_as_zeros,
                 negative_class_weight,
                 image_resizer_fn,
                 non_max_suppression_fn,
                 score_conversion_fn,
                 classification_loss,
                 localization_loss,
                 classification_loss_weight,
                 localization_loss_weight,
                 normalize_loss_by_num_matches,
                 hard_example_miner,
                 add_summaries=True,
                 normalize_loc_loss_by_codesize=False,
                 freeze_batchnorm=False,
                 inplace_batchnorm_update=False,
                 add_background_class=True,
                 random_example_sampler=None):
        """SSDMetaArch Constructor.

    TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
    a class and loss parameters into a class and write config protos for
    postprocessing and losses.

    Args:
      is_training: A boolean indicating whether the training version of the
        computation graph should be constructed.
      anchor_generator: an anchor_generator.AnchorGenerator object.
      box_predictor: a box_predictor.BoxPredictor object.
      box_coder: a box_coder.BoxCoder object.
      feature_extractor: a SSDFeatureExtractor object.
      matcher: a matcher.Matcher object.
      region_similarity_calculator: a
        region_similarity_calculator.RegionSimilarityCalculator object.
      encode_background_as_zeros: boolean determining whether background
        targets are to be encoded as an all zeros vector or a one-hot
        vector (where background is the 0th class).
      negative_class_weight: Weight for confidence loss of negative anchors.
      image_resizer_fn: a callable for image resizing.  This callable always
        takes a rank-3 image tensor (corresponding to a single image) and
        returns a rank-3 image tensor, possibly with new spatial dimensions and
        a 1-D tensor of shape [3] indicating shape of true image within
        the resized image tensor as the resized image tensor could be padded.
        See builders/image_resizer_builder.py.
      non_max_suppression_fn: batch_multiclass_non_max_suppression
        callable that takes `boxes`, `scores` and optional `clip_window`
        inputs (with all other inputs already set) and returns a dictionary
        hold tensors with keys: `detection_boxes`, `detection_scores`,
        `detection_classes` and `num_detections`. See `post_processing.
        batch_multiclass_non_max_suppression` for the type and shape of these
        tensors.
      score_conversion_fn: callable elementwise nonlinearity (that takes tensors
        as inputs and returns tensors).  This is usually used to convert logits
        to probabilities.
      classification_loss: an object_detection.core.losses.Loss object.
      localization_loss: a object_detection.core.losses.Loss object.
      classification_loss_weight: float
      localization_loss_weight: float
      normalize_loss_by_num_matches: boolean
      hard_example_miner: a losses.HardExampleMiner object (can be None)
      add_summaries: boolean (default: True) controlling whether summary ops
        should be added to tensorflow graph.
      normalize_loc_loss_by_codesize: whether to normalize localization loss
        by code size of the box encoder.
      freeze_batchnorm: Whether to freeze batch norm parameters during
        training or not. When training with a small batch size (e.g. 1), it is
        desirable to freeze batch norm update and use pretrained batch norm
        params.
      inplace_batchnorm_update: Whether to update batch norm moving average
        values inplace. When this is false train op must add a control
        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
        batch norm statistics.
      add_background_class: Whether to add an implicit background class to
        one-hot encodings of groundtruth labels. Set to false if using
        groundtruth labels with an explicit background class or using multiclass
        scores instead of truth in the case of distillation.
      random_example_sampler: a BalancedPositiveNegativeSampler object that can
        perform random example sampling when computing loss. If None, random
        sampling process is skipped. Note that random example sampler and hard
        example miner can both be applied to the model. In that case, random
        sampler will take effect first and hard example miner can only process
        the random sampled examples.
    """
        super(SSDMetaArch,
              self).__init__(num_classes=box_predictor.num_classes)
        self._is_training = is_training
        self._freeze_batchnorm = freeze_batchnorm
        self._inplace_batchnorm_update = inplace_batchnorm_update

        # Needed for fine-tuning from classification checkpoints whose
        # variables do not have the feature extractor scope.
        self._extract_features_scope = 'FeatureExtractor'

        self._anchor_generator = anchor_generator
        self._box_predictor = box_predictor

        self._box_coder = box_coder
        self._feature_extractor = feature_extractor
        self._matcher = matcher
        self._region_similarity_calculator = region_similarity_calculator
        self._add_background_class = add_background_class

        # TODO(jonathanhuang): handle agnostic mode
        # weights
        unmatched_cls_target = None
        unmatched_cls_target = tf.constant([1] + self.num_classes * [0],
                                           tf.float32)
        if encode_background_as_zeros:
            unmatched_cls_target = tf.constant((self.num_classes + 1) * [0],
                                               tf.float32)

        self._target_assigner = target_assigner.TargetAssigner(
            self._region_similarity_calculator,
            self._matcher,
            self._box_coder,
            negative_class_weight=negative_class_weight,
            unmatched_cls_target=unmatched_cls_target)

        self._classification_loss = classification_loss
        self._localization_loss = localization_loss
        self._classification_loss_weight = classification_loss_weight
        self._localization_loss_weight = localization_loss_weight
        self._normalize_loss_by_num_matches = normalize_loss_by_num_matches
        self._normalize_loc_loss_by_codesize = normalize_loc_loss_by_codesize
        self._hard_example_miner = hard_example_miner
        self._random_example_sampler = random_example_sampler
        self._parallel_iterations = 16

        self._image_resizer_fn = image_resizer_fn
        self._non_max_suppression_fn = non_max_suppression_fn
        self._score_conversion_fn = score_conversion_fn

        self._anchors = None
        self._add_summaries = add_summaries
        self._batched_prediction_tensor_names = []
Ejemplo n.º 8
0
def _build_ssd_model(ssd_config, is_training, add_summaries,
                     add_background_class=True):
    """Builds an SSD detection model based on the model config.

  Args:
    ssd_config: A ssd.proto object containing the config for the desired
      SSDMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.
    add_background_class: Whether to add an implicit background class to one-hot
      encodings of groundtruth labels. Set to false if using groundtruth labels
      with an explicit background class or using multiclass scores instead of
      truth in the case of distillation.
  Returns:
    SSDMetaArch based on the config.

  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = ssd_config.num_classes

    # Feature extractor
    feature_extractor = _build_ssd_feature_extractor(
        feature_extractor_config=ssd_config.feature_extractor,
        is_training=is_training)

    box_coder = box_coder_builder.build(ssd_config.box_coder)
    matcher = matcher_builder.build(ssd_config.matcher)
    region_similarity_calculator = sim_calc.build(
        ssd_config.similarity_calculator)
    encode_background_as_zeros = ssd_config.encode_background_as_zeros
    negative_class_weight = ssd_config.negative_class_weight
    ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
                                                    ssd_config.box_predictor,
                                                    is_training, num_classes)
    anchor_generator = anchor_generator_builder.build(
        ssd_config.anchor_generator)
    image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
    non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
        ssd_config.post_processing)
    (classification_loss, localization_loss, classification_weight,
     localization_weight, hard_example_miner,
     random_example_sampler) = losses_builder.build(ssd_config.loss)
    normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
    normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize
    weight_regression_loss_by_score = ssd_config.weight_regression_loss_by_score

    target_assigner_instance = target_assigner.TargetAssigner(
        region_similarity_calculator,
        matcher,
        box_coder,
        negative_class_weight=negative_class_weight,
        weight_regression_loss_by_score=weight_regression_loss_by_score)

    expected_classification_loss_under_sampling = None
    if ssd_config.use_expected_classification_loss_under_sampling:
        expected_classification_loss_under_sampling = functools.partial(
            ops.expected_classification_loss_under_sampling,
            minimum_negative_sampling=ssd_config.minimum_negative_sampling,
            desired_negative_sampling_ratio=ssd_config.desired_negative_sampling_ratio)

    ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch

    return ssd_meta_arch_fn(
        is_training=is_training,
        anchor_generator=anchor_generator,
        box_predictor=ssd_box_predictor,
        box_coder=box_coder,
        feature_extractor=feature_extractor,
        encode_background_as_zeros=encode_background_as_zeros,
        image_resizer_fn=image_resizer_fn,
        non_max_suppression_fn=non_max_suppression_fn,
        score_conversion_fn=score_conversion_fn,
        classification_loss=classification_loss,
        localization_loss=localization_loss,
        classification_loss_weight=classification_weight,
        localization_loss_weight=localization_weight,
        normalize_loss_by_num_matches=normalize_loss_by_num_matches,
        hard_example_miner=hard_example_miner,
        target_assigner_instance=target_assigner_instance,
        add_summaries=add_summaries,
        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
        freeze_batchnorm=ssd_config.freeze_batchnorm,
        inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
        add_background_class=add_background_class,
        random_example_sampler=random_example_sampler,
        expected_classification_loss_under_sampling=expected_classification_loss_under_sampling)