def _get_target_assigner(self): similarity_calc = region_similarity_calculator.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, unmatched_threshold=0.5) box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) return targetassigner.TargetAssigner(similarity_calc, matcher, box_coder)
def graph_fn(anchor_means, groundtruth_box_corners): similarity_calc = region_similarity_calculator.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, unmatched_threshold=0.3) box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) target_assigner = targetassigner.TargetAssigner( similarity_calc, matcher, box_coder) anchors_boxlist = box_list.BoxList(anchor_means) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None) (cls_targets, cls_weights, reg_targets, reg_weights, _) = result return (cls_targets, cls_weights, reg_targets, reg_weights)
def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_keypoints): similarity_calc = region_similarity_calculator.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, unmatched_threshold=0.5) box_coder = keypoint_box_coder.KeypointBoxCoder( num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0]) target_assigner = targetassigner.TargetAssigner( similarity_calc, matcher, box_coder) anchors_boxlist = box_list.BoxList(anchor_means) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) groundtruth_boxlist.add_field(fields.BoxListFields.keypoints, groundtruth_keypoints) result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None) (cls_targets, cls_weights, reg_targets, reg_weights, _) = result return (cls_targets, cls_weights, reg_targets, reg_weights)
def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels, groundtruth_weights): similarity_calc = region_similarity_calculator.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, unmatched_threshold=0.5) box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1) unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) target_assigner = targetassigner.TargetAssigner( similarity_calc, matcher, box_coder) anchors_boxlist = box_list.BoxList(anchor_means) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) result = target_assigner.assign( anchors_boxlist, groundtruth_boxlist, groundtruth_labels, unmatched_class_label=unmatched_class_label, groundtruth_weights=groundtruth_weights) (_, cls_weights, _, reg_weights, _) = result return (cls_weights, reg_weights)
def test_raises_error_on_invalid_groundtruth_labels(self): similarity_calc = region_similarity_calculator.NegSqDistSimilarity() matcher = bipartite_matcher.GreedyBipartiteMatcher() box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0) unmatched_class_label = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32) target_assigner = targetassigner.TargetAssigner( similarity_calc, matcher, box_coder) prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]]) priors = box_list.BoxList(prior_means) box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9], [.75, 0, .95, .27]] boxes = box_list.BoxList(tf.constant(box_corners)) groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32) with self.assertRaises(ValueError): target_assigner.assign(priors, boxes, groundtruth_labels, unmatched_class_label=unmatched_class_label)
def _create_model(self, model_fn=ssd_meta_arch.SSDMetaArch, apply_hard_mining=True, normalize_loc_loss_by_codesize=False, add_background_class=True, random_example_sampling=False, weight_regression_loss_by_score=False, use_expected_classification_loss_under_sampling=False, minimum_negative_sampling=1, desired_negative_sampling_ratio=3, use_keras=False, predict_mask=False, use_static_shapes=False, nms_max_size_per_class=5): is_training = False num_classes = 1 mock_anchor_generator = MockAnchorGenerator2x2() if use_keras: mock_box_predictor = test_utils.MockKerasBoxPredictor( is_training, num_classes, predict_mask=predict_mask) else: mock_box_predictor = test_utils.MockBoxPredictor( is_training, num_classes, predict_mask=predict_mask) mock_box_coder = test_utils.MockBoxCoder() if use_keras: fake_feature_extractor = FakeSSDKerasFeatureExtractor() else: fake_feature_extractor = FakeSSDFeatureExtractor() mock_matcher = test_utils.MockMatcher() region_similarity_calculator = sim_calc.IouSimilarity() encode_background_as_zeros = False def image_resizer_fn(image): return [tf.identity(image), tf.shape(image)] classification_loss = losses.WeightedSigmoidClassificationLoss() localization_loss = losses.WeightedSmoothL1LocalizationLoss() non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=-20.0, iou_thresh=1.0, max_size_per_class=nms_max_size_per_class, max_total_size=nms_max_size_per_class, use_static_shapes=use_static_shapes) classification_loss_weight = 1.0 localization_loss_weight = 1.0 negative_class_weight = 1.0 normalize_loss_by_num_matches = False hard_example_miner = None if apply_hard_mining: # This hard example miner is expected to be a no-op. hard_example_miner = losses.HardExampleMiner( num_hard_examples=None, iou_threshold=1.0) random_example_sampler = None if random_example_sampling: random_example_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=0.5) target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, mock_matcher, mock_box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, minimum_negative_sampling=minimum_negative_sampling, desired_negative_sampling_ratio=desired_negative_sampling_ratio ) code_size = 4 model = model_fn( is_training=is_training, anchor_generator=mock_anchor_generator, box_predictor=mock_box_predictor, box_coder=mock_box_coder, feature_extractor=fake_feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=tf.identity, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_loss_weight, localization_loss_weight=localization_loss_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=False, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=False, inplace_batchnorm_update=False, add_background_class=add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling= expected_classification_loss_under_sampling) return model, num_classes, mock_anchor_generator.num_anchors( ), code_size
def __init__(self, is_training, anchor_generator, box_predictor, box_coder, feature_extractor, matcher, region_similarity_calculator, encode_background_as_zeros, negative_class_weight, image_resizer_fn, non_max_suppression_fn, score_conversion_fn, classification_loss, localization_loss, classification_loss_weight, localization_loss_weight, normalize_loss_by_num_matches, hard_example_miner, add_summaries=True, normalize_loc_loss_by_codesize=False, freeze_batchnorm=False, inplace_batchnorm_update=False, add_background_class=True, random_example_sampler=None): """SSDMetaArch Constructor. TODO(rathodv,jonathanhuang): group NMS parameters + score converter into a class and loss parameters into a class and write config protos for postprocessing and losses. Args: is_training: A boolean indicating whether the training version of the computation graph should be constructed. anchor_generator: an anchor_generator.AnchorGenerator object. box_predictor: a box_predictor.BoxPredictor object. box_coder: a box_coder.BoxCoder object. feature_extractor: a SSDFeatureExtractor object. matcher: a matcher.Matcher object. region_similarity_calculator: a region_similarity_calculator.RegionSimilarityCalculator object. encode_background_as_zeros: boolean determining whether background targets are to be encoded as an all zeros vector or a one-hot vector (where background is the 0th class). negative_class_weight: Weight for confidence loss of negative anchors. image_resizer_fn: a callable for image resizing. This callable always takes a rank-3 image tensor (corresponding to a single image) and returns a rank-3 image tensor, possibly with new spatial dimensions and a 1-D tensor of shape [3] indicating shape of true image within the resized image tensor as the resized image tensor could be padded. See builders/image_resizer_builder.py. non_max_suppression_fn: batch_multiclass_non_max_suppression callable that takes `boxes`, `scores` and optional `clip_window` inputs (with all other inputs already set) and returns a dictionary hold tensors with keys: `detection_boxes`, `detection_scores`, `detection_classes` and `num_detections`. See `post_processing. batch_multiclass_non_max_suppression` for the type and shape of these tensors. score_conversion_fn: callable elementwise nonlinearity (that takes tensors as inputs and returns tensors). This is usually used to convert logits to probabilities. classification_loss: an object_detection.core.losses.Loss object. localization_loss: a object_detection.core.losses.Loss object. classification_loss_weight: float localization_loss_weight: float normalize_loss_by_num_matches: boolean hard_example_miner: a losses.HardExampleMiner object (can be None) add_summaries: boolean (default: True) controlling whether summary ops should be added to tensorflow graph. normalize_loc_loss_by_codesize: whether to normalize localization loss by code size of the box encoder. freeze_batchnorm: Whether to freeze batch norm parameters during training or not. When training with a small batch size (e.g. 1), it is desirable to freeze batch norm update and use pretrained batch norm params. inplace_batchnorm_update: Whether to update batch norm moving average values inplace. When this is false train op must add a control dependency on tf.graphkeys.UPDATE_OPS collection in order to update batch norm statistics. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. random_example_sampler: a BalancedPositiveNegativeSampler object that can perform random example sampling when computing loss. If None, random sampling process is skipped. Note that random example sampler and hard example miner can both be applied to the model. In that case, random sampler will take effect first and hard example miner can only process the random sampled examples. """ super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) self._is_training = is_training self._freeze_batchnorm = freeze_batchnorm self._inplace_batchnorm_update = inplace_batchnorm_update # Needed for fine-tuning from classification checkpoints whose # variables do not have the feature extractor scope. self._extract_features_scope = 'FeatureExtractor' self._anchor_generator = anchor_generator self._box_predictor = box_predictor self._box_coder = box_coder self._feature_extractor = feature_extractor self._matcher = matcher self._region_similarity_calculator = region_similarity_calculator self._add_background_class = add_background_class # TODO(jonathanhuang): handle agnostic mode # weights unmatched_cls_target = None unmatched_cls_target = tf.constant([1] + self.num_classes * [0], tf.float32) if encode_background_as_zeros: unmatched_cls_target = tf.constant((self.num_classes + 1) * [0], tf.float32) self._target_assigner = target_assigner.TargetAssigner( self._region_similarity_calculator, self._matcher, self._box_coder, negative_class_weight=negative_class_weight, unmatched_cls_target=unmatched_cls_target) self._classification_loss = classification_loss self._localization_loss = localization_loss self._classification_loss_weight = classification_loss_weight self._localization_loss_weight = localization_loss_weight self._normalize_loss_by_num_matches = normalize_loss_by_num_matches self._normalize_loc_loss_by_codesize = normalize_loc_loss_by_codesize self._hard_example_miner = hard_example_miner self._random_example_sampler = random_example_sampler self._parallel_iterations = 16 self._image_resizer_fn = image_resizer_fn self._non_max_suppression_fn = non_max_suppression_fn self._score_conversion_fn = score_conversion_fn self._anchors = None self._add_summaries = add_summaries self._batched_prediction_tensor_names = []
def _build_ssd_model(ssd_config, is_training, add_summaries, add_background_class=True): """Builds an SSD detection model based on the model config. Args: ssd_config: A ssd.proto object containing the config for the desired SSDMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. add_background_class: Whether to add an implicit background class to one-hot encodings of groundtruth labels. Set to false if using groundtruth labels with an explicit background class or using multiclass scores instead of truth in the case of distillation. Returns: SSDMetaArch based on the config. Raises: ValueError: If ssd_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = ssd_config.num_classes # Feature extractor feature_extractor = _build_ssd_feature_extractor( feature_extractor_config=ssd_config.feature_extractor, is_training=is_training) box_coder = box_coder_builder.build(ssd_config.box_coder) matcher = matcher_builder.build(ssd_config.matcher) region_similarity_calculator = sim_calc.build( ssd_config.similarity_calculator) encode_background_as_zeros = ssd_config.encode_background_as_zeros negative_class_weight = ssd_config.negative_class_weight ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, ssd_config.box_predictor, is_training, num_classes) anchor_generator = anchor_generator_builder.build( ssd_config.anchor_generator) image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( ssd_config.post_processing) (classification_loss, localization_loss, classification_weight, localization_weight, hard_example_miner, random_example_sampler) = losses_builder.build(ssd_config.loss) normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize weight_regression_loss_by_score = ssd_config.weight_regression_loss_by_score target_assigner_instance = target_assigner.TargetAssigner( region_similarity_calculator, matcher, box_coder, negative_class_weight=negative_class_weight, weight_regression_loss_by_score=weight_regression_loss_by_score) expected_classification_loss_under_sampling = None if ssd_config.use_expected_classification_loss_under_sampling: expected_classification_loss_under_sampling = functools.partial( ops.expected_classification_loss_under_sampling, minimum_negative_sampling=ssd_config.minimum_negative_sampling, desired_negative_sampling_ratio=ssd_config.desired_negative_sampling_ratio) ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch return ssd_meta_arch_fn( is_training=is_training, anchor_generator=anchor_generator, box_predictor=ssd_box_predictor, box_coder=box_coder, feature_extractor=feature_extractor, encode_background_as_zeros=encode_background_as_zeros, image_resizer_fn=image_resizer_fn, non_max_suppression_fn=non_max_suppression_fn, score_conversion_fn=score_conversion_fn, classification_loss=classification_loss, localization_loss=localization_loss, classification_loss_weight=classification_weight, localization_loss_weight=localization_weight, normalize_loss_by_num_matches=normalize_loss_by_num_matches, hard_example_miner=hard_example_miner, target_assigner_instance=target_assigner_instance, add_summaries=add_summaries, normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize, freeze_batchnorm=ssd_config.freeze_batchnorm, inplace_batchnorm_update=ssd_config.inplace_batchnorm_update, add_background_class=add_background_class, random_example_sampler=random_example_sampler, expected_classification_loss_under_sampling=expected_classification_loss_under_sampling)