def create_target_assigner(reference, stage=None, negative_class_weight=1.0, unmatched_cls_target=None): """Factory function for creating standard target assigners. Args: reference: string referencing the type of TargetAssigner. stage: string denoting stage: {proposal, detection}. negative_class_weight: classification weight to be associated to negative anchors (default: 1.0) unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] which is consistent with the classification target for each anchor (and can be empty for scalar targets). This shape must thus be compatible with the groundtruth labels that are passed to the Assign function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None, unmatched_cls_target is set to be 0 for each anchor. Returns: TargetAssigner: desired target assigner. Raises: ValueError: if combination reference+stage is invalid. """ if reference == 'Multibox' and stage == 'proposal': similarity_calc = sim_calc.NegSqDistSimilarity() matcher = bipartite_matcher.GreedyBipartiteMatcher() box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() elif reference == 'FasterRCNN' and stage == 'proposal': similarity_calc = sim_calc.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7, unmatched_threshold=0.3, force_match_for_each_row=True) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=[10.0, 10.0, 5.0, 5.0]) elif reference == 'FasterRCNN' and stage == 'detection': similarity_calc = sim_calc.IouSimilarity() # Uses all proposals with IOU < 0.5 as candidate negatives. matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, negatives_lower_than_unmatched=True) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=[10.0, 10.0, 5.0, 5.0]) elif reference == 'FastRCNN': similarity_calc = sim_calc.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, unmatched_threshold=0.1, force_match_for_each_row=False, negatives_lower_than_unmatched=False) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() else: raise ValueError('No valid combination of reference and stage.') return TargetAssigner(similarity_calc, matcher, box_coder, negative_class_weight=negative_class_weight, unmatched_cls_target=unmatched_cls_target)
def create_target_assigner(reference, stage=None, negative_class_weight=1.0, use_matmul_gather=False): """Factory function for creating standard target assigners. Args: reference: string referencing the type of TargetAssigner. stage: string denoting stage: {proposal, detection}. negative_class_weight: classification weight to be associated to negative anchors (default: 1.0) use_matmul_gather: whether to use matrix multiplication based gather which are better suited for TPUs. Returns: TargetAssigner: desired target assigner. Raises: ValueError: if combination reference+stage is invalid. """ if reference == 'Multibox' and stage == 'proposal': similarity_calc = sim_calc.NegSqDistSimilarity() matcher = bipartite_matcher.GreedyBipartiteMatcher() box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() elif reference == 'FasterRCNN' and stage == 'proposal': similarity_calc = sim_calc.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7, unmatched_threshold=0.3, force_match_for_each_row=True, use_matmul_gather=use_matmul_gather) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=[10.0, 10.0, 5.0, 5.0]) elif reference == 'FasterRCNN' and stage == 'detection': similarity_calc = sim_calc.IouSimilarity() # Uses all proposals with IOU < 0.5 as candidate negatives. matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, negatives_lower_than_unmatched=True, use_matmul_gather=use_matmul_gather) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=[10.0, 10.0, 5.0, 5.0]) elif reference == 'FastRCNN': similarity_calc = sim_calc.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, unmatched_threshold=0.1, force_match_for_each_row=False, negatives_lower_than_unmatched=False, use_matmul_gather=use_matmul_gather) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() else: raise ValueError('No valid combination of reference and stage.') return TargetAssigner(similarity_calc, matcher, box_coder, negative_class_weight=negative_class_weight)
def encode_labels(gt_boxes, gt_labels): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. Returns: encoded_classes: a tensor with shape [num_anchors, 1]. encoded_boxes: a tensor with shape [num_anchors, 4]. num_positives: scalar tensor storing number of positives in an image. """ similarity_calc = region_similarity_calculator.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher( matched_threshold=ssd_constants.MATCH_THRESHOLD, unmatched_threshold=ssd_constants.MATCH_THRESHOLD, negatives_lower_than_unmatched=True, force_match_for_each_row=True) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=ssd_constants.BOX_CODER_SCALES) default_boxes = box_list.BoxList(tf.convert_to_tensor(DefaultBoxes()('ltrb'))) target_boxes = box_list.BoxList(gt_boxes) assigner = target_assigner.TargetAssigner( similarity_calc, matcher, box_coder) encoded_classes, _, encoded_boxes, _, matches = assigner.assign( default_boxes, target_boxes, gt_labels) num_matched_boxes = tf.reduce_sum( tf.cast(tf.not_equal(matches.match_results, -1), tf.float32)) return encoded_classes, encoded_boxes, num_matched_boxes
def build(box_coder_config): """Builds a box coder object based on the box coder config. Args: box_coder_config: A box_coder.proto object containing the config for the desired box coder. Returns: BoxCoder based on the config. Raises: ValueError: On empty box coder proto. """ if not isinstance(box_coder_config, box_coder_pb2.BoxCoder): raise ValueError( 'box_coder_config not of type box_coder_pb2.BoxCoder.') if box_coder_config.WhichOneof( 'box_coder_oneof') == 'faster_rcnn_box_coder': return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[ box_coder_config.faster_rcnn_box_coder.y_scale, box_coder_config.faster_rcnn_box_coder.x_scale, box_coder_config.faster_rcnn_box_coder.height_scale, box_coder_config.faster_rcnn_box_coder.width_scale ]) if (box_coder_config.WhichOneof('box_coder_oneof') == 'mean_stddev_box_coder'): return mean_stddev_box_coder.MeanStddevBoxCoder() if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder': return square_box_coder.SquareBoxCoder(scale_factors=[ box_coder_config.square_box_coder.y_scale, box_coder_config.square_box_coder.x_scale, box_coder_config.square_box_coder.length_scale ]) raise ValueError('Empty box coder.')
def build(box_coder_config): if not isinstance(box_coder_config, box_coder_pb2.BoxCoder): raise ValueError( 'box_coder_config not of type box_coder_pb2.BoxCoder.') if box_coder_config.WhichOneof( 'box_coder_oneof') == 'faster_rcnn_box_coder': return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[ box_coder_config.faster_rcnn_box_coder.y_scale, box_coder_config.faster_rcnn_box_coder.x_scale, box_coder_config.faster_rcnn_box_coder.height_scale, box_coder_config.faster_rcnn_box_coder.width_scale ]) if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder': return keypoint_box_coder.KeypointBoxCoder( box_coder_config.keypoint_box_coder.num_keypoints, scale_factors=[ box_coder_config.keypoint_box_coder.y_scale, box_coder_config.keypoint_box_coder.x_scale, box_coder_config.keypoint_box_coder.height_scale, box_coder_config.keypoint_box_coder.width_scale ]) if (box_coder_config.WhichOneof('box_coder_oneof') == 'mean_stddev_box_coder'): return mean_stddev_box_coder.MeanStddevBoxCoder( stddev=box_coder_config.mean_stddev_box_coder.stddev) if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder': return square_box_coder.SquareBoxCoder(scale_factors=[ box_coder_config.square_box_coder.y_scale, box_coder_config.square_box_coder.x_scale, box_coder_config.square_box_coder.length_scale ]) raise ValueError('Empty box coder.')
def __init__(self, nonbackground_classes: int, loc_weight: float = 1., conf_weight: float = 1., nms_redund_threshold: float = 0.2, min_score_threshold: float = 0.01, top_k_per_class: int = 100, ohem: bool = False, neg_loss_weight: float = 1., obb: bool = False): self.num_nonbackground_classes = nonbackground_classes self.num_classes = nonbackground_classes + 1 self.obb = obb self.box_coder = oriented_box_coder.OrientedBoxCoder( ) if obb else faster_rcnn_box_coder.FasterRcnnBoxCoder() self._ohem = ohem self._neg_loss_weight = neg_loss_weight self._loc_weight = loc_weight self._conf_weight = conf_weight self._nms_redund_threshold = nms_redund_threshold self._min_score_threshold = min_score_threshold self._top_k_per_class = top_k_per_class pass
def graph_fn(rel_codes, anchors): scale_factors = [2, 3, 4, 5] anchors = box_list.BoxList(anchors) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=scale_factors) boxes = coder.decode(rel_codes, anchors).get() return boxes
def create_coloc_target_assigner(reference, stage=None, positive_class_weight=1.0, negative_class_weight=1.0, unmatched_cls_target=None, k_shot=1, independent_matching=False): """Factory function for creating coloc target assigners. Args: reference: string referencing the type of TargetAssigner. stage: string denoting stage: {proposal, detection}. positive_class_weight: classification weight to be associated to positive anchors (default: 1.0) negative_class_weight: classification weight to be associated to negative anchors (default: 1.0) unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] which is consistent with the classification target for each anchor (and can be empty for scalar targets). This shape must thus be compatible with the groundtruth labels that are passed to the Assign function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None, unmatched_cls_target is set to be 0 for each anchor. Returns: TargetAssigner: desired target assigner. Raises: ValueError: if combination reference+stage is invalid. """ if reference == 'RCNNAttention' and stage == 'coloc': k_shot_similarity_calc = sim_calc.IouSimilarity() # Uses all proposals with IOU < 0.5 as candidate negatives. k_shot_matcher = argmax_matcher.ArgMaxMatcher( matched_threshold=0.5, negatives_lower_than_unmatched=True) matcher = KShotMatcher(k_shot_similarity_calc, k_shot_matcher, k_shot, independent_matching) # We do not use the similarity calculator inside the target # assigner. Matches will be decided before calling the # target_assigner. class _DummySimilarity(sim_calc.RegionSimilarityCalculator): def _compare(self, boxlist1, boxlist2): return None similarity_calc = _DummySimilarity() box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=[10.0, 10.0, 5.0, 5.0]) else: raise ValueError('No valid combination of reference and stage.') return target_assign.TargetAssigner( similarity_calc, matcher, box_coder, positive_class_weight=positive_class_weight, negative_class_weight=negative_class_weight, unmatched_cls_target=unmatched_cls_target)
def graph_fn(boxes, anchors): scale_factors = [2, 3, 4, 5] boxes = box_list.BoxList(boxes) anchors = box_list.BoxList(anchors) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=scale_factors) rel_codes = coder.encode(boxes, anchors) return rel_codes
def accuracy_function(self, logits, labels, data_type): """Returns the ops to measure the mean precision of the model.""" try: import ssd_dataloader # pylint: disable=g-import-not-at-top from object_detection.box_coders import faster_rcnn_box_coder # pylint: disable=g-import-not-at-top from object_detection.core import box_coder # pylint: disable=g-import-not-at-top from object_detection.core import box_list # pylint: disable=g-import-not-at-top except ImportError: raise ImportError('To use the COCO dataset, you must clone the ' 'repo https://github.com/tensorflow/models and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following https://github.com/tensorflow/models/blob/' 'master/research/object_detection/g3doc/installation.md' '#protobuf-compilation ; To evaluate using COCO' 'metric, download and install Python COCO API from' 'https://github.com/cocodataset/cocoapi') # Unpack model output back to locations and confidence scores of predictions # pred_locs: relative locations (coordiates) of objects in all SSD boxes # shape: [batch_size, NUM_SSD_BOXES, 4] # pred_labels: confidence scores of objects being of all categories # shape: [batch_size, NUM_SSD_BOXES, label_num] pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2) ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=ssd_constants.BOX_CODER_SCALES) anchors = box_list.BoxList( tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb'))) pred_boxes = box_coder.batch_decode( encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors) pred_scores = tf.nn.softmax(pred_labels, axis=2) boxes_classes, id_shape = tf.split( labels, [ssd_constants.MAX_NUM_EVAL_BOXES, 1], 1) # TODO(haoyuzhang): maybe use these values for visualization. gt_boxes, gt_classes = tf.split(boxes_classes, [4, 1], 2) # pylint: disable=unused-variable id_shape = tf.squeeze(id_shape, 1) source_id, raw_shape, _ = tf.split(id_shape, [1, 3, 1], 1) source_id = tf.squeeze(source_id, 1) return { (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.PRED_BOXES): pred_boxes, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.PRED_SCORES): pred_scores, # TODO(haoyuzhang): maybe use these values for visualization. # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes, # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.SOURCE_ID): source_id, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.RAW_SHAPE): raw_shape }
def test_very_small_Width_nan_after_encoding(self): boxes = [[10.0, 10.0, 10.0000001, 20.0]] anchors = [[15.0, 12.0, 30.0, 18.0]] expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]] boxes = box_list.BoxList(tf.constant(boxes)) anchors = box_list.BoxList(tf.constant(anchors)) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() rel_codes = coder.encode(boxes, anchors) with self.test_session() as sess: rel_codes_out, = sess.run([rel_codes]) self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self): anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], [-0.083333, -0.222222, -0.693147, -1.098612]] expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] anchors = box_list.BoxList(tf.constant(anchors)) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() boxes = coder.decode(rel_codes, anchors) with self.test_session() as sess: boxes_out, = sess.run([boxes.get()]) self.assertAllClose(boxes_out, expected_boxes)
def test_get_correct_boxes_after_decoding_with_scaling(self): anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] rel_codes = [[-1., -1.25, -1.62186, -0.911608], [-0.166667, -0.666667, -2.772588, -5.493062]] scale_factors = [2, 3, 4, 5] expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] anchors = box_list.BoxList(tf.constant(anchors)) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=scale_factors) boxes = coder.decode(rel_codes, anchors) with self.test_session() as sess: boxes_out, = sess.run([boxes.get()]) self.assertAllClose(boxes_out, expected_boxes)
def __init__(self): similarity_calc = region_similarity_calculator.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher( matched_threshold=ssd_constants.MATCH_THRESHOLD, unmatched_threshold=ssd_constants.MATCH_THRESHOLD, negatives_lower_than_unmatched=True, force_match_for_each_row=True) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=ssd_constants.BOX_CODER_SCALES) self.default_boxes = DefaultBoxes()('ltrb') self.default_boxes = box_list.BoxList( tf.convert_to_tensor(self.default_boxes)) self.assigner = target_assigner.TargetAssigner(similarity_calc, matcher, box_coder)
def test_get_correct_relative_codes_after_encoding(self): print('\n=================================================================') print('test_get_correct_relative_codes_after_encoding') # [y1, x1, y2, x2] boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], [-0.083333, -0.222222, -0.693147, -1.098612]] boxes = box_list.BoxList(tf.constant(boxes)) anchors = box_list.BoxList(tf.constant(anchors)) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() rel_codes = coder.encode(boxes, anchors) with self.test_session() as sess: rel_codes_out, = sess.run([rel_codes]) self.assertAllClose(rel_codes_out, expected_rel_codes)
def graph_fn(boxes, anchors): boxes = box_list.BoxList(boxes) anchors = box_list.BoxList(anchors) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() rel_codes = coder.encode(boxes, anchors) return rel_codes
def main(out_dir, image_dir, annotations_path, SSD_class, desired_categories=None, win_set=None, min_coverage=.3, num_out_files=30): os.makedirs(out_dir, exist_ok=True) annotations = get_annotations(annotations_path) if not desired_categories: desired_categories = [ 'plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter' ] desired_ids = construct_desired_ids(desired_categories, annotations['categories']) assert len(desired_ids) == len(desired_categories) # construct dictionaries containing info about images (images_dict, file_name_dict) = construct_dicts(annotations) # create category index in the correct format for retraining and detection category_index = construct_category_index(annotations, desired_categories) label_id_offsets = calculate_label_id_offsets(category_index) num_nonbackground_classes = len(desired_categories) input_dims = SSD_class.get_input_dims() default_boxes = SSD_class.get_anchors() unmatched_class_target = SSD_class.get_unmatched_class_target( num_nonbackground_classes) box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() filenames = [ os.path.join(out_dir, f"examples_{i}.tfrecord") for i in range(num_out_files) ] writers = [tf.io.TFRecordWriter(f) for f in filenames] count = 0 preprocessor = Preprocessor(images_dict, file_name_dict, image_dir, annotations, category_index, win_set=win_set, min_coverage=min_coverage) for (window_np, gt_boxes, gt_classes) in preprocessor.iterate(): window_np = tf.image.resize(window_np, input_dims) [gt_classes] = map_category_ids_to_index(label_id_offsets, [gt_classes]) gt_classes = tf.constant(gt_classes, dtype=tf.int32) gt_classes = tf.one_hot(gt_classes, num_nonbackground_classes, dtype=tf.float32) zeros = tf.zeros([gt_classes.shape[0], 1], gt_classes.dtype) gt_classes = tf.concat([zeros, gt_classes], axis=1) targets_set = compute_ssd_targets([gt_boxes], [gt_classes], default_boxes, box_coder, unmatched_class_target) targets_set = [tf.squeeze(t) for t in targets_set] [cls_targets, cls_weights, reg_targets, reg_weights, matched] = targets_set serialized = serialize_ssd_example(window_np, *targets_set) writers[count % len(writers)].write(serialized) count += 1 for w in writers: w.close() out_meta = os.path.join(out_dir, "meta.json") with open(out_meta, "w") as w: json_string = json.dumps( { "num_examples": count, "classes": list(desired_categories) }, indent=2) w.write(json_string + "\n")
def graph_fn(rel_codes, anchors): anchors = box_list.BoxList(anchors) coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() boxes = coder.decode(rel_codes, anchors) return boxes.get()