def __init__(self, anchor_ratios=(0.5, 1, 2), **kwargs): super().__init__( 2, SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE, from_logits=True), L1Loss(reduction=tf.keras.losses.Reduction.NONE), multiples=len(anchor_ratios), kernel_initializer_classification_head=initializers.RandomNormal(stddev=0.01), kernel_initializer_box_prediction_head=initializers.RandomNormal(stddev=0.01), **kwargs) #Force each ground_truths to match to at least one anchor matcher = Matcher([0.3, 0.7], [0, -1, 1], allow_low_quality_matches=True) self.target_assigner = TargetAssigner(IoUSimilarity(), matcher, encode_boxes_faster_rcnn, dtype=self._compute_dtype) anchor_strides = (4, 8, 16, 32, 64) anchor_zises = (32, 64, 128, 256, 512) self._anchor_ratios = anchor_ratios # Precompute a deterministic grid of anchors for each layer of the pyramid. # We will extract a subpart of the anchors according to self._anchors = [ Anchors(stride, size, self._anchor_ratios) for stride, size in zip(anchor_strides, anchor_zises) ]
def test_class_l1_loss(): boxes1 = tf.constant([[[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], [[4.0, 3.0, 7.0, 5.0], [0, 0, 0, 0]]]) boxes2 = tf.constant([[[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]], [[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]]) assert 25. == L1Loss()(boxes1, boxes2)
def __init__(self, num_classes, backbone, num_queries=300, **kwargs): super().__init__(**kwargs) self.num_classes = num_classes self.num_queries = num_queries self.hidden_dim = 256 self.backbone = backbone self.input_proj = tf.keras.layers.Conv2D(self.hidden_dim, 1) self.pos_embed = PositionEmbeddingSine(output_dim=self.hidden_dim) num_heads = 8 self.transformer_num_layers = 6 self.transformer = Transformer(num_layers=self.transformer_num_layers, d_model=self.hidden_dim, num_heads=num_heads, dim_feedforward=2048) # MCMA layers self.dyn_weight_map = DynamicalWeightMaps() self.ref_points = SMCAReferencePoints(self.hidden_dim, num_heads) self.bbox_embed = tf.keras.models.Sequential([ tf.keras.layers.Dense(self.hidden_dim, activation='relu'), tf.keras.layers.Dense(self.hidden_dim, activation='relu'), tf.keras.layers.Dense(4, dtype=tf.float32) # (x1, y1, x2, y2) ]) self.class_embed = tf.keras.layers.Dense(num_classes + 1, dtype=tf.float32) # Will create a learnable embedding matrix for all our queries # It is a matrix of [num_queries, self.hidden_dim] # The embedding layers self.query_embed = tf.keras.layers.Embedding( num_queries, self.hidden_dim, embeddings_initializer=tf.keras.initializers.RandomNormal(mean=0., stddev=1.)) self.all_the_queries = tf.range(num_queries) # Loss computation self.weight_class, self.weight_l1, self.weight_giou = 2, 5, 2 similarity_func = DetrSimilarity(self.weight_class, self.weight_l1, self.weight_giou) self.target_assigner = TargetAssigner(similarity_func, hungarian_matching, lambda gt, pred: gt, negative_class_weight=1.0) # Losses self.giou = tfa.losses.GIoULoss(reduction=tf.keras.losses.Reduction.NONE) self.l1 = L1Loss(reduction=tf.keras.losses.Reduction.NONE) self.focal_loss = tfa.losses.SigmoidFocalCrossEntropy( alpha=0.25, gamma=2, reduction=tf.keras.losses.Reduction.NONE, from_logits=True) # Metrics self.giou_metric = tf.keras.metrics.Mean(name="giou_last_layer") self.l1_metric = tf.keras.metrics.Mean(name="l1_last_layer") self.focal_loss_metric = tf.keras.metrics.Mean(name="focal_loss_last_layer") self.loss_metric = tf.keras.metrics.Mean(name="loss") self.precision_metric = tf.keras.metrics.SparseCategoricalAccuracy() # Object recall = foreground self.recall_metric = tf.keras.metrics.Mean(name="object_recall")
def __init__(self, num_classes: int, backbone, num_queries=100, **kwargs): super().__init__(**kwargs) self.num_classes = num_classes self.num_queries = num_queries self.hidden_dim = 256 self.backbone = backbone self.input_proj = tf.keras.layers.Conv2D(self.hidden_dim, 1) self.pos_embed = PositionEmbeddingSine(output_dim=self.hidden_dim) self.transformer_num_layers = 6 self.transformer = Transformer(num_layers=self.transformer_num_layers, d_model=self.hidden_dim, num_heads=8, dim_feedforward=2048) self.bbox_embed = tf.keras.models.Sequential([ tf.keras.layers.Dense(self.hidden_dim, activation='relu'), tf.keras.layers.Dense(self.hidden_dim, activation='relu'), tf.keras.layers.Dense(4, activation='sigmoid', dtype=tf.float32) # (x1, y1, x2, y2) ]) self.class_embed = tf.keras.layers.Dense(num_classes + 1, dtype=tf.float32) # Will create a learnable embedding matrix for all our queries # It is a matrix of [num_queries, self.hidden_dim] # The embedding layers self.query_embed = tf.keras.layers.Embedding(num_queries, self.hidden_dim) self.all_the_queries = tf.range(num_queries) # Loss computation self.weight_class, self.weight_l1, self.weight_giou = 1, 5, 2 similarity_func = DetrSimilarity(self.weight_class, self.weight_l1, self.weight_giou) self.target_assigner = TargetAssigner(similarity_func, hungarian_matching, lambda gt, pred: gt, negative_class_weight=1.0) # Relative classification weight applied to the no-object category # It down-weight the log-probability term of a no-object # by a factor 10 to account for class imbalance self.non_object_weight = tf.constant(0.1, dtype=self.compute_dtype) # Losses self.giou = GIoULoss(reduction=tf.keras.losses.Reduction.NONE) self.l1 = L1Loss(reduction=tf.keras.losses.Reduction.NONE) self.scc = SparseCategoricalCrossentropy( reduction=tf.keras.losses.Reduction.NONE, from_logits=True) # Metrics self.giou_metric = tf.keras.metrics.Mean(name="giou_last_layer") self.l1_metric = tf.keras.metrics.Mean(name="l1_last_layer") self.scc_metric = tf.keras.metrics.Mean(name="scc_last_layer") self.loss_metric = tf.keras.metrics.Mean(name="loss") self.precision_metric = tf.keras.metrics.SparseCategoricalAccuracy() # Object recall = foreground self.recall_metric = tf.keras.metrics.Mean(name="object_recall")
def __init__(self, num_classes, **kwargs): super().__init__( num_classes, SparseCategoricalCrossentropy( reduction=tf.keras.losses.Reduction.NONE, from_logits=True), L1Loss(reduction=tf.keras.losses.Reduction.NONE ), # like in tensorpack kernel_initializer_classification_head=initializers.RandomNormal( stddev=0.01), kernel_initializer_box_prediction_head=initializers.RandomNormal( stddev=0.001), **kwargs) matcher = Matcher([0.5], [0, 1]) # The same scale_factors is used in decoding as well encode = functools.partial(encode_boxes_faster_rcnn, scale_factors=(10.0, 10.0, 5.0, 5.0)) self.target_assigner = TargetAssigner(IoUSimilarity(), matcher, encode, dtype=self._compute_dtype)