""" # compute usefull metrics #Even if the softmax has not been applyed the argmax can be usefull prediction = tf.argmax(y_pred, axis=-1, name='label_prediction', output_type=tf.int32) correct = tf.cast(prediction == y_true, tf.float32) # The accuracy allows to determine if the models perform well (background included) accuracy = tf.reduce_mean(correct, name='accuracy') # Compute accuracy and false negative on all the foreground boxes fg_inds = tf.where(y_true > 0) num_fg = tf.shape(fg_inds)[0] fg_label_pred = tf.argmax(tf.gather_nd(y_pred, fg_inds), axis=-1) num_zero = tf.reduce_sum(tf.cast(tf.equal(fg_label_pred, 0), tf.int32), name='num_zero') # Number of example predicted as background instead of one of our classes false_negative = tf.cast(tf.truediv(num_zero, num_fg), tf.float32, name='false_negative') fg_accuracy = tf.reduce_mean(tf.gather_nd(correct, fg_inds), name='fg_accuracy') return accuracy, fg_accuracy, false_negative remove_unwanted_doc(FastRCNN, __pdoc__)
**kwargs) def compute_detr_metrics(y_true: tf.Tensor, y_pred: tf.Tensor): """Useful metrics that allows to track how behave the training. Arguments: y_true: A one-hot encoded vector with shape [batch_size, num_object_queries, num_classes] y_pred: A tensor with shape [batch_size, num_object_queries, num_classes], representing the classification logits. Returns: tf.Tensor: Recall Among all the boxes that we had to find how much did we found. """ #Even if the softmax has not been applyed the argmax can be usefull prediction = tf.argmax(y_pred, axis=-1, name='label_prediction', output_type=tf.int32) correct = tf.cast(prediction == y_true, tf.float32) # Compute accuracy and false negative on all the foreground boxes fg_inds = tf.where(y_true > 0) recall = tf.reduce_mean(tf.gather_nd(correct, fg_inds), name='recall') return recall remove_unwanted_doc(DeTr, __pdoc__) remove_unwanted_doc(DeTrResnet50, __pdoc__) remove_unwanted_doc(DeTrResnet50Pytorch, __pdoc__)
not in inference. For the serving only the `images` and `images_information` are defined. It means the inputs link to the ground_truths won't be defined in serving. However, in tensorflow when the `training` arguments is defined int the method `call`, `tf.save_model.save` method performs a check on the graph for training=False and training=True. However, we don't want this check to be perform because our ground_truths inputs aren't defined. """ self._serving = True call_output = self.serving_step.get_concrete_function() tf.saved_model.save(self, filepath, signatures={'serving_default': call_output}) self._serving = False class FasterRcnnFPNResnet50Caffe(FasterRcnnFPN): def __init__(self, num_classes, **kwargs): resnet = ResNet50(input_shape=[None, None, 3], weights='imagenet') super().__init__(num_classes, resnet, **kwargs) class FasterRcnnFPNResnet50Pytorch(FasterRcnnFPN): def __init__(self, num_classes, **kwargs): resnet = ResNet50PytorchStyle(input_shape=[None, None, 3], weights='imagenet') super().__init__(num_classes, resnet, **kwargs) remove_unwanted_doc(FasterRcnnFPN, __pdoc__) remove_unwanted_doc(FasterRcnnFPNResnet50Caffe, __pdoc__) remove_unwanted_doc(FasterRcnnFPNResnet50Pytorch, __pdoc__)
self._ratios = ratios self._anchors = generate_anchors(stride, tf.constant([scales], self._compute_dtype), tf.constant(ratios, self._compute_dtype), max_size=MAX_IMAGE_DIMENSION) def call(self, inputs): """Return anchors based on the shape of the input tensors Arguments: inputs: A tensor of shape [batch_size, height, widht, channel] Returns: tf.Tensor: A tensor of shape [num_scales * num_ratios * height * width, 4]. The anchors have the format [y_min, x_min, y_max, x_max]. """ shape = tf.shape(inputs) height, width = shape[1], shape[2] anchors = self._anchors[:height, :width] return tf.reshape(anchors, (-1, 4)) def get_config(self): config = super().get_config() config['stride'] = self._stride config['scales'] = self._scales config['ratios'] = self._ratios return config remove_unwanted_doc(Anchors, __pdoc__)
1 = foreground. y_pred: A tensor of shape [batch_size, num_anchors, 2], representing the classification logits. weights: A tensor of shape [batch_size, num_anchors] where weights should Returns: tf.Tensor: Recall, among all the boxes that we had to find how much did we found. """ # Force the cast to avoid type issue when the mixed precision is activated y_true, y_pred, weights = tf.cast(y_true, tf.float32), tf.cast( y_pred, tf.float32), tf.cast(weights, tf.float32) # Sometimes the weights have decimal value we do not want that weights = tf.clip_by_value(tf.math.ceil(weights), 0, 1) masked_y_true = y_true * weights prediction = tf.cast(tf.argmax(y_pred, axis=-1, name='label_prediction'), tf.float32) * weights # 0 or 1 correct = tf.cast(tf.equal(prediction, masked_y_true), tf.float32) fg_inds = tf.where(masked_y_true == 1) num_valid_anchor = tf.math.count_nonzero(masked_y_true) num_pos_foreground_prediction = tf.math.count_nonzero( tf.gather_nd(correct, fg_inds)) recall = tf.truediv(num_pos_foreground_prediction, num_valid_anchor, name='recall') return recall remove_unwanted_doc(RegionProposalNetwork, __pdoc__)
# mean=0 and var= 1 * depth_k. QK^T/sqrt(depth_k) => mean=0 and var=1 scaled_attention_logits = matmul_qk / tf.math.sqrt( tf.cast(self.depth, self.compute_dtype)) if attn_mask is not None: scaled_attention_logits += attn_mask if key_padding_mask is not None: # Apply -inf if the pixels is a padding # False means padded so we take: not key_padding_mask scaled_attention_logits = tf.where( ~key_padding_mask[:, None, None], tf.zeros_like(scaled_attention_logits) + float('-inf'), scaled_attention_logits) # softmax is normalized on the last axis (seq_len_k) so that the scores # add up to 1. # (..., seq_len_q, seq_len_k) attention_weights = self.softmax(scaled_attention_logits) attention_weights = self.dropout(attention_weights, training=training) scaled_attention = tf.matmul(attention_weights, value) # (batch_size, seq_len_q, nh, depth) scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, d_model) concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) return self.dense(concat_attention) remove_unwanted_doc(MultiHeadAttention, __pdoc__)
name=f'{self.name}_segmentation_loss', aggregation='mean') self.add_loss(segmentation_loss) return { BoxField.LABELS: classification_loss, BoxField.BOXES: localization_loss, BoxField.MASKS: segmentation_loss } return { BoxField.LABELS: classification_loss, BoxField.BOXES: localization_loss } def get_config(self): base_config = super().get_config() base_config['num_classes'] = self._num_classes base_config[ 'classification_loss_weight'] = self._classification_loss_weight base_config[ 'localization_loss_weight'] = self._localization_loss_weight base_config['multiples'] = self._multiples base_config['use_mask'] = self._use_mask if self._use_mask: base_config[ 'segmentation_loss_weight'] = self._segmentation_loss_weight return base_config remove_unwanted_doc(AbstractDetectionHead, __pdoc__)
# Normalize x_embed and y_embed by the maximum values of the cumsum eps = 1e-6 y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale pos_x = x_embed[..., None] / self.dim_t pos_y = y_embed[..., None] / self.dim_t pos_x = tf.stack([ tf.math.sin(pos_x[..., 0::2]), tf.math.cos(pos_x[..., 1::2]), ], axis=4) pos_y = tf.stack([ tf.math.sin(pos_y[..., 0::2]), tf.math.cos(pos_y[..., 1::2]), ], axis=4) batch_size, h, w = tf.shape(masks)[0], tf.shape(masks)[1], tf.shape( masks)[2] pos_x = tf.reshape(pos_x, (batch_size, h, w, -1)) pos_y = tf.reshape(pos_y, (batch_size, h, w, -1)) pos_emb = tf.concat([pos_y, pos_x], axis=-1) return pos_emb remove_unwanted_doc(PositionEmbeddingLearned, __pdoc__) remove_unwanted_doc(PositionEmbeddingSine, __pdoc__)
""" data = data_adapter.expand_1d(data) x, _, _ = data_adapter.unpack_x_y_sample_weight(data) y_pred = self(x, training=False) boxes_without_padding, scores, labels = detr_postprocessing( y_pred[BoxField.BOXES], y_pred[BoxField.SCORES], x[DatasetField.IMAGES_INFO], tf.shape(x[DatasetField.IMAGES])[1:3], ) return boxes_without_padding, scores, labels class SMCAR50(SMCA): def __init__(self, num_classes, num_queries=100, **kwargs): resnet = ResNet50(input_shape=[None, None, 3], weights='imagenet') super().__init__(num_classes, resnet, num_queries=num_queries, **kwargs) class SMCAR50Pytorch(SMCA): def __init__(self, num_classes, num_queries=100, **kwargs): resnet = ResNet50PytorchStyle(input_shape=[None, None, 3], weights='imagenet') super().__init__(num_classes, resnet, num_queries=num_queries, **kwargs) remove_unwanted_doc(SMCA, __pdoc__) remove_unwanted_doc(SMCAR50, __pdoc__) remove_unwanted_doc(SMCAR50Pytorch, __pdoc__)
conv(tensor) for tensor, conv in zip(inputs, self.lateral_connection_2345) ] lat_sum_5432 = [] for idx, block in enumerate(lateral_connection_2345[::-1]): if idx > 0: up_shape = tf.shape(block) block = block + tf.image.resize( lat_sum_5432[-1], [up_shape[1], up_shape[2]], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) lat_sum_5432.append(block) # 3×3convolution on each merged map to generate the final feature map, # which is to reduce the aliasing effect of upsampling. lateral_connection_2345 = [ conv(tensor) for conv, tensor in zip(self.anti_aliasing_conv, lat_sum_5432[::-1]) ] p6 = layers.MaxPool2D()(lateral_connection_2345[-1]) return lateral_connection_2345 + [p6] def get_config(self): base_config = super().get_config() base_config['dim'] = self._dim return base_config remove_unwanted_doc(FPN, __pdoc__)
pos_embed, key_padding_mask=key_padding_mask, training=training) # At the beginning we set target to 0 # In the first decoder layer Q and K will be equal # to dec_out + object_queries=object_queries dec_out = tf.zeros_like(object_queries) layers_output = [] for layer in self.dec_layers: dec_out = layer(dec_out, memory, pos_embed, object_queries, key_padding_mask=key_padding_mask, coattn_mask=coattn_mask, training=training) dec_out = self.layer_norm(dec_out) if training: layers_output.append(dec_out) if training: return tf.concat(layers_output, axis=1), memory return dec_out, memory remove_unwanted_doc(EncoderLayer, __pdoc__) remove_unwanted_doc(DecoderLayer, __pdoc__) remove_unwanted_doc(Transformer, __pdoc__)
yx = tf.nn.sigmoid(yx_pre_sigmoid) # Where y and x are offsets predictions for 'yx' (above) # h and w are the scales predictions yx_offset_hw = self.yx_offset_hw_embed( object_queries) # [bs, num_queries, head * 4] batch_size = tf.shape(object_queries)[0] num_queries = tf.shape(object_queries)[1] # Add offset coordinates to yx and concatenate with scale predictions # yx => [bs, num_queries, head, 2] yx = tf.tile(yx[:, :, None], (1, 1, self.num_heads, 1)) # yx_offset_hw => [bs, num_queries, head, 4] yx_offset_hw = tf.reshape(yx_offset_hw, (batch_size, -1, self.num_heads, 4)) yxhw = tf.concat( [yx, tf.zeros((batch_size, num_queries, self.num_heads, 2))], axis=-1) return yxhw + yx_offset_hw, yx_pre_sigmoid def get_config(self): config = super().get_config() config['num_heads'] = self.num_heads config['hidden_dim'] = self.hidden_dim return config remove_unwanted_doc(SMCAReferencePoints, __pdoc__)
ref_points = tf.transpose(ref_points, (0, 2, 1, 3)) y_cent, x_cent, h, w = tf.split(ref_points, 4, axis=-1) # [batch_size, heads, N, 1] => [batch_size,heads, N, 1, 1] y_cent, x_cent = y_cent[..., tf.newaxis], x_cent[..., tf.newaxis] h, w = h[..., tf.newaxis], w[..., tf.newaxis] # [height, width] => [1, 1, 1, height, width] y, x = y[tf.newaxis, tf.newaxis, tf.newaxis], x[tf.newaxis, tf.newaxis, tf.newaxis] # [batch_size, heads, N, height, width] beta = tf.cast(tf.convert_to_tensor(self._beta), self.dtype) x_term = -(x - x_cent)**2 / (beta * w**2) y_term = -(y - y_cent)**2 / (beta * h**2) weight_map = tf.math.exp(x_term + y_term) batch_size, num_heads = tf.shape(weight_map)[0], tf.shape( weight_map)[1] return tf.reshape(weight_map, (batch_size, num_heads, -1, height * width)) def get_config(self): config = super().get_config() config['beta'] = self._beta return config remove_unwanted_doc(DynamicalWeightMaps, __pdoc__)