def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer, select_threshold=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. If None, select boxes whose classification score is higher than 'no class'. Return: classes, scores, bboxes: Input Tensors. """ # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape(localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) # Boxes selection: use threshold or score > no-label criteria. if select_threshold is None or select_threshold == 0: # Class prediction and scores: assign 0. to 0-class classes = tf.argmax(predictions_layer, axis=2) scores = tf.reduce_max(predictions_layer, axis=2) scores = scores * tf.cast(classes > 0, scores.dtype) else: sub_predictions = predictions_layer[:, :, 1:] classes = tf.argmax(sub_predictions, axis=2) + 1 scores = tf.reduce_max(sub_predictions, axis=2) # Only keep predictions higher than threshold. mask = tf.greater(scores, select_threshold) classes = classes * tf.cast(mask, classes.dtype) scores = scores * tf.cast(mask, scores.dtype) # Assume localization layer already decoded. bboxes = localizations_layer return classes, scores, bboxes
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=21, ignore_class=0, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 4. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape( localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) d_scores = {} d_bboxes = {} for c in range(0, num_classes): if c != ignore_class: # Remove boxes under the threshold. scores = predictions_layer[:, :, c] fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) scores = scores * fmask bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes pass pass return d_scores, d_bboxes pass
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... 正样本 pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) # 这里存疑,为什么是-0.5?,论文中说的是0.5 fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): # 从不是正样本的框里面选择, 让他们预测是背景的概率 loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) # 预测背景的置信度越小,误差越大, 误差变小说的是,是背景要预测成背景 loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss) pass pass