def depthwise_conv1d(input, k_h=1, k_w=3, channel_multiplier=1, strides=1, padding='SAME', stddev=0.02, name='depthwise_conv1d', bias=True, weight_decay=0.0001): lshape = tfe.get_shape(input, 3) input = tf.reshape(input, [lshape[0], 1, lshape[1], lshape[2]]) with tf.variable_scope(name): in_channel = input.get_shape().as_list()[-1] w = tf.get_variable( 'w', [k_h, k_w, in_channel, channel_multiplier], regularizer=tf.contrib.layers.l2_regularizer(weight_decay), initializer=tf.truncated_normal_initializer(stddev=stddev)) conv = tf.nn.depthwise_conv2d(input, w, [1, strides, strides, 1], padding, rate=None, name=None, data_format=None) if bias: biases = tf.get_variable('bias', [in_channel * channel_multiplier], initializer=tf.constant_initializer(0.0)) conv = tf.nn.bias_add(conv, biases) cshape = tfe.get_shape(conv, 4) conv = tf.reshape(conv, [cshape[0], cshape[2], cshape[3] ]) # convert to the original 1d data return conv
def tf_aher_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=21, ignore_class=0, scope=None, IoU_flag=False): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A prediction layer; localizations_layer: A localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 2. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'aher_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) if IoU_flag: zeros_m = tf.zeros([predictions_layer.shape[1], 1]) predictions_layer = tf.reshape( tf.stack([ zeros_m, tf.reshape(predictions_layer, [predictions_layer.shape[1], 1]) ], axis=1), [predictions_layer.shape[0], predictions_layer.shape[1], 2]) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape( localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) d_scores = {} d_bboxes = {} for c in range(0, num_classes): if c != ignore_class: # Remove boxes under the threshold. scores = predictions_layer[:, :, c] fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) scores = scores * fmask bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes return d_scores, d_bboxes
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=21, ignore_class=0, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 4. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) #reshape之后predictioN_layer的shape转变为(batch,n*n*num_layer_anchors,num_classes) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) #reshape之后localizations_layer的shape转化为(batch,n*n*num_layer_anchors,4) localizations_layer = tf.reshape( localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) d_scores = {} d_bboxes = {} for c in range(0, num_classes): if c != ignore_class: # Remove boxes under the threshold. # 拿到每个预测类的得分,shapes的shape为(batch,n*n*num_layer_anchors),predictions_layer的shape为(batch,n*n*num_layer_anchors,num_classes) scores = predictions_layer[:, :, c] # 转化,根据该得分判断是否需要保留bboxes,小于select_threshold的候选框都被丢弃 fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) #小于select_threshold的scores都设置为0, scores = scores * fmask #小于select_threshold的bboxes都设置为0, bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes return d_scores, d_bboxes
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=21, ignore_class=0, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 4. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape( localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) # just consider those legal bboxes # localizations_mask = (localizations_layer[:, :, 0] < localizations_layer[:, :, 2]) # localizations_mask = tf.logical_and(localizations_mask, (localizations_layer[:, :, 1] < localizations_layer[:, :, 3])) # localizations_mask = tf.Print(localizations_mask,[localizations_mask], message='localizations_mask: ', summarize=30) d_scores = {} d_bboxes = {} for c in range(0, num_classes): if c != ignore_class: # Remove boxes under the threshold. scores = predictions_layer[:, :, c] fmask = tf.cast(tf.greater(scores, select_threshold), scores.dtype) # fmask = tf.cast(tf.logical_and(tf.greater(scores, select_threshold), localizations_mask), scores.dtype) scores = scores * fmask bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes return d_scores, d_bboxes
def flaten_predict1(predictions, localisations): predictions_shape = tfe.get_shape(predictions[0], 5) batch_size = predictions_shape[0] num_classes = predictions_shape[-1] if batch_size > 1: raise ValueError('only batch_size 1 is supported.') flaten_pred = [] flaten_labels = [] flaten_locations = [] flaten_scores = [] for i in range(len(predictions)): flaten_pred.append( tf.reshape(predictions[i], [batch_size, -1, num_classes])) cls_pred = flaten_pred[i] flaten_scores.append( tf.reshape(cls_pred, [batch_size, -1, num_classes])) ## ## flaten_labels.append( tf.reshape( tf.argmax(cls_pred[:, :, 1:], -1) + 1, [batch_size, -1])) total_scores = tf.squeeze(tf.concat(flaten_scores, 1), 0) total_locations = tf.squeeze(localisations, 0) total_labels = tf.squeeze(tf.concat(flaten_labels, 1), 0) # remove bboxes that are not foreground non_background_mask = tf.greater(total_labels, 0) bbox_mask = non_background_mask # return tf.boolean_mask(total_scores, bbox_mask), tf.boolean_mask(total_labels, bbox_mask), tf.boolean_mask(total_locations, bbox_mask) return total_scores, total_labels, total_locations
def bboxes_decode(self, cls, loc, match_thres, scope='bboxes_decode'): """Decode labels and bounding boxes.""" with tf.name_scope(scope): assert len(tfe.get_shape(cls)) == 3 keep = tf.minimum(tf.size(cls), 25) anchors = tf.reshape(self.yxhw, [-1, 4]) def fn(args): cls, loc = args fcls = tf.reshape(cls, [-1]) floc = tf.reshape(loc, [-1, 4]) v, idx = tf.nn.top_k(fcls, keep, sorted=True) sel_anchors = tf.gather(anchors, idx) floc = tf.gather(floc, idx) bboxes = sel_anchors + floc * self.ruler bboxes = tfe.yxhw2yxyx(bboxes) bboxes = tf.where(v > match_thres, bboxes, tf.zeros_like(bboxes)) v = tf.where(v > match_thres, v, tf.zeros_like(v)) return bboxes, v bboxes, scores = tf.map_fn(fn=fn, elems=[cls, loc], back_prop=False, dtype=(tf.float32, tf.float32)) return bboxes, scores
def lane_net_losses(logits, gt_maps, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'lane_net_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # logits = tf.layers.flatten(logits) # gt = tf.layers.flatten(gt_maps) print(gt_maps.get_shape().as_list()) print(logits.get_shape().as_list()) # Add cross-entropy loss. with tf.name_scope('spase_softmax_cross_entropy'): loss = tf.nn.softmax_cross_entropy_with_logits(labels=gt_maps, logits=logits) # loss = tf.nn.l2_loss(logits - gt_maps) # loss = tf.nn.weighted_cross_entropy_with_logits( # targets=gt_maps, # logits=logits, # pos_weight=12 # ) loss = tf.reduce_mean(loss) # loss = tf.div(loss, batch_size, name='value') tf.losses.add_loss(loss)
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=7, ignore_class=0, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 4. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # shape = batch_size × (64*64*4) × number_classes p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) # print "debug+++++++++++++++predictions_layer = {} ".format(predictions_layer) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape( localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) # print "localizations_layer = {} ".format(localizations_layer) d_scores = {} d_bboxes = {} # for c in range(0, num_classes): for c in range(0, 2): if c != ignore_class: # Remove boxes under the threshold. scores = predictions_layer[:, :, c] fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) scores = scores * fmask bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes return d_scores, d_bboxes
def tf_ssd_bboxes_select_layer( predictions_layer, localizations_layer, #输入预测得到的类别和位置做筛选 select_threshold=None, num_classes=21, ignore_class=0, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 4. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape( localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) d_scores = {} d_bboxes = {} for c in range(0, num_classes): if c != ignore_class: #如果不是背景类别 # Remove boxes under the threshold. #去掉低于阈值的box scores = predictions_layer[:, :, c] #预测为第c类别的得分值 fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) scores = scores * fmask #保留得分值大于阈值的得分 bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes return d_scores, d_bboxes #返回字典,每个字典里是对应某类的预测权重和框位置信息;
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=21, ignore_class=0, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of size Batches X N x 1 | 4. Each key corresponding to a class. """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape(localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) d_scores = {} d_bboxes = {} for c in range(0, num_classes): if c != ignore_class: # Remove boxes under the threshold. scores = predictions_layer[:, :, c] fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype) scores = scores * fmask bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1) # Append to dictionary. d_scores[c] = scores d_bboxes[c] = bboxes return d_scores, d_bboxes
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer, select_threshold=None, num_classes=21, scope=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. All boxes under the threshold are set to 'zero'. If None, no threshold applied. Return: scores, bboxes: Output tensors of size Batches x N_Classes-1 X N x 1 | 4 """ select_threshold = 0.0 if select_threshold is None else select_threshold with tf.name_scope(scope, 'ssd_bboxes_select_layer', [predictions_layer, localizations_layer]): # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape(localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) l_scores = [] l_bboxes = [] for c in range(1, num_classes): # Remove boxes under the threshold. scores = predictions_layer[:, :, c] mask = tf.greater_equal(scores, select_threshold) scores = scores * tf.cast(mask, scores.dtype) bboxes = localizations_layer * tf.expand_dims(tf.cast(mask, localizations_layer.dtype), axis=-1) l_scores.append(scores) l_bboxes.append(bboxes) scores = tf.stack(l_scores, axis=1) bboxes = tf.stack(l_bboxes, axis=1) return scores, bboxes
def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer, select_threshold=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. If None, select boxes whose classification score is higher than 'no class'. Return: classes, scores, bboxes: Input Tensors. """ # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape(localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) # Boxes selection: use threshold or score > no-label criteria. if select_threshold is None or select_threshold == 0: # Class prediction and scores: assign 0. to 0-class classes = tf.argmax(predictions_layer, axis=2) scores = tf.reduce_max(predictions_layer, axis=2) scores = scores * tf.cast(classes > 0, scores.dtype) else: sub_predictions = predictions_layer[:, :, 1:] classes = tf.argmax(sub_predictions, axis=2) + 1 scores = tf.reduce_max(sub_predictions, axis=2) # Only keep predictions higher than threshold. mask = tf.greater(scores, select_threshold) classes = classes * tf.cast(mask, classes.dtype) scores = scores * tf.cast(mask, scores.dtype) # Assume localization layer already decoded. bboxes = localizations_layer return classes, scores, bboxes
def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer, select_threshold=None): """Extract classes, scores and bounding boxes from features in one layer. Batch-compatible: inputs are supposed to have batch-type shapes. Args: predictions_layer: A SSD prediction layer; localizations_layer: A SSD localization layer; select_threshold: Classification threshold for selecting a box. If None, select boxes whose classification score is higher than 'no class'. Return: classes, scores, bboxes: Input Tensors. """ # Reshape features: Batches x N x N_labels | 4 p_shape = tfe.get_shape(predictions_layer) predictions_layer = tf.reshape(predictions_layer, tf.stack([p_shape[0], -1, p_shape[-1]])) l_shape = tfe.get_shape(localizations_layer) localizations_layer = tf.reshape(localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]])) # Boxes selection: use threshold or score > no-label criteria. if select_threshold is None or select_threshold == 0: # Class prediction and scores: assign 0. to 0-class classes = tf.argmax(predictions_layer, axis=2) scores = tf.reduce_max(predictions_layer, axis=2) scores = scores * tf.cast(classes > 0, scores.dtype) else: sub_predictions = predictions_layer[:, :, 1:] classes = tf.argmax(sub_predictions, axis=2) + 1 scores = tf.reduce_max(sub_predictions, axis=2) # Only keep predictions higher than threshold. mask = tf.greater(scores, select_threshold) classes = classes * tf.cast(mask, classes.dtype) scores = scores * tf.cast(mask, scores.dtype) # Assume localization layer already decoded. bboxes = localizations_layer return classes, scores, bboxes
def AHER_Detection_Inference(aher_anet_model,aher_anet_anchor, feature,vname,label,duration, clsweights, clsbias, reuse, n_class,cls_suffix='_anet'): """ Inference bbox of sigle shot action localization feature: batch_size x 512 x 4069 vname: batch_size x 1 label: batch_size x 1 duration: batch_size x 1 """ predictions, localisation, logits, proplogits, proppredictions, iouprediction, end_points \ = aher_anet_model.net_prop_iou(feature,clsweights,clsbias,is_training=True,reuse=reuse,num_classes=n_class,cls_suffix=cls_suffix) # decode bounding box and get scores localisation = aher_anet_model.bboxes_decode_logits(localisation, duration ,aher_anet_anchor, predictions) if FLAGS.cls_flag: rscores, rbboxes = aher_anet_model.detected_bboxes_classwise( proppredictions, localisation, select_threshold=FLAGS.select_threshold, nms_threshold=FLAGS.nms_threshold, clipping_bbox=None, top_k=FLAGS.select_top_k, keep_top_k=FLAGS.keep_top_k, iou_flag=False) else: rscores, rbboxes = aher_anet_model.detected_bboxes_classwise( iouprediction, localisation, select_threshold=FLAGS.select_threshold, nms_threshold=FLAGS.nms_threshold, clipping_bbox=None, top_k=FLAGS.select_top_k, keep_top_k=FLAGS.keep_top_k, iou_flag=True) # compute pooling score lshape = tfe.get_shape(predictions[0], 8) num_classes = lshape[-1] batch_size = lshape[0] fprediction = [] for i in range(len(predictions)): fprediction.append(tf.reshape(predictions[i], [-1, num_classes])) predictions = tf.concat(fprediction, axis=0) avergeprediction = tf.reduce_mean(predictions,axis=0) labelid = tf.argmax(avergeprediction, 0) argmaxid = tf.argmax(predictions, 1) prebbox={"rscores":rscores,"rbboxes":rbboxes,"label":labelid,"avescore":avergeprediction, \ "rawscore":predictions,"argmaxid":argmaxid} return prebbox
def flaten_predict(predictions, objness_pred, localisations): predictions_shape = tfe.get_shape(predictions[0], 5) batch_size = predictions_shape[0] num_classes = predictions_shape[-1] if batch_size > 1: raise ValueError('only batch_size 1 is supported.') flaten_pred = [] flaten_labels = [] flaten_objness = [] flaten_locations = [] flaten_scores = [] for i in range(len(predictions)): flaten_pred.append( tf.reshape(predictions[i], [batch_size, -1, num_classes])) flaten_objness.append(tf.reshape(objness_pred[i], [batch_size, -1])) cls_pred = tf.expand_dims(flaten_objness[i], axis=-1) * flaten_pred[i] flaten_scores.append( tf.reshape(cls_pred, [batch_size, -1, num_classes])) #flaten_scores.append(tf.reshape(tf.reduce_max(cls_pred, -1), [batch_size, -1])) flaten_labels.append( tf.reshape(tf.argmax(cls_pred, -1), [batch_size, -1])) flaten_locations.append( tf.reshape(localisations[i], [batch_size, -1, 4])) # assume batch_size is always 1 total_scores = tf.squeeze(tf.concat(flaten_scores, 1), 0) total_objness = tf.squeeze(tf.concat(flaten_objness, 1), 0) total_locations = tf.squeeze(tf.concat(flaten_locations, 1), 0) total_labels = tf.squeeze(tf.concat(flaten_labels, 1), 0) # remove bboxes that are not foreground non_background_mask = tf.greater(total_labels, 0) # remove bboxes that have scores lower than select_threshold #bbox_mask = tf.logical_and(non_background_mask, tf.greater(total_scores, FLAGS.select_threshold)) # total_objness = tf.Print(total_objness, [total_objness]) bbox_mask = tf.logical_and( non_background_mask, tf.greater(total_objness, FLAGS.objectness_thres)) return tf.boolean_mask(total_scores, bbox_mask), tf.boolean_mask( total_labels, bbox_mask), tf.boolean_mask(total_locations, bbox_mask)
def get_losses(self, logits3, localisations3, gclasses3, glocalisations3, gscores3, match_threshold=0.5, negative_ratio=2., alpha=1., label_smoothing=0., scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.name_scope(scope, 'ssd_losses'): train_or_eval_test = len(logits3) all_pmask = [] apmask = [] for u in range(train_or_eval_test): gclasses = gclasses3[u] fgclasses = [] for i in range(len(gclasses)): fgclasses.append(tf.reshape(gclasses[i], [-1])) gclasses = tf.concat(fgclasses, axis=0) pmask = gclasses > 0 all_pmask.append(pmask) part1 = all_pmask[0][0:25600] part2_temp = tf.logical_or(all_pmask[0][25600:], all_pmask[1][:], name='or1') part2 = part2_temp[0:6400] part3 = tf.logical_or(part2_temp[6400:], all_pmask[2][:], name='or2') apmask.append(tf.concat([part1, part2, part3], axis=0)) apmask.append(tf.concat([part2, part3], axis=0)) apmask.append(part3) for u in range(train_or_eval_test): logits = logits3[u] localisations = localisations3[u] gclasses = gclasses3[u] glocalisations = glocalisations3[u] gscores = gscores3[u] lshape = tfe.get_shape(logits[0], 4) num_classes = 2 batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits) - u): flogits.append(tf.reshape(logits[i + u], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append( tf.reshape(localisations[i + u], [-1, 4])) fglocalisations.append( tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gclasses > 0 fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... #for no_classes, we only care that false positive's label is 0 #this is why pmask sufice our needs no_classes = tf.cast(apmask[u], tf.int32) nmask = tf.logical_not(apmask[u]) fnmask = tf.cast(nmask, dtype) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) #avoid n_neg is zero, and cause error when doing top_k later on n_neg = tf.maximum(n_neg, 1) extend_weight = 1.0 if u == 1: extend_weight = 0.5 elif u == 2: extend_weight = 0.25 # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos%d' % u): total_cross_pos = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) total_cross_pos = tf.reduce_sum(total_cross_pos * fpmask, name="cross_entropy_pos") total_cross_pos = tf.cond( n_positives > 0, lambda: tf.div(total_cross_pos, n_positives), lambda: 0.) tf.losses.add_loss(total_cross_pos) with tf.name_scope('cross_entropy_neg%d' % u): total_cross_neg = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) val, idxes = tf.nn.top_k(total_cross_neg * fnmask, k=n_neg) total_cross_neg = tf.reduce_sum(val, name="cross_entropy_neg") total_cross_neg = tf.cond( n_positives > 0, lambda: tf.div(total_cross_neg, n_positives), lambda: 0.) tf.losses.add_loss(total_cross_neg) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization%d' % u): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) total_loc = custom_layers.abs_smooth_2(localisations - glocalisations) total_loc = tf.reduce_sum(total_loc * weights * extend_weight, name="localization") total_loc = tf.cond(n_positives > 0, lambda: tf.div(total_loc, n_positives), lambda: 0.) tf.losses.add_loss(total_loc) total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy%d' % u) # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc) tf.summary.scalar('postive_num%d' % u, n_positives) tf.summary.scalar('negative_num%d' % u, n_neg) model_loss = tf.get_collection(tf.GraphKeys.LOSSES) model_loss = tf.add_n(model_loss) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') tf.summary.scalar('regularization_loss', regularization_loss) total_loss = tf.add(model_loss, regularization_loss) return total_loss
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] #我们已经看过了上面的logits的输出,现在我们来看看loss中怎么进行处理的! #因为logits/localisations这个list中有6个tensor,对应了6个不同层的预测/分类输出, #这样没法处理,所以我们先进行flatten,而后concat,方便进行处理! for i in range(len(logits)): #reshape之后,flogits中分别得到的shape为(N*5776,21),(N*1444,21),(N*600,21),(N*150,21),(N*36,21),(N*4,21) #5776=38*38*4,即将logits[i] reshape成了shape[:-1],21 flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) #reshape之后,flocalisations中分别得到的shape为(N*5776,4),(N*1444,4),(N*600,4),(N*150,4),(N*36,4),(N*4,4) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! #然后我们进行concat操作,这样就可以得到logits的shape为(8732*N,21) logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) #localisations的shape为(8732*N,4) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) #注意我们求得的正负样本,然后就可以计算相应的损失了,注意losses*fpmask,这样就可以计算正样本的损失了!!! loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) #注意losses*fnmask,这样就可以计算负样本的损失了!!! loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # 正样本选择 #如果anchors真实的类别得分>0.5则将这个预测框作为正样本 #pmask: shape=gscores_shape. if gscores>0.5, pmask=True;else pmask=False pmask = gscores > match_threshold #将pmask从True、False转化为1,0 fpmask = tf.cast(pmask, dtype) #求 pmask=1的数量(作为正样本数量) n_positives = tf.reduce_sum(fpmask) #no_classes:gscores>0.5, pmask=1 ;否则pmask=0 #no_classes=1为正样本(正样本有物体,负样本为背景) no_classes = tf.cast(pmask, tf.int32) # 负样本选择:Hard negative mining... # Hard negative mining... 为了保证正负样本尽量平衡,SSD采用了hard negative mining,就是对负样本进行抽样,抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本,以保证正负样本比例接近1:3 #-0.5<gscore<0.5时nmask=True,代表负样本。 nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) #负样本时fnmask=1,正样本处fnmask=0 fnmask = tf.cast(nmask, dtype) #tf.where(input, a,b),其中a,b均为尺寸一致的tensor。 #作用是将a中对应input中true的位置的元素值不变,其余元素进行替换,替换成b中对应位置的元素值 #predictions:预测为每个类别的概率 predictions = slim.softmax(logits) #将nvalues中判断为背景的位置,替换为预测为背景的得分。其他位置替换为1. nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) #将所有框的预测类别为背景的概率排成一行,保存在nvalues_flat中(其中正样本对应的概率为1) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. #计算全部的负样本数量 max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) #负样本数量是正样本数量的3倍+batch_size n_neg = tf.cast(negative_ratio * n_positives, tf.int32) #如果全部的负样本数量小于n_neg,则n_neg等于全部的负样本数量 n_neg = tf.minimum(n_neg, max_neg_entries) #抽样时按照置信度误差(预测为背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本 val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) #负样本背景置信度最大值(背景置信度小于该阈值为负样本) max_hard_pred = -val[-1] # Final negative mask. #-0.5<gscore<0.5(真实是背景),且nvalues<阈值(预测为背景的分数低于阈值),代表负样本 nmask = tf.logical_and(nmask, nvalues < max_hard_pred) #负样本fnmask从True转化为1 fnmask = tf.cast(nmask, dtype) n_negatives = tf.reduce_sum(fnmask) #正样本和负样本的数量 fn_neg = tf.cast(n_negatives, tf.float32) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), n_positives, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), fn_neg, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), n_positives, name='value') tf.losses.add_loss(loss)
def ron_losses(logits, localisations, objness_logits, objness_pred, gclasses, glocalisations, gscores, match_threshold=0.5, neg_threshold=0.3, objness_threshold=0.03, negative_ratio=3., alpha=1. / 3, beta=1. / 3, label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ron_losses'): # why rank 5, batch, height, width, num_anchors, num_classes logits_shape = tfe.get_shape(logits[0], 5) num_classes = logits_shape[-1] batch_size = logits_shape[0] # Flatten out all vectors flogits = [] fobjness_logits = [] fobjness_pred = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fobjness_logits.append(tf.reshape(objness_logits[i], [-1, 2])) fobjness_pred.append(tf.reshape(objness_pred[i], [-1])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # concat along different feature map (from last to front: layer7->layer4) logits = tf.concat(flogits, axis=0) objness_logits = tf.concat(fobjness_logits, axis=0) objness_pred = tf.concat(fobjness_pred, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype #num_nonzero = tf.count_nonzero(gclasses) #gclasses = tf.Print(gclasses, [num_nonzero], message='gscores non_zeros: ', summarize=20) # gscores = tf.Print(gscores, [gscores], message='gscores: ', summarize=50) # raw mask for positive > 0.5, and for negetive < 0.3 # each positive examples has one label #gclasses = tf.Print(gclasses, [gclasses, tf.reduce_sum(tf.cast(tf.logical_and(gclasses > 0, tf.logical_not(gscores > 0.5)), dtype)) ], message='gclasses: ', summarize=500) positive_mask = gclasses > 0 #positive_mask = tf.Print(positive_mask, [positive_mask], message='positive_mask: ', summarize=500) fpositive_mask = tf.cast(positive_mask, dtype) n_positives = tf.reduce_sum(fpositive_mask) # negtive examples are those max_overlap is still lower than neg_threshold, note that some positive may also has lower jaccard #negtive_mask = tf.cast(tf.logical_not(positive_mask), dtype) * gscores < neg_threshold negtive_mask = tf.equal(gclasses, 0) #(gclasses == 0) #negtive_mask = tf.logical_and(gscores < neg_threshold, tf.logical_not(positive_mask)) fnegtive_mask = tf.cast(negtive_mask, dtype) n_negtives = tf.reduce_sum(fnegtive_mask) # random select hard negtive for objectness n_neg_to_select = tf.cast(negative_ratio * n_positives, tf.int32) n_neg_to_select = tf.minimum(n_neg_to_select, tf.cast(n_negtives, tf.int32)) rand_neg_mask = tf.random_uniform( tfe.get_shape(gscores, 1), minval=0, maxval=1.) < tfe.safe_divide( tf.cast(n_neg_to_select, dtype), n_negtives, name='rand_select_objness') # include both random_select negtive and all positive examples final_neg_mask_objness = tf.stop_gradient( tf.logical_or(tf.logical_and(negtive_mask, rand_neg_mask), positive_mask)) total_examples_for_objness = tf.reduce_sum( tf.cast(final_neg_mask_objness, dtype)) # the label for objectness is all the positive objness_pred_label = tf.stop_gradient(tf.cast(positive_mask, tf.int32)) # objness_pred = tf.Print(objness_pred, [objness_pred], message='objness_pred: ', summarize=50) # objectness score in all positive positions objness_pred_in_positive = tf.cast(positive_mask, dtype) * objness_pred # max objectness score in all positive positions max_objness_in_positive = tf.reduce_max(objness_pred_in_positive) # the position of max objectness score in all positive positions max_objness_mask = tf.equal(objness_pred_in_positive, max_objness_in_positive) # objectness mask for select real positive for detection objectness_mask = objness_pred > objness_threshold # positive for detection, and insure there is more than one positive to predict #cls_positive_mask = tf.stop_gradient(tf.logical_or(tf.logical_and(positive_mask, objectness_mask), max_objness_mask)) cls_positive_mask = tf.stop_gradient( tf.logical_and(positive_mask, objectness_mask)) cls_negtive_mask = tf.logical_and(objectness_mask, negtive_mask) #cls_negtive_mask = tf.logical_and(objectness_mask, tf.logical_not(cls_positive_mask)) n_cls_negtives = tf.reduce_sum(tf.cast(cls_negtive_mask, dtype)) fcls_positive_mask = tf.cast(cls_positive_mask, dtype) n_cls_positives = tf.reduce_sum(fcls_positive_mask) n_cls_neg_to_select = tf.cast(negative_ratio * n_cls_positives, tf.int32) n_cls_neg_to_select = tf.minimum(n_cls_neg_to_select, tf.cast(n_cls_negtives, tf.int32)) # random selected negtive mask rand_cls_neg_mask = tf.random_uniform( tfe.get_shape(gscores, 1), minval=0, maxval=1.) < tfe.safe_divide( tf.cast(n_cls_neg_to_select, dtype), n_cls_negtives, name='rand_select_cls') # include both random_select negtive and all positive(positive is filtered by objectness) final_cls_neg_mask_objness = tf.stop_gradient( tf.logical_or(tf.logical_and(cls_negtive_mask, rand_cls_neg_mask), cls_positive_mask)) total_examples_for_cls = tf.reduce_sum( tf.cast(final_cls_neg_mask_objness, dtype)) # n_cls_neg_to_select = tf.Print(n_cls_neg_to_select, [n_cls_neg_to_select], message='n_cls_neg_to_select: ', summarize=20) #logits = tf.Print(logits, [n_cls_positives, tf.reduce_sum(tf.cast(tf.logical_and(cls_negtive_mask, rand_cls_neg_mask), dtype))], message='n_cls_positives: ', summarize=20) # n_neg_to_select = tf.Print(n_neg_to_select, [n_neg_to_select], message='n_neg_to_select: ', summarize=20) #logits = tf.Print(logits, [n_positives, tf.reduce_sum(tf.cast(tf.logical_and(negtive_mask, rand_neg_mask), dtype))], message='n_positives: ', summarize=20) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): #weights = (1. - alpha - beta) * tf.cast(final_cls_neg_mask_objness, dtype) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient( tf.clip_by_value(gclasses, 0, num_classes))) loss = tf.cond( n_positives > 0., lambda: (1. - alpha - beta) * tf.reduce_mean( tf.boolean_mask(loss, final_cls_neg_mask_objness)), lambda: 0.) #loss = tf.reduce_mean(loss * weights) #loss = tf.reduce_sum(loss * weights) #loss = tfe.safe_divide(tf.reduce_sum(loss * weights), total_examples_for_cls, name='cls_loss') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_objectness'): #weights = alpha * tf.cast(final_neg_mask_objness, dtype) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=objness_logits, labels=objness_pred_label) loss = tf.cond( n_positives > 0., lambda: alpha * tf.reduce_mean( tf.boolean_mask(loss, final_neg_mask_objness)), lambda: 0.) #loss = tf.reduce_mean(loss * weights) #loss = tf.reduce_sum(loss * weights) #loss = tfe.safe_divide(tf.reduce_sum(loss * weights), total_examples_for_objness, name='objness_loss') tf.losses.add_loss(loss) # Add localization loss with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. #weights = tf.expand_dims(beta * tf.cast(fcls_positive_mask, dtype), axis=-1) loss = custom_layers.modified_smooth_l1( localisations, tf.stop_gradient(glocalisations), sigma=3.) #loss = custom_layers.abs_smooth(localisations - tf.stop_gradient(glocalisations)) loss = tf.cond( n_cls_positives > 0., lambda: beta * tf.reduce_mean( tf.boolean_mask(tf.reduce_sum(loss, axis=-1), tf.stop_gradient(cls_positive_mask))), lambda: 0.) #loss = tf.cond(n_positives > 0., lambda: beta * n_positives / total_examples_for_objness * tf.reduce_mean(tf.boolean_mask(tf.reduce_sum(loss, axis=-1), tf.stop_gradient(positive_mask))), lambda: 0.) #loss = tf.reduce_mean(loss * weights) #loss = tf.reduce_sum(loss * weights) #loss = tfe.safe_divide(tf.reduce_sum(loss * weights), n_cls_positives, name='localization_loss') tf.losses.add_loss(loss)
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def get_losses(self, logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=2.5, alpha=1., label_smoothing=0., scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] # batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gclasses > 0 fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... # for no_classes, we only care that false positive's label is 0 # this is why pmask sufice our needs no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_not(pmask) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) # avoid n_neg is zero, and cause error when doing top_k later on n_neg = tf.maximum(n_neg, 1) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask, hard negative mining nmask = tf.logical_and(nmask, nvalues <= max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): total_cross_pos = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) total_cross_pos = tf.reduce_sum(total_cross_pos * fpmask, name="cross_entropy_pos") tf.losses.add_loss(total_cross_pos) with tf.name_scope('cross_entropy_neg'): total_cross_neg = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) total_cross_neg = tf.reduce_sum(total_cross_neg * fnmask, name="cross_entropy_neg") tf.losses.add_loss(total_cross_neg) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) total_loc = custom_layers.abs_smooth_2(localisations - glocalisations) total_loc = tf.reduce_sum(total_loc * weights, name="localization") tf.losses.add_loss(total_loc) total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc) # stick with the orgiginal paper in terms of definig model loss model_loss = tf.get_collection(tf.GraphKeys.LOSSES) model_loss = tf.add_n(model_loss) model_loss = array_ops.where(tf.equal(n_positives, 0), array_ops.zeros_like(model_loss), tf.div(1.0, n_positives) * model_loss) # Add regularziaton loss regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') # if model oss is zero, no need to do gradient update on this batch total_loss = array_ops.where( tf.equal(n_positives, 0), array_ops.zeros_like(model_loss), tf.add(model_loss, regularization_loss)) # debugging info tf.summary.scalar("postive_num", n_positives) tf.summary.scalar("negative_num", n_neg) tf.summary.scalar("regularization_loss", regularization_loss) # with tf.name_scope('variables_loc'): # selected_p = tf.boolean_mask(glocalisations, pmask) # p_mean, p_variance = tf.nn.moments(selected_p, [0]) # tf.summary.scalar("mean_cx", p_mean[0]) # tf.summary.scalar("mean_cy", p_mean[1]) # tf.summary.scalar("mean_w", p_mean[2]) # tf.summary.scalar("mean_h", p_mean[3]) # # tf.summary.scalar("var_cx", p_variance[0]) # tf.summary.scalar("var_cy", p_variance[1]) # tf.summary.scalar("var_w", p_variance[2]) # tf.summary.scalar("var_h", p_variance[3]) return total_loss
def ssd_losses( logits, localisations, #损失函数定义为位置误差和置信度误差的加权和; gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., #位置误差权重系数 label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape( logits[i], [-1, num_classes])) #将类别的概率值reshape成(-1,21) fgclasses.append(tf.reshape(gclasses[i], [-1])) #真实类别 fgscores.append(tf.reshape(gscores[i], [-1])) #预测真实目标的得分 flocalisations.append(tf.reshape(localisations[i], [-1, 4])) #预测真实目标边框坐标(编码形式的值) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) #用于将真实目标gt的坐标进行编码存储 # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold #预测框与真实框IOU>0.5则将这个先验作为正样本 fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) #求正样本数量N # Hard negative mining... 为了保证正负样本尽量平衡,SSD采用了hard negative mining,就是对负样本进行抽样,抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本,以保证正负样本比例接近1:3 no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) #类别预测 nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size #负样本数量,保证是正样本3倍 n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k( -nvalues_flat, k=n_neg) #抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本 max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. #交叉熵 with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, #类别置信度误差 labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') #将置信度误差除以正样本数后除以batch-size tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth( localisations - glocalisations) #先验框对应边界的位置预测值-真实位置;然后做Smooth L1 loss loss = tf.div( tf.reduce_sum(loss * weights), batch_size, name='value') #将上面的loss*权重(=alpha/正样本数)求和后除以batch-size tf.losses.add_loss(loss) #获得置信度误差和位置误差的加权和
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, end_points, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None, feat_layers=SSDNet.default_params.feat_layers): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses_concat = tf.concat(fgclasses, axis=0) gscores_concat = tf.concat(fgscores, axis=0) localisations_concat = tf.concat(flocalisations, axis=0) glocalisations_concat = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores_concat > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores_concat > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add center loss. center_op_layers = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_loss'): layer_scores = tf.reshape(gscores[i], [-1, tensor_shape(gscores[i], 4)[-1]]) layer_classes = tf.reshape(gclasses[i], [-1,tensor_shape(gclasses[i], 4)[-1]]) layer_features = tf.reshape(end_points[layer], [-1, tensor_shape(end_points[layer], 4)[-1]]) label_index = tf.argmax(layer_scores, axis=1) label_index = tf.expand_dims(label_index, 1) row = tf.range(label_index.shape[0])[:, None] row = tf.cast(row, tf.int64) label_index = tf.concat([row, label_index], axis=1) labels_one_layer = tf.gather_nd(layer_classes, label_index) loss, center_op = get_center_loss(layer_features, layer, labels_one_layer) loss = tf.div(tf.reduce_sum(0.001*loss * fpmask), batch_size, name='value') center_op_layers.append(center_op) tf.losses.add_loss(loss) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=gclasses_concat) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # with tf.name_scope('center_loss'): # loss, centers_update_op = get_center_loss(logits, gclasses_concat) # loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') # tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations_concat - glocalisations_concat) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss) return center_op_layers
def my_ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): # _alphas = np.array([0.01, 0.1, 0.3, 0.6, 0.5, 1.]) # alphas = _alphas / _alphas.sum() alphas = np.array([0.01, 0.1, 0.3, 0.6, 0.5, 1.]) lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): print("#### logits: ", logits[i]) flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) print("###### flogits: ", flogits[i]) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:184832], labels=gclasses[:184832]) loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[184832:254144], labels=gclasses[184832:254144]) loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[254144:273344], labels=gclasses[254144:273344]) loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[273344:278144], labels=gclasses[273344:278144]) loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[278144:279296], labels=gclasses[278144:279296]) loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424], labels=gclasses[279296:279424]) loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:184832], labels=no_classes[:184832]) loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[184832:254144], labels=no_classes[184832:254144]) loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[254144:273344], labels=no_classes[254144:273344]) loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[273344:278144], labels=no_classes[273344:278144]) loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[278144:279296], labels=no_classes[278144:279296]) loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424], labels=no_classes[279296:279424]) loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss0 = alphas[0] * custom_layers.abs_smooth(localisations[:184832] - glocalisations[:184832]) loss1 = alphas[1] * custom_layers.abs_smooth(localisations[184832:254144] - glocalisations[184832:254144]) loss2 = alphas[2] * custom_layers.abs_smooth(localisations[254144:273344] - glocalisations[254144:273344]) loss3 = alphas[3] * custom_layers.abs_smooth(localisations[273344:278144] - glocalisations[273344:278144]) loss4 = alphas[4] * custom_layers.abs_smooth(localisations[278144:279296] - glocalisations[278144:279296]) loss5 = alphas[5] * custom_layers.abs_smooth(localisations[279296:279424] - glocalisations[279296:279424]) loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) with tf.name_scope('cross_entropy_pos'): loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:369664], labels=gclasses[:369664]) loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[369664:462080], labels=gclasses[369664:462080]) loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[462080:118016], labels=gclasses[462080:118016]) loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[118016:124416], labels=gclasses[118016:124416]) loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[124416:126720], labels=gclasses[124416:126720]) loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[126720:126976], labels=gclasses[126720:126976]) loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:369664], labels=no_classes[:369664]) loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[369664:462080], labels=no_classes[369664:462080]) loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[462080:118016], labels=no_classes[462080:118016]) loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[118016:124416], labels=no_classes[118016:124416]) loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[124416:126720], labels=no_classes[124416:126720]) loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424], labels=no_classes[279296:279424]) loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss0 = alphas[0] * custom_layers.abs_smooth(localisations[:369664] - glocalisations[:369664]) loss1 = alphas[1] * custom_layers.abs_smooth(localisations[369664:462080] - glocalisations[369664:462080]) loss2 = alphas[2] * custom_layers.abs_smooth(localisations[462080:118016] - glocalisations[462080:118016]) loss3 = alphas[3] * custom_layers.abs_smooth(localisations[118016:124416] - glocalisations[118016:124416]) loss4 = alphas[4] * custom_layers.abs_smooth(localisations[124416:126720] - glocalisations[124416:126720]) loss5 = alphas[5] * custom_layers.abs_smooth(localisations[126720:126976] - glocalisations[126720:126976]) loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def get_model(self, inputs, weight_decay=0.0005, is_training=False): # End_points collect relevant activations for external use. arg_scope = self.__arg_scope(weight_decay=weight_decay) self.img_shape = tfe.get_shape(inputs)[1:3] with slim.arg_scope(arg_scope): end_points = {} channels = {} with tf.variable_scope('vgg_16', [inputs]): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net channels['block3'] = 256 self.layer_shape.append(tfe.get_shape(net)[1:3]) net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4.d net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net channels['block4'] = 512 self.layer_shape.append(tfe.get_shape(net)[1:3]) net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net channels['block5'] = 512 self.layer_shape.append(tfe.get_shape(net)[1:3]) net = slim.max_pool2d(net, [2, 2], scope='pool5') # Additional SSD blocks. #with slim.arg_scope([slim.conv2d], #activation_fn=None): #with slim.arg_scope([slim.batch_norm], #activation_fn=tf.nn.relu, is_training=is_training,updates_collections=None): #with slim.arg_scope([slim.dropout], #is_training=is_training,keep_prob=0.8): with tf.variable_scope(self.model_name): return self.__additional_ssd_block(end_points, channels, net, is_training=is_training)
def Adversary_Back_Train(D, D_logits, D_, D_logits_, gscore_untrim, gscore_gene, scope=None): with tf.name_scope(scope, 'aher_adv_losses'): lshape = tfe.get_shape(D_logits[0], 8) batch_size = lshape[0] fgscore_untrim = [] fgscore_gene = [] f_D_logits = [] f_D_logits_ = [] f_D = [] f_D_ = [] for i in range(len(D_logits)): fgscore_untrim.append(tf.reshape(gscore_untrim[i], [-1])) fgscore_gene.append(tf.reshape(gscore_gene[i], [-1])) f_D_logits.append(tf.reshape(D_logits[i], [-1])) f_D_logits_.append(tf.reshape(D_logits_[i], [-1])) f_D.append(tf.reshape(D[i], [-1])) f_D_.append(tf.reshape(D_[i], [-1])) gscore_untrim = tf.concat(fgscore_untrim, axis=0) gscore_gene = tf.concat(fgscore_gene, axis=0) D_logits = tf.concat(f_D_logits, axis=0) D_logits_ = tf.concat(f_D_logits_, axis=0) D = tf.concat(f_D, axis=0) D_ = tf.concat(f_D_, axis=0) dtype = D_logits.dtype # select the background position and logits pos_mask_untrim = gscore_untrim > 0.70 nmask_untrim = tf.logical_and(tf.logical_not(pos_mask_untrim), gscore_untrim < 0.3) pos_mask_gene = gscore_gene > 0.70 nmask_gene = tf.logical_and(tf.logical_not(pos_mask_gene), gscore_gene < 0.3) nmask = tf.logical_and(nmask_untrim, nmask_gene) fnmask = tf.cast(nmask, dtype) fnmask_num = tf.reduce_sum(fnmask) # compute the sigmoid cross entropy loss d_loss_real = sigmoid_cross_entropy_with_logits( D_logits, tf.ones_like(D)) d_loss_real = tf.div(tf.reduce_sum(d_loss_real * fnmask), fnmask_num / FLAGS.dis_weights, name='d_loss_real') d_loss_fake = sigmoid_cross_entropy_with_logits( D_logits_, tf.zeros_like(D_)) d_loss_fake = tf.div(tf.reduce_sum(d_loss_fake * fnmask), fnmask_num / FLAGS.dis_weights, name='d_loss_fake') g_loss = sigmoid_cross_entropy_with_logits(D_logits_, tf.ones_like(D_)) g_loss = tf.div(tf.reduce_sum(g_loss * fnmask), fnmask_num / FLAGS.gen_weights, name='g_loss') return d_loss_real, d_loss_fake, g_loss
def ssd_losses( logits, localisations, # 预测类别,位置 gclasses, glocalisations, gscores, # ground truth类别,位置,得分 match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): # 提取类别数和batch_size lshape = tfe.get_shape(logits[0], 5) # tensor_shape函数可以取代 num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): # 按照ssd特征层循环 flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) # 全部的搜索框,对应的21类别的输出 gclasses = tf.concat(fgclasses, axis=0) # 全部的搜索框,真实的类别数字 gscores = tf.concat(fgscores, axis=0) # 全部的搜索框,和真实框的IOU localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) """[<tf.Tensor 'ssd_losses/concat:0' shape=(279424, 21) dtype=float32>, <tf.Tensor 'ssd_losses/concat_1:0' shape=(279424,) dtype=int64>, <tf.Tensor 'ssd_losses/concat_2:0' shape=(279424,) dtype=float32>, <tf.Tensor 'ssd_losses/concat_3:0' shape=(279424, 4) dtype=float32>, <tf.Tensor 'ssd_losses/concat_4:0' shape=(279424, 4) dtype=float32>] """ dtype = logits.dtype pmask = gscores > match_threshold # (全部搜索框数目, 21),类别搜索框和真实框IOU大于阈值 fpmask = tf.cast(pmask, dtype) # 浮点型前景掩码(前景假定为含有对象的IOU足够的搜索框标号) n_positives = tf.reduce_sum(fpmask) # 前景总数 # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) # 此时每一行的21个数转化为概率 nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) # IOU达不到阈值的类别搜索框位置记1 fnmask = tf.cast(nmask, dtype) nvalues = tf.where( nmask, predictions[:, 0], # 框内无物体标记为背景预测概率 1. - fnmask) # 框内有物体位置标记为1 nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. # 在nmask中剔除n_neg个最不可能背景点(对应的class0概率最低) max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) # 3 × 前景掩码数量 + batch_size n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) # 最不可能为背景的n_neg个点 max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) # 不是前景,又最不像背景的n_neg个点 fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) # 0-20 loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) # {0,1} loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def __additional_ssd_block(self, end_points, channels, net, is_training=False): # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') #net = slim.batch_norm(net) #net = self.__dropout(net) end_points['block6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') #net = slim.batch_norm(net) #net = self.__dropout(net) end_points['block7'] = net channels['block7'] = 1024 self.layer_shape.append(tfe.get_shape(net)[1:3]) # Block 8/9: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') #net = slim.batch_norm(net) #net = self.__dropout(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') #net = slim.batch_norm(net) #net = self.__dropout(net) end_points[end_point] = net channels[end_point] = 512 self.layer_shape.append(tfe.get_shape(net)[1:3]) end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') #net = slim.batch_norm(net) #net = self.__dropout(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') #net = slim.batch_norm(net) #net = self.__dropout(net) end_points[end_point] = net channels[end_point] = 256 self.layer_shape.append(tfe.get_shape(net)[1:3]) # Prediction and localisations layers. predictions = [] logits, localisations = self.ssd_multibox_layer( end_points, channels, self.feat_layers, self.normalizations, is_training=is_training) if is_training == True: return localisations, logits, end_points else: predictions = [] for l in range(len(logits[0])): predictions.append(slim.softmax(logits[0][l])) return predictions, localisations, logits, end_points
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def ssd_losses(logits, localisations,glabels, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=0.2, label_smoothing=0., batch_size=16, scope=None): '''Loss functions for training the text box network. Arguments: logits: (list of) predictions logits Tensors; x localisations: (list of) localisations Tensors; l glocalisations: (list of) groundtruth localisations Tensors; g gscores: (list of) groundtruth score Tensors; c ''' # from ssd loss with tf.name_scope(scope, 'txt_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = batch_size l_cross_pos = [] l_cross_neg = [] l_loc = [] # Flatten out all vectors! flogits = logits fgscores = gscores flocalisations = localisations fglocalisations = glocalisations fglabels = glabels # for i in range(len(logits)): # flogits.append(tf.reshape(logits[i], [-1, num_classes])) # fgscores.append(tf.reshape(gscores[i], [-1])) # fglabels.append(tf.reshape(glabels[i], [-1])) # flocalisations.append(tf.reshape(localisations[i], [-1, 12])) # fglocalisations.append(tf.reshape(glocalisations[i], [-1, 12])) # And concat the crap! glabels = tf.concat(fglabels, axis=1) logits = tf.concat(flogits, axis=1) # x gscores = tf.concat(fgscores, axis=1) # c localisations = tf.concat(flocalisations, axis=1) # l glocalisations = tf.concat(fglocalisations, axis=1) # g dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold # positive mask # pmask = tf.concat(axis=0, values=[pmask[:tf.argmax(gscores, axis=0)], [True], pmask[tf.argmax(gscores, axis=0) + 1:]]) ipmask = tf.cast(pmask, tf.int32) # int positive mask fpmask = tf.cast(pmask, dtype) # float positive mask n_positives = tf.reduce_sum(fpmask) # calculate all number # Hard negative mining... # conf loss ?? no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) # nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) # fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, :, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) inmask = tf.cast(nmask, tf.int32) # Add cross-entropy loss. # logits [batch_size, num_classes] labels [batch_size] ~ 0,num_class with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=glabels) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) l_cross_pos.append(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) l_cross_neg.append(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) # localisations = tf.Print(localisations, [localisations, tf.shape(localisations)], "pre is: ", summarize=20) # glocalisations = tf.Print(glocalisations, [glocalisations, tf.shape(glocalisations)], "gt is : ",summarize=20) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss) l_loc.append(loss) with tf.name_scope('total'): total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') total_loc = tf.add_n(l_loc, 'localization') # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc)
def ssd_losses_old(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.device(device): with tf.name_scope(scope, 'ssd_losses'): l_cross_pos = [] l_cross_neg = [] l_loc = [] for i in range(len(logits)): dtype = logits[i].dtype with tf.name_scope('block_%i' % i): # Sizing weight... wsize = tfe.get_shape(logits[i], rank=5) wsize = wsize[1] * wsize[2] * wsize[3] # Positive mask. pmask = gscores[i] > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Select some random negative entries. # n_entries = np.prod(gclasses[i].get_shape().as_list()) # r_positive = n_positives / n_entries # r_negative = negative_ratio * n_positives / (n_entries - n_positives) # Negative mask. no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits[i]) nmask = tf.logical_and(tf.logical_not(pmask), gscores[i] > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, :, :, :, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): fpmask = wsize * fpmask loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=gclasses[i]) loss = tf.losses.compute_weighted_loss(loss, fpmask) l_cross_pos.append(loss) with tf.name_scope('cross_entropy_neg'): fnmask = wsize * fnmask loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=no_classes) loss = tf.losses.compute_weighted_loss(loss, fnmask) l_cross_neg.append(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i]) loss = tf.losses.compute_weighted_loss(loss, weights) l_loc.append(loss) # Additional total losses... with tf.name_scope('total'): total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') total_loc = tf.add_n(l_loc, 'localization') # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc)
def ssd_losses( logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., #位置误差权重系数 label_smoothing=0., device='/gpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) #预测类别概率值 fgclasses.append(tf.reshape(gclasses[i], [-1])) #真实类别 fgscores.append(tf.reshape(gscores[i], [-1])) #预测框的的分值,即IOU值 flocalisations.append(tf.reshape(localisations[i], [-1, 4])) #预测目标边框坐标,编码形式 fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) #真实目标坐标,编码形式 # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold #若IOU大于匹配阈值,则为先验正样本 fpmask = tf.cast(pmask, dtype) #将pmask类型转换为dtype n_positives = tf.reduce_sum(fpmask) #求正样本数量N # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) #预测类别 nmask = tf.logical_and( tf.logical_not(pmask), #tf逻辑与操作,非正样本中IOU<0.5的样本为先验负样本 gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) ############不明白!!!!!!!!! nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) #得到负样本数目 val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') #将置信度误差乘以正样本数求和后除以batch-size tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth( localisations - glocalisations) #l1 smooth计算方式与正常的不一样 loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape( logits[0], 5) #(batch_size,长,宽,每个格子ancher数目,每个ancher的21个类别的可能性) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! 全部扁平化 flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! 先reshape 一个feature map 一条,再就是将所有feature map的cancat在一起。 logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... 这个ground true 的box,就是先把低于阈值的过滤了 pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) #统计正样本 # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) #这个no_class就是纯背景了 predictions = slim.softmax(logits) # 这个21个类别的logit做了一个softmax nmask = tf.logical_and( tf.logical_not(pmask), # 选出不是正例,并且gscore>-0.5的负例子 gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where( nmask, # 如果是负例子,就把sotfmax的背景的Logistics的结果赋过去,否则为1,也就是负例子的地方用背景的score,正例为1 predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) #事先制定的策略的个数和自然有的负样本,去个最小值 #弄个负号,取出前n个最不像是背景的负例 val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) #取出值和对应的index max_hard_pred = -val[-1] #最可能像的那个像背景负例 # Final negative mask.#最后的负例就是,小于最可能像的那个像背景负例【那也就是是有东西的】,且原来判定也是负例的 # 可以想象很多锚框都不会框住感兴趣的物体,就是说跟任何对应感兴趣物体的表框的IoU都小于某个阈值。这样就会产生大量的负类锚框,或者说对应标号为0的锚框。对于这类锚框有两点要考虑的: # # 因为负类锚框数目可能远多于其他,我们可以只保留其中的一些。而且是保留那些目前预测最不确信它是负类的,就是对类0预测值排序,选取数值最小的哪一些困难的负类锚框 nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss.分类的正确性(正类,进行了一定的过滤) with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) # Add cross-entropy loss.分类的正确性(负类,进行了一定的过滤) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ...分类的正确性(负类,进行了一定的过滤) with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. #本来如果是背景,就不存在box regression的问题,因此需要用fpmask过滤下背景 weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def ssd_losses_old(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.device(device): with tf.name_scope(scope, 'ssd_losses'): l_cross_pos = [] l_cross_neg = [] l_loc = [] for i in range(len(logits)): dtype = logits[i].dtype with tf.name_scope('block_%i' % i): # Sizing weight... wsize = tfe.get_shape(logits[i], rank=5) wsize = wsize[1] * wsize[2] * wsize[3] # Positive mask. pmask = gscores[i] > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Select some random negative entries. # n_entries = np.prod(gclasses[i].get_shape().as_list()) # r_positive = n_positives / n_entries # r_negative = negative_ratio * n_positives / (n_entries - n_positives) # Negative mask. no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits[i]) nmask = tf.logical_and(tf.logical_not(pmask), gscores[i] > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, :, :, :, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): fpmask = wsize * fpmask loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i], labels=gclasses[i]) loss = tf.losses.compute_weighted_loss(loss, fpmask) l_cross_pos.append(loss) with tf.name_scope('cross_entropy_neg'): fnmask = wsize * fnmask loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i], labels=no_classes) loss = tf.losses.compute_weighted_loss(loss, fnmask) l_cross_neg.append(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i]) loss = tf.losses.compute_weighted_loss(loss, weights) l_loc.append(loss) # Additional total losses... with tf.name_scope('total'): total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') total_loc = tf.add_n(l_loc, 'localization') # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc)