예제 #1
0
def depthwise_conv1d(input,
                     k_h=1,
                     k_w=3,
                     channel_multiplier=1,
                     strides=1,
                     padding='SAME',
                     stddev=0.02,
                     name='depthwise_conv1d',
                     bias=True,
                     weight_decay=0.0001):
    lshape = tfe.get_shape(input, 3)
    input = tf.reshape(input, [lshape[0], 1, lshape[1], lshape[2]])
    with tf.variable_scope(name):
        in_channel = input.get_shape().as_list()[-1]
        w = tf.get_variable(
            'w', [k_h, k_w, in_channel, channel_multiplier],
            regularizer=tf.contrib.layers.l2_regularizer(weight_decay),
            initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.depthwise_conv2d(input,
                                      w, [1, strides, strides, 1],
                                      padding,
                                      rate=None,
                                      name=None,
                                      data_format=None)
        if bias:
            biases = tf.get_variable('bias', [in_channel * channel_multiplier],
                                     initializer=tf.constant_initializer(0.0))
            conv = tf.nn.bias_add(conv, biases)
        cshape = tfe.get_shape(conv, 4)
        conv = tf.reshape(conv, [cshape[0], cshape[2], cshape[3]
                                 ])  # convert to the original 1d data
        return conv
예제 #2
0
def tf_aher_bboxes_select_layer(predictions_layer,
                                localizations_layer,
                                select_threshold=None,
                                num_classes=21,
                                ignore_class=0,
                                scope=None,
                                IoU_flag=False):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.

    Args:
      predictions_layer: A prediction layer;
      localizations_layer: A localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
        size Batches X N x 1 | 2. Each key corresponding to a class.
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'aher_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # Reshape features: Batches x N x N_labels | 4
        p_shape = tfe.get_shape(predictions_layer)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        if IoU_flag:
            zeros_m = tf.zeros([predictions_layer.shape[1], 1])
            predictions_layer = tf.reshape(
                tf.stack([
                    zeros_m,
                    tf.reshape(predictions_layer,
                               [predictions_layer.shape[1], 1])
                ],
                         axis=1),
                [predictions_layer.shape[0], predictions_layer.shape[1], 2])
        l_shape = tfe.get_shape(localizations_layer)
        localizations_layer = tf.reshape(
            localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]]))

        d_scores = {}
        d_bboxes = {}
        for c in range(0, num_classes):
            if c != ignore_class:
                # Remove boxes under the threshold.
                scores = predictions_layer[:, :, c]
                fmask = tf.cast(tf.greater_equal(scores, select_threshold),
                                scores.dtype)
                scores = scores * fmask
                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
                # Append to dictionary.
                d_scores[c] = scores
                d_bboxes[c] = bboxes

        return d_scores, d_bboxes
예제 #3
0
def tf_ssd_bboxes_select_layer(predictions_layer,
                               localizations_layer,
                               select_threshold=None,
                               num_classes=21,
                               ignore_class=0,
                               scope=None):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.

    Args:
      predictions_layer: A SSD prediction layer;
      localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
        size Batches X N x 1 | 4. Each key corresponding to a class.
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # Reshape features: Batches x N x N_labels | 4
        p_shape = tfe.get_shape(predictions_layer)
        #reshape之后predictioN_layer的shape转变为(batch,n*n*num_layer_anchors,num_classes)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        l_shape = tfe.get_shape(localizations_layer)
        #reshape之后localizations_layer的shape转化为(batch,n*n*num_layer_anchors,4)
        localizations_layer = tf.reshape(
            localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]]))

        d_scores = {}
        d_bboxes = {}
        for c in range(0, num_classes):
            if c != ignore_class:
                # Remove boxes under the threshold.
                # 拿到每个预测类的得分,shapes的shape为(batch,n*n*num_layer_anchors),predictions_layer的shape为(batch,n*n*num_layer_anchors,num_classes)
                scores = predictions_layer[:, :, c]
                # 转化,根据该得分判断是否需要保留bboxes,小于select_threshold的候选框都被丢弃
                fmask = tf.cast(tf.greater_equal(scores, select_threshold),
                                scores.dtype)
                #小于select_threshold的scores都设置为0,
                scores = scores * fmask
                #小于select_threshold的bboxes都设置为0,
                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
                # Append to dictionary.
                d_scores[c] = scores
                d_bboxes[c] = bboxes

        return d_scores, d_bboxes
예제 #4
0
def tf_ssd_bboxes_select_layer(predictions_layer,
                               localizations_layer,
                               select_threshold=None,
                               num_classes=21,
                               ignore_class=0,
                               scope=None):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.

    Args:
      predictions_layer: A SSD prediction layer;
      localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
        size Batches X N x 1 | 4. Each key corresponding to a class.
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # Reshape features: Batches x N x N_labels | 4
        p_shape = tfe.get_shape(predictions_layer)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        l_shape = tfe.get_shape(localizations_layer)
        localizations_layer = tf.reshape(
            localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]]))
        # just consider those legal bboxes
        # localizations_mask = (localizations_layer[:, :, 0] < localizations_layer[:, :, 2])
        # localizations_mask = tf.logical_and(localizations_mask, (localizations_layer[:, :, 1] < localizations_layer[:, :, 3]))
        # localizations_mask = tf.Print(localizations_mask,[localizations_mask], message='localizations_mask: ', summarize=30)
        d_scores = {}
        d_bboxes = {}
        for c in range(0, num_classes):
            if c != ignore_class:
                # Remove boxes under the threshold.
                scores = predictions_layer[:, :, c]
                fmask = tf.cast(tf.greater(scores, select_threshold),
                                scores.dtype)
                # fmask = tf.cast(tf.logical_and(tf.greater(scores, select_threshold), localizations_mask), scores.dtype)
                scores = scores * fmask
                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
                # Append to dictionary.
                d_scores[c] = scores
                d_bboxes[c] = bboxes

        return d_scores, d_bboxes
예제 #5
0
def flaten_predict1(predictions, localisations):
    predictions_shape = tfe.get_shape(predictions[0], 5)
    batch_size = predictions_shape[0]
    num_classes = predictions_shape[-1]

    if batch_size > 1:
        raise ValueError('only batch_size 1 is supported.')

    flaten_pred = []
    flaten_labels = []
    flaten_locations = []
    flaten_scores = []

    for i in range(len(predictions)):
        flaten_pred.append(
            tf.reshape(predictions[i], [batch_size, -1, num_classes]))
        cls_pred = flaten_pred[i]
        flaten_scores.append(
            tf.reshape(cls_pred, [batch_size, -1, num_classes]))

        ##
        ##
        flaten_labels.append(
            tf.reshape(
                tf.argmax(cls_pred[:, :, 1:], -1) + 1, [batch_size, -1]))

    total_scores = tf.squeeze(tf.concat(flaten_scores, 1), 0)
    total_locations = tf.squeeze(localisations, 0)
    total_labels = tf.squeeze(tf.concat(flaten_labels, 1), 0)
    # remove bboxes that are not foreground
    non_background_mask = tf.greater(total_labels, 0)

    bbox_mask = non_background_mask
    # return tf.boolean_mask(total_scores, bbox_mask), tf.boolean_mask(total_labels, bbox_mask), tf.boolean_mask(total_locations, bbox_mask)
    return total_scores, total_labels, total_locations
예제 #6
0
    def bboxes_decode(self, cls, loc, match_thres, scope='bboxes_decode'):
        """Decode labels and bounding boxes."""
        with tf.name_scope(scope):
            assert len(tfe.get_shape(cls)) == 3
            keep = tf.minimum(tf.size(cls), 25)
            anchors = tf.reshape(self.yxhw, [-1, 4])

            def fn(args):
                cls, loc = args
                fcls = tf.reshape(cls, [-1])
                floc = tf.reshape(loc, [-1, 4])
                v, idx = tf.nn.top_k(fcls, keep, sorted=True)
                sel_anchors = tf.gather(anchors, idx)
                floc = tf.gather(floc, idx)
                bboxes = sel_anchors + floc * self.ruler
                bboxes = tfe.yxhw2yxyx(bboxes)
                bboxes = tf.where(v > match_thres, bboxes,
                                  tf.zeros_like(bboxes))
                v = tf.where(v > match_thres, v, tf.zeros_like(v))
                return bboxes, v

            bboxes, scores = tf.map_fn(fn=fn,
                                       elems=[cls, loc],
                                       back_prop=False,
                                       dtype=(tf.float32, tf.float32))
            return bboxes, scores
def lane_net_losses(logits,
                    gt_maps,
                    negative_ratio=3.,
                    alpha=1.,
                    label_smoothing=0.,
                    device='/cpu:0',
                    scope=None):
    with tf.name_scope(scope, 'lane_net_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # logits = tf.layers.flatten(logits)
        # gt = tf.layers.flatten(gt_maps)
        print(gt_maps.get_shape().as_list())
        print(logits.get_shape().as_list())

        # Add cross-entropy loss.
        with tf.name_scope('spase_softmax_cross_entropy'):
            loss = tf.nn.softmax_cross_entropy_with_logits(labels=gt_maps,
                                                           logits=logits)
            # loss = tf.nn.l2_loss(logits - gt_maps)
            # loss = tf.nn.weighted_cross_entropy_with_logits(
            # targets=gt_maps,
            # logits=logits,
            # pos_weight=12
            # )
            loss = tf.reduce_mean(loss)
            # loss = tf.div(loss, batch_size, name='value')
            tf.losses.add_loss(loss)
def tf_ssd_bboxes_select_layer(predictions_layer,
                               localizations_layer,
                               select_threshold=None,
                               num_classes=7,
                               ignore_class=0,
                               scope=None):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.

    Args:
      predictions_layer: A SSD prediction layer;
      localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
        size Batches X N x 1 | 4. Each key corresponding to a class.
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # shape = batch_size × (64*64*4) × number_classes
        p_shape = tfe.get_shape(predictions_layer)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        # print "debug+++++++++++++++predictions_layer = {} ".format(predictions_layer)
        l_shape = tfe.get_shape(localizations_layer)
        localizations_layer = tf.reshape(
            localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]]))
        # print "localizations_layer = {} ".format(localizations_layer)
        d_scores = {}
        d_bboxes = {}
        # for c in range(0, num_classes):
        for c in range(0, 2):
            if c != ignore_class:
                # Remove boxes under the threshold.
                scores = predictions_layer[:, :, c]
                fmask = tf.cast(tf.greater_equal(scores, select_threshold),
                                scores.dtype)
                scores = scores * fmask
                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
                # Append to dictionary.
                d_scores[c] = scores
                d_bboxes[c] = bboxes

        return d_scores, d_bboxes
예제 #9
0
def tf_ssd_bboxes_select_layer(
        predictions_layer,
        localizations_layer,  #输入预测得到的类别和位置做筛选
        select_threshold=None,
        num_classes=21,
        ignore_class=0,
        scope=None):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.
    Args:
      predictions_layer: A SSD prediction layer;
      localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
        size Batches X N x 1 | 4. Each key corresponding to a class.
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # Reshape features: Batches x N x N_labels | 4
        p_shape = tfe.get_shape(predictions_layer)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        l_shape = tfe.get_shape(localizations_layer)
        localizations_layer = tf.reshape(
            localizations_layer, tf.stack([l_shape[0], -1, l_shape[-1]]))

        d_scores = {}
        d_bboxes = {}
        for c in range(0, num_classes):
            if c != ignore_class:  #如果不是背景类别
                # Remove boxes under the threshold.   #去掉低于阈值的box
                scores = predictions_layer[:, :, c]  #预测为第c类别的得分值
                fmask = tf.cast(tf.greater_equal(scores, select_threshold),
                                scores.dtype)
                scores = scores * fmask  #保留得分值大于阈值的得分
                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
                # Append to dictionary.
                d_scores[c] = scores
                d_bboxes[c] = bboxes

        return d_scores, d_bboxes  #返回字典,每个字典里是对应某类的预测权重和框位置信息;
예제 #10
0
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer,
                               select_threshold=None,
                               num_classes=21,
                               ignore_class=0,
                               scope=None):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.

    Args:
      predictions_layer: A SSD prediction layer;
      localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
        size Batches X N x 1 | 4. Each key corresponding to a class.
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # Reshape features: Batches x N x N_labels | 4
        p_shape = tfe.get_shape(predictions_layer)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        l_shape = tfe.get_shape(localizations_layer)
        localizations_layer = tf.reshape(localizations_layer,
                                         tf.stack([l_shape[0], -1, l_shape[-1]]))

        d_scores = {}
        d_bboxes = {}
        for c in range(0, num_classes):
            if c != ignore_class:
                # Remove boxes under the threshold.
                scores = predictions_layer[:, :, c]
                fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype)
                scores = scores * fmask
                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
                # Append to dictionary.
                d_scores[c] = scores
                d_bboxes[c] = bboxes

        return d_scores, d_bboxes
예제 #11
0
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer,
                               select_threshold=None,
                               num_classes=21,
                               scope=None):
    """Extract classes, scores and bounding boxes from features in one layer.
    Batch-compatible: inputs are supposed to have batch-type shapes.

    Args:
      predictions_layer: A SSD prediction layer;
      localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. All boxes
        under the threshold are set to 'zero'. If None, no threshold applied.
    Return:
      scores, bboxes: Output tensors of size Batches x N_Classes-1 X N x 1 | 4
    """
    select_threshold = 0.0 if select_threshold is None else select_threshold
    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
                       [predictions_layer, localizations_layer]):
        # Reshape features: Batches x N x N_labels | 4
        p_shape = tfe.get_shape(predictions_layer)
        predictions_layer = tf.reshape(predictions_layer,
                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
        l_shape = tfe.get_shape(localizations_layer)
        localizations_layer = tf.reshape(localizations_layer,
                                         tf.stack([l_shape[0], -1, l_shape[-1]]))

        l_scores = []
        l_bboxes = []
        for c in range(1, num_classes):
            # Remove boxes under the threshold.
            scores = predictions_layer[:, :, c]
            mask = tf.greater_equal(scores, select_threshold)
            scores = scores * tf.cast(mask, scores.dtype)
            bboxes = localizations_layer * tf.expand_dims(tf.cast(mask, localizations_layer.dtype),
                                                          axis=-1)
            l_scores.append(scores)
            l_bboxes.append(bboxes)

        scores = tf.stack(l_scores, axis=1)
        bboxes = tf.stack(l_bboxes, axis=1)
        return scores, bboxes
예제 #12
0
def tf_ssd_bboxes_select_layer_all_classes(predictions_layer,
                                           localizations_layer,
                                           select_threshold=None):
    """Extract classes, scores and bounding boxes from features in one layer.
     Batch-compatible: inputs are supposed to have batch-type shapes.

     Args:
       predictions_layer: A SSD prediction layer;
       localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. If None,
        select boxes whose classification score is higher than 'no class'.
     Return:
      classes, scores, bboxes: Input Tensors.
     """
    # Reshape features: Batches x N x N_labels | 4
    p_shape = tfe.get_shape(predictions_layer)
    predictions_layer = tf.reshape(predictions_layer,
                                   tf.stack([p_shape[0], -1, p_shape[-1]]))
    l_shape = tfe.get_shape(localizations_layer)
    localizations_layer = tf.reshape(localizations_layer,
                                     tf.stack([l_shape[0], -1, l_shape[-1]]))
    # Boxes selection: use threshold or score > no-label criteria.
    if select_threshold is None or select_threshold == 0:
        # Class prediction and scores: assign 0. to 0-class
        classes = tf.argmax(predictions_layer, axis=2)
        scores = tf.reduce_max(predictions_layer, axis=2)
        scores = scores * tf.cast(classes > 0, scores.dtype)
    else:
        sub_predictions = predictions_layer[:, :, 1:]
        classes = tf.argmax(sub_predictions, axis=2) + 1
        scores = tf.reduce_max(sub_predictions, axis=2)
        # Only keep predictions higher than threshold.
        mask = tf.greater(scores, select_threshold)
        classes = classes * tf.cast(mask, classes.dtype)
        scores = scores * tf.cast(mask, scores.dtype)
    # Assume localization layer already decoded.
    bboxes = localizations_layer
    return classes, scores, bboxes
예제 #13
0
def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer,
                                           select_threshold=None):
    """Extract classes, scores and bounding boxes from features in one layer.
     Batch-compatible: inputs are supposed to have batch-type shapes.

     Args:
       predictions_layer: A SSD prediction layer;
       localizations_layer: A SSD localization layer;
      select_threshold: Classification threshold for selecting a box. If None,
        select boxes whose classification score is higher than 'no class'.
     Return:
      classes, scores, bboxes: Input Tensors.
     """
    # Reshape features: Batches x N x N_labels | 4
    p_shape = tfe.get_shape(predictions_layer)
    predictions_layer = tf.reshape(predictions_layer,
                                   tf.stack([p_shape[0], -1, p_shape[-1]]))
    l_shape = tfe.get_shape(localizations_layer)
    localizations_layer = tf.reshape(localizations_layer,
                                     tf.stack([l_shape[0], -1, l_shape[-1]]))
    # Boxes selection: use threshold or score > no-label criteria.
    if select_threshold is None or select_threshold == 0:
        # Class prediction and scores: assign 0. to 0-class
        classes = tf.argmax(predictions_layer, axis=2)
        scores = tf.reduce_max(predictions_layer, axis=2)
        scores = scores * tf.cast(classes > 0, scores.dtype)
    else:
        sub_predictions = predictions_layer[:, :, 1:]
        classes = tf.argmax(sub_predictions, axis=2) + 1
        scores = tf.reduce_max(sub_predictions, axis=2)
        # Only keep predictions higher than threshold.
        mask = tf.greater(scores, select_threshold)
        classes = classes * tf.cast(mask, classes.dtype)
        scores = scores * tf.cast(mask, scores.dtype)
    # Assume localization layer already decoded.
    bboxes = localizations_layer
    return classes, scores, bboxes
예제 #14
0
def AHER_Detection_Inference(aher_anet_model,aher_anet_anchor,
                   feature,vname,label,duration, clsweights, clsbias, reuse, n_class,cls_suffix='_anet'):
    """ Inference bbox of sigle shot action localization
        feature:      batch_size x 512 x 4069
        vname:        batch_size x 1
        label:        batch_size x 1
        duration:     batch_size x 1
    """   

    predictions, localisation, logits, proplogits, proppredictions, iouprediction, end_points \
       = aher_anet_model.net_prop_iou(feature,clsweights,clsbias,is_training=True,reuse=reuse,num_classes=n_class,cls_suffix=cls_suffix)

    # decode bounding box and get scores
    localisation = aher_anet_model.bboxes_decode_logits(localisation, duration ,aher_anet_anchor, predictions)
    if FLAGS.cls_flag:
        rscores, rbboxes = aher_anet_model.detected_bboxes_classwise(
                                    proppredictions, localisation,                                        
                                    select_threshold=FLAGS.select_threshold,
                                    nms_threshold=FLAGS.nms_threshold,
                                    clipping_bbox=None,
                                    top_k=FLAGS.select_top_k,
                                    keep_top_k=FLAGS.keep_top_k,
                                    iou_flag=False)
    else:
        rscores, rbboxes = aher_anet_model.detected_bboxes_classwise(
                                    iouprediction, localisation,                                        
                                    select_threshold=FLAGS.select_threshold,
                                    nms_threshold=FLAGS.nms_threshold,
                                    clipping_bbox=None,
                                    top_k=FLAGS.select_top_k,
                                    keep_top_k=FLAGS.keep_top_k,
                                    iou_flag=True)
    # compute pooling score
    lshape = tfe.get_shape(predictions[0], 8)
    num_classes = lshape[-1]
    batch_size = lshape[0]
    fprediction = []
    for i in range(len(predictions)):
        fprediction.append(tf.reshape(predictions[i], [-1, num_classes]))
    predictions = tf.concat(fprediction, axis=0)
    avergeprediction = tf.reduce_mean(predictions,axis=0)
    labelid = tf.argmax(avergeprediction, 0)
    argmaxid = tf.argmax(predictions, 1)

    prebbox={"rscores":rscores,"rbboxes":rbboxes,"label":labelid,"avescore":avergeprediction, \
             "rawscore":predictions,"argmaxid":argmaxid}
    return prebbox
예제 #15
0
def flaten_predict(predictions, objness_pred, localisations):
    predictions_shape = tfe.get_shape(predictions[0], 5)
    batch_size = predictions_shape[0]
    num_classes = predictions_shape[-1]

    if batch_size > 1:
        raise ValueError('only batch_size 1 is supported.')

    flaten_pred = []
    flaten_labels = []
    flaten_objness = []
    flaten_locations = []
    flaten_scores = []

    for i in range(len(predictions)):
        flaten_pred.append(
            tf.reshape(predictions[i], [batch_size, -1, num_classes]))
        flaten_objness.append(tf.reshape(objness_pred[i], [batch_size, -1]))
        cls_pred = tf.expand_dims(flaten_objness[i], axis=-1) * flaten_pred[i]
        flaten_scores.append(
            tf.reshape(cls_pred, [batch_size, -1, num_classes]))
        #flaten_scores.append(tf.reshape(tf.reduce_max(cls_pred, -1), [batch_size, -1]))
        flaten_labels.append(
            tf.reshape(tf.argmax(cls_pred, -1), [batch_size, -1]))
        flaten_locations.append(
            tf.reshape(localisations[i], [batch_size, -1, 4]))
    # assume batch_size is always 1
    total_scores = tf.squeeze(tf.concat(flaten_scores, 1), 0)
    total_objness = tf.squeeze(tf.concat(flaten_objness, 1), 0)
    total_locations = tf.squeeze(tf.concat(flaten_locations, 1), 0)
    total_labels = tf.squeeze(tf.concat(flaten_labels, 1), 0)
    # remove bboxes that are not foreground
    non_background_mask = tf.greater(total_labels, 0)
    # remove bboxes that have scores lower than select_threshold
    #bbox_mask = tf.logical_and(non_background_mask, tf.greater(total_scores, FLAGS.select_threshold))
    # total_objness = tf.Print(total_objness, [total_objness])
    bbox_mask = tf.logical_and(
        non_background_mask, tf.greater(total_objness, FLAGS.objectness_thres))
    return tf.boolean_mask(total_scores, bbox_mask), tf.boolean_mask(
        total_labels, bbox_mask), tf.boolean_mask(total_locations, bbox_mask)
예제 #16
0
파일: ssd.py 프로젝트: ebriant/FaceTracking
    def get_losses(self,
                   logits3,
                   localisations3,
                   gclasses3,
                   glocalisations3,
                   gscores3,
                   match_threshold=0.5,
                   negative_ratio=2.,
                   alpha=1.,
                   label_smoothing=0.,
                   scope=None):
        """Loss functions for training the SSD 300 VGG network.
    
        This function defines the different loss components of the SSD, and
        adds them to the TF loss collection.
    
        Arguments:
          logits: (list of) predictions logits Tensors;
          localisations: (list of) localisations Tensors;
          gclasses: (list of) groundtruth labels Tensors;
          glocalisations: (list of) groundtruth localisations Tensors;
          gscores: (list of) groundtruth score Tensors;
        """
        with tf.name_scope(scope, 'ssd_losses'):
            train_or_eval_test = len(logits3)
            all_pmask = []
            apmask = []
            for u in range(train_or_eval_test):
                gclasses = gclasses3[u]
                fgclasses = []
                for i in range(len(gclasses)):
                    fgclasses.append(tf.reshape(gclasses[i], [-1]))
                gclasses = tf.concat(fgclasses, axis=0)
                pmask = gclasses > 0
                all_pmask.append(pmask)
            part1 = all_pmask[0][0:25600]
            part2_temp = tf.logical_or(all_pmask[0][25600:],
                                       all_pmask[1][:],
                                       name='or1')
            part2 = part2_temp[0:6400]
            part3 = tf.logical_or(part2_temp[6400:],
                                  all_pmask[2][:],
                                  name='or2')
            apmask.append(tf.concat([part1, part2, part3], axis=0))
            apmask.append(tf.concat([part2, part3], axis=0))
            apmask.append(part3)
            for u in range(train_or_eval_test):
                logits = logits3[u]
                localisations = localisations3[u]
                gclasses = gclasses3[u]
                glocalisations = glocalisations3[u]
                gscores = gscores3[u]
                lshape = tfe.get_shape(logits[0], 4)
                num_classes = 2
                batch_size = lshape[0]
                # Flatten out all vectors!
                flogits = []
                fgclasses = []
                fgscores = []
                flocalisations = []
                fglocalisations = []
                for i in range(len(logits) - u):
                    flogits.append(tf.reshape(logits[i + u],
                                              [-1, num_classes]))
                    fgclasses.append(tf.reshape(gclasses[i], [-1]))
                    fgscores.append(tf.reshape(gscores[i], [-1]))
                    flocalisations.append(
                        tf.reshape(localisations[i + u], [-1, 4]))
                    fglocalisations.append(
                        tf.reshape(glocalisations[i], [-1, 4]))
                # And concat the crap!
                logits = tf.concat(flogits, axis=0)
                gclasses = tf.concat(fgclasses, axis=0)
                gscores = tf.concat(fgscores, axis=0)
                localisations = tf.concat(flocalisations, axis=0)
                glocalisations = tf.concat(fglocalisations, axis=0)
                dtype = logits.dtype

                # Compute positive matching mask...
                pmask = gclasses > 0
                fpmask = tf.cast(pmask, dtype)
                n_positives = tf.reduce_sum(fpmask)

                # Hard negative mining...
                #for no_classes, we only care that false positive's label is 0
                #this is why pmask sufice our needs
                no_classes = tf.cast(apmask[u], tf.int32)

                nmask = tf.logical_not(apmask[u])

                fnmask = tf.cast(nmask, dtype)

                # Number of negative entries to select.
                max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)

                n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                n_neg = tf.minimum(n_neg, max_neg_entries)
                #avoid n_neg is zero, and cause error when doing top_k later on
                n_neg = tf.maximum(n_neg, 1)

                extend_weight = 1.0
                if u == 1:
                    extend_weight = 0.5
                elif u == 2:
                    extend_weight = 0.25
                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy_pos%d' % u):
                    total_cross_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits, labels=gclasses)
                    total_cross_pos = tf.reduce_sum(total_cross_pos * fpmask,
                                                    name="cross_entropy_pos")
                    total_cross_pos = tf.cond(
                        n_positives > 0,
                        lambda: tf.div(total_cross_pos, n_positives),
                        lambda: 0.)
                    tf.losses.add_loss(total_cross_pos)

                with tf.name_scope('cross_entropy_neg%d' % u):
                    total_cross_neg = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits, labels=no_classes)
                    val, idxes = tf.nn.top_k(total_cross_neg * fnmask, k=n_neg)
                    total_cross_neg = tf.reduce_sum(val,
                                                    name="cross_entropy_neg")
                    total_cross_neg = tf.cond(
                        n_positives > 0,
                        lambda: tf.div(total_cross_neg, n_positives),
                        lambda: 0.)
                    tf.losses.add_loss(total_cross_neg)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization%d' % u):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(alpha * fpmask, axis=-1)
                    total_loc = custom_layers.abs_smooth_2(localisations -
                                                           glocalisations)
                    total_loc = tf.reduce_sum(total_loc * weights *
                                              extend_weight,
                                              name="localization")
                    total_loc = tf.cond(n_positives > 0,
                                        lambda: tf.div(total_loc, n_positives),
                                        lambda: 0.)
                    tf.losses.add_loss(total_loc)

                total_cross = tf.add(total_cross_pos, total_cross_neg,
                                     'cross_entropy%d' % u)

                # Add to EXTRA LOSSES TF.collection
                tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
                tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
                tf.add_to_collection('EXTRA_LOSSES', total_cross)
                tf.add_to_collection('EXTRA_LOSSES', total_loc)

                tf.summary.scalar('postive_num%d' % u, n_positives)
                tf.summary.scalar('negative_num%d' % u, n_neg)

            model_loss = tf.get_collection(tf.GraphKeys.LOSSES)
            model_loss = tf.add_n(model_loss)
            regularization_losses = tf.get_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES)
            regularization_loss = tf.add_n(regularization_losses,
                                           name='regularization_loss')
            tf.summary.scalar('regularization_loss', regularization_loss)
            total_loss = tf.add(model_loss, regularization_loss)
            return total_loss
예제 #17
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        #我们已经看过了上面的logits的输出,现在我们来看看loss中怎么进行处理的!
        #因为logits/localisations这个list中有6个tensor,对应了6个不同层的预测/分类输出,
        #这样没法处理,所以我们先进行flatten,而后concat,方便进行处理!
        for i in range(len(logits)):
            #reshape之后,flogits中分别得到的shape为(N*5776,21),(N*1444,21),(N*600,21),(N*150,21),(N*36,21),(N*4,21)
            #5776=38*38*4,即将logits[i] reshape成了shape[:-1],21
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            #reshape之后,flocalisations中分别得到的shape为(N*5776,4),(N*1444,4),(N*600,4),(N*150,4),(N*36,4),(N*4,4)
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        #然后我们进行concat操作,这样就可以得到logits的shape为(8732*N,21)
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        #localisations的shape为(8732*N,4)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            #注意我们求得的正负样本,然后就可以计算相应的损失了,注意losses*fpmask,这样就可以计算正样本的损失了!!!
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')

            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            #注意losses*fnmask,这样就可以计算负样本的损失了!!!
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # 正样本选择
        #如果anchors真实的类别得分>0.5则将这个预测框作为正样本
        #pmask: shape=gscores_shape. if gscores>0.5, pmask=True;else pmask=False
        pmask = gscores > match_threshold
        #将pmask从True、False转化为1,0
        fpmask = tf.cast(pmask, dtype)
        #求 pmask=1的数量(作为正样本数量)
        n_positives = tf.reduce_sum(fpmask)
        #no_classes:gscores>0.5, pmask=1 ;否则pmask=0
        #no_classes=1为正样本(正样本有物体,负样本为背景)
        no_classes = tf.cast(pmask, tf.int32)

        # 负样本选择:Hard negative mining...
        # Hard negative mining... 为了保证正负样本尽量平衡,SSD采用了hard negative mining,就是对负样本进行抽样,抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本,以保证正负样本比例接近1:3
        #-0.5<gscore<0.5时nmask=True,代表负样本。
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        #负样本时fnmask=1,正样本处fnmask=0
        fnmask = tf.cast(nmask, dtype)
        #tf.where(input, a,b),其中a,b均为尺寸一致的tensor。
        #作用是将a中对应input中true的位置的元素值不变,其余元素进行替换,替换成b中对应位置的元素值

        #predictions:预测为每个类别的概率
        predictions = slim.softmax(logits)
        #将nvalues中判断为背景的位置,替换为预测为背景的得分。其他位置替换为1.
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        #将所有框的预测类别为背景的概率排成一行,保存在nvalues_flat中(其中正样本对应的概率为1)
        nvalues_flat = tf.reshape(nvalues, [-1])

        # Number of negative entries to select.
        #计算全部的负样本数量
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        #负样本数量是正样本数量的3倍+batch_size
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
        #如果全部的负样本数量小于n_neg,则n_neg等于全部的负样本数量
        n_neg = tf.minimum(n_neg, max_neg_entries)

        #抽样时按照置信度误差(预测为背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本
        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        #负样本背景置信度最大值(背景置信度小于该阈值为负样本)
        max_hard_pred = -val[-1]
        # Final negative mask.
        #-0.5<gscore<0.5(真实是背景),且nvalues<阈值(预测为背景的分数低于阈值),代表负样本
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        #负样本fnmask从True转化为1
        fnmask = tf.cast(nmask, dtype)

        n_negatives = tf.reduce_sum(fnmask)
        #正样本和负样本的数量
        fn_neg = tf.cast(n_negatives, tf.float32)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          n_positives,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), fn_neg, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          n_positives,
                          name='value')
            tf.losses.add_loss(loss)
예제 #19
0
def ron_losses(logits,
               localisations,
               objness_logits,
               objness_pred,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               neg_threshold=0.3,
               objness_threshold=0.03,
               negative_ratio=3.,
               alpha=1. / 3,
               beta=1. / 3,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ron_losses'):
        # why rank 5, batch, height, width, num_anchors, num_classes
        logits_shape = tfe.get_shape(logits[0], 5)
        num_classes = logits_shape[-1]
        batch_size = logits_shape[0]

        # Flatten out all vectors
        flogits = []
        fobjness_logits = []
        fobjness_pred = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fobjness_logits.append(tf.reshape(objness_logits[i], [-1, 2]))
            fobjness_pred.append(tf.reshape(objness_pred[i], [-1]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # concat along different feature map (from last to front: layer7->layer4)
        logits = tf.concat(flogits, axis=0)
        objness_logits = tf.concat(fobjness_logits, axis=0)
        objness_pred = tf.concat(fobjness_pred, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        #num_nonzero = tf.count_nonzero(gclasses)
        #gclasses = tf.Print(gclasses, [num_nonzero], message='gscores non_zeros: ', summarize=20)
        # gscores = tf.Print(gscores, [gscores], message='gscores: ', summarize=50)

        # raw mask for positive > 0.5, and for negetive < 0.3
        # each positive examples has one label
        #gclasses = tf.Print(gclasses, [gclasses, tf.reduce_sum(tf.cast(tf.logical_and(gclasses > 0, tf.logical_not(gscores > 0.5)), dtype)) ], message='gclasses: ', summarize=500)

        positive_mask = gclasses > 0
        #positive_mask = tf.Print(positive_mask, [positive_mask], message='positive_mask: ', summarize=500)

        fpositive_mask = tf.cast(positive_mask, dtype)
        n_positives = tf.reduce_sum(fpositive_mask)
        # negtive examples are those max_overlap is still lower than neg_threshold, note that some positive may also has lower jaccard

        #negtive_mask = tf.cast(tf.logical_not(positive_mask), dtype) * gscores < neg_threshold
        negtive_mask = tf.equal(gclasses, 0)  #(gclasses == 0)
        #negtive_mask = tf.logical_and(gscores < neg_threshold, tf.logical_not(positive_mask))
        fnegtive_mask = tf.cast(negtive_mask, dtype)
        n_negtives = tf.reduce_sum(fnegtive_mask)

        # random select hard negtive for objectness
        n_neg_to_select = tf.cast(negative_ratio * n_positives, tf.int32)
        n_neg_to_select = tf.minimum(n_neg_to_select,
                                     tf.cast(n_negtives, tf.int32))

        rand_neg_mask = tf.random_uniform(
            tfe.get_shape(gscores, 1), minval=0, maxval=1.) < tfe.safe_divide(
                tf.cast(n_neg_to_select, dtype),
                n_negtives,
                name='rand_select_objness')
        # include both random_select negtive and all positive examples
        final_neg_mask_objness = tf.stop_gradient(
            tf.logical_or(tf.logical_and(negtive_mask, rand_neg_mask),
                          positive_mask))
        total_examples_for_objness = tf.reduce_sum(
            tf.cast(final_neg_mask_objness, dtype))
        # the label for objectness is all the positive
        objness_pred_label = tf.stop_gradient(tf.cast(positive_mask, tf.int32))

        # objness_pred = tf.Print(objness_pred, [objness_pred], message='objness_pred: ', summarize=50)

        # objectness score in all positive positions
        objness_pred_in_positive = tf.cast(positive_mask, dtype) * objness_pred
        # max objectness score in all positive positions
        max_objness_in_positive = tf.reduce_max(objness_pred_in_positive)
        # the position of max objectness score in all positive positions
        max_objness_mask = tf.equal(objness_pred_in_positive,
                                    max_objness_in_positive)

        # objectness mask for select real positive for detection
        objectness_mask = objness_pred > objness_threshold
        # positive for detection, and insure there is more than one positive to predict
        #cls_positive_mask = tf.stop_gradient(tf.logical_or(tf.logical_and(positive_mask, objectness_mask), max_objness_mask))
        cls_positive_mask = tf.stop_gradient(
            tf.logical_and(positive_mask, objectness_mask))
        cls_negtive_mask = tf.logical_and(objectness_mask, negtive_mask)
        #cls_negtive_mask = tf.logical_and(objectness_mask, tf.logical_not(cls_positive_mask))

        n_cls_negtives = tf.reduce_sum(tf.cast(cls_negtive_mask, dtype))

        fcls_positive_mask = tf.cast(cls_positive_mask, dtype)
        n_cls_positives = tf.reduce_sum(fcls_positive_mask)
        n_cls_neg_to_select = tf.cast(negative_ratio * n_cls_positives,
                                      tf.int32)
        n_cls_neg_to_select = tf.minimum(n_cls_neg_to_select,
                                         tf.cast(n_cls_negtives, tf.int32))
        # random selected negtive mask
        rand_cls_neg_mask = tf.random_uniform(
            tfe.get_shape(gscores, 1), minval=0, maxval=1.) < tfe.safe_divide(
                tf.cast(n_cls_neg_to_select, dtype),
                n_cls_negtives,
                name='rand_select_cls')
        # include both random_select negtive and all positive(positive is filtered by objectness)
        final_cls_neg_mask_objness = tf.stop_gradient(
            tf.logical_or(tf.logical_and(cls_negtive_mask, rand_cls_neg_mask),
                          cls_positive_mask))
        total_examples_for_cls = tf.reduce_sum(
            tf.cast(final_cls_neg_mask_objness, dtype))

        # n_cls_neg_to_select = tf.Print(n_cls_neg_to_select, [n_cls_neg_to_select], message='n_cls_neg_to_select: ', summarize=20)
        #logits = tf.Print(logits, [n_cls_positives, tf.reduce_sum(tf.cast(tf.logical_and(cls_negtive_mask, rand_cls_neg_mask), dtype))], message='n_cls_positives: ', summarize=20)
        # n_neg_to_select = tf.Print(n_neg_to_select, [n_neg_to_select], message='n_neg_to_select: ', summarize=20)
        #logits = tf.Print(logits, [n_positives, tf.reduce_sum(tf.cast(tf.logical_and(negtive_mask, rand_neg_mask), dtype))], message='n_positives: ', summarize=20)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            #weights = (1. - alpha - beta) * tf.cast(final_cls_neg_mask_objness, dtype)
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,
                labels=tf.stop_gradient(
                    tf.clip_by_value(gclasses, 0, num_classes)))

            loss = tf.cond(
                n_positives > 0., lambda: (1. - alpha - beta) * tf.reduce_mean(
                    tf.boolean_mask(loss, final_cls_neg_mask_objness)),
                lambda: 0.)
            #loss = tf.reduce_mean(loss * weights)
            #loss = tf.reduce_sum(loss * weights)
            #loss = tfe.safe_divide(tf.reduce_sum(loss * weights), total_examples_for_cls, name='cls_loss')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_objectness'):
            #weights = alpha * tf.cast(final_neg_mask_objness, dtype)
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=objness_logits, labels=objness_pred_label)
            loss = tf.cond(
                n_positives > 0., lambda: alpha * tf.reduce_mean(
                    tf.boolean_mask(loss, final_neg_mask_objness)), lambda: 0.)
            #loss = tf.reduce_mean(loss * weights)
            #loss = tf.reduce_sum(loss * weights)
            #loss = tfe.safe_divide(tf.reduce_sum(loss * weights), total_examples_for_objness, name='objness_loss')
            tf.losses.add_loss(loss)

        # Add localization loss
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            #weights = tf.expand_dims(beta * tf.cast(fcls_positive_mask, dtype), axis=-1)
            loss = custom_layers.modified_smooth_l1(
                localisations, tf.stop_gradient(glocalisations), sigma=3.)
            #loss = custom_layers.abs_smooth(localisations - tf.stop_gradient(glocalisations))

            loss = tf.cond(
                n_cls_positives > 0., lambda: beta * tf.reduce_mean(
                    tf.boolean_mask(tf.reduce_sum(loss, axis=-1),
                                    tf.stop_gradient(cls_positive_mask))),
                lambda: 0.)
            #loss = tf.cond(n_positives > 0., lambda: beta * n_positives / total_examples_for_objness * tf.reduce_mean(tf.boolean_mask(tf.reduce_sum(loss, axis=-1), tf.stop_gradient(positive_mask))), lambda: 0.)
            #loss = tf.reduce_mean(loss * weights)
            #loss = tf.reduce_sum(loss * weights)

            #loss = tfe.safe_divide(tf.reduce_sum(loss * weights), n_cls_positives, name='localization_loss')
            tf.losses.add_loss(loss)
예제 #20
0
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)
예제 #21
0
    def get_losses(self,
                   logits,
                   localisations,
                   gclasses,
                   glocalisations,
                   gscores,
                   match_threshold=0.5,
                   negative_ratio=2.5,
                   alpha=1.,
                   label_smoothing=0.,
                   scope=None):
        """Loss functions for training the SSD 300 VGG network.

        This function defines the different loss components of the SSD, and
        adds them to the TF loss collection.

        Arguments:
          logits: (list of) predictions logits Tensors;
          localisations: (list of) localisations Tensors;
          gclasses: (list of) groundtruth labels Tensors;
          glocalisations: (list of) groundtruth localisations Tensors;
          gscores: (list of) groundtruth score Tensors;
        """
        with tf.name_scope(scope, 'ssd_losses'):
            lshape = tfe.get_shape(logits[0], 5)
            num_classes = lshape[-1]
            #             batch_size = lshape[0]

            # Flatten out all vectors!
            flogits = []
            fgclasses = []
            fgscores = []
            flocalisations = []
            fglocalisations = []
            for i in range(len(logits)):
                flogits.append(tf.reshape(logits[i], [-1, num_classes]))
                fgclasses.append(tf.reshape(gclasses[i], [-1]))
                fgscores.append(tf.reshape(gscores[i], [-1]))
                flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
                fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
            # And concat the crap!
            logits = tf.concat(flogits, axis=0)
            gclasses = tf.concat(fgclasses, axis=0)
            gscores = tf.concat(fgscores, axis=0)
            localisations = tf.concat(flocalisations, axis=0)
            glocalisations = tf.concat(fglocalisations, axis=0)
            dtype = logits.dtype

            # Compute positive matching mask...
            pmask = gclasses > 0
            fpmask = tf.cast(pmask, dtype)
            n_positives = tf.reduce_sum(fpmask)

            # Hard negative mining...
            # for no_classes, we only care that false positive's label is 0
            # this is why pmask sufice our needs
            no_classes = tf.cast(pmask, tf.int32)
            predictions = slim.softmax(logits)
            nmask = tf.logical_not(pmask)

            fnmask = tf.cast(nmask, dtype)
            nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
            nvalues_flat = tf.reshape(nvalues, [-1])
            # Number of negative entries to select.
            max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)

            n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
            n_neg = tf.minimum(n_neg, max_neg_entries)
            # avoid n_neg is zero, and cause error when doing top_k later on
            n_neg = tf.maximum(n_neg, 1)

            val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
            max_hard_pred = -val[-1]
            # Final negative mask, hard negative mining
            nmask = tf.logical_and(nmask, nvalues <= max_hard_pred)
            fnmask = tf.cast(nmask, dtype)

            # Add cross-entropy loss.
            with tf.name_scope('cross_entropy_pos'):
                total_cross_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=gclasses)
                total_cross_pos = tf.reduce_sum(total_cross_pos * fpmask,
                                                name="cross_entropy_pos")
                tf.losses.add_loss(total_cross_pos)

            with tf.name_scope('cross_entropy_neg'):
                total_cross_neg = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=no_classes)
                total_cross_neg = tf.reduce_sum(total_cross_neg * fnmask,
                                                name="cross_entropy_neg")
                tf.losses.add_loss(total_cross_neg)

            # Add localization loss: smooth L1, L2, ...
            with tf.name_scope('localization'):
                # Weights Tensor: positive mask + random negative.
                weights = tf.expand_dims(alpha * fpmask, axis=-1)
                total_loc = custom_layers.abs_smooth_2(localisations -
                                                       glocalisations)
                total_loc = tf.reduce_sum(total_loc * weights,
                                          name="localization")
                tf.losses.add_loss(total_loc)

            total_cross = tf.add(total_cross_pos, total_cross_neg,
                                 'cross_entropy')

            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)

            # stick with the orgiginal paper in terms of definig model loss
            model_loss = tf.get_collection(tf.GraphKeys.LOSSES)
            model_loss = tf.add_n(model_loss)
            model_loss = array_ops.where(tf.equal(n_positives, 0),
                                         array_ops.zeros_like(model_loss),
                                         tf.div(1.0, n_positives) * model_loss)
            # Add regularziaton loss
            regularization_losses = tf.get_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES)
            regularization_loss = tf.add_n(regularization_losses,
                                           name='regularization_loss')

            # if model oss is zero, no need to do gradient update on this batch
            total_loss = array_ops.where(
                tf.equal(n_positives, 0), array_ops.zeros_like(model_loss),
                tf.add(model_loss, regularization_loss))

            # debugging info
            tf.summary.scalar("postive_num", n_positives)
            tf.summary.scalar("negative_num", n_neg)
            tf.summary.scalar("regularization_loss", regularization_loss)
            #             with tf.name_scope('variables_loc'):
            #                 selected_p = tf.boolean_mask(glocalisations, pmask)
            #                 p_mean, p_variance = tf.nn.moments(selected_p, [0])
            #                 tf.summary.scalar("mean_cx", p_mean[0])
            #                 tf.summary.scalar("mean_cy", p_mean[1])
            #                 tf.summary.scalar("mean_w", p_mean[2])
            #                 tf.summary.scalar("mean_h", p_mean[3])
            #
            #                 tf.summary.scalar("var_cx", p_variance[0])
            #                 tf.summary.scalar("var_cy", p_variance[1])
            #                 tf.summary.scalar("var_w", p_variance[2])
            #                 tf.summary.scalar("var_h", p_variance[3])

            return total_loss
예제 #22
0
def ssd_losses(
        logits,
        localisations,  #损失函数定义为位置误差和置信度误差的加权和;
        gclasses,
        glocalisations,
        gscores,
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,  #位置误差权重系数
        label_smoothing=0.,
        device='/cpu:0',
        scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(
                logits[i], [-1, num_classes]))  #将类别的概率值reshape成(-1,21)
            fgclasses.append(tf.reshape(gclasses[i], [-1]))  #真实类别
            fgscores.append(tf.reshape(gscores[i], [-1]))  #预测真实目标的得分
            flocalisations.append(tf.reshape(localisations[i],
                                             [-1, 4]))  #预测真实目标边框坐标(编码形式的值)
            fglocalisations.append(tf.reshape(glocalisations[i],
                                              [-1, 4]))  #用于将真实目标gt的坐标进行编码存储
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold  #预测框与真实框IOU>0.5则将这个先验作为正样本
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)  #求正样本数量N

        # Hard negative mining... 为了保证正负样本尽量平衡,SSD采用了hard negative mining,就是对负样本进行抽样,抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本,以保证正负样本比例接近1:3
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)  #类别预测
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives,
                        tf.int32) + batch_size  #负样本数量,保证是正样本3倍
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(
            -nvalues_flat,
            k=n_neg)  #抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.    #交叉熵
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,  #类别置信度误差
                labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')  #将置信度误差除以正样本数后除以batch-size
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(
                localisations -
                glocalisations)  #先验框对应边界的位置预测值-真实位置;然后做Smooth L1 loss
            loss = tf.div(
                tf.reduce_sum(loss * weights), batch_size,
                name='value')  #将上面的loss*权重(=alpha/正样本数)求和后除以batch-size
            tf.losses.add_loss(loss)  #获得置信度误差和位置误差的加权和
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               end_points,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None,
               feat_layers=SSDNet.default_params.feat_layers):

    with tf.name_scope(scope, 'ssd_losses'):

        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses_concat = tf.concat(fgclasses, axis=0)
        gscores_concat = tf.concat(fgscores, axis=0)
        localisations_concat = tf.concat(flocalisations, axis=0)
        glocalisations_concat = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores_concat > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores_concat > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add center loss.
        center_op_layers = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_loss'):
                layer_scores = tf.reshape(gscores[i], [-1, tensor_shape(gscores[i], 4)[-1]])
                layer_classes = tf.reshape(gclasses[i], [-1,tensor_shape(gclasses[i], 4)[-1]])
                layer_features = tf.reshape(end_points[layer], [-1, tensor_shape(end_points[layer], 4)[-1]])
                label_index = tf.argmax(layer_scores, axis=1)
                label_index = tf.expand_dims(label_index, 1)

                row = tf.range(label_index.shape[0])[:, None]
                row = tf.cast(row, tf.int64)
                label_index = tf.concat([row, label_index], axis=1)

                labels_one_layer = tf.gather_nd(layer_classes, label_index)

                loss, center_op = get_center_loss(layer_features, layer, labels_one_layer)
                loss = tf.div(tf.reduce_sum(0.001*loss * fpmask), batch_size, name='value')
                center_op_layers.append(center_op)
                tf.losses.add_loss(loss)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=gclasses_concat)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # with tf.name_scope('center_loss'):
        #     loss, centers_update_op = get_center_loss(logits, gclasses_concat)
        #     loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
        #     tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations_concat - glocalisations_concat)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)

        return center_op_layers
예제 #24
0
def my_ssd_losses(logits, localisations,
                  gclasses, glocalisations, gscores,
                  match_threshold=0.5,
                  negative_ratio=3.,
                  alpha=1.,
                  label_smoothing=0.,
                  device='/cpu:0',
                  scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        # _alphas = np.array([0.01, 0.1, 0.3, 0.6, 0.5, 1.])
        # alphas = _alphas / _alphas.sum()
		alphas = np.array([0.01, 0.1, 0.3, 0.6, 0.5, 1.])

        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            print("#### logits: ", logits[i])
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
            print("###### flogits: ", flogits[i])
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:184832],
                                                                               labels=gclasses[:184832])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[184832:254144],
                                                                               labels=gclasses[184832:254144])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[254144:273344],
                                                                               labels=gclasses[254144:273344])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[273344:278144],
                                                                               labels=gclasses[273344:278144])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[278144:279296],
                                                                               labels=gclasses[278144:279296])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424],
                                                                               labels=gclasses[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:184832],
                                                                               labels=no_classes[:184832])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[184832:254144],
                                                                               labels=no_classes[184832:254144])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[254144:273344],
                                                                               labels=no_classes[254144:273344])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[273344:278144],
                                                                               labels=no_classes[273344:278144])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[278144:279296],
                                                                               labels=no_classes[278144:279296])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424],
                                                                               labels=no_classes[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss0 = alphas[0] * custom_layers.abs_smooth(localisations[:184832] - glocalisations[:184832])
            loss1 = alphas[1] * custom_layers.abs_smooth(localisations[184832:254144] - glocalisations[184832:254144])
            loss2 = alphas[2] * custom_layers.abs_smooth(localisations[254144:273344] - glocalisations[254144:273344])
            loss3 = alphas[3] * custom_layers.abs_smooth(localisations[273344:278144] - glocalisations[273344:278144])
            loss4 = alphas[4] * custom_layers.abs_smooth(localisations[278144:279296] - glocalisations[278144:279296])
            loss5 = alphas[5] * custom_layers.abs_smooth(localisations[279296:279424] - glocalisations[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)



        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        with tf.name_scope('cross_entropy_pos'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:369664],
                                                                               labels=gclasses[:369664])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[369664:462080],
                                                                               labels=gclasses[369664:462080])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[462080:118016],
                                                                               labels=gclasses[462080:118016])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[118016:124416],
                                                                               labels=gclasses[118016:124416])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[124416:126720],
                                                                               labels=gclasses[124416:126720])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[126720:126976],
                                                                               labels=gclasses[126720:126976])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:369664],
                                                                               labels=no_classes[:369664])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[369664:462080],
                                                                               labels=no_classes[369664:462080])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[462080:118016],
                                                                               labels=no_classes[462080:118016])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[118016:124416],
                                                                               labels=no_classes[118016:124416])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[124416:126720],
                                                                               labels=no_classes[124416:126720])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424],
                                                                               labels=no_classes[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss0 = alphas[0] * custom_layers.abs_smooth(localisations[:369664] - glocalisations[:369664])
            loss1 = alphas[1] * custom_layers.abs_smooth(localisations[369664:462080] - glocalisations[369664:462080])
            loss2 = alphas[2] * custom_layers.abs_smooth(localisations[462080:118016] - glocalisations[462080:118016])
            loss3 = alphas[3] * custom_layers.abs_smooth(localisations[118016:124416] - glocalisations[118016:124416])
            loss4 = alphas[4] * custom_layers.abs_smooth(localisations[124416:126720] - glocalisations[124416:126720])
            loss5 = alphas[5] * custom_layers.abs_smooth(localisations[126720:126976] - glocalisations[126720:126976])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)
예제 #25
0
파일: ssd.py 프로젝트: ebriant/FaceTracking
    def get_model(self, inputs, weight_decay=0.0005, is_training=False):
        # End_points collect relevant activations for external use.
        arg_scope = self.__arg_scope(weight_decay=weight_decay)
        self.img_shape = tfe.get_shape(inputs)[1:3]
        with slim.arg_scope(arg_scope):
            end_points = {}
            channels = {}
            with tf.variable_scope('vgg_16', [inputs]):
                # Original VGG-16 blocks.
                net = slim.repeat(inputs,
                                  2,
                                  slim.conv2d,
                                  64, [3, 3],
                                  scope='conv1')
                end_points['block1'] = net
                net = slim.max_pool2d(net, [2, 2], scope='pool1')
                # Block 2.
                net = slim.repeat(net,
                                  2,
                                  slim.conv2d,
                                  128, [3, 3],
                                  scope='conv2')
                end_points['block2'] = net
                net = slim.max_pool2d(net, [2, 2], scope='pool2')
                # Block 3.
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  256, [3, 3],
                                  scope='conv3')
                end_points['block3'] = net
                channels['block3'] = 256
                self.layer_shape.append(tfe.get_shape(net)[1:3])
                net = slim.max_pool2d(net, [2, 2], scope='pool3')
                # Block 4.d
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  512, [3, 3],
                                  scope='conv4')
                end_points['block4'] = net
                channels['block4'] = 512
                self.layer_shape.append(tfe.get_shape(net)[1:3])
                net = slim.max_pool2d(net, [2, 2], scope='pool4')
                # Block 5.
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  512, [3, 3],
                                  scope='conv5')
                end_points['block5'] = net
                channels['block5'] = 512
                self.layer_shape.append(tfe.get_shape(net)[1:3])
                net = slim.max_pool2d(net, [2, 2], scope='pool5')

            # Additional SSD blocks.
            #with slim.arg_scope([slim.conv2d],
            #activation_fn=None):
            #with slim.arg_scope([slim.batch_norm],
            #activation_fn=tf.nn.relu, is_training=is_training,updates_collections=None):
            #with slim.arg_scope([slim.dropout],
            #is_training=is_training,keep_prob=0.8):
            with tf.variable_scope(self.model_name):
                return self.__additional_ssd_block(end_points,
                                                   channels,
                                                   net,
                                                   is_training=is_training)
예제 #26
0
def Adversary_Back_Train(D,
                         D_logits,
                         D_,
                         D_logits_,
                         gscore_untrim,
                         gscore_gene,
                         scope=None):

    with tf.name_scope(scope, 'aher_adv_losses'):
        lshape = tfe.get_shape(D_logits[0], 8)
        batch_size = lshape[0]
        fgscore_untrim = []
        fgscore_gene = []
        f_D_logits = []
        f_D_logits_ = []
        f_D = []
        f_D_ = []
        for i in range(len(D_logits)):
            fgscore_untrim.append(tf.reshape(gscore_untrim[i], [-1]))
            fgscore_gene.append(tf.reshape(gscore_gene[i], [-1]))
            f_D_logits.append(tf.reshape(D_logits[i], [-1]))
            f_D_logits_.append(tf.reshape(D_logits_[i], [-1]))
            f_D.append(tf.reshape(D[i], [-1]))
            f_D_.append(tf.reshape(D_[i], [-1]))
        gscore_untrim = tf.concat(fgscore_untrim, axis=0)
        gscore_gene = tf.concat(fgscore_gene, axis=0)
        D_logits = tf.concat(f_D_logits, axis=0)
        D_logits_ = tf.concat(f_D_logits_, axis=0)
        D = tf.concat(f_D, axis=0)
        D_ = tf.concat(f_D_, axis=0)
        dtype = D_logits.dtype

        # select the background position and logits
        pos_mask_untrim = gscore_untrim > 0.70
        nmask_untrim = tf.logical_and(tf.logical_not(pos_mask_untrim),
                                      gscore_untrim < 0.3)

        pos_mask_gene = gscore_gene > 0.70
        nmask_gene = tf.logical_and(tf.logical_not(pos_mask_gene),
                                    gscore_gene < 0.3)

        nmask = tf.logical_and(nmask_untrim, nmask_gene)
        fnmask = tf.cast(nmask, dtype)
        fnmask_num = tf.reduce_sum(fnmask)

        # compute the sigmoid cross entropy loss
        d_loss_real = sigmoid_cross_entropy_with_logits(
            D_logits, tf.ones_like(D))
        d_loss_real = tf.div(tf.reduce_sum(d_loss_real * fnmask),
                             fnmask_num / FLAGS.dis_weights,
                             name='d_loss_real')

        d_loss_fake = sigmoid_cross_entropy_with_logits(
            D_logits_, tf.zeros_like(D_))
        d_loss_fake = tf.div(tf.reduce_sum(d_loss_fake * fnmask),
                             fnmask_num / FLAGS.dis_weights,
                             name='d_loss_fake')

        g_loss = sigmoid_cross_entropy_with_logits(D_logits_, tf.ones_like(D_))
        g_loss = tf.div(tf.reduce_sum(g_loss * fnmask),
                        fnmask_num / FLAGS.gen_weights,
                        name='g_loss')

    return d_loss_real, d_loss_fake, g_loss
예제 #27
0
def ssd_losses(
        logits,
        localisations,  # 预测类别,位置
        gclasses,
        glocalisations,
        gscores,  # ground truth类别,位置,得分
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,
        label_smoothing=0.,
        device='/cpu:0',
        scope=None):

    with tf.name_scope(scope, 'ssd_losses'):

        # 提取类别数和batch_size
        lshape = tfe.get_shape(logits[0], 5)  # tensor_shape函数可以取代
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):  # 按照ssd特征层循环
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)  # 全部的搜索框,对应的21类别的输出
        gclasses = tf.concat(fgclasses, axis=0)  # 全部的搜索框,真实的类别数字
        gscores = tf.concat(fgscores, axis=0)  # 全部的搜索框,和真实框的IOU
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        """[<tf.Tensor 'ssd_losses/concat:0' shape=(279424, 21) dtype=float32>,
            <tf.Tensor 'ssd_losses/concat_1:0' shape=(279424,) dtype=int64>,
            <tf.Tensor 'ssd_losses/concat_2:0' shape=(279424,) dtype=float32>,
            <tf.Tensor 'ssd_losses/concat_3:0' shape=(279424, 4) dtype=float32>,
            <tf.Tensor 'ssd_losses/concat_4:0' shape=(279424, 4) dtype=float32>]
        """

        dtype = logits.dtype
        pmask = gscores > match_threshold  # (全部搜索框数目, 21),类别搜索框和真实框IOU大于阈值
        fpmask = tf.cast(pmask, dtype)  # 浮点型前景掩码(前景假定为含有对象的IOU足够的搜索框标号)
        n_positives = tf.reduce_sum(fpmask)  # 前景总数

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)  # 此时每一行的21个数转化为概率
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)  # IOU达不到阈值的类别搜索框位置记1
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(
            nmask,
            predictions[:, 0],  # 框内无物体标记为背景预测概率
            1. - fnmask)  # 框内有物体位置标记为1
        nvalues_flat = tf.reshape(nvalues, [-1])

        # Number of negative entries to select.
        # 在nmask中剔除n_neg个最不可能背景点(对应的class0概率最低)
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        # 3 × 前景掩码数量 + batch_size
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)
        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)  # 最不可能为背景的n_neg个点
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask,
                               nvalues < max_hard_pred)  # 不是前景,又最不像背景的n_neg个点
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)  # 0-20
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)  # {0,1}
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
예제 #28
0
파일: ssd.py 프로젝트: ebriant/FaceTracking
    def __additional_ssd_block(self,
                               end_points,
                               channels,
                               net,
                               is_training=False):
        # Additional SSD blocks.
        # Block 6: let's dilate the hell out of it!

        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
        #net = slim.batch_norm(net)
        #net = self.__dropout(net)
        end_points['block6'] = net
        # Block 7: 1x1 conv. Because the f**k.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
        #net = slim.batch_norm(net)
        #net = self.__dropout(net)
        end_points['block7'] = net
        channels['block7'] = 1024
        self.layer_shape.append(tfe.get_shape(net)[1:3])

        # Block 8/9: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
            #net = slim.batch_norm(net)
            #net = self.__dropout(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              512, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
            #net = slim.batch_norm(net)
            #net = self.__dropout(net)
        end_points[end_point] = net
        channels[end_point] = 512
        self.layer_shape.append(tfe.get_shape(net)[1:3])
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
            #net = slim.batch_norm(net)
            #net = self.__dropout(net)
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net,
                              256, [3, 3],
                              stride=2,
                              scope='conv3x3',
                              padding='VALID')
            #net = slim.batch_norm(net)
            #net = self.__dropout(net)
        end_points[end_point] = net
        channels[end_point] = 256
        self.layer_shape.append(tfe.get_shape(net)[1:3])

        # Prediction and localisations layers.
        predictions = []

        logits, localisations = self.ssd_multibox_layer(
            end_points,
            channels,
            self.feat_layers,
            self.normalizations,
            is_training=is_training)

        if is_training == True:
            return localisations, logits, end_points
        else:
            predictions = []
            for l in range(len(logits[0])):
                predictions.append(slim.softmax(logits[0][l]))
            return predictions, localisations, logits, end_points
예제 #29
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
def ssd_losses(logits, localisations,glabels,
               glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=0.2,
               label_smoothing=0.,
               batch_size=16,
               scope=None):
	'''Loss functions for training the text box network.
	Arguments:
	  logits: (list of) predictions logits Tensors;                x
	  localisations: (list of) localisations Tensors;              l
	  glocalisations: (list of) groundtruth localisations Tensors; g
	  gscores: (list of) groundtruth score Tensors;                c
	'''
	# from ssd loss
	with tf.name_scope(scope, 'txt_losses'):
		lshape = tfe.get_shape(logits[0], 5)
		num_classes = lshape[-1]
		batch_size = batch_size

		l_cross_pos = []
		l_cross_neg = []
		l_loc = []

		# Flatten out all vectors!
		flogits = logits
		fgscores = gscores
		flocalisations = localisations
		fglocalisations = glocalisations
		fglabels = glabels
		# for i in range(len(logits)):
		# 	flogits.append(tf.reshape(logits[i], [-1, num_classes]))
		# 	fgscores.append(tf.reshape(gscores[i], [-1]))
		# 	fglabels.append(tf.reshape(glabels[i], [-1]))
		# 	flocalisations.append(tf.reshape(localisations[i], [-1, 12]))
		# 	fglocalisations.append(tf.reshape(glocalisations[i], [-1, 12]))
		# And concat the crap!
		glabels = tf.concat(fglabels, axis=1)
		logits = tf.concat(flogits, axis=1)  # x
		gscores = tf.concat(fgscores, axis=1)  # c
		localisations = tf.concat(flocalisations, axis=1)  # l
		glocalisations = tf.concat(fglocalisations, axis=1)  # g
		dtype = logits.dtype

		# Compute positive matching mask...
		pmask = gscores > match_threshold  # positive mask
		# pmask = tf.concat(axis=0, values=[pmask[:tf.argmax(gscores, axis=0)], [True], pmask[tf.argmax(gscores, axis=0) + 1:]])

		ipmask = tf.cast(pmask, tf.int32)  # int positive mask
		fpmask = tf.cast(pmask, dtype)  # float positive mask
		n_positives = tf.reduce_sum(fpmask)  # calculate all number

		# Hard negative mining...
		# conf loss ??
		no_classes = tf.cast(pmask, tf.int32)
		predictions = slim.softmax(logits)  #
		nmask = tf.logical_and(tf.logical_not(pmask),
		                       gscores > -0.5)  #
		fnmask = tf.cast(nmask, dtype)
		nvalues = tf.where(nmask,
		                   predictions[:, :, 0],
		                   1. - fnmask)
		nvalues_flat = tf.reshape(nvalues, [-1])
		# Number of negative entries to select.
		max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
		n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
		n_neg = tf.minimum(n_neg, max_neg_entries)

		val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
		max_hard_pred = -val[-1]
		# Final negative mask.
		nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
		fnmask = tf.cast(nmask, dtype)
		inmask = tf.cast(nmask, tf.int32)
		# Add cross-entropy loss.
		# logits [batch_size, num_classes] labels [batch_size] ~ 0,num_class
		with tf.name_scope('cross_entropy_pos'):
			loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=glabels)
			loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
			tf.losses.add_loss(loss)
			l_cross_pos.append(loss)

		with tf.name_scope('cross_entropy_neg'):
			loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
			                                                      labels=no_classes)
			loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
			tf.losses.add_loss(loss)
			l_cross_neg.append(loss)

		# Add localization loss: smooth L1, L2, ...
		with tf.name_scope('localization'):
			# Weights Tensor: positive mask + random negative.
			weights = tf.expand_dims(alpha * fpmask, axis=-1)
			# localisations = tf.Print(localisations, [localisations, tf.shape(localisations)], "pre is:         ", summarize=20)
			# glocalisations = tf.Print(glocalisations, [glocalisations,  tf.shape(glocalisations)], "gt is :         ",summarize=20)
			loss = custom_layers.abs_smooth(localisations - glocalisations)
			loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
			tf.losses.add_loss(loss)
			l_loc.append(loss)

		with tf.name_scope('total'):
			total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
			total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
			total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy')
			total_loc = tf.add_n(l_loc, 'localization')

			# Add to EXTRA LOSSES TF.collection
			tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
			tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
			tf.add_to_collection('EXTRA_LOSSES', total_cross)
			tf.add_to_collection('EXTRA_LOSSES', total_loc)
예제 #31
0
def ssd_losses_old(logits,
                   localisations,
                   gclasses,
                   glocalisations,
                   gscores,
                   match_threshold=0.5,
                   negative_ratio=3.,
                   alpha=1.,
                   label_smoothing=0.,
                   device='/cpu:0',
                   scope=None):
    """Loss functions for training the SSD 300 VGG network.
    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.
    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.device(device):
        with tf.name_scope(scope, 'ssd_losses'):
            l_cross_pos = []
            l_cross_neg = []
            l_loc = []
            for i in range(len(logits)):
                dtype = logits[i].dtype
                with tf.name_scope('block_%i' % i):
                    # Sizing weight...
                    wsize = tfe.get_shape(logits[i], rank=5)
                    wsize = wsize[1] * wsize[2] * wsize[3]

                    # Positive mask.
                    pmask = gscores[i] > match_threshold
                    fpmask = tf.cast(pmask, dtype)
                    n_positives = tf.reduce_sum(fpmask)

                    # Select some random negative entries.
                    # n_entries = np.prod(gclasses[i].get_shape().as_list())
                    # r_positive = n_positives / n_entries
                    # r_negative = negative_ratio * n_positives / (n_entries - n_positives)

                    # Negative mask.
                    no_classes = tf.cast(pmask, tf.int32)
                    predictions = slim.softmax(logits[i])
                    nmask = tf.logical_and(tf.logical_not(pmask),
                                           gscores[i] > -0.5)
                    fnmask = tf.cast(nmask, dtype)
                    nvalues = tf.where(nmask, predictions[:, :, :, :, 0],
                                       1. - fnmask)
                    nvalues_flat = tf.reshape(nvalues, [-1])
                    # Number of negative entries to select.
                    n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                    n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                    n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                    max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask),
                                                  tf.int32)
                    n_neg = tf.minimum(n_neg, max_neg_entries)

                    val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                    max_hard_pred = -val[-1]
                    # Final negative mask.
                    nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
                    fnmask = tf.cast(nmask, dtype)

                    # Add cross-entropy loss.
                    with tf.name_scope('cross_entropy_pos'):
                        fpmask = wsize * fpmask
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logits[i], labels=gclasses[i])
                        loss = tf.losses.compute_weighted_loss(loss, fpmask)
                        l_cross_pos.append(loss)

                    with tf.name_scope('cross_entropy_neg'):
                        fnmask = wsize * fnmask
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logits[i], labels=no_classes)
                        loss = tf.losses.compute_weighted_loss(loss, fnmask)
                        l_cross_neg.append(loss)

                    # Add localization loss: smooth L1, L2, ...
                    with tf.name_scope('localization'):
                        # Weights Tensor: positive mask + random negative.
                        weights = tf.expand_dims(alpha * fpmask, axis=-1)
                        loss = custom_layers.abs_smooth(localisations[i] -
                                                        glocalisations[i])
                        loss = tf.losses.compute_weighted_loss(loss, weights)
                        l_loc.append(loss)

            # Additional total losses...
            with tf.name_scope('total'):
                total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
                total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
                total_cross = tf.add(total_cross_pos, total_cross_neg,
                                     'cross_entropy')
                total_loc = tf.add_n(l_loc, 'localization')

                # Add to EXTRA LOSSES TF.collection
                tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
                tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
                tf.add_to_collection('EXTRA_LOSSES', total_cross)
                tf.add_to_collection('EXTRA_LOSSES', total_loc)
예제 #32
0
def ssd_losses(
        logits,
        localisations,
        gclasses,
        glocalisations,
        gscores,
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,  #位置误差权重系数
        label_smoothing=0.,
        device='/gpu:0',
        scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))  #预测类别概率值
            fgclasses.append(tf.reshape(gclasses[i], [-1]))  #真实类别
            fgscores.append(tf.reshape(gscores[i], [-1]))  #预测框的的分值,即IOU值
            flocalisations.append(tf.reshape(localisations[i],
                                             [-1, 4]))  #预测目标边框坐标,编码形式
            fglocalisations.append(tf.reshape(glocalisations[i],
                                              [-1, 4]))  #真实目标坐标,编码形式
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold  #若IOU大于匹配阈值,则为先验正样本
        fpmask = tf.cast(pmask, dtype)  #将pmask类型转换为dtype
        n_positives = tf.reduce_sum(fpmask)  #求正样本数量N

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)  #预测类别
        nmask = tf.logical_and(
            tf.logical_not(pmask),  #tf逻辑与操作,非正样本中IOU<0.5的样本为先验负样本
            gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0],
                           1. - fnmask)  ############不明白!!!!!!!!!
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)  #得到负样本数目

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')  #将置信度误差乘以正样本数求和后除以batch-size
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(
                localisations - glocalisations)  #l1 smooth计算方式与正常的不一样
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
예제 #33
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(
            logits[0], 5)  #(batch_size,长,宽,每个格子ancher数目,每个ancher的21个类别的可能性)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors! 全部扁平化
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!  先reshape 一个feature map 一条,再就是将所有feature map的cancat在一起。
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...  这个ground  true 的box,就是先把低于阈值的过滤了
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)  #统计正样本

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)  #这个no_class就是纯背景了
        predictions = slim.softmax(logits)  # 这个21个类别的logit做了一个softmax
        nmask = tf.logical_and(
            tf.logical_not(pmask),  # 选出不是正例,并且gscore>-0.5的负例子
            gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(
            nmask,  # 如果是负例子,就把sotfmax的背景的Logistics的结果赋过去,否则为1,也就是负例子的地方用背景的score,正例为1
            predictions[:, 0],
            1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)  #事先制定的策略的个数和自然有的负样本,去个最小值
        #弄个负号,取出前n个最不像是背景的负例
        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)  #取出值和对应的index
        max_hard_pred = -val[-1]  #最可能像的那个像背景负例

        # Final negative mask.#最后的负例就是,小于最可能像的那个像背景负例【那也就是是有东西的】,且原来判定也是负例的

        # 可以想象很多锚框都不会框住感兴趣的物体,就是说跟任何对应感兴趣物体的表框的IoU都小于某个阈值。这样就会产生大量的负类锚框,或者说对应标号为0的锚框。对于这类锚框有两点要考虑的:
        #
        # 因为负类锚框数目可能远多于其他,我们可以只保留其中的一些。而且是保留那些目前预测最不确信它是负类的,就是对类0预测值排序,选取数值最小的哪一些困难的负类锚框
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.分类的正确性(正类,进行了一定的过滤)
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add cross-entropy loss.分类的正确性(负类,进行了一定的过滤)
        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...分类的正确性(负类,进行了一定的过滤)
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            #本来如果是背景,就不存在box regression的问题,因此需要用fpmask过滤下背景
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
예제 #34
0
def ssd_losses_old(logits, localisations,
                   gclasses, glocalisations, gscores,
                   match_threshold=0.5,
                   negative_ratio=3.,
                   alpha=1.,
                   label_smoothing=0.,
                   device='/cpu:0',
                   scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.device(device):
        with tf.name_scope(scope, 'ssd_losses'):
            l_cross_pos = []
            l_cross_neg = []
            l_loc = []
            for i in range(len(logits)):
                dtype = logits[i].dtype
                with tf.name_scope('block_%i' % i):
                    # Sizing weight...
                    wsize = tfe.get_shape(logits[i], rank=5)
                    wsize = wsize[1] * wsize[2] * wsize[3]

                    # Positive mask.
                    pmask = gscores[i] > match_threshold
                    fpmask = tf.cast(pmask, dtype)
                    n_positives = tf.reduce_sum(fpmask)

                    # Select some random negative entries.
                    # n_entries = np.prod(gclasses[i].get_shape().as_list())
                    # r_positive = n_positives / n_entries
                    # r_negative = negative_ratio * n_positives / (n_entries - n_positives)

                    # Negative mask.
                    no_classes = tf.cast(pmask, tf.int32)
                    predictions = slim.softmax(logits[i])
                    nmask = tf.logical_and(tf.logical_not(pmask),
                                           gscores[i] > -0.5)
                    fnmask = tf.cast(nmask, dtype)
                    nvalues = tf.where(nmask,
                                       predictions[:, :, :, :, 0],
                                       1. - fnmask)
                    nvalues_flat = tf.reshape(nvalues, [-1])
                    # Number of negative entries to select.
                    n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                    n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                    n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                    max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32)
                    n_neg = tf.minimum(n_neg, max_neg_entries)

                    val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                    max_hard_pred = -val[-1]
                    # Final negative mask.
                    nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
                    fnmask = tf.cast(nmask, dtype)

                    # Add cross-entropy loss.
                    with tf.name_scope('cross_entropy_pos'):
                        fpmask = wsize * fpmask
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i],
                                                                              labels=gclasses[i])
                        loss = tf.losses.compute_weighted_loss(loss, fpmask)
                        l_cross_pos.append(loss)

                    with tf.name_scope('cross_entropy_neg'):
                        fnmask = wsize * fnmask
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i],
                                                                              labels=no_classes)
                        loss = tf.losses.compute_weighted_loss(loss, fnmask)
                        l_cross_neg.append(loss)

                    # Add localization loss: smooth L1, L2, ...
                    with tf.name_scope('localization'):
                        # Weights Tensor: positive mask + random negative.
                        weights = tf.expand_dims(alpha * fpmask, axis=-1)
                        loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i])
                        loss = tf.losses.compute_weighted_loss(loss, weights)
                        l_loc.append(loss)

            # Additional total losses...
            with tf.name_scope('total'):
                total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
                total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
                total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy')
                total_loc = tf.add_n(l_loc, 'localization')

                # Add to EXTRA LOSSES TF.collection
                tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
                tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
                tf.add_to_collection('EXTRA_LOSSES', total_cross)
                tf.add_to_collection('EXTRA_LOSSES', total_loc)