def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # 正样本选择
        #如果anchors真实的类别得分>0.5则将这个预测框作为正样本
        #pmask: shape=gscores_shape. if gscores>0.5, pmask=True;else pmask=False
        pmask = gscores > match_threshold
        #将pmask从True、False转化为1,0
        fpmask = tf.cast(pmask, dtype)
        #求 pmask=1的数量(作为正样本数量)
        n_positives = tf.reduce_sum(fpmask)
        #no_classes:gscores>0.5, pmask=1 ;否则pmask=0
        #no_classes=1为正样本(正样本有物体,负样本为背景)
        no_classes = tf.cast(pmask, tf.int32)

        # 负样本选择:Hard negative mining...
        # Hard negative mining... 为了保证正负样本尽量平衡,SSD采用了hard negative mining,就是对负样本进行抽样,抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本,以保证正负样本比例接近1:3
        #-0.5<gscore<0.5时nmask=True,代表负样本。
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        #负样本时fnmask=1,正样本处fnmask=0
        fnmask = tf.cast(nmask, dtype)
        #tf.where(input, a,b),其中a,b均为尺寸一致的tensor。
        #作用是将a中对应input中true的位置的元素值不变,其余元素进行替换,替换成b中对应位置的元素值

        #predictions:预测为每个类别的概率
        predictions = slim.softmax(logits)
        #将nvalues中判断为背景的位置,替换为预测为背景的得分。其他位置替换为1.
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        #将所有框的预测类别为背景的概率排成一行,保存在nvalues_flat中(其中正样本对应的概率为1)
        nvalues_flat = tf.reshape(nvalues, [-1])

        # Number of negative entries to select.
        #计算全部的负样本数量
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        #负样本数量是正样本数量的3倍+batch_size
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
        #如果全部的负样本数量小于n_neg,则n_neg等于全部的负样本数量
        n_neg = tf.minimum(n_neg, max_neg_entries)

        #抽样时按照置信度误差(预测为背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本
        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        #负样本背景置信度最大值(背景置信度小于该阈值为负样本)
        max_hard_pred = -val[-1]
        # Final negative mask.
        #-0.5<gscore<0.5(真实是背景),且nvalues<阈值(预测为背景的分数低于阈值),代表负样本
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        #负样本fnmask从True转化为1
        fnmask = tf.cast(nmask, dtype)

        n_negatives = tf.reduce_sum(fnmask)
        #正样本和负样本的数量
        fn_neg = tf.cast(n_negatives, tf.float32)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          n_positives,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), fn_neg, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          n_positives,
                          name='value')
            tf.losses.add_loss(loss)
def ssd_losses(logits, localisations,glabels,
               glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=0.2,
               label_smoothing=0.,
               batch_size=16,
               scope=None):
	'''Loss functions for training the text box network.
	Arguments:
	  logits: (list of) predictions logits Tensors;                x
	  localisations: (list of) localisations Tensors;              l
	  glocalisations: (list of) groundtruth localisations Tensors; g
	  gscores: (list of) groundtruth score Tensors;                c
	'''
	# from ssd loss
	with tf.name_scope(scope, 'txt_losses'):
		lshape = tfe.get_shape(logits[0], 5)
		num_classes = lshape[-1]
		batch_size = batch_size

		l_cross_pos = []
		l_cross_neg = []
		l_loc = []

		# Flatten out all vectors!
		flogits = logits
		fgscores = gscores
		flocalisations = localisations
		fglocalisations = glocalisations
		fglabels = glabels
		# for i in range(len(logits)):
		# 	flogits.append(tf.reshape(logits[i], [-1, num_classes]))
		# 	fgscores.append(tf.reshape(gscores[i], [-1]))
		# 	fglabels.append(tf.reshape(glabels[i], [-1]))
		# 	flocalisations.append(tf.reshape(localisations[i], [-1, 12]))
		# 	fglocalisations.append(tf.reshape(glocalisations[i], [-1, 12]))
		# And concat the crap!
		glabels = tf.concat(fglabels, axis=1)
		logits = tf.concat(flogits, axis=1)  # x
		gscores = tf.concat(fgscores, axis=1)  # c
		localisations = tf.concat(flocalisations, axis=1)  # l
		glocalisations = tf.concat(fglocalisations, axis=1)  # g
		dtype = logits.dtype

		# Compute positive matching mask...
		pmask = gscores > match_threshold  # positive mask
		# pmask = tf.concat(axis=0, values=[pmask[:tf.argmax(gscores, axis=0)], [True], pmask[tf.argmax(gscores, axis=0) + 1:]])

		ipmask = tf.cast(pmask, tf.int32)  # int positive mask
		fpmask = tf.cast(pmask, dtype)  # float positive mask
		n_positives = tf.reduce_sum(fpmask)  # calculate all number

		# Hard negative mining...
		# conf loss ??
		no_classes = tf.cast(pmask, tf.int32)
		predictions = slim.softmax(logits)  #
		nmask = tf.logical_and(tf.logical_not(pmask),
		                       gscores > -0.5)  #
		fnmask = tf.cast(nmask, dtype)
		nvalues = tf.where(nmask,
		                   predictions[:, :, 0],
		                   1. - fnmask)
		nvalues_flat = tf.reshape(nvalues, [-1])
		# Number of negative entries to select.
		max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
		n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
		n_neg = tf.minimum(n_neg, max_neg_entries)

		val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
		max_hard_pred = -val[-1]
		# Final negative mask.
		nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
		fnmask = tf.cast(nmask, dtype)
		inmask = tf.cast(nmask, tf.int32)
		# Add cross-entropy loss.
		# logits [batch_size, num_classes] labels [batch_size] ~ 0,num_class
		with tf.name_scope('cross_entropy_pos'):
			loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=glabels)
			loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
			tf.losses.add_loss(loss)
			l_cross_pos.append(loss)

		with tf.name_scope('cross_entropy_neg'):
			loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
			                                                      labels=no_classes)
			loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
			tf.losses.add_loss(loss)
			l_cross_neg.append(loss)

		# Add localization loss: smooth L1, L2, ...
		with tf.name_scope('localization'):
			# Weights Tensor: positive mask + random negative.
			weights = tf.expand_dims(alpha * fpmask, axis=-1)
			# localisations = tf.Print(localisations, [localisations, tf.shape(localisations)], "pre is:         ", summarize=20)
			# glocalisations = tf.Print(glocalisations, [glocalisations,  tf.shape(glocalisations)], "gt is :         ",summarize=20)
			loss = custom_layers.abs_smooth(localisations - glocalisations)
			loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
			tf.losses.add_loss(loss)
			l_loc.append(loss)

		with tf.name_scope('total'):
			total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
			total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
			total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy')
			total_loc = tf.add_n(l_loc, 'localization')

			# Add to EXTRA LOSSES TF.collection
			tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
			tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
			tf.add_to_collection('EXTRA_LOSSES', total_cross)
			tf.add_to_collection('EXTRA_LOSSES', total_loc)
Exemple #3
0
def ssd_losses(
        logits,
        localisations,
        gclasses,
        glocalisations,
        gscores,
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,  #位置误差权重系数
        label_smoothing=0.,
        device='/gpu:0',
        scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))  #预测类别概率值
            fgclasses.append(tf.reshape(gclasses[i], [-1]))  #真实类别
            fgscores.append(tf.reshape(gscores[i], [-1]))  #预测框的的分值,即IOU值
            flocalisations.append(tf.reshape(localisations[i],
                                             [-1, 4]))  #预测目标边框坐标,编码形式
            fglocalisations.append(tf.reshape(glocalisations[i],
                                              [-1, 4]))  #真实目标坐标,编码形式
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold  #若IOU大于匹配阈值,则为先验正样本
        fpmask = tf.cast(pmask, dtype)  #将pmask类型转换为dtype
        n_positives = tf.reduce_sum(fpmask)  #求正样本数量N

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)  #预测类别
        nmask = tf.logical_and(
            tf.logical_not(pmask),  #tf逻辑与操作,非正样本中IOU<0.5的样本为先验负样本
            gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0],
                           1. - fnmask)  ############不明白!!!!!!!!!
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)  #得到负样本数目

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')  #将置信度误差乘以正样本数求和后除以batch-size
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(
                localisations - glocalisations)  #l1 smooth计算方式与正常的不一样
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
def text_losses(logits,
                localisations,
                glocalisations,
                gscores,
                match_threshold=0.5,
                negative_ratio=3.,
                alpha=1.,
                label_smoothing=0.,
                scope=None):
    """Loss functions for training the text box network.

	Arguments:
	  logits: (list of) predictions logits Tensors;
	  localisations: (list of) localisations Tensors;
	  glocalisations: (list of) groundtruth localisations Tensors;
	  gscores: (list of) groundtruth score Tensors;

	return: loss
	"""
    with tf.name_scope(scope, 'text_loss'):
        l_cross_pos = []
        l_cross_neg = []
        l_loc = []
        n_poses = []
        for i in range(len(logits)):
            dtype = logits[i].dtype
            with tf.name_scope('block_%i' % i):

                # Determine weights Tensor.
                pmask = gscores[i] > match_threshold
                ipmask = tf.cast(pmask, tf.int32)
                n_pos = tf.reduce_sum(ipmask)
                fpmask = tf.cast(pmask, tf.float32)
                nmask = gscores[i] < match_threshold
                inmask = tf.cast(nmask, tf.int32)
                fnmask = tf.cast(nmask, tf.float32)
                num = tf.ones_like(gscores[i])
                n = tf.reduce_sum(num) + 1e-5
                n_poses.append(n_pos)

                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy_pos'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=ipmask)
                    loss = tf.losses.compute_weighted_loss(loss, fpmask)
                    #loss = tf.square(fpmask*(logits[i][:,:,:,:,:,1] - fpmask))
                    #loss = alpha*tf.reduce_mean(loss)
                    l_cross_pos.append(loss)

                with tf.name_scope('cross_entropy_neg'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=inmask)
                    loss = tf.losses.compute_weighted_loss(loss, fnmask)
                    #loss = tf.square(fnmask*(logits[i][:,:,:,:,:,0] - fnmask))
                    #loss = alpha*tf.reduce_mean(loss)
                    l_cross_neg.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(alpha * fpmask, axis=-1)
                    loss = custom_layers.abs_smooth(localisations[i] -
                                                    glocalisations[i])
                    loss = tf.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)

        # Additional total losses...
        with tf.name_scope('total'):
            total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
            total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
            total_cross = tf.add(total_cross_pos, total_cross_neg,
                                 'cross_entropy')
            total_loc = tf.add_n(l_loc, 'localization')
            numofpositive = tf.add_n(n_poses, 'numofpositive')
            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', numofpositive)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)

            total_loss = tf.add(total_loc, total_cross, 'total_loss')
            tf.add_to_collection('EXTRA_LOSSES', total_loss)

        return total_loss
Exemple #5
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(
            logits[0], 5)  #(batch_size,长,宽,每个格子ancher数目,每个ancher的21个类别的可能性)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors! 全部扁平化
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!  先reshape 一个feature map 一条,再就是将所有feature map的cancat在一起。
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...  这个ground  true 的box,就是先把低于阈值的过滤了
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)  #统计正样本

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)  #这个no_class就是纯背景了
        predictions = slim.softmax(logits)  # 这个21个类别的logit做了一个softmax
        nmask = tf.logical_and(
            tf.logical_not(pmask),  # 选出不是正例,并且gscore>-0.5的负例子
            gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(
            nmask,  # 如果是负例子,就把sotfmax的背景的Logistics的结果赋过去,否则为1,也就是负例子的地方用背景的score,正例为1
            predictions[:, 0],
            1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)  #事先制定的策略的个数和自然有的负样本,去个最小值
        #弄个负号,取出前n个最不像是背景的负例
        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)  #取出值和对应的index
        max_hard_pred = -val[-1]  #最可能像的那个像背景负例

        # Final negative mask.#最后的负例就是,小于最可能像的那个像背景负例【那也就是是有东西的】,且原来判定也是负例的

        # 可以想象很多锚框都不会框住感兴趣的物体,就是说跟任何对应感兴趣物体的表框的IoU都小于某个阈值。这样就会产生大量的负类锚框,或者说对应标号为0的锚框。对于这类锚框有两点要考虑的:
        #
        # 因为负类锚框数目可能远多于其他,我们可以只保留其中的一些。而且是保留那些目前预测最不确信它是负类的,就是对类0预测值排序,选取数值最小的哪一些困难的负类锚框
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.分类的正确性(正类,进行了一定的过滤)
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add cross-entropy loss.分类的正确性(负类,进行了一定的过滤)
        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...分类的正确性(负类,进行了一定的过滤)
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            #本来如果是背景,就不存在box regression的问题,因此需要用fpmask过滤下背景
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
Exemple #6
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope='ssd_losses'):
    """Loss functions for training the SSD 512 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    # Some debugging...
    # for i in range(len(gclasses)):
    #     print(localisations[i].get_shape())
    #     print(logits[i].get_shape())
    #     print(gclasses[i].get_shape())
    #     print(glocalisations[i].get_shape())
    #     print()
    with tf.name_scope(scope):
        l_cross = []
        l_loc = []
        for i in range(len(logits)):
            with tf.name_scope('block_%i' % i):
                # Determine weights Tensor.
                pmask = tf.cast(gclasses[i] > 0, logits[i].dtype)
                n_positives = tf.reduce_sum(pmask)
                n_entries = np.prod(gclasses[i].get_shape().as_list())
                # r_positive = n_positives / n_entries
                # Select some random negative entries.
                r_negative = negative_ratio * n_positives / (n_entries -
                                                             n_positives)
                nmask = tf.random_uniform(gclasses[i].get_shape(),
                                          dtype=logits[i].dtype)
                nmask = nmask * (1. - pmask)
                nmask = tf.cast(nmask > 1. - r_negative, logits[i].dtype)

                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy'):
                    # Weights Tensor: positive mask + random negative.
                    weights = pmask + nmask
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits[i], gclasses[i])
                    loss = tf.contrib.losses.compute_weighted_loss(
                        loss, weights)
                    l_cross.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = alpha * pmask
                    loss = custom_layers.abs_smooth(localisations[i] -
                                                    glocalisations[i])
                    loss = tf.contrib.losses.compute_weighted_loss(
                        loss, weights)
                    l_loc.append(loss)

        # Total losses in summaries...
        with tf.name_scope('total'):
            tf.summary.scalar('cross_entropy', tf.add_n(l_cross))
            tf.summary.scalar('localization', tf.add_n(l_loc))
Exemple #7
0
def ssd_losses(
        logits,
        localisations,  #损失函数定义为位置误差和置信度误差的加权和;
        gclasses,
        glocalisations,
        gscores,
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,  #位置误差权重系数
        label_smoothing=0.,
        device='/cpu:0',
        scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(
                logits[i], [-1, num_classes]))  #将类别的概率值reshape成(-1,21)
            fgclasses.append(tf.reshape(gclasses[i], [-1]))  #真实类别
            fgscores.append(tf.reshape(gscores[i], [-1]))  #预测真实目标的得分
            flocalisations.append(tf.reshape(localisations[i],
                                             [-1, 4]))  #预测真实目标边框坐标(编码形式的值)
            fglocalisations.append(tf.reshape(glocalisations[i],
                                              [-1, 4]))  #用于将真实目标gt的坐标进行编码存储
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold  #预测框与真实框IOU>0.5则将这个先验作为正样本
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)  #求正样本数量N

        # Hard negative mining... 为了保证正负样本尽量平衡,SSD采用了hard negative mining,就是对负样本进行抽样,抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本,以保证正负样本比例接近1:3
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)  #类别预测
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives,
                        tf.int32) + batch_size  #负样本数量,保证是正样本3倍
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(
            -nvalues_flat,
            k=n_neg)  #抽样时按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列,选取误差的较大的top-k作为训练的负样本
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.    #交叉熵
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,  #类别置信度误差
                labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')  #将置信度误差除以正样本数后除以batch-size
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(
                localisations -
                glocalisations)  #先验框对应边界的位置预测值-真实位置;然后做Smooth L1 loss
            loss = tf.div(
                tf.reduce_sum(loss * weights), batch_size,
                name='value')  #将上面的loss*权重(=alpha/正样本数)求和后除以batch-size
            tf.losses.add_loss(loss)  #获得置信度误差和位置误差的加权和
Exemple #8
0
def ssd_losses(
        logits,
        localisations,  # 预测类别,位置
        gclasses,
        glocalisations,
        gscores,  # ground truth类别,位置,得分
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,
        label_smoothing=0.,
        device='/cpu:0',
        scope=None):

    with tf.name_scope(scope, 'ssd_losses'):

        # 提取类别数和batch_size
        lshape = tfe.get_shape(logits[0], 5)  # tensor_shape函数可以取代
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):  # 按照ssd特征层循环
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)  # 全部的搜索框,对应的21类别的输出
        gclasses = tf.concat(fgclasses, axis=0)  # 全部的搜索框,真实的类别数字
        gscores = tf.concat(fgscores, axis=0)  # 全部的搜索框,和真实框的IOU
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        """[<tf.Tensor 'ssd_losses/concat:0' shape=(279424, 21) dtype=float32>,
            <tf.Tensor 'ssd_losses/concat_1:0' shape=(279424,) dtype=int64>,
            <tf.Tensor 'ssd_losses/concat_2:0' shape=(279424,) dtype=float32>,
            <tf.Tensor 'ssd_losses/concat_3:0' shape=(279424, 4) dtype=float32>,
            <tf.Tensor 'ssd_losses/concat_4:0' shape=(279424, 4) dtype=float32>]
        """

        dtype = logits.dtype
        pmask = gscores > match_threshold  # (全部搜索框数目, 21),类别搜索框和真实框IOU大于阈值
        fpmask = tf.cast(pmask, dtype)  # 浮点型前景掩码(前景假定为含有对象的IOU足够的搜索框标号)
        n_positives = tf.reduce_sum(fpmask)  # 前景总数

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)  # 此时每一行的21个数转化为概率
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)  # IOU达不到阈值的类别搜索框位置记1
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(
            nmask,
            predictions[:, 0],  # 框内无物体标记为背景预测概率
            1. - fnmask)  # 框内有物体位置标记为1
        nvalues_flat = tf.reshape(nvalues, [-1])

        # Number of negative entries to select.
        # 在nmask中剔除n_neg个最不可能背景点(对应的class0概率最低)
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        # 3 × 前景掩码数量 + batch_size
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)
        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)  # 最不可能为背景的n_neg个点
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask,
                               nvalues < max_hard_pred)  # 不是前景,又最不像背景的n_neg个点
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)  # 0-20
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)  # {0,1}
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
def ssd_losses(logits_pest,
               gclasses_pest,
               logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               b_gscores_pest,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    # print "=================ssd_loses start======================="
    # print("====match_threshold={}".format(match_threshold))

    with tf.name_scope(scope, 'ssd_losses'):
        l_cross_pos_pest = []
        l_cross_pos = []
        l_cross_neg = []
        l_cross_neg_pest = []
        l_loc = []
        # print "logits = {}".format(logits)
        # i 代表了第几层的输出
        for i in range(len(logits)):
            dtype = logits[i].dtype
            with tf.name_scope('block_%i' % i):

                # Determine weights Tensor.
                label_mask = gscores[i] > match_threshold
                flabel_mask = tf.cast(label_mask, dtype)

                pmask_pest = b_gscores_pest[i] > match_threshold
                fpmask_pest = tf.cast(pmask_pest, dtype)
                n_positives = tf.reduce_sum(fpmask_pest)  # 不写维度是多少就是直接把所有的数值相加

                # Negative mask.
                no_classes = tf.cast(pmask_pest, tf.int32)
                predictions = slim.softmax(logits[i])
                nmask = tf.logical_and(
                    tf.logical_not(pmask_pest),  # 选出负样本
                    b_gscores_pest[i] > -0.5)

                fnmask = tf.cast(nmask, dtype)
                nvalues = tf.where(nmask, predictions[:, :, :, :, 0],
                                   1. - fnmask)  #选出来负样本位置的背景预测值 然后其他的地方设置成0
                nvalues_flat = tf.reshape(nvalues, [-1])

                n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32)
                n_neg = tf.minimum(n_neg, max_neg_entries)

                val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                minval = val[-1]
                # Final negative mask.
                nmask = tf.logical_and(nmask, -nvalues > minval)
                fnmask = tf.cast(nmask, dtype)

                # def add_summary(name,tensor):
                #     op = tf.summary.tensor_summary(name, tensor, collections=[])
                #     op = tf.Print(op, [tensor], name,summarize=500)
                #     tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                #
                # if i==1:
                #     with tf.name_scope("cross_tropy_debug_summary"):
                #         # summary add for debug
                #         # add_summary("cross_entropy_logits_"+str(i), logits[i])
                #         # add_summary("cross_entropy_logits_pest_" + str(i), logits_pest[i])
                #
                #         add_summary("gscores_" + str(i), gscores[i])
                #         add_summary("gclasses_pest" + str(i), gclasses_pest[i])
                #         add_summary("gclasses_" + str(i), gclasses[i])
                #         add_summary("no_classes_" + str(i), no_classes)
                #         add_summary("flabel_mask_" + str(i), flabel_mask)
                #         add_summary("fnmask_" + str(i), fnmask)
                #         add_summary("fpmask_" + str(i), fpmask_pest)

                with tf.name_scope('cross_entropy_pos'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=gclasses[i])  # gclasses是包含类别的
                    loss = tf.losses.compute_weighted_loss(loss, flabel_mask)
                    l_cross_pos.append(loss)

                with tf.name_scope('cross_entropy_neg'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i],
                        labels=no_classes)  # no_classes只包含是否为前景
                    loss = tf.losses.compute_weighted_loss(loss, fnmask)
                    l_cross_neg.append(loss)

                with tf.name_scope('cross_entropy_pos_pest'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits_pest[i],
                        labels=gclasses_pest[i])  # gclasses是包含类别的
                    loss = tf.losses.compute_weighted_loss(loss, fpmask_pest)
                    l_cross_pos_pest.append(loss)
                    # summary add for debug

                with tf.name_scope('cross_entropy_neg_pest'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits_pest[i],
                        labels=no_classes)  # no_classes只包含是否为前景
                    loss = tf.losses.compute_weighted_loss(loss, fnmask)
                    l_cross_neg_pest.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(alpha * fpmask_pest, axis=-1)
                    loss = custom_layers.abs_smooth(localisations[i] -
                                                    glocalisations[i])
                    loss = tf.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)

        # Additional total losses...
        with tf.name_scope('total'):
            total_cross_pos_pest = tf.add_n(l_cross_pos_pest,
                                            'cross_entropy_pos_pest')
            total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
            total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
            total_cross_neg_pest = tf.add_n(l_cross_neg_pest,
                                            'cross_entropy_neg_pest')
            total_cross = tf.add(total_cross_pos, total_cross_neg,
                                 'cross_entropy')
            total_loc = tf.add_n(l_loc, 'localization')

            # Add to EXTRA LOSSES TF.collection total_cross_pos_pest
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg_pest)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos_pest)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               end_points,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None,
               feat_layers=SSDNet.default_params.feat_layers):

    with tf.name_scope(scope, 'ssd_losses'):

        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses_concat = tf.concat(fgclasses, axis=0)
        gscores_concat = tf.concat(fgscores, axis=0)
        localisations_concat = tf.concat(flocalisations, axis=0)
        glocalisations_concat = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores_concat > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores_concat > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add center loss.
        center_op_layers = []
        for i, layer in enumerate(feat_layers):
            with tf.variable_scope(layer + '_loss'):
                layer_scores = tf.reshape(gscores[i], [-1, tensor_shape(gscores[i], 4)[-1]])
                layer_classes = tf.reshape(gclasses[i], [-1,tensor_shape(gclasses[i], 4)[-1]])
                layer_features = tf.reshape(end_points[layer], [-1, tensor_shape(end_points[layer], 4)[-1]])
                label_index = tf.argmax(layer_scores, axis=1)
                label_index = tf.expand_dims(label_index, 1)

                row = tf.range(label_index.shape[0])[:, None]
                row = tf.cast(row, tf.int64)
                label_index = tf.concat([row, label_index], axis=1)

                labels_one_layer = tf.gather_nd(layer_classes, label_index)

                loss, center_op = get_center_loss(layer_features, layer, labels_one_layer)
                loss = tf.div(tf.reduce_sum(0.001*loss * fpmask), batch_size, name='value')
                center_op_layers.append(center_op)
                tf.losses.add_loss(loss)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=gclasses_concat)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # with tf.name_scope('center_loss'):
        #     loss, centers_update_op = get_center_loss(logits, gclasses_concat)
        #     loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
        #     tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations_concat - glocalisations_concat)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)

        return center_op_layers
Exemple #11
0
def ssd_losses(logits,
               localisations,
               glocalisations,
               gscores,
               match_threshold=0.1,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope=None):
    """Loss functions for training the text box network.


	Arguments:
	  logits: (list of) predictions logits Tensors;
	  localisations: (list of) localisations Tensors;
	  glocalisations: (list of) groundtruth localisations Tensors;
	  gscores: (list of) groundtruth score Tensors;
	"""
    with tf.name_scope(scope, 'text_loss'):
        l_cross_pos = []
        l_cross_neg = []
        l_loc = []
        for i in range(len(logits)):
            dtype = logits[i].dtype
            with tf.name_scope('block_%i' % i):
                # Determine weights Tensor.
                pmask = gscores[i] > match_threshold
                ipmask = tf.cast(pmask, tf.int32)
                fpmask = tf.cast(pmask, dtype)
                n_positives = tf.reduce_sum(fpmask)

                # Negative mask
                # Number of negative entries to select.
                n_neg = tf.cast(negative_ratio * n_positives, tf.int32)

                nvalues = tf.where(tf.cast(1 - ipmask, tf.bool), gscores[i],
                                   np.zeros(gscores[i].shape))
                nvalues_flat = tf.reshape(nvalues, [-1])
                val, idxes = tf.nn.top_k(nvalues_flat, k=1)
                minval = val
                # Final negative mask.
                nmask = nvalues > minval
                fnmask = tf.cast(nmask, dtype)
                inmask = tf.cast(nmask, tf.int32)
                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy_pos'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=ipmask)
                    loss = tf.losses.compute_weighted_loss(loss, fpmask)
                    l_cross_pos.append(loss)

                with tf.name_scope('cross_entropy_neg'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=inmask)
                    loss = tf.losses.compute_weighted_loss(loss, fnmask)
                    l_cross_neg.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(alpha * fpmask, axis=-1)
                    loss = custom_layers.abs_smooth(localisations[i] -
                                                    glocalisations[i])
                    loss = tf.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)

        # Additional total losses...
        with tf.name_scope('total'):
            total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
            total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
            total_cross = tf.add(total_cross_pos, total_cross_neg,
                                 'cross_entropy')
            total_loc = tf.add_n(l_loc, 'localization')

            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)
Exemple #12
0
def text_losses(logits,
                localisations,
                linkslogits,
                glocalisations,
                gscores,
                glinks,
                negative_ratio=3.,
                alpha1=1.,
                alpha2=1.,
                label_smoothing=0.,
                scope=None):
    with tf.name_scope(scope, 'text_loss'):
        alllogits = []
        alllocalization = []
        alllinkslogits = []
        allglocalization = []
        allgscores = []
        allglinks = []
        for i in range(len(logits)):
            alllogits.append(tf.reshape(logits[i], [-1, 2]))
            allgscores.append(tf.reshape(gscores[i], [-1]))
            allglinks.append(tf.reshape(glinks[i], [-1, 12]))
            alllinkslogits.append(tf.reshape(linkslogits[i], [-1, 12, 2]))
            allglocalization.append(tf.reshape(glocalisations[i], [-1, 5]))
            alllocalization.append(tf.reshape(localisations[i], [-1, 5]))

        alllogits = tf.concat(alllogits, 0)
        allgscores = tf.concat(allgscores, 0)
        allglinks = tf.concat(allglinks, 0)
        alllinkslogits = tf.concat(alllinkslogits, 0)
        alllocalization = tf.concat(alllocalization, 0)
        allglocalization = tf.concat(allglocalization, 0)

        pmask = tf.cast(allgscores, tf.bool)
        ipmask = tf.cast(pmask, tf.int32)
        n_pos = tf.reduce_sum(ipmask) + 1
        num = tf.ones_like(allgscores)
        n = tf.reduce_sum(num)
        fpmask = tf.cast(pmask, tf.float32)
        nmask = tf.cast(1 - allgscores, tf.bool)

        ## segment score loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=alllogits,
                                                              labels=ipmask)
        cross_pos = tf.losses.compute_weighted_loss(loss, fpmask)
        loss_neg = tf.where(pmask, tf.cast(tf.zeros_like(ipmask), tf.float32),
                            loss)
        loss_neg_flat = tf.reshape(loss_neg, [-1])
        n_neg = tf.minimum(3 * n_pos, tf.cast(n, tf.int32))
        val, idxes = tf.nn.top_k(loss_neg_flat, k=n_neg)
        minval = val[-1]
        nmask = tf.logical_and(nmask, loss_neg >= minval)

        fnmask = tf.cast(nmask, tf.float32)
        cross_neg = tf.losses.compute_weighted_loss(loss, fnmask)

        ## localization loss
        weights = tf.expand_dims(fpmask, axis=-1)
        l_loc = custom_layers.abs_smooth(alllocalization - allglocalization)
        l_loc = tf.losses.compute_weighted_loss(l_loc, weights)

        ## links score loss
        pmask_l = tf.cast(allglinks, tf.bool)
        ipmask_l = tf.cast(pmask_l, tf.int32)
        n_pos_l = tf.reduce_sum(ipmask_l) + 1
        num_l = tf.ones_like(ipmask_l)
        n_l = tf.reduce_sum(num_l)
        fpmask_l = tf.cast(pmask_l, tf.float32)
        nmask_l = tf.cast(1 - allglinks, tf.bool)

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=alllinkslogits, labels=ipmask_l)
        l_cross_pos = tf.losses.compute_weighted_loss(loss, fpmask_l)
        loss_neg = tf.where(pmask_l,
                            tf.cast(tf.zeros_like(ipmask_l), tf.float32), loss)
        loss_neg_flat = tf.reshape(loss_neg, [-1])
        n_neg = tf.minimum(3 * n_pos_l, tf.cast(n_l, tf.int32))
        val, idxes = tf.nn.top_k(loss_neg_flat, k=n_neg)
        minval = val[-1]
        nmask_l = tf.logical_and(nmask_l, loss_neg >= minval)

        fnmask_l = tf.cast(nmask_l, tf.float32)
        l_cross_neg = tf.losses.compute_weighted_loss(loss, fnmask_l)

        with tf.name_scope('total'):
            # Add to EXTRA LOSSES TF.collection
            total_cross = tf.add(cross_pos, cross_neg, 'cross_entropy')
            total_cross_l = tf.add(l_cross_pos, l_cross_neg,
                                   'cross_entropy_links')
            #total_cross = tf.identity(total_cross, name = 'total_cross')
            n_pos = tf.identity(n_pos, name='num_of_positive')
            n_pos_l = tf.identity(n_pos_l, name='num_of_positive_links')
            cross_pos = tf.identity(cross_pos, name='cross_pos')
            cross_neg = tf.identity(cross_neg, name='cross_neg')
            l_cross_neg = tf.identity(l_cross_neg, name='l_cross_neg')
            l_cross_pos = tf.identity(l_cross_pos, name='l_cross_pos')
            l_loc = tf.identity(l_loc, name='l_loc')

            tf.add_to_collection('EXTRA_LOSSES', n_pos)
            tf.add_to_collection('EXTRA_LOSSES', n_pos_l)
            tf.add_to_collection('EXTRA_LOSSES', l_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', l_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', l_loc)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_l)

            total_loss = tf.add_n(
                [alpha1 * l_loc, total_cross, alpha2 * total_cross_l],
                'total_loss')
            tf.add_to_collection('EXTRA_LOSSES', total_loss)

    return total_loss
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope='ssd_losses'):
    """Loss functions for training the SSD 512 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    # Some debugging...
    # for i in range(len(gclasses)):
    #     print(localisations[i].get_shape())
    #     print(logits[i].get_shape())
    #     print(gclasses[i].get_shape())
    #     print(glocalisations[i].get_shape())
    #     print()
    with tf.name_scope(scope):
        l_cross = []
        l_loc = []
        for i in range(len(logits)):
            with tf.name_scope('block_%i' % i):
                # Determine weights Tensor.
                pmask = tf.cast(gclasses[i] > 0, logits[i].dtype)
                n_positives = tf.reduce_sum(pmask)
                n_entries = np.prod(gclasses[i].get_shape().as_list())
                # r_positive = n_positives / n_entries
                # Select some random negative entries.
                r_negative = negative_ratio * n_positives / (n_entries - n_positives)
                nmask = tf.random_uniform(gclasses[i].get_shape(),
                                          dtype=logits[i].dtype)
                nmask = nmask * (1. - pmask)
                nmask = tf.cast(nmask > 1. - r_negative, logits[i].dtype)

                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy'):
                    # Weights Tensor: positive mask + random negative.
                    weights = pmask + nmask
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits[i],
                                                                          gclasses[i])
                    loss = tf.contrib.losses.compute_weighted_loss(loss, weights)
                    l_cross.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = alpha * pmask
                    loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i])
                    loss = tf.contrib.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)

        # Total losses in summaries...
        with tf.name_scope('total'):
            tf.summary.scalar('cross_entropy', tf.add_n(l_cross))
            tf.summary.scalar('localization', tf.add_n(l_loc))
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        #我们已经看过了上面的logits的输出,现在我们来看看loss中怎么进行处理的!
        #因为logits/localisations这个list中有6个tensor,对应了6个不同层的预测/分类输出,
        #这样没法处理,所以我们先进行flatten,而后concat,方便进行处理!
        for i in range(len(logits)):
            #reshape之后,flogits中分别得到的shape为(N*5776,21),(N*1444,21),(N*600,21),(N*150,21),(N*36,21),(N*4,21)
            #5776=38*38*4,即将logits[i] reshape成了shape[:-1],21
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            #reshape之后,flocalisations中分别得到的shape为(N*5776,4),(N*1444,4),(N*600,4),(N*150,4),(N*36,4),(N*4,4)
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        #然后我们进行concat操作,这样就可以得到logits的shape为(8732*N,21)
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        #localisations的shape为(8732*N,4)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            #注意我们求得的正负样本,然后就可以计算相应的损失了,注意losses*fpmask,这样就可以计算正样本的损失了!!!
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')

            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            #注意losses*fnmask,这样就可以计算负样本的损失了!!!
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
Exemple #15
0
def ssd_losses(
        logits,
        locations,  #  Predicted
        gt_classes,
        gt_locations,
        gt_scores,  #  Ground truth
        match_threshold=0.5,
        negative_ratio=3.,
        alpha=1.,
        label_smoothing=0,
        device='/cpu:0',
        scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tensor_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        #Flatten all
        flogits = []
        fgt_classes = []
        fgt_scores = []
        flocations = []
        fgt_locations = []
        for i in range(len(logits)):  # From feature layer to feature layer
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgt_classes.append(tf.reshape(gt_classes[i], [-1]))
            fgt_scores.append(tf.reshape(gt_scores[i], [-1]))
            flocations.append(tf.reshape(locations[i], [-1, 4]))
            fgt_locations.append(tf.reshape(gt_locations[i], [-1, 4]))

        logits = tf.concat(flogits, axis=0)  # (N, 21)
        gt_classes = tf.concat(fgt_classes, axis=0)  # (N, )
        gt_scores = tf.concat(fgt_scores, axis=0)  # (N, )
        locations = tf.concat(flocations, axis=0)  # (N, 4)
        gt_locations = tf.concat(fgt_locations, axis=0)  # (N, 4)

        dtype = logits.dtype
        pos_mask = gt_scores > match_threshold
        fpos_mask = tf.cast(pos_mask, dtype=dtype)
        num_positives = tf.reduce_sum(fpos_mask)  #Number of foreground

        no_classes = tf.cast(pos_mask, tf.int32)
        predictions = slim.softmax(logits)
        neg_mask = tf.logical_and(tf.logical_not(pos_mask), gt_scores > -0.5)
        fneg_mask = tf.cast(neg_mask, dtype)
        neg_value = tf.where(
            neg_mask,  # 框内无物体标记为背景预测概率
            predictions[:, 0],  # 框内有物体位置标记为1
            1. - fneg_mask)
        neg_value_flat = tf.reshape(neg_value, [-1])

        max_neg_entries = tf.cast(tf.reduce_sum(fneg_mask),
                                  tf.int32)  # 撑死最多能有多少背景框

        num_negatives = tf.cast(negative_ratio * num_positives,
                                tf.int32) + batch_size
        num_negatives = tf.minimum(num_negatives,
                                   max_neg_entries)  # 确保背景框个数不大于最多能有的个数
        valu, index = tf.nn.top_k(-neg_value_flat,
                                  k=num_negatives)  # 最不可能成为背景的几个点
        max_hard_pred = -valu[-1]  # 最不可呢成为背景的点里概率最高的
        # 所有概率小于这个阈值的都属于 num_negatives 个最不可能为背景的框, 不是前景又最不像背景
        neg_mask = tf.logical_and(neg_mask, neg_value < max_hard_pred)
        fneg_mask = tf.cast(neg_mask, dtype)

        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,  # (N, num_classes): (batch size, 21)
                labels=gt_classes)  # 1D: (batch size): id of classes 0-20
            loss = tf.div(tf.reduce_sum(loss * fpos_mask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,  # (N, num_classes): (batch size, 21)
                labels=no_classes)  # 1D: (batch size): id of classes 0-20
            loss = tf.div(tf.reduce_sum(loss * fneg_mask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('localtion'):
            weight = tf.expand_dims(alpha * fpos_mask, axis=-1)
            loss = custom_layers.abs_smooth(locations - gt_locations)
            loss = tf.div(tf.reduce_sum(loss * weight),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
Exemple #16
0
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                  labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)
Exemple #17
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):

    lshape = tfe.get_shape(logits[0], 5)
    num_classes = lshape[-1]
    batch_size = lshape[0]

    # Flatten out all vectors!
    flogits = []
    fgclasses = []
    fgscores = []
    flocalisations = []
    fglocalisations = []
    for i in range(len(logits)):
        flogits.append(tf.reshape(logits[i], [-1, num_classes]))
        fgclasses.append(tf.reshape(gclasses[i], [-1]))
        fgscores.append(tf.reshape(gscores[i], [-1]))
        flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
        fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
    # And concat the crap!
    logits = tf.concat(flogits, axis=0)
    gclasses = tf.concat(fgclasses, axis=0)
    gscores = tf.concat(fgscores, axis=0)
    localisations = tf.concat(flocalisations, axis=0)
    glocalisations = tf.concat(fglocalisations, axis=0)
    dtype = logits.dtype

    # Compute positive matching mask...
    pmask = gscores > match_threshold
    fpmask = tf.cast(pmask, dtype)
    n_positives = tf.reduce_sum(fpmask)

    # Hard negative mining...
    no_classes = tf.cast(pmask, tf.int32)
    predictions = slim.softmax(logits)
    nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
    fnmask = tf.cast(nmask, dtype)
    nvalues = tf.where(
        nmask,
        predictions[:,
                    0],  # voc2012共有目标20类(正样本),加背景(负样本)共21类[0,20]。第0类是背景(负样本)得分
        1. - fnmask)
    nvalues_flat = tf.reshape(nvalues, [-1])
    # Number of negative entries to select.
    max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
    n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
    n_neg = tf.minimum(n_neg, max_neg_entries)

    val, idxes = tf.nn.top_k(
        -nvalues_flat,
        k=n_neg)  # 找背景(负样本)得分最低的n_neg个,所谓的hard negative,最难区分的负样本
    max_hard_pred = -val[-1]
    # Final negative mask.
    nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
    fnmask = tf.cast(nmask, dtype)

    # Add cross-entropy loss.
    # 两个交叉熵函数在label shape上的区别:
    # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=gclasses)
    # logits[b,w,h,num_classes], gclasses[b,w,h], 真值是0~num_classes之间的一个具体数,不用one-hot
    # loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=gclasses)
    # logits[b,w,h,num_classes], gclasses[b,w,h,num_classes], 真值是num_classes个one-hot的数
    with tf.name_scope('ssd_cls_loss'):
        loss_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=gclasses)
        loss_pos = tf.div(tf.reduce_sum(loss_pos * fpmask),
                          batch_size,
                          name='value')
        loss_neg = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=no_classes)
        loss_neg = tf.div(tf.reduce_sum(loss_neg * fnmask),
                          batch_size,
                          name='value')
        loss_cls = loss_pos + loss_neg

    # Add localization loss: smooth L1, L2, ...
    with tf.name_scope('ssd_loc_loss'):
        # Weights Tensor: positive mask + random negative.
        weights = tf.expand_dims(alpha * fpmask, axis=-1)
        loss = custom_layers.abs_smooth(localisations - glocalisations)
        loss_loc = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')

    return loss_cls, loss_loc
Exemple #18
0
def ssd_losses(logits,
               localisations,
               glocalisations,
               gscores,
               match_threshold,
               use_hard_neg=False,
               negative_ratio=3,
               alpha=1.,
               label_smoothing=0.,
               scope=None):
    with tf.name_scope(scope, 'txt_losses'):
        num_classes = 2
        l_cross_pos = []
        l_cross_neg = []
        l_loc = []

        # Flatten out all vectors!
        flogits = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype
        num = tf.ones_like(gscores)
        n = tf.reduce_sum(num)
        # Compute positive matching mask...
        pmask = gscores > match_threshold  #positive mask
        nmask = gscores <= match_threshold  #negative mask
        ipmask = tf.cast(pmask, tf.int32)  #int positive mask
        fpmask = tf.cast(pmask, dtype)  #float positive mask
        n_pos = tf.reduce_sum(fpmask)  #calculate all number

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=ipmask)

        l_cross_pos = tf.losses.compute_weighted_loss(loss, fpmask)
        l_cross_pos = tf.identity(l_cross_pos, name='l_cross_pos')

        # Hard negative mining
        fnmask = tf.cast(nmask, dtype)
        loss_neg = tf.where(pmask, loss, tf.zeros_like(fnmask))
        loss_neg_flat = tf.reshape(loss_neg, [-1])
        n_neg = tf.minimum(tf.cast(3 * n_pos, tf.int32), tf.cast(n,
                                                                 tf.int32)) + 1
        val, _ = tf.nn.top_k(loss_neg_flat, k=n_neg)
        minval = val[-1]
        nmask = tf.logical_and(nmask, loss_neg_flat >= minval)
        fnmask = tf.cast(nmask, tf.float32)

        l_cross_neg = tf.losses.compute_weighted_loss(loss, fnmask)
        l_cross_neg = tf.identity(l_cross_neg, name='l_cross_neg')

        weights = tf.expand_dims(fpmask, axis=-1)
        l_loc = custom_layers.abs_smooth(localisations - glocalisations)

        l_loc = tf.losses.compute_weighted_loss(l_loc, weights)
        l_loc = tf.identity(l_loc, name='l_loc')
        total_loss = tf.add_n([l_loc, l_cross_pos, l_cross_neg], 'total_loss')

        with tf.name_scope('total'):
            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', l_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', l_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', l_loc)
            tf.add_to_collection('EXTRA_LOSSES', total_loss)
    return total_loss
Exemple #19
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=gclasses)
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes)
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)
Exemple #20
0
def my_ssd_losses(logits, localisations,
                  gclasses, glocalisations, gscores,
                  match_threshold=0.5,
                  negative_ratio=3.,
                  alpha=1.,
                  label_smoothing=0.,
                  device='/cpu:0',
                  scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        # _alphas = np.array([0.01, 0.1, 0.3, 0.6, 0.5, 1.])
        # alphas = _alphas / _alphas.sum()
		alphas = np.array([0.01, 0.1, 0.3, 0.6, 0.5, 1.])

        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            print("#### logits: ", logits[i])
            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
            print("###### flogits: ", flogits[i])
        # And concat the crap!
        logits = tf.concat(flogits, axis=0)
        gclasses = tf.concat(fgclasses, axis=0)
        gscores = tf.concat(fgscores, axis=0)
        localisations = tf.concat(flocalisations, axis=0)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype

        # Compute positive matching mask...
        pmask = gscores > match_threshold
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:184832],
                                                                               labels=gclasses[:184832])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[184832:254144],
                                                                               labels=gclasses[184832:254144])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[254144:273344],
                                                                               labels=gclasses[254144:273344])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[273344:278144],
                                                                               labels=gclasses[273344:278144])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[278144:279296],
                                                                               labels=gclasses[278144:279296])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424],
                                                                               labels=gclasses[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:184832],
                                                                               labels=no_classes[:184832])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[184832:254144],
                                                                               labels=no_classes[184832:254144])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[254144:273344],
                                                                               labels=no_classes[254144:273344])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[273344:278144],
                                                                               labels=no_classes[273344:278144])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[278144:279296],
                                                                               labels=no_classes[278144:279296])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424],
                                                                               labels=no_classes[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss0 = alphas[0] * custom_layers.abs_smooth(localisations[:184832] - glocalisations[:184832])
            loss1 = alphas[1] * custom_layers.abs_smooth(localisations[184832:254144] - glocalisations[184832:254144])
            loss2 = alphas[2] * custom_layers.abs_smooth(localisations[254144:273344] - glocalisations[254144:273344])
            loss3 = alphas[3] * custom_layers.abs_smooth(localisations[273344:278144] - glocalisations[273344:278144])
            loss4 = alphas[4] * custom_layers.abs_smooth(localisations[278144:279296] - glocalisations[278144:279296])
            loss5 = alphas[5] * custom_layers.abs_smooth(localisations[279296:279424] - glocalisations[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)



        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)
        predictions = slim.softmax(logits)
        nmask = tf.logical_and(tf.logical_not(pmask),
                               gscores > -0.5)
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(nmask,
                           predictions[:, 0],
                           1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
        max_hard_pred = -val[-1]
        # Final negative mask.
        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
        fnmask = tf.cast(nmask, dtype)

        with tf.name_scope('cross_entropy_pos'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:369664],
                                                                               labels=gclasses[:369664])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[369664:462080],
                                                                               labels=gclasses[369664:462080])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[462080:118016],
                                                                               labels=gclasses[462080:118016])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[118016:124416],
                                                                               labels=gclasses[118016:124416])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[124416:126720],
                                                                               labels=gclasses[124416:126720])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[126720:126976],
                                                                               labels=gclasses[126720:126976])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss0 = alphas[0] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[:369664],
                                                                               labels=no_classes[:369664])
            loss1 = alphas[1] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[369664:462080],
                                                                               labels=no_classes[369664:462080])
            loss2 = alphas[2] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[462080:118016],
                                                                               labels=no_classes[462080:118016])
            loss3 = alphas[3] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[118016:124416],
                                                                               labels=no_classes[118016:124416])
            loss4 = alphas[4] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[124416:126720],
                                                                               labels=no_classes[124416:126720])
            loss5 = alphas[5] * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[279296:279424],
                                                                               labels=no_classes[279296:279424])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss0 = alphas[0] * custom_layers.abs_smooth(localisations[:369664] - glocalisations[:369664])
            loss1 = alphas[1] * custom_layers.abs_smooth(localisations[369664:462080] - glocalisations[369664:462080])
            loss2 = alphas[2] * custom_layers.abs_smooth(localisations[462080:118016] - glocalisations[462080:118016])
            loss3 = alphas[3] * custom_layers.abs_smooth(localisations[118016:124416] - glocalisations[118016:124416])
            loss4 = alphas[4] * custom_layers.abs_smooth(localisations[124416:126720] - glocalisations[124416:126720])
            loss5 = alphas[5] * custom_layers.abs_smooth(localisations[126720:126976] - glocalisations[126720:126976])
            loss = tf.concat([loss0, loss1, loss2, loss3, loss4, loss5], axis=0)
            loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value')
            tf.losses.add_loss(loss)
Exemple #21
0
def ssd_losses(logits, localisations,
               gclasses, glocalisations, gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.name_scope(scope, 'ssd_losses'):
        l_cross_pos = []
        l_cross_neg = []
        l_loc = []
        for i in range(len(logits)):
            dtype = logits[i].dtype
            with tf.name_scope('block_%i' % i):
                # Determine weights Tensor.
                pmask = gscores[i] > match_threshold
                fpmask = tf.cast(pmask, dtype)
                n_positives = tf.reduce_sum(fpmask)

                # Select some random negative entries.
                # n_entries = np.prod(gclasses[i].get_shape().as_list())
                # r_positive = n_positives / n_entries
                # r_negative = negative_ratio * n_positives / (n_entries - n_positives)

                # Negative mask.
                no_classes = tf.cast(pmask, tf.int32)
                predictions = slim.softmax(logits[i])
                nmask = tf.logical_and(tf.logical_not(pmask),
                                       gscores[i] > -0.5)
                fnmask = tf.cast(nmask, dtype)
                nvalues = tf.where(nmask,
                                   predictions[:, :, :, :, 0],
                                   1. - fnmask)
                nvalues_flat = tf.reshape(nvalues, [-1])
                # Number of negative entries to select.
                n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32)
                n_neg = tf.minimum(n_neg, max_neg_entries)

                val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                minval = val[-1]
                # Final negative mask.
                nmask = tf.logical_and(nmask, -nvalues > minval)
                fnmask = tf.cast(nmask, dtype)

                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy_pos'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i],
                                                                          labels=gclasses[i])
                    loss = tf.losses.compute_weighted_loss(loss, fpmask)
                    l_cross_pos.append(loss)

                with tf.name_scope('cross_entropy_neg'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i],
                                                                          labels=no_classes)
                    loss = tf.losses.compute_weighted_loss(loss, fnmask)
                    l_cross_neg.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(alpha * fpmask, axis=-1)
                    loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i])
                    loss = tf.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)

        # Additional total losses...
        with tf.name_scope('total'):
            total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
            total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
            total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy')
            total_loc = tf.add_n(l_loc, 'localization')

            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)
def text_losses(logits,
                localisations,
                glocalisations,
                gscores,
                match_threshold,
                use_hard_neg=False,
                negative_ratio=3,
                alpha=1.,
                label_smoothing=0.,
                scope=None):
    with tf.name_scope(scope, 'text_loss'):
        alllogits = []
        alllocalization = []
        for i in range(len(logits)):
            alllogits.append(tf.reshape(logits[i], [-1, 2]))
            alllocalization.append(tf.reshape(localisations[i], [-1, 4]))

        alllogits = tf.concat(alllogits, 0)
        alllocalization = tf.concat(alllocalization, 0)

        pmask = gscores > match_threshold
        ipmask = tf.cast(pmask, tf.int32)
        fpmask = tf.cast(pmask, tf.float32)
        n_pos = tf.reduce_sum(fpmask, name='num_of_positive')
        num = tf.ones_like(gscores, dtype=tf.float32)
        n = tf.reduce_sum(num)
        nmask = gscores <= match_threshold

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=alllogits,
                                                              labels=ipmask)
        #l_cross_pos = tf.losses.compute_weighted_loss(loss, fpmask)
        l_cross_pos = tf.div(tf.reduce_sum(loss * fpmask),
                             n_pos,
                             name='l_cross_pos')
        #tf.losses.add_loss(l_cross_pos)

        loss_neg = tf.where(pmask, tf.cast(tf.zeros_like(ipmask), tf.float32),
                            loss)
        loss_neg_flat = tf.reshape(loss_neg, [-1])
        n_neg = tf.minimum(negative_ratio * n_pos, n)
        n_neg_i = tf.cast(n_neg, tf.int32, name='num_of_negative')
        val, idxes = tf.nn.top_k(loss_neg_flat, k=n_neg_i)
        minval = val[-1]
        nmask = tf.logical_and(nmask, loss_neg >= minval)

        fnmask = tf.cast(nmask, tf.float32)
        #l_cross_neg = tf.losses.compute_weighted_loss(loss, fnmask)
        l_cross_neg = tf.div(tf.reduce_sum(loss * fnmask),
                             n_neg,
                             name='l_cross_neg')
        #tf.losses.add_loss(l_cross_neg)
        all_mask = tf.cast(tf.logical_or(nmask, pmask), tf.float32)
        l_cross = tf.div(tf.reduce_sum(loss * all_mask), n_pos, name='l_cross')
        tf.losses.add_loss(l_cross)

        weights = tf.expand_dims(fpmask, axis=-1)
        l_loc = custom_layers.abs_smooth(alllocalization - glocalisations)
        #l_loc = tf.losses.compute_weighted_loss(l_loc, weights)
        l_loc = tf.div(tf.reduce_sum(l_loc * weights), n_pos, name='l_loc')
        tf.losses.add_loss(l_loc)

        with tf.name_scope('total'):
            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', n_pos)
            tf.add_to_collection('EXTRA_LOSSES', n_neg)
            tf.add_to_collection('EXTRA_LOSSES', l_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', l_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', l_loc)

            total_loss = tf.add_n([l_loc, l_cross_pos, l_cross_neg],
                                  'total_loss')
            tf.add_to_collection('EXTRA_LOSSES', total_loss)

    return total_loss
Exemple #23
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.name_scope(scope, 'ssd_losses'):
        l_cross_pos = []
        l_cross_neg = []
        l_loc = []
        for i in range(len(logits)):
            dtype = logits[i].dtype
            with tf.name_scope('block_%i' % i):
                # Determine weights Tensor.
                pmask = gscores[i] > match_threshold
                fpmask = tf.cast(pmask, dtype)
                n_positives = tf.reduce_sum(fpmask)

                # Select some random negative entries.
                # n_entries = np.prod(gclasses[i].get_shape().as_list())
                # r_positive = n_positives / n_entries
                # r_negative = negative_ratio * n_positives / (n_entries - n_positives)

                # Negative mask.
                no_classes = tf.cast(pmask, tf.int32)
                predictions = slim.softmax(logits[i])
                nmask = tf.logical_and(tf.logical_not(pmask),
                                       gscores[i] > -0.5)
                fnmask = tf.cast(nmask, dtype)
                nvalues = tf.where(nmask, predictions[:, :, :, :, 0],
                                   1. - fnmask)
                nvalues_flat = tf.reshape(nvalues, [-1])
                # Number of negative entries to select.
                n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32)
                n_neg = tf.minimum(n_neg, max_neg_entries)

                val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                minval = val[-1]
                # Final negative mask.
                nmask = tf.logical_and(nmask, -nvalues > minval)
                fnmask = tf.cast(nmask, dtype)

                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy_pos'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=gclasses[i])
                    loss = tf.losses.compute_weighted_loss(loss, fpmask)
                    l_cross_pos.append(loss)

                with tf.name_scope('cross_entropy_neg'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=no_classes)
                    loss = tf.losses.compute_weighted_loss(loss, fnmask)
                    l_cross_neg.append(loss)

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(alpha * fpmask, axis=-1)
                    loss = custom_layers.abs_smooth(localisations[i] -
                                                    glocalisations[i])
                    loss = tf.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)

        # Additional total losses...
        with tf.name_scope('total'):
            total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
            total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
            total_cross = tf.add(total_cross_pos, total_cross_neg,
                                 'cross_entropy')
            total_loc = tf.add_n(l_loc, 'localization')

            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)
Exemple #24
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               device='/cpu:0',
               scope=None):
    with tf.name_scope(scope, 'ssd_losses'):
        lshape = tfe.get_shape(logits[0], 5)
        num_classes = lshape[-1]  #得到类别数
        batch_size = lshape[0]

        # Flatten out all vectors!
        flogits = []
        fgclasses = []
        fgscores = []
        flocalisations = []
        fglocalisations = []
        for i in range(len(logits)):
            flogits.append(tf.reshape(
                logits[i], [-1, num_classes]))  #flogits.shape=[[***,21],]
            fgclasses.append(tf.reshape(gclasses[i], [-1]))
            fgscores.append(tf.reshape(gscores[i], [-1]))  #gscore是每个框的得分?
            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
        # And concat the crap!
        logits = tf.concat(
            flogits,
            axis=0)  #将每层展开的logits concat,二维shape  #tf.concat可以直接concat列表!
        gclasses = tf.concat(fgclasses, axis=0)  #一维(27××××,)
        gscores = tf.concat(fgscores, axis=0)  #同gclasses
        localisations = tf.concat(flocalisations, axis=0)  #二维(27×××,4)
        glocalisations = tf.concat(fglocalisations, axis=0)
        dtype = logits.dtype  #float32

        # Compute positive matching mask...
        pmask = gscores > match_threshold  #match_threshold=0.5,即初始正负样本是靠gscores选择,是交并比
        fpmask = tf.cast(pmask, dtype)
        n_positives = tf.reduce_sum(fpmask)

        # Hard negative mining...
        no_classes = tf.cast(pmask, tf.int32)  #(27×××,)即框为1
        predictions = slim.softmax(logits)  #[2794**,21]
        nmask = tf.logical_and(
            tf.logical_not(pmask),
            gscores > -0.5)  #gscores<0.5的mask全部变成1,和原来gscores>-0.5的相与
        fnmask = tf.cast(nmask, dtype)
        nvalues = tf.where(
            nmask,  #若是hard nega? nvalues就选predictions[0],属于背景的概率?,不然就是1
            predictions[:, 0],  #第0类?
            1. - fnmask)
        nvalues_flat = tf.reshape(nvalues, [-1])
        # Number of negative entries to select.
        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)  #选择的负样本共有多少
        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
        n_neg = tf.minimum(n_neg, max_neg_entries)

        val, idxes = tf.nn.top_k(
            -nvalues_flat,
            k=n_neg)  #根据负样本个数k,选择负样本中预测值比较小的(这里用了负号),实际是选择的交并比最小的k个
        max_hard_pred = -val[-1]  #maybe 0.3
        # Final negative mask.
        nmask = tf.logical_and(
            nmask, nvalues < max_hard_pred)  #选择首先保证负样本,其次保证预测值<0.3,用这些作为新的负样本
        fnmask = tf.cast(nmask, dtype)

        # Add cross-entropy loss.
        with tf.name_scope('cross_entropy_pos'):
            #labels:  每个条目 labels[i] 必须是[0, num_classes) 的索引或者-1. 如果是-1,则相应的损失为0,不用考虑 logits[i] 的值。这里采用fpmask来过滤,效果同索引为-1
            #所有正样本靠fpmask来过滤loss,fpmask是从所有有交集的框中,选出来的被作为正样本的框。而gclasses是所有有交集的框,他们都有一个属于自己的类别
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,  #logits.shape(27***,21)
                labels=gclasses)  # gclasses有交集的框实际被规划为哪一类
            loss = tf.div(tf.reduce_sum(loss * fpmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        with tf.name_scope('cross_entropy_neg'):
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=no_classes
            )  #no_classes是所有没有被选择为正样本的,都是0.也就是告诉softmax,这些是0类
            #然后再用fnmasK,找到那些真正被选出来的负样本,计算损失。
            loss = tf.div(tf.reduce_sum(loss * fnmask),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)

        # Add localization loss: smooth L1, L2, ...
        with tf.name_scope('localization'):
            # Weights Tensor: positive mask + random negative.
            weights = tf.expand_dims(alpha * fpmask, axis=-1)
            loss = custom_layers.abs_smooth(localisations - glocalisations)
            loss = tf.div(tf.reduce_sum(loss * weights),
                          batch_size,
                          name='value')
            tf.losses.add_loss(loss)