Beispiel #1
0
 def test_smooth_l1(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[4], dtype='float32')
         y = layers.data(name='label', shape=[4], dtype='float32')
         loss = layers.smooth_l1(x, y)
         self.assertIsNotNone(loss)
     print(str(program))
Beispiel #2
0
 def test_smooth_l1(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[4], dtype='float32')
         y = layers.data(name='label', shape=[4], dtype='float32')
         loss = layers.smooth_l1(x, y)
         self.assertIsNotNone(loss)
     print(str(program))
Beispiel #3
0
    def __call__(
            self,
            predictions,
            labels_pos_mask,  # Shape: [batch_size, 19248, 1]
            labels_neg_mask,  # Shape: [batch_size, 19248, 1]
            labels_allboxes_vector,  # Shape: [batch_size, 19248, 8]
            segment_t,  # list  Shape: [batch_size, 19248, 1]
            label_masks,
            labels_best_truth_idx,
            labels_pos_index,
            labels_pos_cid,  #  Shape: [batch_size, 19248]
            labels_pos_cid2,  #  Shape: [batch_size, 19248]
            priors,
            class_vectors,
            batch_size,
            use_maskiou=True,
            use_ce_loss=True,
            use_ghm_c_loss=False,
            use_focal_loss=False,
            use_ohem_loss=False):

        pred_allboxes_encode_x0y0x1y1 = predictions[
            'loc']  # Shape: [batch_size, 19248, 4]
        pred_allboxes_conf = predictions[
            'conf']  # Shape: [batch_size, 19248, 1+80]
        pred_allboxes_mask_coef = predictions[
            'mask']  # Shape: [batch_size, 19248, 原型数=32]
        pred_proto = predictions[
            'proto']  # Shape: [batch_size, s4=138, s4=138, 原型数=32]
        pred_segm = predictions[
            'segm']  # Shape: [batch_size, 类别数=80, s8=69, s8=69]

        labels_allboxes_x0y0x1y1 = labels_allboxes_vector[:, :, 0:
                                                          4]  # Shape: [batch_size, 19248, 4]
        labels_allboxes_decode_x0y0x1y1 = labels_allboxes_vector[:, :, 4:
                                                                 8]  # Shape: [batch_size, 19248, 4]

        losses = {}

        # 1.bbox_loss,只有正例才计算。
        # bbox_alpha = 1.5
        # bbox_loss = P.smooth_l1(P.reshape(pred_allboxes_encode_x0y0x1y1, (-1, 4)), P.reshape(labels_allboxes_x0y0x1y1, (-1, 4)))
        # bbox_loss = P.reshape(labels_pos_mask, (-1, 1)) * bbox_loss
        # bbox_loss = P.reduce_sum(bbox_loss) * bbox_alpha
        # losses['B'] = bbox_loss

        # 1.bbox_loss,ciou_loss
        pred_x0y0x1y1 = []
        for idx in range(batch_size):
            temp = decode(pred_allboxes_encode_x0y0x1y1[idx], priors)
            pred_x0y0x1y1.append(temp)
        pred_x0y0x1y1 = P.concat(pred_x0y0x1y1,
                                 axis=0)  # Shape: [batch_size*num_priors, 4]
        pred_x0y0x1y1 = P.reshape(
            pred_x0y0x1y1,
            (batch_size, -1, 4))  # Shape: [batch_size, num_priors, 4]

        ciou = P.reshape(
            self.bbox_ciou(pred_x0y0x1y1, labels_allboxes_decode_x0y0x1y1),
            (batch_size, -1, 1))  # (batch_size, num_priors, 1)

        # 每个预测框ciou_loss的权重 = 2 - (ground truth的面积/图片面积)
        gt_area = (labels_allboxes_decode_x0y0x1y1[:, :, 2:3] - labels_allboxes_decode_x0y0x1y1[:, :, 0:1]) * \
                  (labels_allboxes_decode_x0y0x1y1[:, :, 3:4] - labels_allboxes_decode_x0y0x1y1[:, :, 1:2])
        bbox_loss_scale = 2.0 - gt_area
        ciou_loss = labels_pos_mask * bbox_loss_scale * (1 - ciou)
        bbox_alpha = 1.5
        ciou_loss = P.reduce_sum(ciou_loss) * bbox_alpha
        losses['B'] = ciou_loss

        # 2.mask_loss,只有正例才计算
        mask_h = P.shape(pred_proto)[1]
        mask_w = P.shape(pred_proto)[2]
        loss_m = 0
        maskiou_t_list = []
        maskiou_net_input_list = []
        label_t_list = []
        for idx in range(batch_size):
            # [[0], [0], [0], [0], [0], [0], [0], [0]]。把8个正样本的最匹配gt的下标(在label_x0y0x1y1cid[idx]中的下标)选出来。
            # 因为只有一个gt,所以下标全是0
            labels_pos_index[idx].stop_gradient = True
            cur_gt = P.gather(labels_best_truth_idx[idx],
                              labels_pos_index[idx])  # (?, 1)
            cur_gt.stop_gradient = True
            cur_x0y0x1y1 = P.gather(labels_allboxes_decode_x0y0x1y1[idx],
                                    labels_pos_index[idx])  # (?, 4)

            proto_masks = pred_proto[idx]  # (138, 138, 32)
            # pred_mask_coef (batch_size, 19248, 32)。 把8个正样本预测的mask系数选出来。
            proto_coef = P.gather(pred_allboxes_mask_coef[idx],
                                  labels_pos_index[idx])  # (?, 32)

            # (?, 138, 138),把8个正样本所匹配的gt的真实mask抽出来。因为匹配到同一个gt,所以是同一个mask重复了8次。
            mask_t = P.gather(label_masks[idx], cur_gt)  # (?, 138, 138)
            # (?, ),把8个正样本所匹配的gt的真实cid抽出来。因为匹配到同一个gt,所以是同一个cid重复了8次。
            label_t = P.gather(labels_pos_cid[idx],
                               labels_pos_index[idx])  # (?, )

            # Size: (138, 138, ?)  =  原型*系数转置
            pred_masks = P.matmul(proto_masks, proto_coef, transpose_y=True)
            pred_masks = P.sigmoid(pred_masks)  # sigmoid激活

            pred_masks = crop(pred_masks, cur_x0y0x1y1)
            pred_masks = P.transpose(pred_masks, perm=[2, 0, 1])

            masks_pos_loss = mask_t * (0 - P.log(pred_masks + 1e-9)
                                       )  # 二值交叉熵,加了极小的常数防止nan
            masks_neg_loss = (1 - mask_t) * (0 - P.log(1 - pred_masks + 1e-9)
                                             )  # 二值交叉熵,加了极小的常数防止nan
            pre_loss = (masks_pos_loss + masks_neg_loss)
            pre_loss = P.reduce_sum(pre_loss, dim=[1, 2])

            # gt面积越小,对应mask损失权重越大
            cur_cxcywh = center_size(cur_x0y0x1y1)
            gt_box_width = cur_cxcywh[:, 2]
            gt_box_height = cur_cxcywh[:, 3]
            pre_loss = pre_loss / (gt_box_width * gt_box_height)
            loss_m += P.reduce_sum(pre_loss)

            if use_maskiou:
                # mask_t中,面积<=5*5的被丢弃
                # discard_mask_area = 5*5
                '''
                gpu版本的paddlepaddle1.6.2里有一个问题。select如果是[None],并且在gather()里使用了select,就会出现
                cudaGetLastError  invalid configuration argument errno: 9   这个错误。cpu版本则可以正常跑。
                为了避免上面的问题,只能让select不是[None],所以这里不做面积过滤,mask_t全部保留。
                '''
                discard_mask_area = -1
                gt_mask_area = P.reduce_sum(mask_t, dim=[1, 2])
                gt_mask_area.stop_gradient = True
                select = P.where(gt_mask_area > discard_mask_area)
                select.stop_gradient = True
                pred_masks = P.gather(pred_masks, select)
                mask_t = P.gather(mask_t, select)
                label_t = P.gather(label_t, select)
                label_t.stop_gradient = True

                maskiou_net_input = P.reshape(
                    pred_masks, (P.shape(pred_masks)[0], 1, mask_h, mask_w))
                pred_masks = P.cast(pred_masks > 0.5, 'float32')  # 四舍五入
                maskiou_t = self._mask_iou(pred_masks, mask_t)  # (8, )
                maskiou_net_input_list.append(
                    maskiou_net_input)  # (8, 1, 138, 138)
                maskiou_t_list.append(maskiou_t)  # (8, )
                label_t_list.append(label_t)  # (8, )
        mask_alpha = 6.125
        losses['M'] = loss_m * mask_alpha / mask_h / mask_w

        # 余下部分
        if use_maskiou:
            maskiou_net_input = P.concat(
                maskiou_net_input_list,
                axis=0)  # (21, 1, 138, 138)  21个正例预测的掩码
            maskiou_t = P.concat(maskiou_t_list,
                                 axis=0)  # (21, )  21个正例预测的掩码和真实掩码的iou
            label_t = P.concat(label_t_list, axis=0)  # (21, )  21个正例预测的cid
            label_t.stop_gradient = True  # 因为是整数所以才?
            maskiou_targets = [maskiou_net_input, maskiou_t, label_t]

        # 3.conf_loss。
        conf_alpha = 1.0
        if use_ce_loss:
            conf_loss = self.ce_conf_loss(pred_allboxes_conf, labels_pos_mask,
                                          labels_neg_mask, class_vectors,
                                          labels_pos_cid2, gt_area)
        elif use_ghm_c_loss:
            conf_loss = self.ghm_c_loss(pred_allboxes_conf, labels_pos_mask,
                                        labels_neg_mask, class_vectors,
                                        labels_pos_cid2)
        elif use_focal_loss:
            conf_loss = self.focal_conf_loss(pred_allboxes_conf,
                                             labels_pos_mask, labels_neg_mask,
                                             class_vectors, labels_pos_cid2)
        elif use_ohem_loss:
            conf_loss = self.ohem_conf_loss(pred_allboxes_conf, batch_size,
                                            labels_neg_mask, labels_pos_mask,
                                            labels_pos_index, class_vectors,
                                            labels_pos_cid)
        losses['C'] = conf_loss * conf_alpha

        # 4.mask_iou_loss,只有正例才计算。
        if use_maskiou:
            # maskiou_net_input  (21, 1, 138, 138)  21个正例预测的掩码
            # maskiou_t          (21, )             21个正例预测的掩码和真实掩码的iou
            # label_t            (21, )             21个正例预测的cid
            maskiou_net_input, maskiou_t, label_t = maskiou_targets
            maskiou_p = maskiou_net(maskiou_net_input, self.num_classes - 1)
            maskiou_p = P.reduce_max(maskiou_p, dim=[2, 3])  # 最大池化  (21, 80)
            temp_mask = P.gather(class_vectors, label_t)  # 掩码  (21, 81)
            temp_mask = temp_mask[:, 1:]  # 掩码  (21, 80)
            maskiou_p = temp_mask * maskiou_p  # 只保留真实类别的那个通道  (21, 80)
            maskiou_p = P.reduce_sum(maskiou_p, dim=1,
                                     keep_dim=True)  # (21, 1)
            loss_i = P.smooth_l1(
                maskiou_p, P.reshape(maskiou_t, (P.shape(maskiou_t)[0], 1)))
            maskiou_alpha = 25.0
            losses['I'] = maskiou_alpha * P.reduce_sum(loss_i)

        # 5.semantic_segmentation_loss,只有正例才计算
        mask_h = P.shape(pred_segm)[2]
        mask_w = P.shape(pred_segm)[3]
        loss_s = 0.0
        for idx in range(batch_size):
            cur_segment = pred_segm[idx]  # (80, 69, 69)
            l = P.sigmoid_cross_entropy_with_logits(cur_segment,
                                                    segment_t[idx])
            loss_s += P.reduce_sum(l)

        semantic_segmentation_alpha = 1.0
        losses['S'] = loss_s / mask_h / mask_w * semantic_segmentation_alpha

        total_num_pos = P.cast(P.reduce_sum(labels_pos_mask), 'float32')
        for k in losses:
            if k not in ('S', ):
                losses[k] /= total_num_pos
            else:
                losses[k] /= batch_size
        total_loss = 0.0
        for k in losses:
            total_loss += losses[k]

        # Loss Key:
        #  - B: Box Localization Loss
        #  - M: Mask Loss
        #  - C: Class Confidence Loss
        #  - I: MaskIou Loss
        #  - S: Semantic Segmentation Loss
        # return losses['M'], losses['C']
        return losses, total_loss
    def __call__(self,
                 location,
                 confidence,
                 gt_box,
                 gt_label,
                 landmark_predict,
                 lmk_label,
                 lmk_ignore_flag,
                 prior_box,
                 prior_box_var=None):
        def _reshape_to_2d(var):
            return layers.flatten(x=var, axis=2)

        helper = LayerHelper('ssd_loss')  #, **locals())
        # Only support mining_type == 'max_negative' now.
        mining_type = 'max_negative'
        # The max `sample_size` of negative box, used only
        # when mining_type is `hard_example`.
        sample_size = None
        num, num_prior, num_class = confidence.shape
        conf_shape = layers.shape(confidence)

        # 1. Find matched boundding box by prior box.
        # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
        iou = iou_similarity(x=gt_box, y=prior_box)
        # 1.2 Compute matched boundding box by bipartite matching algorithm.
        matched_indices, matched_dist = bipartite_match(
            iou, self.match_type, self.overlap_threshold)

        # 2. Compute confidence for mining hard examples
        # 2.1. Get the target label based on matched indices
        gt_label = layers.reshape(x=gt_label,
                                  shape=(len(gt_label.shape) - 1) * (0, ) +
                                  (-1, 1))
        gt_label.stop_gradient = True
        target_label, _ = target_assign(gt_label,
                                        matched_indices,
                                        mismatch_value=self.background_label)
        # 2.2. Compute confidence loss.
        # Reshape confidence to 2D tensor.
        confidence = _reshape_to_2d(confidence)
        target_label = tensor.cast(x=target_label, dtype='int64')
        target_label = _reshape_to_2d(target_label)
        target_label.stop_gradient = True
        conf_loss = layers.softmax_with_cross_entropy(confidence, target_label)
        # 3. Mining hard examples
        actual_shape = layers.slice(conf_shape, axes=[0], starts=[0], ends=[2])
        actual_shape.stop_gradient = True
        conf_loss = layers.reshape(x=conf_loss,
                                   shape=(-1, 0),
                                   actual_shape=actual_shape)
        conf_loss.stop_gradient = True
        neg_indices = helper.create_variable_for_type_inference(dtype='int32')
        updated_matched_indices = helper.create_variable_for_type_inference(
            dtype=matched_indices.dtype)
        helper.append_op(type='mine_hard_examples',
                         inputs={
                             'ClsLoss': conf_loss,
                             'LocLoss': None,
                             'MatchIndices': matched_indices,
                             'MatchDist': matched_dist,
                         },
                         outputs={
                             'NegIndices': neg_indices,
                             'UpdatedMatchIndices': updated_matched_indices
                         },
                         attrs={
                             'neg_pos_ratio': self.neg_pos_ratio,
                             'neg_dist_threshold': self.neg_overlap,
                             'mining_type': mining_type,
                             'sample_size': sample_size,
                         })

        # 4. Assign classification and regression targets
        # 4.1. Encoded bbox according to the prior boxes.
        encoded_bbox = box_coder(prior_box=prior_box,
                                 prior_box_var=prior_box_var,
                                 target_box=gt_box,
                                 code_type='encode_center_size')
        # 4.2. Assign regression targets
        target_bbox, target_loc_weight = target_assign(
            encoded_bbox,
            updated_matched_indices,
            mismatch_value=self.background_label)
        # 4.3. Assign classification targets
        target_label, target_conf_weight = target_assign(
            gt_label,
            updated_matched_indices,
            negative_indices=neg_indices,
            mismatch_value=self.background_label)

        target_loc_weight = target_loc_weight * target_label
        encoded_lmk_label = self.decode_lmk(lmk_label, prior_box,
                                            prior_box_var)

        target_lmk, target_lmk_weight = target_assign(
            encoded_lmk_label,
            updated_matched_indices,
            mismatch_value=self.background_label)
        lmk_ignore_flag = layers.reshape(
            x=lmk_ignore_flag,
            shape=(len(lmk_ignore_flag.shape) - 1) * (0, ) + (-1, 1))
        target_ignore, nouse = target_assign(
            lmk_ignore_flag,
            updated_matched_indices,
            mismatch_value=self.background_label)

        target_lmk_weight = target_lmk_weight * target_ignore
        landmark_predict = _reshape_to_2d(landmark_predict)
        target_lmk = _reshape_to_2d(target_lmk)
        target_lmk_weight = _reshape_to_2d(target_lmk_weight)
        lmk_loss = layers.smooth_l1(landmark_predict, target_lmk)
        lmk_loss = lmk_loss * target_lmk_weight
        target_lmk.stop_gradient = True
        target_lmk_weight.stop_gradient = True
        target_ignore.stop_gradient = True
        nouse.stop_gradient = True

        # 5. Compute loss.
        # 5.1 Compute confidence loss.
        target_label = _reshape_to_2d(target_label)
        target_label = tensor.cast(x=target_label, dtype='int64')

        conf_loss = layers.softmax_with_cross_entropy(confidence, target_label)
        target_conf_weight = _reshape_to_2d(target_conf_weight)
        conf_loss = conf_loss * target_conf_weight

        # the target_label and target_conf_weight do not have gradient.
        target_label.stop_gradient = True
        target_conf_weight.stop_gradient = True

        # 5.2 Compute regression loss.
        location = _reshape_to_2d(location)
        target_bbox = _reshape_to_2d(target_bbox)

        loc_loss = layers.smooth_l1(location, target_bbox)
        target_loc_weight = _reshape_to_2d(target_loc_weight)
        loc_loss = loc_loss * target_loc_weight

        # the target_bbox and target_loc_weight do not have gradient.
        target_bbox.stop_gradient = True
        target_loc_weight.stop_gradient = True

        # 5.3 Compute overall weighted loss.
        loss = self.conf_loss_weight * conf_loss + self.loc_loss_weight * loc_loss + 0.4 * lmk_loss
        # reshape to [N, Np], N is the batch size and Np is the prior box number.
        loss = layers.reshape(x=loss, shape=(-1, 0), actual_shape=actual_shape)
        loss = layers.reduce_sum(loss, dim=1, keep_dim=True)
        if self.normalize:
            normalizer = layers.reduce_sum(target_loc_weight) + 1
            loss = loss / normalizer

        return loss