コード例 #1
0
    def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data):
        """ Perform nms for only the max scoring class that isn't background (class 0) """
        # 确实是先坐标全部解码完成,在进行分数过滤。可以考虑过滤后再进行坐标解码
        cur_scores = conf_preds[batch_idx, 1:, :]
        conf_scores = P.reduce_max(cur_scores, dim=0)
        '''
        gpu版本的paddlepaddle1.6.2里有一个问题。keep如果是[None],并且在gather()里使用了keep,就会出现
        cudaGetLastError  invalid configuration argument errno: 9   这个错误。cpu版本则可以正常跑。
        为了避免上面的问题,只能让keep不是[None],所以这里给keep额外添加了一个元素keep_extra。
        '''
        keep = P.where(conf_scores > self.conf_thresh)
        keep_extra = P.where(conf_scores < self.conf_thresh)
        keep_extra = keep_extra[:1]
        keep = P.concat([keep, keep_extra], axis=0)
        scores = P.gather(P.transpose(cur_scores, perm=[1, 0]), keep)
        scores = P.transpose(scores, perm=[1, 0])
        boxes = P.gather(decoded_boxes, keep)
        masks = P.gather(mask_data[batch_idx], keep)
        '''
        因为上面增加了一个keep_extra,所以keep一定至少有一个预测框。
        当官方修复了上述问题后,删除上面keep_extra的代码,下面的代码解除注释。
        这么做的原因是判断keep为空太难了。
        '''
        # 可能没有框被保留。所以添加一个得分垫底的框让fast_nms()能进行下去
        # extra_box = P.fill_constant((1, 4), 'float32', value=-1.0)
        # extra_score = P.fill_constant((P.shape(cur_scores)[0], 1), 'float32', value=-1.0)
        # extra_mask = P.fill_constant((1, P.shape(mask_data)[2]), 'float32', value=-1.0)
        # boxes = P.concat([boxes, extra_box], axis=0)
        # scores = P.concat([scores, extra_score], axis=1)
        # masks = P.concat([masks, extra_mask], axis=0)

        return self.fast_nms(boxes, scores, masks)
コード例 #2
0
ファイル: decode.py プロジェクト: ColeFang/MOCO_ERNIE
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    _, vocab_size = logits.shape

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]
    #log.debug(gather_idx.numpy())
    #log.debug(state.finished.numpy())
    #log.debug(next_finished.numpy())

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    #log.debug(next_word_id.numpy())
    #log.debug(next_beam_id.numpy())
    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
コード例 #3
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    beam_size, vocab_size = logits.shape  # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size
    logits_np = logits.numpy()
    for i in range(beam_size):
        logits_np[i][17963] = 0  # make [UNK] prob = 0
    logits = D.to_variable(logits_np)

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
コード例 #4
0
def matrix_nms(bboxes,
               scores,
               score_threshold,
               post_threshold,
               nms_top_k,
               keep_top_k,
               use_gaussian=False,
               gaussian_sigma=2.):
    scores = L.transpose(scores, [1, 0])
    inds = L.where(scores > score_threshold)
    if len(inds) == 0:
        return L.zeros((0, 6), 'float32') - 1.0

    cate_scores = L.gather_nd(scores, inds)
    cate_labels = inds[:, 1]
    bboxes = L.gather(bboxes, inds[:, 0])

    # sort and keep top nms_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if nms_top_k > 0 and len(sort_inds) > nms_top_k:
        sort_inds = sort_inds[:nms_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    # Matrix NMS
    kernel = 'gaussian' if use_gaussian else 'linear'
    cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma)

    # filter.
    keep = L.where(cate_scores >= post_threshold)
    if len(keep) == 0:
        return L.zeros((0, 6), 'float32') - 1.0
    bboxes = L.gather(bboxes, keep)
    cate_scores = L.gather(cate_scores, keep)
    cate_labels = L.gather(cate_labels, keep)

    # sort and keep keep_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if len(sort_inds) > keep_top_k:
        sort_inds = sort_inds[:keep_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    cate_scores = L.unsqueeze(cate_scores, 1)
    cate_labels = L.unsqueeze(cate_labels, 1)
    cate_labels = L.cast(cate_labels, 'float32')
    pred = L.concat([cate_labels, cate_scores, bboxes], 1)

    return pred
コード例 #5
0
def no_nms(bboxes,
           scores,
           score_threshold,
           keep_top_k):
    scores = L.transpose(scores, [1, 0])
    inds = L.where(scores > score_threshold)
    if len(inds) == 0:
        return L.zeros((0, 6), 'float32') - 1.0

    cate_scores = L.gather_nd(scores, inds)
    cate_labels = inds[:, 1]
    bboxes = L.gather(bboxes, inds[:, 0])

    # sort and keep top keep_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if keep_top_k > 0 and len(sort_inds) > keep_top_k:
        sort_inds = sort_inds[:keep_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    cate_scores = L.unsqueeze(cate_scores, 1)
    cate_labels = L.unsqueeze(cate_labels, 1)
    cate_labels = L.cast(cate_labels, 'float32')
    pred = L.concat([cate_labels, cate_scores, bboxes], 1)

    return pred
コード例 #6
0
def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None):
    dtype = input.dtype
    if dtype not in ["float32", "float64"]:
        raise ValueError("Layer tensor.var() only supports floating-point "
                         "dtypes, but received {}.".format(dtype))
    rank = len(input.shape)
    axes = axis if axis != None and axis != [] else range(rank)
    axes = [e if e >= 0 else e + rank for e in axes]
    inp_shape = input.shape if fluid.in_dygraph_mode() else layers.shape(input)
    mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name)
    tmp = layers.reduce_mean((input - mean)**2,
                             dim=axis,
                             keep_dim=keepdim,
                             name=name)

    if unbiased:
        n = 1
        for i in axes:
            n *= inp_shape[i]
        if not fluid.in_dygraph_mode():
            n = layers.cast(n, dtype)
            zero_const = layers.fill_constant(shape=[1],
                                              dtype=dtype,
                                              value=0.0)
            factor = layers.where(n > 1.0, n / (n - 1.0), zero_const)
        else:
            factor = n / (n - 1.0) if n > 1.0 else 0.0
        tmp *= factor
    if out:
        layers.assign(input=tmp, output=out)
        return out
    else:
        return tmp
コード例 #7
0
def iou_single(a, b, mask, n_class):
    valid = mask == 1

    valid_flatten = paddle.reshape(valid, (-1, ))
    valid_flatten = paddle.cast(valid_flatten, dtype="int32")
    index = where(valid_flatten == 1)
    if index.shape[0] == 0:
        return paddle.zeros((1, ))

    index = paddle.reshape(index, (1, -1))
    a_flatten = paddle.reshape(a, (1, -1))
    a = paddle.index_sample(a_flatten, index)
    a = paddle.reshape(a, (-1, ))

    b_flatten = paddle.reshape(b, (1, -1))
    b = paddle.index_sample(b_flatten, index)
    b = paddle.reshape(b, (-1, ))

    miou = []
    for i in range(n_class):
        inter = paddle.logical_and(a == i, b == i)
        inter = paddle.cast(inter, dtype='float32')
        union = paddle.logical_or(a == i, b == i)
        union = paddle.cast(union, dtype='float32')

        miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS))
    miou = sum(miou) / len(miou)
    return miou
コード例 #8
0
def masked_select(input, mask):
    """Select the input value according to the mask
    
    Arags:
        input: input matrix
        mask: mask matrix

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> mask
    [
        [True, True, False],
        [True, False, False]
    ]
    >>> masked_select(input, mask)
    [1, 2, 4]
    """
    select = layers.where(mask)
    output = layers.gather_nd(input, select)
    return output
コード例 #9
0
 def forward(self, src_ids, *args, **kwargs):
     pooled, encoded = ErnieModel.forward(self, src_ids, *args, **kwargs)
     encoded_2d = L.gather_nd(encoded, L.where(src_ids == mask_id))
     encoded_2d = self.mlm(encoded_2d)
     encoded_2d = self.mlm_ln(encoded_2d)
     logits_2d = L.matmul(
         encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
     return logits_2d
コード例 #10
0
 def get_subgraph_by_masked(self, graph, mask):
     index = L.where(mask)
     if index.shape[0] > 0:
         edges = graph.edges
         sub_edges = paddle.gather(edges, index, axis=0)
         sg = pgl.Graph(sub_edges, num_nodes=graph.num_nodes)
         return sg
     else:
         return None
コード例 #11
0
    def fast_nms(self, boxes, scores, masks, max_num_detections=100):
        iou_threshold = self.nms_thresh
        top_k = self.top_k

        # 同类方框根据得分降序排列
        scores, idx = P.argsort(scores, axis=1, descending=True)

        idx = idx[:, :top_k]
        scores = scores[:, :top_k]

        num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

        idx = P.reshape(idx, (-1, ))
        boxes = P.gather(boxes, idx)
        boxes = P.reshape(boxes, (num_classes, num_dets, 4))
        masks = P.gather(masks, idx)
        masks = P.reshape(masks, (num_classes, num_dets, -1))

        # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
        iou = jaccard(boxes, boxes)
        # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
        # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
        rows = P.range(0, num_dets, 1, 'int32')
        cols = P.range(0, num_dets, 1, 'int32')
        rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
        cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
        tri_mask = P.cast(rows > cols, 'float32')
        tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                            [num_classes, 1, 1])
        iou = tri_mask * iou
        iou_max = P.reduce_max(iou, dim=1)

        # Now just filter out the ones higher than the threshold
        keep = P.where(iou_max <= iou_threshold)

        # Assign each kept detection to its corresponding class
        classes = P.range(0, num_classes, 1, 'int32')
        classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
        classes = P.gather_nd(classes, keep)

        boxes = P.gather_nd(boxes, keep)
        masks = P.gather_nd(masks, keep)
        scores = P.gather_nd(scores, keep)

        # Only keep the top cfg.max_num_detections highest scores across all classes
        scores, idx = P.argsort(scores, axis=0, descending=True)
        idx = idx[:max_num_detections]
        scores = scores[:max_num_detections]

        classes = P.gather(classes, idx)
        boxes = P.gather(boxes, idx)
        masks = P.gather(masks, idx)

        return boxes, masks, classes, scores
コード例 #12
0
ファイル: fastnms.py プロジェクト: XrosLiang/Paddle-YOLOv4
def fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k):
    '''
    :param boxes:    [?, 4]
    :param scores:   [80, ?]
    '''

    # 同类方框根据得分降序排列
    scores, idx = P.argsort(scores, axis=1, descending=True)

    idx = idx[:, :keep_top_k]
    scores = scores[:, :keep_top_k]

    num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

    idx = P.reshape(idx, (-1, ))
    boxes = P.gather(boxes, idx)
    boxes = P.reshape(boxes, (num_classes, num_dets, 4))

    # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
    iou = _iou(boxes, boxes)

    # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
    # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
    rows = P.range(0, num_dets, 1, 'int32')
    cols = P.range(0, num_dets, 1, 'int32')
    rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
    cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
    tri_mask = P.cast(rows > cols, 'float32')
    tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                        [num_classes, 1, 1])
    iou = tri_mask * iou
    iou_max = P.reduce_max(iou, dim=1)

    # 同一类别,n个框与“分数比它高的框”的最高iou超过nms_thresh的话,就丢弃。下标是0的框肯定被保留。
    keep = P.where(iou_max <= nms_thresh)

    # Assign each kept detection to its corresponding class
    classes = P.range(0, num_classes, 1, 'int32')
    classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
    classes = P.gather_nd(classes, keep)

    boxes = P.gather_nd(boxes, keep)
    scores = P.gather_nd(scores, keep)

    # Only keep the top cfg.max_num_detections highest scores across all classes
    scores, idx = P.argsort(scores, axis=0, descending=True)
    idx = idx[:nms_top_k]
    scores = scores[:nms_top_k]

    classes = P.gather(classes, idx)
    boxes = P.gather(boxes, idx)

    return boxes, scores, classes
コード例 #13
0
ファイル: paddle_helper.py プロジェクト: Yelrose/PGL
def masked_select(input, mask):
    """masked_select
    
    Slice the value from given Mask
   
    Args:
        input: Input tensor to be selected
         
        mask: A bool tensor for sliced.
  
    Return:
        Part of inputs where mask is True. 
    """
    index = L.where(mask)
    return L.gather(input, index)
コード例 #14
0
def ohem_single(score, gt_text, training_mask):
    gt_part = paddle.cast(gt_text > 0.5, dtype='float32')
    gt_tr_part = paddle.cast(paddle.logical_and(gt_text > 0.5,
                                                training_mask <= 0.5),
                             dtype='float32')
    pos_num = int(paddle.sum(gt_part)) - int(paddle.sum(gt_tr_part))
    #pos_num = int(np.sum(gt_text.numpy() > 0.5)) - int(np.sum((gt_text.numpy() > 0.5) & (training_mask.numpy() <= 0.5)))
    #pos_num = int(paddle.sum(gt_text > 0.5)) - int(paddle.sum((gt_text > 0.5) & (training_mask <= 0.5)))
    if pos_num == 0:
        # selected_mask = gt_text.copy() * 0 # may be not good
        selected_mask = training_mask
        selected_mask = paddle.reshape(
            selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1]))
        selected_mask = paddle.cast(selected_mask, dtype='float32')
        return selected_mask

    neg_num = int(np.sum(gt_text.numpy() <= 0.5))
    neg_num = int(min(pos_num * 3, neg_num))

    if neg_num == 0:
        selected_mask = training_mask
        # selected_mask = selected_mask.view(1, selected_mask.shape[0], selected_mask.shape[1]).float()
        selected_mask = paddle.reshape(
            selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1]))
        selected_mask = paddle.cast(selected_mask, dtype='float32')
        return selected_mask

    gt_text_flatten = paddle.reshape(gt_text, (-1, ))
    index = where(gt_text_flatten <= 0.5)
    index = paddle.reshape(index, (1, -1))
    score_flatten = paddle.reshape(score, (1, -1))
    neg_score = paddle.index_sample(score_flatten, index)
    neg_score = paddle.reshape(neg_score, (-1, ))

    neg_score_sorted = paddle.sort(-neg_score)
    threshold = -neg_score_sorted[neg_num - 1]

    item1 = paddle.logical_or(score >= threshold, gt_text > 0.5)
    selected_mask = paddle.logical_and(item1, training_mask > 0.5)
    # selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).float()
    selected_mask = paddle.reshape(
        selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1]))
    #selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1])
    selected_mask = paddle.cast(selected_mask, dtype='float32')
    return selected_mask
コード例 #15
0
    def get_loss(self, kernel_preds, cate_preds, mask_feats, ins_labels, cate_labels, grid_orders, fg_nums):
        '''
        丢掉了lod信息,只能改写一下了。
        :param kernel_preds:  kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid],  每个格子的预测卷积核。      从 小感受野 到 大感受野。
        :param cate_preds:     cate_preds里每个元素形状是   [N,  80, seg_num_grid, seg_num_grid],  每个格子的预测概率,未进行sigmoid()激活。  从 小感受野 到 大感受野。
        :param mask_feats:   [bs, 256, s4, s4]   掩码原型
        :param ins_labels:   5个元素。5个输出层的正样本对应掩码。里面每个元素形状是[M, s4, s4]  M表示该输出层所有图片的正样本对应掩码 被无差别地拼接起来。从 小感受野 到 大感受野。
        :param cate_labels:  5个元素。5个输出层的正样本的类别id。里面每个元素形状是[N*seg_num_grid*seg_num_grid, ]   从 小感受野 到 大感受野。
        :param grid_orders:  5个元素。5个输出层的正样本在[N*seg_num_grid*seg_num_grid, ]中的下标。里每个元素形状是[M, ]  与ins_labels中的掩码对应。
        :param fg_nums:      [N, ]    每张图片的正样本个数。
        :return:
        '''
        new_kernel_preds = []
        gathered_img_id_list = []
        for kernel_preds_level, grid_orders_level in zip(kernel_preds, grid_orders):
            # 首先,将正样本预测的卷积核抽出来。
            # [N, 256, seg_num_grid, seg_num_grid] -> [N, seg_num_grid, seg_num_grid, 256]
            kernel_preds_level = L.transpose(kernel_preds_level, perm=[0, 2, 3, 1])
            reshape_pred = L.reshape(kernel_preds_level, shape=(-1, L.shape(kernel_preds_level)[-1]))   # [N*seg_num_grid*seg_num_grid, 256]
            gathered_pred = L.gather(reshape_pred, index=grid_orders_level)   # [M=5, 256]   比如第一张图片有2个正样本,第二张图片有3个正样本(假如batch_size=2)

            # 然后,确定这些正样本是第几张图片的正样本。
            batch_size = L.shape(kernel_preds_level)[0]
            seg_num_grid = L.shape(kernel_preds_level)[1]
            img_ids = L.range(0, batch_size, 1, dtype='int32')   # [N, ]
            img_ids = L.unsqueeze(img_ids, axes=[1, 2])   # [N, 1, 1]
            img_ids = L.expand(img_ids, [1, seg_num_grid, seg_num_grid])   # [N, seg_num_grid, seg_num_grid]
            img_ids = L.reshape(img_ids, [-1, ])   # [N*seg_num_grid*seg_num_grid, ]
            gathered_img_id = L.gather(img_ids, index=grid_orders_level)   # [M=5, ]   这些正样本是第几张图片的正样本

            new_kernel_preds.append(gathered_pred)
            gathered_img_id_list.append(gathered_img_id)

        # 生成掩码
        ins_pred_list = []
        for kernel_pred, gathered_img_id in zip(new_kernel_preds, gathered_img_id_list):
            # kernel_pred     shape=[5, 256]   第一张图片填充1个卷积核。使得这一批图片预测的卷积核个数相同。
            # gathered_img_id      data=[5, ]   这些正样本是第几张图片的正样本

            # 第一张图片的正样本卷积核卷积第一张图片的掩码原型,第二张图片的正样本卷积核卷积第二张图片的掩码原型,...
            batch_size = L.shape(mask_feats)[0]
            cur_ins_pred = []
            for i in range(batch_size):
                mask_feat = mask_feats[i:i + 1]  # [1, 256, s4, s4]   掩码原型
                interest = L.where(gathered_img_id == i)
                kr = L.gather(kernel_pred, interest)  # [m, 256]
                kr = L.unsqueeze(kr, [2, 3])  # [m, 256, 1, 1]
                pred_mask = F.conv2d(mask_feat, kr)  # [1, m, s4, s4]
                cur_ins_pred.append(L.squeeze(pred_mask, [0]))
            cur_ins_pred = L.concat(cur_ins_pred, 0)  # [M, s4, s4]
            ins_pred_list.append(cur_ins_pred)

        num_ins = fluid.layers.reduce_sum(fg_nums)   # 所有图片所有输出层的正样本个数
        cate_preds = [   # cate_preds里每个元素变成   [N*seg_num_grid*seg_num_grid, 80]
            fluid.layers.reshape(
                fluid.layers.transpose(cate_pred, [0, 2, 3, 1]),
                shape=(-1, self.cate_out_channels)) for cate_pred in cate_preds
        ]
        flatten_cate_preds = fluid.layers.concat(cate_preds)   # [N*seg_num_grid_1*seg_num_grid_1 + N*seg_num_grid_2*seg_num_grid_2 + ..., 80]
        new_cate_labels = []
        cate_labels = fluid.layers.concat(cate_labels)
        cate_labels = fluid.layers.unsqueeze(cate_labels, 1)
        loss_ins, loss_cate = self.solov2_loss(
            ins_pred_list, ins_labels, flatten_cate_preds, cate_labels, num_ins)

        return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
コード例 #16
0
ファイル: fastnms.py プロジェクト: XrosLiang/Paddle-YOLOv4
def fastnms(all_pred_boxes, all_pred_scores, resize_shape, origin_shape,
            conf_thresh, nms_thresh, keep_top_k, nms_top_k, use_yolo_box):
    '''
    :param all_pred_boxes:      [batch_size, -1, 4]
    :param all_pred_scores:     [batch_size, -1, 80]
    :param resize_shape:        [batch_size, 2]
    :param origin_shape:        [batch_size, 2]
    '''
    conf_preds = P.transpose(all_pred_scores, perm=[0, 2, 1])  # [1, 80, -1]
    cur_scores = conf_preds[0]  # [80, -1]
    conf_scores = P.reduce_max(cur_scores, dim=0)  # [-1, ]
    # keep如果是[None],并且在gather()里使用了keep,就会出现
    # cudaGetLastError  invalid configuration argument errno: 9   这个错误。
    # 为了避免上面的问题,只能让keep不是[None],所以这里当keep是[None]时给keep赋予一个坐标[[0]]。
    keep = P.where(conf_scores > conf_thresh)

    def exist_objs_1(keep):
        return keep

    def no_objs_1():
        keep_extra = P.zeros((1, 1), 'int64')
        return keep_extra

    keep = P.cond(P.shape(keep)[0] == 0, no_objs_1, lambda: exist_objs_1(keep))
    scores = P.gather(all_pred_scores[0], keep)
    scores = P.transpose(scores, perm=[1, 0])
    boxes = P.gather(all_pred_boxes[0], keep)
    boxes, scores, classes = fast_nms(boxes, scores, conf_thresh, nms_thresh,
                                      keep_top_k, nms_top_k)

    # 再做一次分数过滤。前面提到,只要某个框最高分数>阈值就保留,
    # 然而计算上面那个矩阵时,这个框其实重复了80次,每一个分身代表是不同类的物品。
    # 非最高分数的其它类别,它的得分可能小于阈值,要过滤。
    # 所以fastnms存在这么一个现象:某个框它最高分数 > 阈值,它有一个非最高分数类的得分也超过了阈值,
    # 那么最后有可能两个框都保留,而且这两个框有相同的xywh
    keep = P.where(scores > conf_thresh)

    def exist_objs_2(keep, boxes, classes, scores):
        boxes = P.gather(boxes, keep)
        classes = P.gather(classes, keep)
        scores = P.gather(scores, keep)
        return boxes, classes, scores

    def no_objs_2(boxes, classes, scores):
        keep = P.zeros((1, 1), 'int64')
        boxes = P.gather(boxes, keep)
        classes = P.gather(classes, keep)
        scores = P.gather(scores, keep)
        scores -= 2.0  # 巧妙设置为负分数让python端过滤
        return boxes, classes, scores

    boxes, classes, scores = P.cond(
        P.shape(keep)[0] == 0, lambda: no_objs_2(boxes, classes, scores),
        lambda: exist_objs_2(keep, boxes, classes, scores))
    # 变成左上角坐标、右下角坐标
    boxes = P.concat(
        [boxes[:, :2] - boxes[:, 2:] * 0.5, boxes[:, :2] + boxes[:, 2:] * 0.5],
        axis=-1)

    # 缩放到原图大小
    resize_shape_f = P.cast(resize_shape, 'float32')
    origin_shape_f = P.cast(origin_shape, 'float32')
    if use_yolo_box:
        scale = origin_shape_f
    else:
        scale = origin_shape_f / resize_shape_f
    scale = P.expand(scale, [1, 2])
    boxes *= scale  # 批大小是1才支持这么做,因为scale第0维表示批大小,boxes第0维却表示这张图片预测出的物体数

    # 批大小在前
    boxes = P.reshape(boxes, (1, -1, 4), name='boxes')
    scores = P.reshape(scores, (1, -1), name='scores')
    classes = P.reshape(classes, (1, -1), name='classes')
    return [boxes, scores, classes]
コード例 #17
0
def seq2seq(model, tokenizer, args):
    log.info('Training starts with args: %r' % args)
    attn_id = tokenizer.vocab[args.attn_token]

    def gen_mask(batch_ids, mask_type='bidi', query_len=None, pad_value=0):
        if query_len is None:
            query_len = batch_ids.shape[1]
        if mask_type != 'empty':
            mask = (batch_ids != pad_value).astype(np.float32)
            mask = np.tile(np.expand_dims(mask, 1), [1, query_len, 1])
            if mask_type == 'causal':
                assert query_len == batch_ids.shape[1]
                mask = np.tril(mask)
            elif mask_type == 'causal_without_diag':
                assert query_len == batch_ids.shape[1]
                mask = np.tril(mask, -1)
            elif mask_type == 'diag':
                assert query_len == batch_ids.shape[1]
                mask = np.stack([np.diag(np.diag(m)) for m in mask], 0)
        else:
            mask_type == 'empty'
            mask = np.zeros_like(batch_ids).astype(np.float32)
            mask = np.tile(np.expand_dims(mask, 1), [1, query_len, 1])
        return mask

    def make_some_noice(ids):
        if args.use_random_noice:
            noice_ids = np.random.randint(1,
                                          len(tokenizer.vocab),
                                          size=ids.shape)
        else:
            noice_ids = np.ones_like(ids) * tokenizer.vocab['[NOISE]']
        pos, = np.where(np.ones_like(ids))
        np.random.shuffle(pos)
        pos = pos[:int(args.noise_prob * len(pos))]
        ids[pos, ] = noice_ids[pos, ]
        return ids

    def map_fn(example_id, src_ids, tgt_ids):
        src_ids = src_ids[:args.max_encode_len]
        tgt_ids = tgt_ids[:args.max_decode_len]
        src_ids, src_sids = tokenizer.build_for_ernie(src_ids)
        src_pids = np.arange(len(src_ids))

        tgt_ids, tgt_sids = tokenizer.build_for_ernie(tgt_ids)
        tgt_pids = np.arange(len(tgt_ids)) + len(src_ids)  # continues position
        tgt_sids = np.ones_like(tgt_sids) * args.tgt_type_id

        attn_ids = np.ones_like(tgt_ids) * attn_id
        if args.noise_prob > 0.:
            tgt_labels = deepcopy(tgt_ids)
            tgt_ids = make_some_noice(tgt_ids)  #corrupted
        else:
            tgt_labels = tgt_ids

        return (example_id, src_ids, src_pids, src_sids, tgt_ids, tgt_pids,
                tgt_sids, attn_ids, tgt_labels)

    def after_padding(example_id, src_ids, src_pids, src_sids, tgt_ids,
                      tgt_pids, tgt_sids, attn_ids, tgt_labels):
        '''
        attention mask:
        ***  src,  tgt, attn
        src  00,   01,   11
        tgt  10,   11,   12
        attn 20,   21,   22

        ***   s1, s2 | t1 t2 t3| attn1 attn2 attn3
        s1    1,  1  | 0, 0, 0,| 0,    0,    0,
        s2    1,  1  | 0, 0, 0,| 0,    0,    0,
        -
        t1    1,  1, | 1, 0, 0,| 0,    0,    0,
        t2    1,  1, | 1, 1, 0,| 0,    0,    0,
        t3    1,  1, | 1, 1, 1,| 0,    0,    0,
        -
        attn1 1,  1, | 0, 0, 0,| 1,    0,    0,
        attn2 1,  1, | 1, 0, 0,| 0,    1,    0,
        attn3 1,  1, | 1, 1, 0,| 0,    0,    1,

        for details, see Fig3. https://arxiv.org/abs/2001.11314
        '''

        src_len = src_ids.shape[1]
        tgt_len = tgt_ids.shape[1]
        mask_00 = gen_mask(src_ids, 'bidi', query_len=src_len)
        mask_01 = gen_mask(tgt_ids, 'empty', query_len=src_len)
        mask_02 = gen_mask(attn_ids, 'empty', query_len=src_len)

        mask_10 = gen_mask(src_ids, 'bidi', query_len=tgt_len)
        mask_11 = gen_mask(tgt_ids, 'causal', query_len=tgt_len)
        mask_12 = gen_mask(attn_ids, 'empty', query_len=tgt_len)

        mask_20 = gen_mask(src_ids, 'bidi', query_len=tgt_len)
        mask_21 = gen_mask(tgt_ids, 'causal_without_diag', query_len=tgt_len)
        mask_22 = gen_mask(attn_ids, 'diag', query_len=tgt_len)
        '''
        mask = np.concatenate([
            np.concatenate([mask_00, mask_01, mask_02], 2),
            np.concatenate([mask_10, mask_11, mask_12], 2),
            np.concatenate([mask_20, mask_21, mask_22], 2),
        ], 1)

        ids = np.concatenate([src_ids, tgt_ids, attn_ids], 1)
        pids = np.concatenate([src_pids, tgt_pids, tgt_pids], 1)
        sids = np.concatenate([src_sids, tgt_sids, tgt_sids], 1)

        '''

        mask_src_2_src = mask_00
        mask_tgt_2_srctgt = np.concatenate([mask_10, mask_11], 2)
        mask_attn_2_srctgtattn = np.concatenate([mask_20, mask_21, mask_22], 2)

        tgt_labels = tgt_labels[np.where(tgt_labels != 0)]
        return (example_id, src_ids, src_sids, src_pids, tgt_ids, tgt_sids,
                tgt_pids, attn_ids, mask_src_2_src, mask_tgt_2_srctgt,
                mask_attn_2_srctgtattn, tgt_labels)

    bytes_vocab = {k.encode('utf8'): v for k, v in tokenizer.vocab.items()}
    feature_column = propeller.data.FeatureColumns([
        propeller.data.LabelColumn('id'),
        propeller.data.TextColumn('src',
                                  unk_id=tokenizer.unk_id,
                                  vocab_dict=bytes_vocab),
        propeller.data.TextColumn('tgt',
                                  unk_id=tokenizer.unk_id,
                                  vocab_dict=bytes_vocab),
    ])

    train_ds = feature_column.build_dataset('train', data_dir=os.path.join(args.data_dir, 'train'), shuffle=False, repeat=True, use_gz=False) \
                                   .map(map_fn)

    dev_ds = feature_column.build_dataset('dev', data_dir=os.path.join(args.data_dir, 'dev'), shuffle=False, repeat=False, use_gz=False) \
                                   .map(map_fn) \
                                   .padded_batch(args.eval_bsz) \
                                   .map(after_padding)

    log.debug('shard %d of %d' %
              (D.parallel.Env().dev_id, D.parallel.Env().nranks))
    train_ds = train_ds.shard(
        D.parallel.Env().nranks,
        D.parallel.Env().dev_id).shuffle(10000).padded_batch(
            args.bsz).map(after_padding)
    dev_ds = dev_ds.shard(D.parallel.Env().nranks, D.parallel.Env().dev_id)

    shapes = [[None, None]] * 7 + [[None, None, None]] * 3 + [[None]]
    types = ['int64'] * 11

    train_ds.data_shapes = shapes
    train_ds.data_types = types
    dev_ds.data_shapes = shapes
    dev_ds.data_types = types

    vocab_size, _ = model.word_emb.weight.shape
    ctx = D.parallel.prepare_context()
    model = D.parallel.DataParallel(model, ctx)
    g_clip = F.clip.GradientClipByGlobalNorm(1.0)
    opt = AdamW(learning_rate=LinearDecay(
        args.lr, int(args.warmup_proportion * args.max_steps), args.max_steps),
                parameter_list=model.parameters(),
                weight_decay=args.wd,
                grad_clip=g_clip)
    attn_id = tokenizer.vocab[args.attn_token]
    for step, data in enumerate(train_ds.start(place)):
        (example_id, src_ids, src_sids, src_pids, tgt_ids, tgt_sids, tgt_pids,
         attn_ids, mask_src_2_src, mask_tgt_2_srctgt, mask_attn_2_srctgtattn,
         tgt_labels) = data

        _, __, info = model(src_ids,
                            sent_ids=src_sids,
                            pos_ids=src_pids,
                            attn_bias=mask_src_2_src,
                            encode_only=True)
        cached_k, cached_v = info['caches']
        _, __, info = model(tgt_ids,
                            sent_ids=tgt_sids,
                            pos_ids=tgt_pids,
                            attn_bias=mask_tgt_2_srctgt,
                            past_cache=(cached_k, cached_v),
                            encode_only=True)
        cached_k2, cached_v2 = info['caches']
        past_cache_k = [
            L.concat([k, k2], 1) for k, k2 in zip(cached_k, cached_k2)
        ]
        past_cache_v = [
            L.concat([v, v2], 1) for v, v2 in zip(cached_v, cached_v2)
        ]
        if args.label_smooth > 0.:
            tgt_labels = L.label_smooth(F.one_hot(tgt_labels, vocab_size),
                                        epsilon=args.label_smooth)
        loss, _, __ = model(attn_ids,
                            sent_ids=tgt_sids,
                            pos_ids=tgt_pids,
                            attn_bias=mask_attn_2_srctgtattn,
                            past_cache=(past_cache_k, past_cache_v),
                            tgt_labels=tgt_labels,
                            tgt_pos=L.where(attn_ids == attn_id))

        scaled_loss = model.scale_loss(loss)
        scaled_loss.backward()
        model.apply_collective_grads()
        opt.minimize(scaled_loss)
        model.clear_gradients()
        if step % 10 == 0:
            loss = loss.numpy()
            ppl = np.exp(loss)
            log.debug('[step %d]train loss %.5f, ppl %.5f, lr %.3e' %
                      (step, loss, ppl, opt.current_step_lr()))
        if args.save_dir is not None and step % 1000 == 0 and D.parallel.Env(
        ).dev_id == 0:
            F.save_dygraph(model.state_dict(), args.save_dir)
        if args.predict_output_dir is not None and step > args.skip_eval_steps and step % args.eval_steps == 0:
            assert os.path.exists(
                args.predict_output_dir
            ), 'predict_output_dir not found: %s' % args.predict_output_dir
            log.debug('doing predict on gpu %d...' % D.parallel.Env().dev_id)
            evaluate(model, dev_ds, step, args)
        if step > args.max_steps:
            break
    evaluate(model, dev_ds, step, args)

    if args.save_dir is not None:
        F.save_dygraph(model.state_dict(), args.save_dir)
コード例 #18
0
    def __call__(
            self,
            predictions,
            labels_pos_mask,  # Shape: [batch_size, 19248, 1]
            labels_neg_mask,  # Shape: [batch_size, 19248, 1]
            labels_allboxes_vector,  # Shape: [batch_size, 19248, 8]
            segment_t,  # list  Shape: [batch_size, 19248, 1]
            label_masks,
            labels_best_truth_idx,
            labels_pos_index,
            labels_pos_cid,  #  Shape: [batch_size, 19248]
            labels_pos_cid2,  #  Shape: [batch_size, 19248]
            priors,
            class_vectors,
            batch_size,
            use_maskiou=True,
            use_ce_loss=True,
            use_ghm_c_loss=False,
            use_focal_loss=False,
            use_ohem_loss=False):

        pred_allboxes_encode_x0y0x1y1 = predictions[
            'loc']  # Shape: [batch_size, 19248, 4]
        pred_allboxes_conf = predictions[
            'conf']  # Shape: [batch_size, 19248, 1+80]
        pred_allboxes_mask_coef = predictions[
            'mask']  # Shape: [batch_size, 19248, 原型数=32]
        pred_proto = predictions[
            'proto']  # Shape: [batch_size, s4=138, s4=138, 原型数=32]
        pred_segm = predictions[
            'segm']  # Shape: [batch_size, 类别数=80, s8=69, s8=69]

        labels_allboxes_x0y0x1y1 = labels_allboxes_vector[:, :, 0:
                                                          4]  # Shape: [batch_size, 19248, 4]
        labels_allboxes_decode_x0y0x1y1 = labels_allboxes_vector[:, :, 4:
                                                                 8]  # Shape: [batch_size, 19248, 4]

        losses = {}

        # 1.bbox_loss,只有正例才计算。
        # bbox_alpha = 1.5
        # bbox_loss = P.smooth_l1(P.reshape(pred_allboxes_encode_x0y0x1y1, (-1, 4)), P.reshape(labels_allboxes_x0y0x1y1, (-1, 4)))
        # bbox_loss = P.reshape(labels_pos_mask, (-1, 1)) * bbox_loss
        # bbox_loss = P.reduce_sum(bbox_loss) * bbox_alpha
        # losses['B'] = bbox_loss

        # 1.bbox_loss,ciou_loss
        pred_x0y0x1y1 = []
        for idx in range(batch_size):
            temp = decode(pred_allboxes_encode_x0y0x1y1[idx], priors)
            pred_x0y0x1y1.append(temp)
        pred_x0y0x1y1 = P.concat(pred_x0y0x1y1,
                                 axis=0)  # Shape: [batch_size*num_priors, 4]
        pred_x0y0x1y1 = P.reshape(
            pred_x0y0x1y1,
            (batch_size, -1, 4))  # Shape: [batch_size, num_priors, 4]

        ciou = P.reshape(
            self.bbox_ciou(pred_x0y0x1y1, labels_allboxes_decode_x0y0x1y1),
            (batch_size, -1, 1))  # (batch_size, num_priors, 1)

        # 每个预测框ciou_loss的权重 = 2 - (ground truth的面积/图片面积)
        gt_area = (labels_allboxes_decode_x0y0x1y1[:, :, 2:3] - labels_allboxes_decode_x0y0x1y1[:, :, 0:1]) * \
                  (labels_allboxes_decode_x0y0x1y1[:, :, 3:4] - labels_allboxes_decode_x0y0x1y1[:, :, 1:2])
        bbox_loss_scale = 2.0 - gt_area
        ciou_loss = labels_pos_mask * bbox_loss_scale * (1 - ciou)
        bbox_alpha = 1.5
        ciou_loss = P.reduce_sum(ciou_loss) * bbox_alpha
        losses['B'] = ciou_loss

        # 2.mask_loss,只有正例才计算
        mask_h = P.shape(pred_proto)[1]
        mask_w = P.shape(pred_proto)[2]
        loss_m = 0
        maskiou_t_list = []
        maskiou_net_input_list = []
        label_t_list = []
        for idx in range(batch_size):
            # [[0], [0], [0], [0], [0], [0], [0], [0]]。把8个正样本的最匹配gt的下标(在label_x0y0x1y1cid[idx]中的下标)选出来。
            # 因为只有一个gt,所以下标全是0
            labels_pos_index[idx].stop_gradient = True
            cur_gt = P.gather(labels_best_truth_idx[idx],
                              labels_pos_index[idx])  # (?, 1)
            cur_gt.stop_gradient = True
            cur_x0y0x1y1 = P.gather(labels_allboxes_decode_x0y0x1y1[idx],
                                    labels_pos_index[idx])  # (?, 4)

            proto_masks = pred_proto[idx]  # (138, 138, 32)
            # pred_mask_coef (batch_size, 19248, 32)。 把8个正样本预测的mask系数选出来。
            proto_coef = P.gather(pred_allboxes_mask_coef[idx],
                                  labels_pos_index[idx])  # (?, 32)

            # (?, 138, 138),把8个正样本所匹配的gt的真实mask抽出来。因为匹配到同一个gt,所以是同一个mask重复了8次。
            mask_t = P.gather(label_masks[idx], cur_gt)  # (?, 138, 138)
            # (?, ),把8个正样本所匹配的gt的真实cid抽出来。因为匹配到同一个gt,所以是同一个cid重复了8次。
            label_t = P.gather(labels_pos_cid[idx],
                               labels_pos_index[idx])  # (?, )

            # Size: (138, 138, ?)  =  原型*系数转置
            pred_masks = P.matmul(proto_masks, proto_coef, transpose_y=True)
            pred_masks = P.sigmoid(pred_masks)  # sigmoid激活

            pred_masks = crop(pred_masks, cur_x0y0x1y1)
            pred_masks = P.transpose(pred_masks, perm=[2, 0, 1])

            masks_pos_loss = mask_t * (0 - P.log(pred_masks + 1e-9)
                                       )  # 二值交叉熵,加了极小的常数防止nan
            masks_neg_loss = (1 - mask_t) * (0 - P.log(1 - pred_masks + 1e-9)
                                             )  # 二值交叉熵,加了极小的常数防止nan
            pre_loss = (masks_pos_loss + masks_neg_loss)
            pre_loss = P.reduce_sum(pre_loss, dim=[1, 2])

            # gt面积越小,对应mask损失权重越大
            cur_cxcywh = center_size(cur_x0y0x1y1)
            gt_box_width = cur_cxcywh[:, 2]
            gt_box_height = cur_cxcywh[:, 3]
            pre_loss = pre_loss / (gt_box_width * gt_box_height)
            loss_m += P.reduce_sum(pre_loss)

            if use_maskiou:
                # mask_t中,面积<=5*5的被丢弃
                # discard_mask_area = 5*5
                '''
                gpu版本的paddlepaddle1.6.2里有一个问题。select如果是[None],并且在gather()里使用了select,就会出现
                cudaGetLastError  invalid configuration argument errno: 9   这个错误。cpu版本则可以正常跑。
                为了避免上面的问题,只能让select不是[None],所以这里不做面积过滤,mask_t全部保留。
                '''
                discard_mask_area = -1
                gt_mask_area = P.reduce_sum(mask_t, dim=[1, 2])
                gt_mask_area.stop_gradient = True
                select = P.where(gt_mask_area > discard_mask_area)
                select.stop_gradient = True
                pred_masks = P.gather(pred_masks, select)
                mask_t = P.gather(mask_t, select)
                label_t = P.gather(label_t, select)
                label_t.stop_gradient = True

                maskiou_net_input = P.reshape(
                    pred_masks, (P.shape(pred_masks)[0], 1, mask_h, mask_w))
                pred_masks = P.cast(pred_masks > 0.5, 'float32')  # 四舍五入
                maskiou_t = self._mask_iou(pred_masks, mask_t)  # (8, )
                maskiou_net_input_list.append(
                    maskiou_net_input)  # (8, 1, 138, 138)
                maskiou_t_list.append(maskiou_t)  # (8, )
                label_t_list.append(label_t)  # (8, )
        mask_alpha = 6.125
        losses['M'] = loss_m * mask_alpha / mask_h / mask_w

        # 余下部分
        if use_maskiou:
            maskiou_net_input = P.concat(
                maskiou_net_input_list,
                axis=0)  # (21, 1, 138, 138)  21个正例预测的掩码
            maskiou_t = P.concat(maskiou_t_list,
                                 axis=0)  # (21, )  21个正例预测的掩码和真实掩码的iou
            label_t = P.concat(label_t_list, axis=0)  # (21, )  21个正例预测的cid
            label_t.stop_gradient = True  # 因为是整数所以才?
            maskiou_targets = [maskiou_net_input, maskiou_t, label_t]

        # 3.conf_loss。
        conf_alpha = 1.0
        if use_ce_loss:
            conf_loss = self.ce_conf_loss(pred_allboxes_conf, labels_pos_mask,
                                          labels_neg_mask, class_vectors,
                                          labels_pos_cid2, gt_area)
        elif use_ghm_c_loss:
            conf_loss = self.ghm_c_loss(pred_allboxes_conf, labels_pos_mask,
                                        labels_neg_mask, class_vectors,
                                        labels_pos_cid2)
        elif use_focal_loss:
            conf_loss = self.focal_conf_loss(pred_allboxes_conf,
                                             labels_pos_mask, labels_neg_mask,
                                             class_vectors, labels_pos_cid2)
        elif use_ohem_loss:
            conf_loss = self.ohem_conf_loss(pred_allboxes_conf, batch_size,
                                            labels_neg_mask, labels_pos_mask,
                                            labels_pos_index, class_vectors,
                                            labels_pos_cid)
        losses['C'] = conf_loss * conf_alpha

        # 4.mask_iou_loss,只有正例才计算。
        if use_maskiou:
            # maskiou_net_input  (21, 1, 138, 138)  21个正例预测的掩码
            # maskiou_t          (21, )             21个正例预测的掩码和真实掩码的iou
            # label_t            (21, )             21个正例预测的cid
            maskiou_net_input, maskiou_t, label_t = maskiou_targets
            maskiou_p = maskiou_net(maskiou_net_input, self.num_classes - 1)
            maskiou_p = P.reduce_max(maskiou_p, dim=[2, 3])  # 最大池化  (21, 80)
            temp_mask = P.gather(class_vectors, label_t)  # 掩码  (21, 81)
            temp_mask = temp_mask[:, 1:]  # 掩码  (21, 80)
            maskiou_p = temp_mask * maskiou_p  # 只保留真实类别的那个通道  (21, 80)
            maskiou_p = P.reduce_sum(maskiou_p, dim=1,
                                     keep_dim=True)  # (21, 1)
            loss_i = P.smooth_l1(
                maskiou_p, P.reshape(maskiou_t, (P.shape(maskiou_t)[0], 1)))
            maskiou_alpha = 25.0
            losses['I'] = maskiou_alpha * P.reduce_sum(loss_i)

        # 5.semantic_segmentation_loss,只有正例才计算
        mask_h = P.shape(pred_segm)[2]
        mask_w = P.shape(pred_segm)[3]
        loss_s = 0.0
        for idx in range(batch_size):
            cur_segment = pred_segm[idx]  # (80, 69, 69)
            l = P.sigmoid_cross_entropy_with_logits(cur_segment,
                                                    segment_t[idx])
            loss_s += P.reduce_sum(l)

        semantic_segmentation_alpha = 1.0
        losses['S'] = loss_s / mask_h / mask_w * semantic_segmentation_alpha

        total_num_pos = P.cast(P.reduce_sum(labels_pos_mask), 'float32')
        for k in losses:
            if k not in ('S', ):
                losses[k] /= total_num_pos
            else:
                losses[k] /= batch_size
        total_loss = 0.0
        for k in losses:
            total_loss += losses[k]

        # Loss Key:
        #  - B: Box Localization Loss
        #  - M: Mask Loss
        #  - C: Class Confidence Loss
        #  - I: MaskIou Loss
        #  - S: Semantic Segmentation Loss
        # return losses['M'], losses['C']
        return losses, total_loss
コード例 #19
0
    def ohem_conf_loss(self, pred_allboxes_conf, batch_size, labels_neg_mask,
                       labels_pos_mask, labels_pos_index, class_vectors,
                       labels_pos_cid):
        batch_conf = P.reshape(pred_allboxes_conf, (-1, self.num_classes))
        loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0]
        loss_c = P.reshape(loss_c, (batch_size, -1))  # (batch_size, 19248)
        labels_neg_mask = P.concat(labels_neg_mask,
                                   axis=0)  # (batch_size*19248, 1)
        labels_neg_mask = P.reshape(labels_neg_mask,
                                    (batch_size, -1))  # (batch_size, 19248)
        loss_c = labels_neg_mask * loss_c  # 只留下负样本损失, (batch_size, 19248)
        sorted_loss_c, loss_idx = P.argsort(loss_c, axis=-1, descending=True)

        labels_pos_mask = P.concat(labels_pos_mask,
                                   axis=0)  # (batch_size*19248, 1)
        labels_pos_mask = P.reshape(labels_pos_mask,
                                    (batch_size, -1))  # (batch_size, 19248)
        num_pos = P.cast(P.reduce_sum(labels_pos_mask, dim=1),
                         'int32')  # (batch_size, )
        num_neg = self.negpos_ratio * num_pos  # (batch_size, )
        neg_topk_mask = []
        for idx in range(batch_size):
            desc = P.range(num_neg[idx],
                           num_neg[idx] - P.shape(labels_pos_mask)[1], -1,
                           'int32')
            neg_topk_mask.append(desc)
        neg_topk_mask = P.concat(neg_topk_mask, axis=0)  # (batch_size*19248, )
        neg_topk_mask = P.reshape(neg_topk_mask,
                                  (batch_size, -1))  # (batch_size, 19248)
        neg_topk_mask = P.cast(neg_topk_mask > 0,
                               'float32')  # (batch_size, 19248)
        sorted_loss_c = neg_topk_mask * sorted_loss_c
        selected_poss = []
        selected_negs = []
        selected_pos_class_vectors = []
        selected_neg_class_vectors = []
        for idx in range(batch_size):
            selected_neg_idx_idx = P.where(sorted_loss_c[idx] > 0)
            selected_neg_idx_idx.stop_gradient = True
            selected_neg_idx = P.gather(loss_idx[idx], selected_neg_idx_idx)
            selected_neg_idx.stop_gradient = True
            selected_neg = P.gather(pred_allboxes_conf[idx], selected_neg_idx)
            selected_neg.stop_gradient = True
            selected_negs.append(selected_neg)
            selected_pos = P.gather(pred_allboxes_conf[idx],
                                    labels_pos_index[idx])
            selected_pos.stop_gradient = True
            selected_poss.append(selected_pos)

            zeros = P.fill_constant(shape=[
                P.shape(selected_neg)[0],
            ],
                                    value=0,
                                    dtype='int32')
            zeros.stop_gradient = True
            selected_neg_class_vector = P.gather(class_vectors, zeros)
            selected_neg_class_vector.stop_gradient = True
            selected_neg_class_vectors.append(selected_neg_class_vector)

            labels_pos_cid.stop_gradient = True
            labels_pos_index[idx].stop_gradient = True
            selected_pos_cid = P.gather(labels_pos_cid[idx],
                                        labels_pos_index[idx])
            selected_pos_cid.stop_gradient = True
            selected_pos_class_vector = P.gather(class_vectors,
                                                 selected_pos_cid)
            selected_pos_class_vector.stop_gradient = True
            selected_pos_class_vectors.append(selected_pos_class_vector)
        selected_negs = P.concat(selected_negs, axis=0)  # (?, 1+80)
        selected_poss = P.concat(selected_poss, axis=0)  # (?, 1+80)
        pred_ = P.concat([selected_negs, selected_poss], axis=0)  # (?, 1+80)
        selected_neg_class_vectors = P.concat(selected_neg_class_vectors,
                                              axis=0)  # (?, 1+80)
        selected_pos_class_vectors = P.concat(selected_pos_class_vectors,
                                              axis=0)  # (?, 1+80)
        labels_ = P.concat(
            [selected_neg_class_vectors, selected_pos_class_vectors],
            axis=0)  # (?, 1+80)

        # softmax交叉熵
        fenzi = P.exp(pred_)
        fenmu = P.reduce_sum(fenzi, dim=1, keep_dim=True)
        pred_prob = fenzi / P.expand_as(fenmu, target_tensor=fenzi)
        conf_loss = labels_ * (0 - P.log(pred_prob + 1e-9))  # 交叉熵,加了极小的常数防止nan
        conf_loss = P.reduce_sum(conf_loss)
        return conf_loss
コード例 #20
0
    def __call__(self, kernel_preds, cls_preds, mask_protos,
                 batch_gt_objs_tensors, batch_gt_clss_tensors,
                 batch_gt_masks_tensors, batch_gt_pos_idx_tensors):
        '''
        :param kernel_preds:  kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid],  每个格子的预测卷积核。      从 小感受野 到 大感受野。
        :param cls_preds:     cls_preds里每个元素形状是   [N,  80, seg_num_grid, seg_num_grid],  每个格子的预测概率,未进行sigmoid()激活。  从 小感受野 到 大感受野。
        :param mask_protos:   [bs, 256, s4, s4]   掩码原型
        :param batch_gt_objs_tensors:   里每个元素形状是[N, seg_num_grid, seg_num_grid, 1],   每个格子的objness。           从 小感受野 到 大感受野。
        :param batch_gt_clss_tensors:   里每个元素形状是[N, seg_num_grid, seg_num_grid, 80],  每个格子真实类别onehot。      从 小感受野 到 大感受野。
        :param batch_gt_masks_tensors:     里每个元素形状是[N, -1, s4, s4],   真实掩码。  从 小感受野 到 大感受野。
        :param batch_gt_pos_idx_tensors:   里每个元素形状是[N, -1, 3],    正样本的下标。  从 小感受野 到 大感受野。
        :return:
        '''

        batch_size = self.batch_size
        num_layers = len(kernel_preds)

        # ================= 计算损失 =================
        num_ins = 0.  # 记录这一批图片的正样本个数
        loss_clss, loss_masks = [], []
        for bid in range(batch_size):
            for lid in range(num_layers):
                # ================ 掩码损失 ======================
                mask_proto = mask_protos[bid]  # [256, s4, s4]   这张图片产生的掩码原型。
                kernel_pred = kernel_preds[lid][
                    bid]  # [256, seg_num_grid, seg_num_grid]   格子预测的卷积核(yolact中的“掩码系数”)
                kernel_pred = L.transpose(
                    kernel_pred, perm=[1, 2, 0]
                )  # [seg_num_grid, seg_num_grid, 256]   格子预测的卷积核(yolact中的“掩码系数”)

                gt_objs = batch_gt_objs_tensors[lid][
                    bid]  # [seg_num_grid, seg_num_grid, 1]
                gt_masks = batch_gt_masks_tensors[lid][bid]  # [-1, s4, s4]
                pmidx = batch_gt_pos_idx_tensors[lid][bid]  # [-1, 3]
                gt_objs.stop_gradient = True
                gt_masks.stop_gradient = True
                pmidx.stop_gradient = True

                idx_sum = L.reduce_sum(pmidx, dim=1)
                keep = L.where(idx_sum > -1)
                keep = L.reshape(keep, (-1, ))
                keep.stop_gradient = True
                pmidx = L.gather(pmidx, keep)  # [M, 3]

                yx_idx = pmidx[:, :2]  # [M, 2]
                m_idx = pmidx[:, 2]  # [M, ]
                yx_idx.stop_gradient = True
                m_idx.stop_gradient = True

                # 抽出来
                gt_obj = L.gather_nd(gt_objs,
                                     yx_idx)  # [M, 1]        是否是真正的正样本。
                pos_krn = L.gather_nd(kernel_pred,
                                      yx_idx)  # [M, 256]      正样本的卷积核(掩码系数)。
                gt_mask = L.gather(gt_masks, m_idx)  # [M, s4, s4]   真实掩码。

                # 正样本数量
                num_ins += L.reduce_sum(gt_obj)

                # 生成预测掩码
                mask_proto = L.transpose(mask_proto, perm=[1, 2,
                                                           0])  # [s4, s4, 256]
                masks = L.matmul(mask_proto, pos_krn,
                                 transpose_y=True)  # [s4, s4, M]
                masks = L.sigmoid(masks)  # [s4, s4, M]
                masks = L.transpose(masks, perm=[2, 0, 1])  # [M, s4, s4]
                loss_mask = self.dice_loss(masks, gt_mask, gt_obj)
                loss_masks.append(loss_mask)

                # ================ 分类损失。sigmoid_focal_loss() ======================
                gamma = self.loss_gamma
                alpha = self.loss_alpha
                pred_conf = cls_preds[lid][
                    bid]  # [80, seg_num_grid, seg_num_grid]    未进行sigmoid()激活。
                pred_conf = L.transpose(pred_conf, perm=[
                    1, 2, 0
                ])  # [seg_num_grid, seg_num_grid, 80]    未进行sigmoid()激活。
                pred_conf = L.sigmoid(
                    pred_conf
                )  # [seg_num_grid, seg_num_grid, 80]    已进行sigmoid()激活。
                gt_clss = batch_gt_clss_tensors[lid][
                    bid]  # [seg_num_grid, seg_num_grid, 80]    真实类别onehot
                gt_clss.stop_gradient = True
                pos_loss = gt_clss * (0 - L.log(pred_conf + 1e-9)) * L.pow(
                    1 - pred_conf, gamma) * alpha
                neg_loss = (
                    1.0 - gt_clss) * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow(
                        pred_conf, gamma) * (1 - alpha)
                focal_loss = pos_loss + neg_loss
                focal_loss = L.reduce_sum(focal_loss, dim=[0, 1])
                loss_clss.append(focal_loss)
        loss_masks = L.concat(loss_masks, axis=0)
        loss_masks = L.reduce_sum(loss_masks) * self.ins_loss_weight
        loss_masks = loss_masks / L.elementwise_max(
            L.ones((1, ), dtype='float32'), num_ins)

        loss_clss = L.concat(loss_clss, axis=0)
        loss_clss = L.reduce_sum(loss_clss) * self.clss_loss_weight
        loss_clss = loss_clss / L.elementwise_max(
            L.ones((1, ), dtype='float32'), num_ins)

        loss_all = {"loss_masks": loss_masks, "loss_clss": loss_clss}
        return loss_all
コード例 #21
0
ファイル: decode.py プロジェクト: xiaoyangyang2/PaddleHub
 def reorder_(t, parent_id):
     """reorder cache according to parent beam id"""
     gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(
         parent_id, [-1])
     t = L.gather(t, gather_idx)
     return t
コード例 #22
0
def build_pad_idx(input_mask):
    pad_idx = L.where(L.cast(L.squeeze(input_mask, [2]), 'bool'))
    return pad_idx
コード例 #23
0
ファイル: module.py プロジェクト: zkming9/PaddleHub
    def finetune(
            self,
            train_path,
            dev_path=None,
            save_dir="ernie_gen_result",
            init_ckpt_path=None,
            use_gpu=True,
            max_steps=500,
            batch_size=8,
            max_encode_len=50,
            max_decode_len=50,
            learning_rate=5e-5,
            warmup_proportion=0.1,
            weight_decay=0.1,
            noise_prob=0,
            label_smooth=0,
            beam_width=5,
            length_penalty=1.0,
            log_interval=100,
            save_interval=200,
    ):
        """
        finetune with the specified dataset.

        Args:
            train_path(str): the train dataset path.
            dev_path(str): the dev dataset path.
            save_dir(str): the model params and dev dataset predict result save path.
            init_ckpt_path(str): incremental training load path.
            use_gpu(bool): use gpu or not.
            max_steps(int): max training steps.
            batch_size(int): the batch size.
            max_encode_len(int): the max encode length.
            max_decode_len(int): the max decode length.
            learning_rate(float): the learning rate.
            warmup_proportion(float): the warmup proportion.
            weight_decay(float): the weight decay magnitude.
            noise_prob(float): the nosie probability. see the ernie gen paper for details.
            label_smooth(float): the label smooth magnitude.
            beam_width(int): the beam size during evaluating the dev dataset.
            length_penalty(float): the length penalty during evaluating the dev dataset.
            log_interval(int): the log interval.
            save_interval(int): the save interval. dev set will be evaluated after saving.

        Return:
            result(dict): A Dictionary of shape::
                {
                    last_save_path(str): last model save path.
                    last_ppl(float): last model ppl.
                }
        """
        self.max_encode_len = max_encode_len
        self.max_decode_len = max_decode_len
        self.noise_prob = noise_prob

        place = F.CUDAPlace(0) if use_gpu else F.CPUPlace()

        with F.dygraph.guard(place):
            if init_ckpt_path is not None:
                logger.info('loading checkpoint from %s' % init_ckpt_path)
                sd, _ = D.load_dygraph(init_ckpt_path)
                self.model.set_dict(sd)

            feature_column = propeller.data.FeatureColumns([
                propeller.data.LabelColumn('id'),
                propeller.data.TextColumn(
                    'src',
                    unk_id=self.tokenizer.unk_id,
                    vocab_dict=self.tokenizer.vocab,
                    tokenizer=self.tokenizer.tokenize),
                propeller.data.TextColumn(
                    'tgt',
                    unk_id=self.tokenizer.unk_id,
                    vocab_dict=self.tokenizer.vocab,
                    tokenizer=self.tokenizer.tokenize),
            ])

            train_ds = feature_column.build_dataset('train', data_file=train_path, shuffle=False,
                                                    repeat=True, use_gz=False)\
                .map(self._map_fn).shuffle(10000).padded_batch(batch_size).map(self._after_padding)
            train_ds.data_shapes = [[None, None]] * 7 + [[None, None, None]
                                                         ] * 3 + [[None]]
            train_ds.data_types = ['int64'] * 11

            if dev_path:
                dev_ds = feature_column.build_dataset('dev', data_file=dev_path, shuffle=False,
                                                    repeat=False, use_gz=False) \
                    .map(self._map_fn) \
                    .padded_batch(1) \
                    .map(self._after_padding)
                dev_ds.data_shapes = [[None, None]] * 7 + [[None, None, None]
                                                           ] * 3 + [[None]]
                dev_ds.data_types = ['int64'] * 11

            vocab_size, _ = self.model.word_emb.weight.shape
            g_clip = F.clip.GradientClipByGlobalNorm(1.0)
            opt = AdamW(
                learning_rate=LinearDecay(learning_rate,
                                          int(warmup_proportion * max_steps),
                                          max_steps),
                parameter_list=self.model.parameters(),
                weight_decay=weight_decay,
                grad_clip=g_clip)

            loss = None

            save_path = None
            ppl = None

            if save_dir and not os.path.exists(save_dir):
                os.makedirs(save_dir)
            for step, data in enumerate(train_ds.start(place)):
                (example_id, src_ids, src_sids, src_pids, tgt_ids, tgt_sids,
                 tgt_pids, attn_ids, mask_src_2_src, mask_tgt_2_srctgt,
                 mask_attn_2_srctgtattn, tgt_labels) = data

                _, __, info = self.model(
                    src_ids,
                    sent_ids=src_sids,
                    pos_ids=src_pids,
                    attn_bias=mask_src_2_src,
                    encode_only=True)
                cached_k, cached_v = info['caches']
                _, __, info = self.model(
                    tgt_ids,
                    sent_ids=tgt_sids,
                    pos_ids=tgt_pids,
                    attn_bias=mask_tgt_2_srctgt,
                    past_cache=(cached_k, cached_v),
                    encode_only=True)
                cached_k2, cached_v2 = info['caches']
                past_cache_k = [
                    L.concat([k, k2], 1) for k, k2 in zip(cached_k, cached_k2)
                ]
                past_cache_v = [
                    L.concat([v, v2], 1) for v, v2 in zip(cached_v, cached_v2)
                ]
                if label_smooth > 0.:
                    tgt_labels = L.label_smooth(
                        F.one_hot(tgt_labels, vocab_size), epsilon=label_smooth)
                loss, _, __ = self.model(
                    attn_ids,
                    sent_ids=tgt_sids,
                    pos_ids=tgt_pids,
                    attn_bias=mask_attn_2_srctgtattn,
                    past_cache=(past_cache_k, past_cache_v),
                    tgt_labels=tgt_labels,
                    tgt_pos=L.where(attn_ids == self.tokenizer.vocab['[MASK]']))

                loss.backward()
                opt.minimize(loss)
                self.model.clear_gradients()

                if step % log_interval == 0:
                    loss_np = loss.numpy()
                    ppl = np.exp(loss_np)
                    logger.info(
                        '[step %d / %d]train loss %.5f, ppl %.5f, elr %.3e' %
                        (step, max_steps, loss_np, ppl, opt.current_step_lr()))
                if save_dir and step % save_interval == 0 and step > 0:
                    loss_np = loss.numpy()
                    ppl = np.exp(loss_np)
                    save_name = "step_%s_ppl_%.5f" % (step, ppl)
                    save_path = os.path.join(save_dir, save_name)
                    logger.info("save the model in %s" % save_path)
                    F.save_dygraph(self.model.state_dict(), save_path)

                    if dev_path:
                        logger.info('evaluating...')
                        res = self._evaluate(dev_ds, place, beam_width,
                                             length_penalty)
                        output_path = os.path.join(
                            save_dir, "step_%s_ppl_%.5f.txt" % (step, ppl))
                        logger.info(
                            'save the predict result in %s' % output_path)
                        with open(output_path, 'w') as fout:
                            fout.write(('\n'.join(res)))

                if step > max_steps:
                    break

            if loss:
                loss_np = loss.numpy()
                ppl = np.exp(loss_np)
                logger.info('[final step %d]train loss %.5f, ppl %.5f, elr %.3e'
                            % (step, loss_np, ppl, opt.current_step_lr()))
                if save_dir:
                    save_name = "step_%s_ppl_%.5f" % (step, ppl)
                    save_path = os.path.join(save_dir, save_name)
                    logger.info("save the model in %s" % save_path)
                    F.save_dygraph(self.model.state_dict(), save_path)

                    if dev_path:
                        logger.info('evaluating...')
                        res = self._evaluate(dev_ds, place, beam_width,
                                             length_penalty)
                        output_path = os.path.join(
                            save_dir, "step_%s_ppl_%.5f.txt" % (step, ppl))
                        logger.info(
                            'save the predict result in %s' % output_path)
                        with open(output_path, 'w') as fout:
                            fout.write(('\n'.join(res)))

            result = {
                "last_save_path": "%s.pdparams" % save_path,
                "last_ppl": ppl[0],
            }

            return result
コード例 #24
0
    def get_seg_single(self, cate_preds, mask_proto, kernel_preds,
                       featmap_size, resize_shape, ori_shape):
        '''

        :param cate_preds:   [所有格子数, 80]
        :param mask_proto:   [1, 256, s4, s4]   掩码原型
        :param kernel_preds:   [所有格子数, 256]   每个格子生成的卷积核,是1x1卷积核,输入通道数是256,即掩码原型的通道数。
        :param featmap_size:   (s4, s4)
        :param resize_shape:   shape=[3, ]
        :param ori_shape:      shape=[3, ]
        :return:
        '''
        # overall info.
        upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4
                              )  # 输入网络的图片大小
        cfg = self.nms_cfg

        # 第一次过滤,分数过滤
        inds = L.where(cate_preds > cfg['score_thr'])  # [M, 2]

        # if len(inds) == 0:
        #     return None
        # 静态图里写条件判断太难了。
        def exist_objs_1(inds, cate_preds):
            inds.stop_gradient = True
            scores = L.gather_nd(cate_preds, inds)  # [M, ]   M个物体的分数
            return inds, scores

        def no_objs_1(cate_preds):
            inds = L.zeros((1, 2), np.int64)
            inds.stop_gradient = True
            scores = L.gather_nd(cate_preds,
                                 inds) - 99.0  # [M, ]   M个物体的分数。后面会被过滤掉。
            return inds, scores

        # 是否有物体
        inds, scores = L.cond(
            L.shape(inds)[0] == 0, lambda: no_objs_1(cate_preds),
            lambda: exist_objs_1(inds, cate_preds))

        classes = inds[:, 1]  # [M, ]   M个物体的类别id
        kernel_preds = L.gather(kernel_preds, inds[:,
                                                   0])  # [M, 256]   M个物体的卷积核

        n_stage = len(self.seg_num_grids)  # 5个输出层
        strides = []
        for ind_ in range(n_stage):
            st = L.zeros((1, ), dtype=np.float32) + self.strides[ind_]
            st = L.expand(st, [
                self.seg_num_grids[ind_]**2,
            ])  # [40*40, ]
            strides.append(st)
        strides = L.concat(strides, axis=0)
        strides.stop_gradient = True
        strides = L.gather(strides, inds[:, 0])  # [M, ]   M个物体的下采样倍率

        # mask encoding.原版SOLO中的写法。1x1的卷积核卷积掩码原型,即可得到掩码。
        # M, C = kernel_preds.shape
        # kernel_preds = kernel_preds.view(M, C, 1, 1)    # 被当做卷积核使
        # seg_preds = F.conv2d(seg_preds, kernel_preds, stride=1).squeeze(0).sigmoid()
        # 1x1的卷积核卷积掩码原型,等价于矩阵相乘。注意,3x3卷积核的话可不等价。
        # 这里是由于暂时没发现等价api,所以用矩阵相乘代替。solov2和yolact在这里是一样的。
        mask_proto = L.squeeze(mask_proto, axes=[0])  # [256, s4, s4]
        mask_proto = L.transpose(mask_proto, perm=[1, 2, 0])  # [s4, s4, 256]
        masks = L.matmul(mask_proto, kernel_preds,
                         transpose_y=True)  # [s4, s4, M]
        masks = L.sigmoid(masks)  # [s4, s4, M]
        masks = L.transpose(masks, perm=[2, 0, 1])  # [M, s4, s4]

        # mask.
        seg_masks = L.cast(masks > cfg['mask_thr'],
                           'float32')  # [M, s4, s4]   前景的话值为1
        sum_masks = L.reduce_sum(seg_masks, dim=[1, 2])  # [M, ]   M个物体的掩码面积

        # 第二次过滤,下采样倍率过滤。掩码的面积 超过 下采样倍率 才保留下来。
        keep = L.where(sum_masks > strides)

        # if keep.sum() == 0:
        #     return None

        # 静态图里写条件判断太难了。
        def exist_objs_2(keep, seg_masks, masks, sum_masks, scores, classes):
            keep = L.reshape(keep, (-1, ))  # [M2, ]
            keep.stop_gradient = True
            seg_masks = L.gather(seg_masks, keep)  # [M2, s4, s4]   M2个物体的掩码
            masks = L.gather(masks, keep)  # [M2, s4, s4]   M2个物体的掩码概率
            sum_masks = L.gather(sum_masks, keep)  # [M2, ]   M2个物体的掩码面积
            scores = L.gather(scores, keep)  # [M2, ]   M2个物体的分数
            classes = L.gather(classes, keep)  # [M2, ]   M2个物体的类别id
            return seg_masks, masks, sum_masks, scores, classes

        def no_objs_2(seg_masks, masks, sum_masks, scores, classes):
            keep = L.zeros((1, ), np.int64)
            keep.stop_gradient = True
            seg_masks = L.gather(seg_masks, keep)  # [M2, s4, s4]   M2个物体的掩码
            masks = L.gather(masks, keep)  # [M2, s4, s4]   M2个物体的掩码概率
            sum_masks = L.gather(sum_masks, keep)  # [M2, ]   M2个物体的掩码面积
            scores = L.gather(scores,
                              keep) - 99.0  # [M2, ]   M2个物体的分数。负分数,后面会被过滤掉。
            classes = L.gather(classes, keep)  # [M2, ]   M2个物体的类别id
            return seg_masks, masks, sum_masks, scores, classes

        # 是否有物体
        seg_masks, masks, sum_masks, scores, classes = L.cond(
            L.shape(keep)[0] == 0,
            lambda: no_objs_2(seg_masks, masks, sum_masks, scores, classes),
            lambda: exist_objs_2(keep, seg_masks, masks, sum_masks, scores,
                                 classes))

        # mask scoring.
        # [M2, ]   前景的掩码概率求和,再除以掩码面积。即M2个物体的前景部分的平均掩码概率
        avg_prob = L.reduce_sum(masks * seg_masks, dim=[1, 2]) / sum_masks
        scores *= avg_prob  # [M2, ]   M2个物体的最终分数 = 分类概率 * 平均掩码概率

        # 第三次过滤,只保留得分前cfg['nms_pre']个物体
        _, sort_inds = L.argsort(scores, axis=-1,
                                 descending=True)  # 最终分数降序。最大值的下标,第2大值的下标,...
        sort_inds = sort_inds[:cfg['nms_pre']]  # 最多cfg['nms_pre']个物体。

        seg_masks = L.gather(seg_masks, sort_inds)  # [M3, s4, s4]   M3个物体的掩码
        masks = L.gather(masks, sort_inds)  # [M3, s4, s4]   M3个物体的掩码概率
        sum_masks = L.gather(sum_masks, sort_inds)  # [M3, ]   M3个物体的掩码面积
        scores = L.gather(scores, sort_inds)  # [M3, ]   M3个物体的分数
        classes = L.gather(classes, sort_inds)  # [M3, ]   M3个物体的类别id

        # Matrix NMS
        scores = matrix_nms(seg_masks,
                            classes,
                            scores,
                            kernel=cfg['kernel'],
                            sigma=cfg['sigma'],
                            sum_masks=sum_masks)

        # 第四次过滤,分数过滤
        keep = L.where(scores >= cfg['update_thr'])

        # if keep.sum() == 0:
        #     return None

        def exist_objs_3(keep, masks, classes, scores, upsampled_size_out,
                         resize_shape, ori_shape):
            keep = L.reshape(keep, (-1, ))
            keep.stop_gradient = True
            masks = L.gather(masks, keep)  # [M4, s4, s4]   M4个物体的掩码概率
            scores = L.gather(scores, keep)  # [M4, ]   M4个物体的分数
            classes = L.gather(classes, keep)  # [M4, ]   M4个物体的类别id

            # 第五次过滤,只保留得分前cfg['max_per_img']个物体
            _, sort_inds = L.argsort(scores, axis=-1, descending=True)
            sort_inds = sort_inds[:cfg['max_per_img']]
            sort_inds.stop_gradient = True

            masks = L.gather(masks, sort_inds)  # [M5, s4, s4]   M5个物体的掩码概率
            scores = L.gather(scores, sort_inds)  # [M5, ]   M5个物体的分数
            classes = L.gather(classes, sort_inds)  # [M5, ]   M5个物体的类别id

            masks = L.resize_bilinear(
                L.unsqueeze(masks, axes=[0]),
                out_shape=upsampled_size_out,
                align_corners=False,
                align_mode=0)[:, :, :resize_shape[0], :resize_shape[1]]  # 去掉黑边
            masks = L.resize_bilinear(masks,
                                      out_shape=ori_shape[:2],
                                      align_corners=False,
                                      align_mode=0)  # 插值成原图大小
            masks = L.cast(masks > cfg['mask_thr'], 'float32')[0]
            return masks, classes, scores

        def no_objs_3():
            masks = L.zeros([1, 1, 1], 'float32') - 1.0
            classes = L.zeros([
                1,
            ], 'int64') - 1
            scores = L.zeros([
                1,
            ], 'float32') - 2.0
            return masks, classes, scores

        # 是否有物体
        masks, classes, scores = L.cond(
            L.shape(keep)[0] == 0, no_objs_3,
            lambda: exist_objs_3(keep, masks, classes, scores,
                                 upsampled_size_out, resize_shape, ori_shape))
        return masks, classes, scores