Esempio n. 1
0
 def get_feature_by_coordinate(self, x, coord, offset_h, offset_w,
                               padded_x_w):
     x = paddle.reshape(x, [0, 0, -1])
     index = paddle.cast(
         coord[:, :, :, :self.N] * padded_x_w,
         dtype='int64') + coord[:, :, :, self.N:]  # offset_x*w + offset_y
     index = paddle.unsqueeze(index, 1)
     index = paddle.tile(index, [1, self.in_channel, 1, 1, 1])
     index = paddle.reshape(index, (0, 0, -1))
     x_range = list(range(3))
     dim = 2
     x_range[0] = dim
     x_range[dim] = 0
     x_swaped = paddle.transpose(x, perm=x_range)
     index_range = list(range(3))
     index_range[0] = dim
     index_range[dim] = 0
     index_swaped = paddle.transpose(index, perm=index_range)
     x_shape = layers.shape(x_swaped)
     index_shape = layers.shape(index_swaped)
     prod = paddle.prod(x_shape[1:], keepdim=True)
     x_swaped_flattend = paddle.reshape(x_swaped, [-1])
     index_swaped_flattend = paddle.reshape(index_swaped, [-1])
     index_swaped_flattend *= prod
     bias = paddle.arange(start=0, end=prod, step=1, dtype='float32')
     bias = paddle.tile(bias, index_shape[0])
     index_swaped_flattend += bias
     gathered = paddle.gather(x_swaped_flattend, index_swaped_flattend)
     gathered = paddle.reshape(gathered, layers.shape(index_swaped))
     x_offset = paddle.transpose(gathered, perm=x_range)
     x_offset = paddle.reshape(
         x_offset, (-1, self.in_channel, offset_h, offset_w, self.N))
     return x_offset
Esempio n. 2
0
def intersect(box_a, box_b):  # 相交区域的面积
    """ We resize both tensors to [A,B,2] without new malloc:
    [A,2] -> [A,1,2] -> [A,B,2]
    [B,2] -> [1,B,2] -> [A,B,2]
    Then we compute the area of intersect between box_a and box_b.
    Args:
      box_a: (tensor) bounding boxes, Shape: [n,A,4].
      box_b: (tensor) bounding boxes, Shape: [n,B,4].
    Return:
      (tensor) intersection area, Shape: [n,A,B].
    """
    n = P.shape(box_a)[0]
    A = P.shape(box_a)[1]
    B = P.shape(box_b)[1]

    box_a = P.reshape(box_a, (n, A, 1, 4))
    box_b = P.reshape(box_b, (n, 1, B, 4))
    expand_box_a = P.expand(box_a, [1, 1, B, 1])
    expand_box_b = P.expand(box_b, [1, A, 1, 1])

    # 相交矩形的左上角坐标、右下角坐标
    left_up = P.elementwise_max(expand_box_a[:, :, :, :2],
                                expand_box_b[:, :, :, :2])
    right_down = P.elementwise_min(expand_box_a[:, :, :, 2:],
                                   expand_box_b[:, :, :, 2:])

    inter_section = P.relu(right_down - left_up)
    return inter_section[:, :, :, 0] * inter_section[:, :, :, 1]
Esempio n. 3
0
def jaccard(box_a, box_b, iscrowd: bool = False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    use_batch = True
    if len(box_a.shape) == 2:
        use_batch = False
        box_a = P.reshape(box_a, (1, P.shape(box_a)[0], P.shape(box_a)[1]))
        box_b = P.reshape(box_b, (1, P.shape(box_b)[0], P.shape(box_b)[1]))

    inter = intersect(box_a, box_b)

    area_a = (box_a[:, :, 2] - box_a[:, :, 0]) * (box_a[:, :, 3] -
                                                  box_a[:, :, 1])
    area_a = P.reshape(area_a, (P.shape(area_a)[0], P.shape(area_a)[1], 1))
    area_a = P.expand(area_a, [1, 1, P.shape(inter)[2]])

    area_b = (box_b[:, :, 2] - box_b[:, :, 0]) * (box_b[:, :, 3] -
                                                  box_b[:, :, 1])
    area_b = P.reshape(area_b, (P.shape(area_b)[0], 1, P.shape(area_b)[1]))
    area_b = P.expand(area_b, [1, P.shape(inter)[1], 1])

    union = area_a + area_b - inter

    out = inter / area_a if iscrowd else inter / union
    return out if use_batch else out[0]
Esempio n. 4
0
def concat_coord(x):
    ins_feat = x  # [N, c, h, w]

    batch_size = L.shape(x)[0]
    h = L.shape(x)[2]
    w = L.shape(x)[3]
    float_h = L.cast(h, 'float32')
    float_w = L.cast(w, 'float32')

    y_range = L.range(0., float_h, 1., dtype='float32')  # [h, ]
    y_range = 2.0 * y_range / (float_h - 1.0) - 1.0
    x_range = L.range(0., float_w, 1., dtype='float32')  # [w, ]
    x_range = 2.0 * x_range / (float_w - 1.0) - 1.0
    x_range = L.reshape(x_range, (1, -1))  # [1, w]
    y_range = L.reshape(y_range, (-1, 1))  # [h, 1]
    x = L.expand(x_range, [h, 1])  # [h, w]
    y = L.expand(y_range, [1, w])  # [h, w]

    x = L.reshape(x, (1, 1, h, w))  # [1, 1, h, w]
    y = L.reshape(y, (1, 1, h, w))  # [1, 1, h, w]
    x = L.expand(x, [batch_size, 1, 1, 1])  # [N, 1, h, w]
    y = L.expand(y, [batch_size, 1, 1, 1])  # [N, 1, h, w]

    ins_kernel_feat = L.concat([ins_feat, x, y], axis=1)  # [N, c+2, h, w]

    return ins_kernel_feat
Esempio n. 5
0
    def fast_nms(self, boxes, scores, masks, max_num_detections=100):
        iou_threshold = self.nms_thresh
        top_k = self.top_k

        # 同类方框根据得分降序排列
        scores, idx = P.argsort(scores, axis=1, descending=True)

        idx = idx[:, :top_k]
        scores = scores[:, :top_k]

        num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

        idx = P.reshape(idx, (-1, ))
        boxes = P.gather(boxes, idx)
        boxes = P.reshape(boxes, (num_classes, num_dets, 4))
        masks = P.gather(masks, idx)
        masks = P.reshape(masks, (num_classes, num_dets, -1))

        # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
        iou = jaccard(boxes, boxes)
        # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
        # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
        rows = P.range(0, num_dets, 1, 'int32')
        cols = P.range(0, num_dets, 1, 'int32')
        rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
        cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
        tri_mask = P.cast(rows > cols, 'float32')
        tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                            [num_classes, 1, 1])
        iou = tri_mask * iou
        iou_max = P.reduce_max(iou, dim=1)

        # Now just filter out the ones higher than the threshold
        keep = P.where(iou_max <= iou_threshold)

        # Assign each kept detection to its corresponding class
        classes = P.range(0, num_classes, 1, 'int32')
        classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
        classes = P.gather_nd(classes, keep)

        boxes = P.gather_nd(boxes, keep)
        masks = P.gather_nd(masks, keep)
        scores = P.gather_nd(scores, keep)

        # Only keep the top cfg.max_num_detections highest scores across all classes
        scores, idx = P.argsort(scores, axis=0, descending=True)
        idx = idx[:max_num_detections]
        scores = scores[:max_num_detections]

        classes = P.gather(classes, idx)
        boxes = P.gather(boxes, idx)
        masks = P.gather(masks, idx)

        return boxes, masks, classes, scores
Esempio n. 6
0
def fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k):
    '''
    :param boxes:    [?, 4]
    :param scores:   [80, ?]
    '''

    # 同类方框根据得分降序排列
    scores, idx = P.argsort(scores, axis=1, descending=True)

    idx = idx[:, :keep_top_k]
    scores = scores[:, :keep_top_k]

    num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

    idx = P.reshape(idx, (-1, ))
    boxes = P.gather(boxes, idx)
    boxes = P.reshape(boxes, (num_classes, num_dets, 4))

    # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
    iou = _iou(boxes, boxes)

    # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
    # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
    rows = P.range(0, num_dets, 1, 'int32')
    cols = P.range(0, num_dets, 1, 'int32')
    rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
    cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
    tri_mask = P.cast(rows > cols, 'float32')
    tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                        [num_classes, 1, 1])
    iou = tri_mask * iou
    iou_max = P.reduce_max(iou, dim=1)

    # 同一类别,n个框与“分数比它高的框”的最高iou超过nms_thresh的话,就丢弃。下标是0的框肯定被保留。
    keep = P.where(iou_max <= nms_thresh)

    # Assign each kept detection to its corresponding class
    classes = P.range(0, num_classes, 1, 'int32')
    classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
    classes = P.gather_nd(classes, keep)

    boxes = P.gather_nd(boxes, keep)
    scores = P.gather_nd(scores, keep)

    # Only keep the top cfg.max_num_detections highest scores across all classes
    scores, idx = P.argsort(scores, axis=0, descending=True)
    idx = idx[:nms_top_k]
    scores = scores[:nms_top_k]

    classes = P.gather(classes, idx)
    boxes = P.gather(boxes, idx)

    return boxes, scores, classes
def _relative_attention_inner(q, k, v, transpose):
    batch_size = layers.shape(q)[0]
    heads = layers.shape(q)[1]
    length = layers.shape(q)[2]

    xy_matmul = layers.matmul(q, k, transpose_y=transpose)
    x_t = layers.transpose(q, [2, 0, 1, 3])
    x_t_r = layers.reshape(x_t, [length, batch_size * heads, -1])
    x_tz_matmul = layers.matmul(x_t_r, v, transpose_y=transpose)
    x_tz_matmul_r = layers.reshape(x_tz_matmul,
                                   [length, batch_size, heads, -1])
    x_tz_matmul_r_t = layers.transpose(x_tz_matmul_r, [1, 2, 0, 3])
    return xy_matmul + x_tz_matmul_r_t
Esempio n. 8
0
    def get_prediction(self, feats, eval=True):
        name_list = list(feats.keys())
        feats2 = [feats[name] for name in name_list]  # [p2, p3, p4, p5]
        feats = feats2
        # 有5个张量,5个张量的strides=[8, 8, 16, 32, 32],所以先对首尾张量进行插值。
        # 一定要设置align_corners=False, align_mode=0才能和原版SOLO输出一致。
        new_feats = [
            L.resize_bilinear(feats[0],
                              out_shape=L.shape(feats[1])[2:],
                              align_corners=False,
                              align_mode=0), feats[1], feats[2], feats[3],
            L.resize_bilinear(feats[4],
                              out_shape=L.shape(feats[3])[2:],
                              align_corners=False,
                              align_mode=0)
        ]

        kernel_preds, cls_preds = [], []
        for idx in range(len(self.seg_num_grids)):
            krn_feat = new_feats[idx]  # 给卷积核分支

            # ============ kernel branch (卷积核分支) ============
            ins_kernel_feat = concat_coord(krn_feat)  # 带上坐标信息。[N, c+2, h, w]
            kernel_feat = ins_kernel_feat  # ins_kernel_feat不再使用
            seg_num_grid = self.seg_num_grids[idx]  # 这个特征图一行(列)的格子数
            # kernel_feat插值成格子图。 [N, c+2, seg_num_grid, seg_num_grid]
            kernel_feat = L.resize_bilinear(
                kernel_feat,
                out_shape=[seg_num_grid, seg_num_grid],
                align_corners=False,
                align_mode=0)

            # 扔掉插入的坐标那2个通道,作为cls_feat。 [N, c, seg_num_grid, seg_num_grid]
            cls_feat = kernel_feat[:, :-2, :, :]

            for kernel_layer in self.krn_convs:
                kernel_feat = kernel_layer(kernel_feat)
            for class_layer in self.cls_convs:
                cls_feat = class_layer(cls_feat)
            kernel_pred = kernel_feat  # [N, 256, seg_num_grid, seg_num_grid]   每个格子的预测卷积核
            cls_pred = cls_feat  # [N,  80, seg_num_grid, seg_num_grid]   每个格子的预测概率,未进行sigmoid()激活

            if eval:
                # [N, seg_num_grid, seg_num_grid, 80]   每个格子的预测概率,已进行sigmoid()激活
                cls_pred = L.transpose(points_nms(L.sigmoid(cls_pred),
                                                  kernel=2),
                                       perm=[0, 2, 3, 1])

            kernel_preds.append(kernel_pred)
            cls_preds.append(cls_pred)
        return [kernel_preds, cls_preds]
Esempio n. 9
0
 def __call__(self, input):
     if not self.coord_conv:
         return input
     b = L.shape(input)[0]
     h = L.shape(input)[2]
     w = L.shape(input)[3]
     x_range = L.range(0, w, 1., dtype='float32') / (w - 1) * 2.0 - 1
     y_range = L.range(0, h, 1., dtype='float32') / (h - 1) * 2.0 - 1
     x_range = L.reshape(x_range, (1, 1, 1, -1))  # [1, 1, 1, w]
     y_range = L.reshape(y_range, (1, 1, -1, 1))  # [1, 1, h, 1]
     x_range = L.expand(x_range, [b, 1, h, 1])  # [b, 1, h, w]
     y_range = L.expand(y_range, [b, 1, 1, w])  # [b, 1, h, w]
     offset = L.concat([input, x_range, y_range], axis=1)
     return offset
Esempio n. 10
0
def bbox_iou(boxes1, boxes2):
    '''
    预测框          boxes1 (?, grid_h, grid_w, 3,   1, 4),神经网络的输出(tx, ty, tw, th)经过了后处理求得的(bx, by, bw, bh)
    图片中所有的gt  boxes2 (?,      1,      1, 1, 150, 4)
    paddle里不支持省略号,boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    冒号要写完
    '''
    boxes1_area = boxes1[:, :, :, :, :, 2] * boxes1[:, :, :, :, :,
                                                    3]  # 所有格子的3个预测框的面积
    boxes2_area = boxes2[:, :, :, :, :, 2] * boxes2[:, :, :, :, :,
                                                    3]  # 所有ground truth的面积

    # (x, y, w, h)变成(x0, y0, x1, y1)
    boxes1 = P.concat([
        boxes1[:, :, :, :, :, :2] - boxes1[:, :, :, :, :, 2:] * 0.5,
        boxes1[:, :, :, :, :, :2] + boxes1[:, :, :, :, :, 2:] * 0.5
    ],
                      axis=-1)
    boxes2 = P.concat([
        boxes2[:, :, :, :, :, :2] - boxes2[:, :, :, :, :, 2:] * 0.5,
        boxes2[:, :, :, :, :, :2] + boxes2[:, :, :, :, :, 2:] * 0.5
    ],
                      axis=-1)

    # 所有格子的3个预测框 分别 和  150个ground truth  计算iou。 所以left_up和right_down的shape = (?, grid_h, grid_w, 3, 150, 2)
    expand_boxes1 = P.expand(boxes1,
                             [1, 1, 1, 1, P.shape(boxes2)[4], 1
                              ])  # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape
    expand_boxes2 = P.expand(
        boxes2,
        [1,
         P.shape(boxes1)[1],
         P.shape(boxes1)[2],
         P.shape(boxes1)[3], 1, 1])  # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape
    left_up = P.elementwise_max(expand_boxes1[:, :, :, :, :, :2],
                                expand_boxes2[:, :, :, :, :, :2])  # 相交矩形的左上角坐标
    right_down = P.elementwise_min(expand_boxes1[:, :, :, :, :, 2:],
                                   expand_boxes2[:, :, :, :, :,
                                                 2:])  # 相交矩形的右下角坐标

    inter_section = P.relu(
        right_down -
        left_up)  # 相交矩形的w和h,是负数时取0  (?, grid_h, grid_w, 3, 150, 2)
    inter_area = inter_section[:, :, :, :, :,
                               0] * inter_section[:, :, :, :, :,
                                                  1]  # 相交矩形的面积              (?, grid_h, grid_w, 3, 150)
    expand_boxes1_area = P.expand(boxes1_area,
                                  [1, 1, 1, 1, P.shape(boxes2)[4]])
    expand_boxes2_area = P.expand(boxes2_area, [
        1,
        P.shape(expand_boxes1_area)[1],
        P.shape(expand_boxes1_area)[2],
        P.shape(expand_boxes1_area)[3], 1
    ])
    union_area = expand_boxes1_area + expand_boxes2_area - inter_area  # union_area                (?, grid_h, grid_w, 3, 150)
    iou = 1.0 * inter_area / union_area  # iou                       (?, grid_h, grid_w, 3, 150)

    return iou
Esempio n. 11
0
def PredictionModule(x,
                     num_priors,
                     num_classes,
                     mask_dim,
                     shared_conv_w,
                     shared_conv_b,
                     shared_bbox_w,
                     shared_bbox_b,
                     shared_conf_w,
                     shared_conf_b,
                     shared_mask_w,
                     shared_mask_b):
    '''
    改编自DSSD算法中的PredictionModule,改成了3x3卷积。3个分支分别预测bbox、conf、mask系数。
               x
             / | \
        bbox conf mask
    '''
    x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1,
                 param_attr=shared_conv_w,
                 bias_attr=shared_conv_b)
    x = P.relu(x)

    bbox_x = x
    conf_x = x
    mask_x = x

    bbox = P.conv2d(bbox_x, num_priors * 4, filter_size=(3, 3), stride=1, padding=1,
                    param_attr=shared_bbox_w,
                    bias_attr=shared_bbox_b)
    bbox = P.transpose(bbox, perm=[0, 2, 3, 1])
    bbox = P.reshape(bbox, (P.shape(bbox)[0], -1, 4))

    conf = P.conv2d(conf_x, num_priors * num_classes, filter_size=(3, 3), stride=1, padding=1,
                    param_attr=shared_conf_w,
                    bias_attr=shared_conf_b)
    conf = P.transpose(conf, perm=[0, 2, 3, 1])
    conf = P.reshape(conf, (P.shape(conf)[0], -1, num_classes))

    mask = P.conv2d(mask_x, num_priors * mask_dim, filter_size=(3, 3), stride=1, padding=1,
                    param_attr=shared_mask_w,
                    bias_attr=shared_mask_b)
    mask = P.transpose(mask, perm=[0, 2, 3, 1])
    mask = P.reshape(mask, (P.shape(mask)[0], -1, mask_dim))
    mask = P.tanh(mask)

    preds = {'loc': bbox, 'conf': conf, 'mask': mask}
    return preds
Esempio n. 12
0
        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h
Esempio n. 13
0
 def attention(self, hidden, encoder_output, encoder_output_proj,
               encoder_padding_mask):
     # 定义attention用以计算context,即 c_i,这里使用Bahdanau attention机制
     decoder_state_proj = layers.unsqueeze(
         layers.fc(hidden, size=self.hidden_size, bias_attr=False), [1])
     # 拿解码器的一个向量,和编码器的所有输出,进行一个结合/混合/融合/交融/关联
     mixed_state = fluid.layers.elementwise_add(
         encoder_output_proj,
         layers.expand(decoder_state_proj,
                       [1, layers.shape(decoder_state_proj)[1], 1]))
     # 解码器的一个向量,和编码器的所有输出,进行一个结合/混合/融合/交融/关联 后,进行全连接转成一个数值关系
     attn_scores = layers.squeeze(
         layers.fc(input=mixed_state,
                   size=1,
                   num_flatten_dims=2,
                   bias_attr=False), [2])
     if encoder_padding_mask is not None:
         attn_scores = layers.elementwise_add(attn_scores,
                                              encoder_padding_mask)
     # 数值关系softmax,变成了权重关系
     attn_scores = layers.softmax(attn_scores)
     # 加权平均权重,就是解码器的一个向量一顿操作后,拿到的上下文向量
     context = layers.reduce_sum(layers.elementwise_mul(encoder_output,
                                                        attn_scores,
                                                        axis=0),
                                 dim=1)
     return context
Esempio n. 14
0
def model_func(inputs, is_train=True):
    src = inputs[0]
    src_sequence_length = inputs[1]
    # source embedding
    src_embeder = lambda x: fluid.embedding(
        input=x,
        size=[source_dict_size, hidden_dim],
        dtype="float32",
        param_attr=fluid.ParamAttr(name="src_emb_table"))
    src_embedding = src_embeder(src)

    # encoder
    encoder_output, encoder_state = encoder(src_embedding, src_sequence_length)

    encoder_output_proj = layers.fc(input=encoder_output,
                                    size=decoder_size,
                                    num_flatten_dims=2,
                                    bias_attr=False)
    src_mask = layers.sequence_mask(src_sequence_length,
                                    maxlen=layers.shape(src)[1],
                                    dtype="float32")
    encoder_padding_mask = (src_mask - 1.0) * 1e9

    trg = inputs[2] if is_train else None

    # decoder
    output = decoder(encoder_output=encoder_output,
                     encoder_output_proj=encoder_output_proj,
                     encoder_state=encoder_state,
                     encoder_padding_mask=encoder_padding_mask,
                     trg=trg,
                     is_train=is_train)
    return output
Esempio n. 15
0
    def get_seg(self, kernel_preds, cls_preds, mask_protos, ori_shapes,
                resize_shapes):
        num_levels = len(cls_preds)  # 输出层个数=5
        featmap_size = L.shape(mask_protos)[-2:]  # 特征图大小,为stride=4

        result_list = []
        # for img_id in range(len(img_metas)):
        for img_id in range(1):
            cate_pred_list = [
                L.reshape(cls_preds[i][img_id], (-1, self.cate_out_channels))
                for i in range(num_levels)
            ]
            mask_proto = mask_protos[img_id:img_id + 1, :, :, :]
            kernel_pred_list = [
                L.reshape(L.transpose(kernel_preds[i][img_id], perm=[1, 2, 0]),
                          (-1, self.kernel_out_channels))
                for i in range(num_levels)
            ]
            resize_shape = resize_shapes[img_id]
            ori_shape = ori_shapes[img_id]

            cate_pred_list = L.concat(cate_pred_list, axis=0)
            kernel_pred_list = L.concat(kernel_pred_list, axis=0)

            masks, classes, scores = self.get_seg_single(
                cate_pred_list, mask_proto, kernel_pred_list, featmap_size,
                resize_shape, ori_shape)
        #     result_list.append(result)
        # return result_list
        return {
            'masks': masks,
            'classes': classes,
            'scores': scores,
        }
Esempio n. 16
0
def decode(conv_output, anchors, stride, num_class, conf_thresh):
    conv_shape = P.shape(conv_output)
    batch_size = conv_shape[0]
    n_grid = conv_shape[1]
    anchor_per_scale = len(anchors)
    conv_output = P.reshape(
        conv_output,
        (batch_size, n_grid, n_grid, anchor_per_scale, 5 + num_class))
    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
    conv_raw_conf = conv_output[:, :, :, :, 4:5]
    conv_raw_prob = conv_output[:, :, :, :, 5:]

    rows = P.range(0, n_grid, 1, 'float32')
    cols = P.range(0, n_grid, 1, 'float32')
    rows = P.expand(P.reshape(rows, (1, -1, 1)), [n_grid, 1, 1])
    cols = P.expand(P.reshape(cols, (-1, 1, 1)), [1, n_grid, 1])
    offset = P.concat([rows, cols], axis=-1)
    offset = P.reshape(offset, (1, n_grid, n_grid, 1, 2))
    offset = P.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1])

    pred_xy = (P.sigmoid(conv_raw_dxdy) + offset) * stride
    pred_wh = (P.exp(conv_raw_dwdh) * P.assign(anchors))
    pred_xywh = P.concat([pred_xy, pred_wh], axis=-1)
    pred_conf = P.sigmoid(conv_raw_conf)
    pred_prob = P.sigmoid(conv_raw_prob)

    pred_xywh = P.reshape(pred_xywh, (batch_size, -1, 4))  # [-1, -1, 4]
    pred_conf = P.reshape(pred_conf, (batch_size, -1, 1))  # [-1, -1, 1]
    pred_prob = P.reshape(pred_prob,
                          (batch_size, -1, num_class))  # [-1, -1, 80]
    return pred_xywh, pred_conf, pred_prob
Esempio n. 17
0
def graph_gather(gw, feature, index):
    """Implementation of graph gather 

    Gather the corresponding index for each graph.
   
    Args:
        gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`)

        feature: A tensor with shape (num_nodes, ). 

        index (int32): A tensor with K-rank where the first dim denotes the graph.
                        Shape (num_graph, ) or (num_graph, k1, k2, k3, ..., kn).
                       WARNING: We dont support negative index.

    Return:
        A tensor with shape (num_graph, k1, k2, k3, ..., kn, hidden_size)
    """
    shape = L.shape(index)
    output_dim = int(feature.shape[-1])
    index = index + gw.graph_lod[:-1]
    index = L.reshape(index, [-1])
    feature = L.gather(feature, index, overwrite=False)
    new_shape = []
    for i in range(shape.shape[0]):
        new_shape.append(shape[i])
    new_shape.append(output_dim)
    feature = L.reshape(feature, new_shape)
    return feature
Esempio n. 18
0
def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None):
    dtype = convert_dtype(input.dtype)
    if dtype not in ["float32", "float64"]:
        raise ValueError("Layer tensor.var() only supports floating-point "
                         "dtypes, but received {}.".format(dtype))
    rank = len(input.shape)
    axes = axis if axis != None and axis != [] else range(rank)
    axes = [e if e >= 0 else e + rank for e in axes]
    inp_shape = input.shape if in_dygraph_mode() else layers.shape(input)
    mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name)
    tmp = layers.reduce_mean(
        (input - mean)**2, dim=axis, keep_dim=keepdim, name=name)

    if unbiased:
        n = 1
        for i in axes:
            n *= inp_shape[i]
        if not in_dygraph_mode():
            n = layers.cast(n, dtype)
            zero_const = layers.fill_constant(shape=[1], dtype=dtype, value=0.0)
            factor = where(n > 1.0, n / (n - 1.0), zero_const)
        else:
            factor = n / (n - 1.0) if n > 1.0 else 0.0
        tmp *= factor
    if out:
        layers.assign(input=tmp, output=out)
        return out
    else:
        return tmp
Esempio n. 19
0
    def _build_position_ids(self, src_ids):
        src_shape = L.shape(src_ids)
        src_seqlen = src_shape[1]
        src_batch = src_shape[0]

        slot_seqlen = self.slot_seqlen

        num_b = (src_seqlen / slot_seqlen) - 1
        a_position_ids = L.reshape(L.range(0, slot_seqlen, 1, dtype='int32'),
                                   [1, slot_seqlen, 1],
                                   inplace=True)  # [1, slot_seqlen, 1]
        a_position_ids = L.expand(
            a_position_ids, [src_batch, 1, 1])  # [B, slot_seqlen * num_b, 1]

        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.equal(src_ids[:, :slot_seqlen], zero),
                            "int32")  # assume pad id == 0 [B, slot_seqlen, 1]
        a_pad_len = L.reduce_sum(input_mask, 1)  # [B, 1, 1]

        b_position_ids = L.reshape(L.range(slot_seqlen,
                                           2 * slot_seqlen,
                                           1,
                                           dtype='int32'), [1, slot_seqlen, 1],
                                   inplace=True)  # [1, slot_seqlen, 1]
        b_position_ids = L.expand(
            b_position_ids,
            [src_batch, num_b, 1])  # [B, slot_seqlen * num_b, 1]
        b_position_ids = b_position_ids - a_pad_len  # [B, slot_seqlen * num_b, 1]

        position_ids = L.concat([a_position_ids, b_position_ids], 1)
        position_ids = L.cast(position_ids, 'int64')
        position_ids.stop_gradient = True
        return position_ids
Esempio n. 20
0
    def forward(self, src, src_length):
        # encoding
        encoder_output, encoder_final_state = self.encoder(src, src_length)

        # decoder initial states
        decoder_initial_states = [
            encoder_final_state,
            self.decoder.lstm_attention.cell.get_initial_states(
                batch_ref=encoder_output, shape=[self.hidden_size])
        ]
        # attention mask to avoid paying attention on padddings
        src_mask = layers.sequence_mask(
            src_length,
            maxlen=layers.shape(src)[1],
            dtype=encoder_output.dtype)
        encoder_padding_mask = (src_mask - 1.0) * 1e9
        encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])

        # Tile the batch dimension with beam_size
        encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_output, self.beam_size)
        encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_padding_mask, self.beam_size)

        # dynamic decoding with beam search
        rs, _ = self.beam_search_decoder(
            inits=decoder_initial_states,
            encoder_output=encoder_output,
            encoder_padding_mask=encoder_padding_mask)
        return rs
Esempio n. 21
0
    def ce_conf_loss(self, pred_allboxes_conf, labels_pos_mask,
                     labels_neg_mask, class_vectors, labels_pos_cid2, gt_area):
        labels_pos_cid2 = P.reshape(labels_pos_cid2,
                                    (-1, ))  # [batch_size*num_priors]
        pred_allboxes_conf_r = P.reshape(
            pred_allboxes_conf, (-1, P.shape(pred_allboxes_conf)[2]
                                 ))  # [batch_size*num_priors, num_classes]
        label_prob = P.gather(
            class_vectors,
            labels_pos_cid2)  # one-hot掩码  (batch_size*num_priors, num_classes)

        pred_prob = P.softmax(pred_allboxes_conf_r)
        pred_prob = P.cast(pred_prob, 'float32')
        prob_loss = label_prob * (0 - P.log(pred_prob + 1e-9))  # 加了极小的常数防止nan
        prob_loss = P.reduce_sum(prob_loss, dim=1)

        # 只留下正反例的损失
        labels_pos_mask2 = P.reshape(labels_pos_mask,
                                     (-1, ))  # [batch_size*num_priors]
        labels_neg_mask2 = P.reshape(labels_neg_mask,
                                     (-1, ))  # [batch_size*num_priors]
        conf_loss_scale = 2.0 - gt_area  # gt面积越小,权重越大,越受重视
        conf_loss_scale = P.reshape(conf_loss_scale,
                                    (-1, ))  # [batch_size*num_priors]
        prob_pos_loss = prob_loss * labels_pos_mask2 * conf_loss_scale
        prob_neg_loss = prob_loss * labels_neg_mask2
        ce_loss = prob_pos_loss + prob_neg_loss
        ce_loss = P.reduce_sum(ce_loss)

        return ce_loss
Esempio n. 22
0
def build_graph_attn_bias(input_mask, n_head, dtype, slot_seqlen):

    input_shape = L.shape(input_mask)
    input_batch = input_shape[0]
    input_seqlen = input_shape[1]
    num_slot = input_seqlen / slot_seqlen
    num_b = num_slot - 1
    ones = L.ones([num_b], dtype="float32")  # [num_b]
    diag_ones = L.diag(ones)  # [num_b, num_b]
    diag_ones = L.unsqueeze(diag_ones, [1, -1])  # [num_b, 1, num_b, 1]
    diag_ones = L.expand(
        diag_ones,
        [1, slot_seqlen, 1, slot_seqlen])  # [num_b, seqlen, num_b, seqlen]
    diag_ones = L.reshape(diag_ones,
                          [1, num_b * slot_seqlen, num_b * slot_seqlen
                           ])  # [1, num_b*seqlen, num_b*seqlen]

    graph_attn_bias = L.concat([
        L.ones([1, num_b * slot_seqlen, slot_seqlen], dtype="float32"),
        diag_ones
    ], 2)
    graph_attn_bias = L.concat([
        L.ones([1, slot_seqlen, num_slot * slot_seqlen], dtype="float32"),
        graph_attn_bias
    ], 1)  # [1, seq, seq]

    pad_attn_bias = L.matmul(input_mask, input_mask,
                             transpose_y=True)  # [batch, seq, seq]
    attn_bias = graph_attn_bias * pad_attn_bias

    attn_bias = (1. - attn_bias) * -10000.
    attn_bias = L.stack([attn_bias] * n_head, 1)  # [batch, n_head, seq, seq]
    if attn_bias.dtype != dtype:
        attn_bias = L.cast(attn_bias, dtype)
    return attn_bias
Esempio n. 23
0
        def build_position_ids(src_ids, dst_ids):
            src_shape = L.shape(src_ids)
            src_batch = src_shape[0]
            src_seqlen = src_shape[1]
            dst_seqlen = src_seqlen - 1 # without cls

            src_position_ids = L.reshape(
                L.range(
                    0, src_seqlen, 1, dtype='int32'), [1, src_seqlen, 1],
                inplace=True) # [1, slot_seqlen, 1]
            src_position_ids = L.expand(src_position_ids, [src_batch, 1, 1]) # [B, slot_seqlen * num_b, 1]
            zero = L.fill_constant([1], dtype='int64', value=0)
            input_mask = L.cast(L.equal(src_ids, zero), "int32")  # assume pad id == 0 [B, slot_seqlen, 1]
            src_pad_len = L.reduce_sum(input_mask, 1, keep_dim=True) # [B, 1, 1]

            dst_position_ids = L.reshape(
                L.range(
                    src_seqlen, src_seqlen+dst_seqlen, 1, dtype='int32'), [1, dst_seqlen, 1],
                inplace=True) # [1, slot_seqlen, 1]
            dst_position_ids = L.expand(dst_position_ids, [src_batch, 1, 1]) # [B, slot_seqlen, 1]
            dst_position_ids = dst_position_ids - src_pad_len # [B, slot_seqlen, 1]

            position_ids = L.concat([src_position_ids, dst_position_ids], 1)
            position_ids = L.cast(position_ids, 'int64')
            position_ids.stop_gradient = True
            return position_ids
def model_func(inputs, is_train=True):
    # inputs = [src, src_sequence_length, trg, trg_sequence_length, label]
    # src = fluid.data(name="src", shape=[None, None], dtype="int64")
    # 源语言输入
    src = inputs[0]
    src_sequence_length = inputs[1]
    src_embedding = fluid.embedding(
        input=src,
        size=[source_dict_size, hidden_dim],
        dtype="float32",
        param_attr=fluid.ParamAttr(name="src_emb_table"))

    # 编码器
    encoder_output, encoder_state = encoder(src_embedding, src_sequence_length)

    encoder_output_proj = layers.fc(input=encoder_output,
                                    size=decoder_size,
                                    num_flatten_dims=2,
                                    bias_attr=False)
    src_mask = layers.sequence_mask(src_sequence_length,
                                    maxlen=layers.shape(src)[1],
                                    dtype="float32")
    encoder_padding_mask = (src_mask - 1.0) * 1e9

    # 目标语言输入,训练时有、预测生成时无该输入
    trg = inputs[2] if is_train else None

    # 解码器
    output = decoder(encoder_output=encoder_output,
                     encoder_output_proj=encoder_output_proj,
                     encoder_state=encoder_state,
                     encoder_padding_mask=encoder_padding_mask,
                     trg=trg,
                     is_train=is_train)
    return output
Esempio n. 25
0
    def _build_decoder(self, enc_final_state, mode='train', beam_size=10):
        output_layer = lambda x: layers.fc(
            x,
            size=self.tar_vocab_size,
            num_flatten_dims=len(x.shape) - 1,
            param_attr=fluid.ParamAttr(
                name="output_w",
                initializer=fluid.initializer.UniformInitializer(
                    low=-self.init_scale, high=self.init_scale)),
            bias_attr=False)

        dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
                                        self.dropout, self.init_scale)
        dec_initial_states = [
            enc_final_state,
            dec_cell.get_initial_states(batch_ref=self.enc_output,
                                        shape=[self.hidden_size])
        ]
        max_src_seq_len = layers.shape(self.src)[1]
        src_mask = layers.sequence_mask(self.src_sequence_length,
                                        maxlen=max_src_seq_len,
                                        dtype='float32')
        enc_padding_mask = (src_mask - 1.0)
        if mode == 'train':
            dec_output, _ = rnn(cell=dec_cell,
                                inputs=self.tar_emb,
                                initial_states=dec_initial_states,
                                sequence_length=None,
                                enc_output=self.enc_output,
                                enc_padding_mask=enc_padding_mask)

            dec_output = output_layer(dec_output)

        elif mode == 'beam_search':
            output_layer = lambda x: layers.fc(
                x,
                size=self.tar_vocab_size,
                num_flatten_dims=len(x.shape) - 1,
                param_attr=fluid.ParamAttr(name="output_w"),
                bias_attr=False)
            beam_search_decoder = BeamSearchDecoder(
                dec_cell,
                self.beam_start_token,
                self.beam_end_token,
                beam_size,
                embedding_fn=self.tar_embeder,
                output_fn=output_layer)
            enc_output = beam_search_decoder.tile_beam_merge_with_batch(
                self.enc_output, beam_size)
            enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch(
                enc_padding_mask, beam_size)
            outputs, _ = dynamic_decode(beam_search_decoder,
                                        inits=dec_initial_states,
                                        max_step_num=self.beam_max_step_num,
                                        enc_output=enc_output,
                                        enc_padding_mask=enc_padding_mask)
            return outputs

        return dec_output
def batch_scatter(ref, indices, updates, in_place=False, overwrite=False):
    """Scatter updates to ref, according to corrensponding index in indices
    in each batch. Currently, it only support 2d Tensor.

    Args:
        ref (Variable): with shape [batch_size, ...]
        indices (Variable): with shape [batch_size, 1]
        updates (Variable): with shape [batch_size]
        in_place (bool): if True, scatter result will be assign to ref. otherwise,
                         a new Tensor will be returned. Default is False.
        overwrite (bool): if True, scatter will over write corrensponding elements.
                          Default is False.

    Returns: TODO

    Raises: NULL

    Examples:
        ref
            [[1, 1, 1],
             [1, 1, 1]]
        indices
            [[2], [1]]
        updates
            [2, 3]

        return
            [[1, 1, 2],
             [1, 3, 1]]

    """
    ref_dtype = ref.dtype
    if ref_dtype not in PaddleVarType.floats:
        ref_in = layers.cast(ref, dtype='float32')
    else:
        ref_in = ref

    if updates.dtype != ref_in.dtype:
        updates = layers.cast(updates, dtype=ref_in.dtype)

    batch_size = layers.cast(layers.shape(ref_in)[0], dtype=indices.dtype)
    zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1)
    batch_indices = layers.unsqueeze(
        layers.range(zero, batch_size, one, dtype=indices.dtype), [1])
    coord = layers.concat([batch_indices, indices], axis=1)
    if overwrite:
        mask = layers.gather_nd(ref_in, coord)
        mask = layers.elementwise_sub(layers.zeros_like(mask), mask)
        ref_in = layers.scatter_nd_add(ref_in, coord, mask)

    output = layers.scatter_nd_add(ref_in, coord, updates)
    if ref_dtype not in PaddleVarType.floats:
        output = layers.cast(output, dtype=ref_dtype)
    if in_place:
        layers.assign(output, ref)
        return ref
    else:
        return output
Esempio n. 27
0
def _iou(box_a, box_b):
    '''
    :param box_a:    [c, A, 4]
    :param box_b:    [c, B, 4]
    :return:   [c, A, B]  两两之间的iou
    '''
    # 变成左上角坐标、右下角坐标
    boxes1 = P.concat([
        box_a[:, :, :2] - box_a[:, :, 2:] * 0.5,
        box_a[:, :, :2] + box_a[:, :, 2:] * 0.5
    ],
                      axis=-1)
    boxes2 = P.concat([
        box_b[:, :, :2] - box_b[:, :, 2:] * 0.5,
        box_b[:, :, :2] + box_b[:, :, 2:] * 0.5
    ],
                      axis=-1)

    c = P.shape(boxes1)[0]
    A = P.shape(boxes1)[1]
    B = P.shape(boxes2)[1]

    box_a = P.reshape(boxes1, (c, A, 1, 4))
    box_b = P.reshape(boxes2, (c, 1, B, 4))
    expand_box_a = P.expand(box_a, [1, 1, B, 1])
    expand_box_b = P.expand(box_b, [1, A, 1, 1])

    # 两个矩形的面积
    boxes1_area = (expand_box_a[:, :, :, 2] - expand_box_a[:, :, :, 0]) * \
                  (expand_box_a[:, :, :, 3] - expand_box_a[:, :, :, 1])
    boxes2_area = (expand_box_b[:, :, :, 2] - expand_box_b[:, :, :, 0]) * \
                  (expand_box_b[:, :, :, 3] - expand_box_b[:, :, :, 1])

    # 相交矩形的左上角坐标、右下角坐标
    left_up = P.elementwise_max(expand_box_a[:, :, :, :2],
                                expand_box_b[:, :, :, :2])
    right_down = P.elementwise_min(expand_box_a[:, :, :, 2:],
                                   expand_box_b[:, :, :, 2:])

    # 相交矩形的面积inter_area。iou
    # inter_section = P.elementwise_max(right_down - left_up, 0.0)
    inter_section = P.relu(right_down - left_up)
    inter_area = inter_section[:, :, :, 0] * inter_section[:, :, :, 1]
    union_area = boxes1_area + boxes2_area - inter_area
    iou = inter_area / (union_area + 1e-9)
    return iou
Esempio n. 28
0
 def test_shape(self):
     program = Program()
     with program_guard(program):
         input = layers.data(
             name="input", shape=[3, 100, 100], dtype="float32")
         out = layers.shape(input)
         self.assertIsNotNone(out)
     print(str(program))
Esempio n. 29
0
def loss_func(logits, label, trg_sequence_length):
    probs = layers.softmax(logits)
    loss = layers.cross_entropy(input=probs, label=label)
    trg_mask = layers.sequence_mask(trg_sequence_length,
                                    maxlen=layers.shape(logits)[1],
                                    dtype="float32")
    avg_cost = layers.reduce_sum(loss * trg_mask) / layers.reduce_sum(trg_mask)
    return avg_cost
Esempio n. 30
0
def encoder(enc_input,
            input_mask,
            n_layer,
            n_head,
            d_key,
            d_value,
            d_model,
            d_inner_hid,
            prepostprocess_dropout,
            attention_dropout,
            relu_dropout,
            hidden_act,
            preprocess_cmd="n",
            postprocess_cmd="da",
            param_initializer=None,
            name=''):
    """
    The encoder is composed of a stack of identical layers returned by calling
    encoder_layer.
    """

    d_shape = L.shape(input_mask)
    pad_idx = build_pad_idx(input_mask)
    attn_bias = build_attn_bias(input_mask, n_head, enc_input.dtype)

    enc_input = to_2d(enc_input)
    all_hidden = []
    all_attn = []
    all_ffn = []
    for i in range(n_layer):
        enc_output, ctx_multiheads_attn, ffn_output = encoder_layer(
            enc_input,
            input_mask,
            attn_bias,
            n_head,
            d_key,
            d_value,
            d_model,
            d_inner_hid,
            prepostprocess_dropout,
            attention_dropout,
            relu_dropout,
            hidden_act,
            preprocess_cmd,
            postprocess_cmd,
            param_initializer=param_initializer,
            name=name + '_layer_' + str(i))
        all_hidden.append(enc_output)
        all_attn.append(ctx_multiheads_attn)
        all_ffn.append(ffn_output)
        enc_input = enc_output
    enc_output = pre_process_layer(enc_output,
                                   preprocess_cmd,
                                   prepostprocess_dropout,
                                   name="post_encoder")
    enc_output = to_3d(enc_output)
    return enc_output, all_hidden, all_attn, all_ffn