Esempio n. 1
0
    def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3):
        pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int(
            paddle.sum(
                paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5))
                .astype('float32')))

        if pos_num == 0:
            selected_mask = training_mask
            selected_mask = selected_mask.reshape(
                [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
                    'float32')
            return selected_mask

        neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32')))
        neg_num = int(min(pos_num * ohem_ratio, neg_num))

        if neg_num == 0:
            selected_mask = training_mask
            selected_mask = selected_mask.view(
                1, selected_mask.shape[0],
                selected_mask.shape[1]).astype('float32')
            return selected_mask

        neg_score = paddle.masked_select(score, gt_text <= 0.5)
        neg_score_sorted = paddle.sort(-neg_score)
        threshold = -neg_score_sorted[neg_num - 1]

        selected_mask = paddle.logical_and(
            paddle.logical_or((score >= threshold), (gt_text > 0.5)),
            (training_mask > 0.5))
        selected_mask = selected_mask.reshape(
            [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
                'float32')
        return selected_mask
Esempio n. 2
0
def iou_single(a, b, mask, n_class):
    valid = mask == 1

    valid_flatten = paddle.reshape(valid, (-1, ))
    valid_flatten = paddle.cast(valid_flatten, dtype="int32")
    index = where(valid_flatten == 1)
    if index.shape[0] == 0:
        return paddle.zeros((1, ))

    index = paddle.reshape(index, (1, -1))
    a_flatten = paddle.reshape(a, (1, -1))
    a = paddle.index_sample(a_flatten, index)
    a = paddle.reshape(a, (-1, ))

    b_flatten = paddle.reshape(b, (1, -1))
    b = paddle.index_sample(b_flatten, index)
    b = paddle.reshape(b, (-1, ))

    miou = []
    for i in range(n_class):
        inter = paddle.logical_and(a == i, b == i)
        inter = paddle.cast(inter, dtype='float32')
        union = paddle.logical_or(a == i, b == i)
        union = paddle.cast(union, dtype='float32')

        miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS))
    miou = sum(miou) / len(miou)
    return miou
    def get_in_gt_and_in_center_info(self, flatten_center_and_stride,
                                     gt_bboxes):
        num_gt = gt_bboxes.shape[0]

        flatten_x = flatten_center_and_stride[:,
                                              0].unsqueeze(1).tile([1, num_gt])
        flatten_y = flatten_center_and_stride[:,
                                              1].unsqueeze(1).tile([1, num_gt])
        flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile(
            [1, num_gt])
        flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile(
            [1, num_gt])

        # is prior centers in gt bboxes, shape: [n_center, n_gt]
        l_ = flatten_x - gt_bboxes[:, 0]
        t_ = flatten_y - gt_bboxes[:, 1]
        r_ = gt_bboxes[:, 2] - flatten_x
        b_ = gt_bboxes[:, 3] - flatten_y

        deltas = paddle.stack([l_, t_, r_, b_], axis=1)
        is_in_gts = deltas.min(axis=1) > 0
        is_in_gts_all = is_in_gts.sum(axis=1) > 0

        # is prior centers in gt centers
        gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
        gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
        ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x
        ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y
        ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x
        ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y

        cl_ = flatten_x - ct_bound_l
        ct_ = flatten_y - ct_bound_t
        cr_ = ct_bound_r - flatten_x
        cb_ = ct_bound_b - flatten_y

        ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1)
        is_in_cts = ct_deltas.min(axis=1) > 0
        is_in_cts_all = is_in_cts.sum(axis=1) > 0

        # in any of gts or gt centers, shape: [n_center]
        is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all,
                                                     is_in_cts_all)

        is_in_gts_or_centers_all_inds = paddle.nonzero(
            is_in_gts_or_centers_all).squeeze(1)

        # both in gts and gt centers, shape: [num_fg, num_gt]
        is_in_gts_and_centers = paddle.logical_and(
            paddle.gather(is_in_gts.cast('int'),
                          is_in_gts_or_centers_all_inds,
                          axis=0).cast('bool'),
            paddle.gather(is_in_cts.cast('int'),
                          is_in_gts_or_centers_all_inds,
                          axis=0).cast('bool'))
        return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
Esempio n. 4
0
def ohem_single(score, gt_text, training_mask):
    gt_part = paddle.cast(gt_text > 0.5, dtype='float32')
    gt_tr_part = paddle.cast(paddle.logical_and(gt_text > 0.5,
                                                training_mask <= 0.5),
                             dtype='float32')
    pos_num = int(paddle.sum(gt_part)) - int(paddle.sum(gt_tr_part))
    #pos_num = int(np.sum(gt_text.numpy() > 0.5)) - int(np.sum((gt_text.numpy() > 0.5) & (training_mask.numpy() <= 0.5)))
    #pos_num = int(paddle.sum(gt_text > 0.5)) - int(paddle.sum((gt_text > 0.5) & (training_mask <= 0.5)))
    if pos_num == 0:
        # selected_mask = gt_text.copy() * 0 # may be not good
        selected_mask = training_mask
        selected_mask = paddle.reshape(
            selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1]))
        selected_mask = paddle.cast(selected_mask, dtype='float32')
        return selected_mask

    neg_num = int(np.sum(gt_text.numpy() <= 0.5))
    neg_num = int(min(pos_num * 3, neg_num))

    if neg_num == 0:
        selected_mask = training_mask
        # selected_mask = selected_mask.view(1, selected_mask.shape[0], selected_mask.shape[1]).float()
        selected_mask = paddle.reshape(
            selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1]))
        selected_mask = paddle.cast(selected_mask, dtype='float32')
        return selected_mask

    gt_text_flatten = paddle.reshape(gt_text, (-1, ))
    index = where(gt_text_flatten <= 0.5)
    index = paddle.reshape(index, (1, -1))
    score_flatten = paddle.reshape(score, (1, -1))
    neg_score = paddle.index_sample(score_flatten, index)
    neg_score = paddle.reshape(neg_score, (-1, ))

    neg_score_sorted = paddle.sort(-neg_score)
    threshold = -neg_score_sorted[neg_num - 1]

    item1 = paddle.logical_or(score >= threshold, gt_text > 0.5)
    selected_mask = paddle.logical_and(item1, training_mask > 0.5)
    # selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).float()
    selected_mask = paddle.reshape(
        selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1]))
    #selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1])
    selected_mask = paddle.cast(selected_mask, dtype='float32')
    return selected_mask
Esempio n. 5
0
def hard_negative_mining(loss, labels, neg_pos_ratio):
    """
    It used to suppress the presence of a large number of negative prediction.
    It works on image level not batch level.
    For any example/image, it keeps all the positive predictions and
     cut the number of negative predictions to make sure the ratio
     between the negative examples and positive examples is no more
     the given ratio for an image.

    Args:
        loss (N, num_priors): the loss for each example.
        labels (N, num_priors): the labels.
        neg_pos_ratio:  the ratio between the negative examples and positive examples.
    """
    pos_mask = labels > 0
    num_pos = pos_mask.cast('int').sum(axis=1, keepdim=True)
    num_neg = num_pos * neg_pos_ratio
    loss = pos_mask.cast('int') * (-1e8) + loss * (1 - pos_mask.cast('int'))
    indexes = paddle.argsort(loss, axis=1, descending=True)
    orders = paddle.argsort(indexes, axis=1)
    neg_mask = orders < num_neg
    return paddle.logical_or(pos_mask, neg_mask)
Esempio n. 6
0
def check_points_inside_bboxes(points,
                               bboxes,
                               center_radius_tensor=None,
                               eps=1e-9):
    r"""
    Args:
        points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
        bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
        center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
        eps (float): Default: 1e-9
    Returns:
        is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
    """
    points = points.unsqueeze([0, 1])
    x, y = points.chunk(2, axis=-1)
    xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
    # check whether `points` is in `bboxes`
    l = x - xmin
    t = y - ymin
    r = xmax - x
    b = ymax - y
    delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
    is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
    if center_radius_tensor is not None:
        # check whether `points` is in `center_radius`
        center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
        cx = (xmin + xmax) * 0.5
        cy = (ymin + ymax) * 0.5
        l = x - (cx - center_radius_tensor)
        t = y - (cy - center_radius_tensor)
        r = (cx + center_radius_tensor) - x
        b = (cy + center_radius_tensor) - y
        delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
        is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
        return (paddle.logical_and(is_in_bboxes, is_in_center),
                paddle.logical_or(is_in_bboxes, is_in_center))

    return is_in_bboxes.astype(bboxes.dtype)
Esempio n. 7
0
def clip_grad_norm_(parameters: _tensor_or_tensors,
                    max_norm: float,
                    norm_type: float = 2.0,
                    error_if_nonfinite: bool = False) -> paddle.Tensor:
    r"""Clips gradient norm of an iterable of parameters.

    The norm is computed over all gradients together, as if they were
    concatenated into a single vector. Gradients are modified in-place.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float or int): max norm of the gradients
        norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        error_if_nonfinite (bool): if True, an error is thrown if the total
            norm of the gradients from :attr:``parameters`` is ``nan``,
            ``inf``, or ``-inf``. Default: False (will switch to True in the future)

    Returns:
        Total norm of the parameters (viewed as a single vector).
    """
    import time
    if isinstance(parameters, paddle.Tensor):
        parameters = [parameters]
    parameters = [p for p in parameters if p.grad is not None]
    detached_grads = [p.grad.detach() for p in parameters]

    max_norm = float(max_norm)
    norm_type = float(norm_type)
    if len(parameters) == 0:
        return paddle.to_tensor(0.)
    # device = paddle.get_device()  # parameters[0].grad.device
    if norm_type == inf:
        norms = [p.abs().max() for p in parameters]
        total_norm = norms[0] if len(norms) == 1 else paddle.max(
            paddle.stack(norms))
    else:
        #         tik = time.time()
        total_norm = paddle.norm(
            paddle.stack([paddle.norm(g, norm_type) for g in detached_grads]),
            norm_type)
    #         total_norm = paddle.norm(paddle.stack([paddle.sqrt(paddle.sum(g*g)) for g in detached_grads]), norm_type)  # fixed.
    #         print(time.time() - tik)
    if error_if_nonfinite and paddle.logical_or(total_norm.isnan(),
                                                total_norm.isinf()):
        raise RuntimeError(
            f'The total norm of order {norm_type} for gradients from '
            '`parameters` is non-finite, so it cannot be clipped. To disable '
            'this error and scale the gradients by the non-finite norm anyway, '
            'set `error_if_nonfinite=False`')
    clip_coef = max_norm / (total_norm + 1e-6)
    # Note: multiplying by the clamped coef is redundant when the coef is clamped to 1, but doing so
    # avoids a `if clip_coef < 1:` conditional which can require a CPU <=> device synchronization
    # when the gradients do not reside in CPU memory.
    clip_coef_clamped = paddle.clip(clip_coef, max=1.0)
    for i, p in enumerate(parameters):
        #         p.set_value(paddle.multiply(p, clip_coef_clamped))
        p.grad.set_value(detached_grads[i] * clip_coef_clamped)  # fixed
    #         p.grad.detach().mul_(clip_coef_clamped
    return total_norm
    def test_tensor_patch_method(self):
        paddle.disable_static()
        x_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype)
        y_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype)
        z_np = np.random.uniform(-1, 1, [6, 9]).astype(self.dtype)

        x = paddle.to_tensor(x_np)
        y = paddle.to_tensor(y_np)
        z = paddle.to_tensor(z_np)

        a = paddle.to_tensor([[1, 1], [2, 2], [3, 3]])
        b = paddle.to_tensor([[1, 1], [2, 2], [3, 3]])

        # 1. Unary operation for Tensor
        self.assertEqual(x.dim(), 2)
        self.assertEqual(x.ndimension(), 2)
        self.assertEqual(x.ndim, 2)
        self.assertEqual(x.size, 6)
        self.assertEqual(x.numel(), 6)
        self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy()))
        self.assertTrue(
            np.array_equal(x.tanh().numpy(),
                           paddle.tanh(x).numpy()))
        self.assertTrue(
            np.array_equal(x.atan().numpy(),
                           paddle.atan(x).numpy()))
        self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy()))
        m = x.abs()
        self.assertTrue(
            np.array_equal(m.sqrt().numpy(),
                           paddle.sqrt(m).numpy()))
        self.assertTrue(
            np.array_equal(m.rsqrt().numpy(),
                           paddle.rsqrt(m).numpy()))
        self.assertTrue(
            np.array_equal(x.ceil().numpy(),
                           paddle.ceil(x).numpy()))
        self.assertTrue(
            np.array_equal(x.floor().numpy(),
                           paddle.floor(x).numpy()))
        self.assertTrue(np.array_equal(x.cos().numpy(), paddle.cos(x).numpy()))
        self.assertTrue(
            np.array_equal(x.acos().numpy(),
                           paddle.acos(x).numpy()))
        self.assertTrue(
            np.array_equal(x.asin().numpy(),
                           paddle.asin(x).numpy()))
        self.assertTrue(np.array_equal(x.sin().numpy(), paddle.sin(x).numpy()))
        self.assertTrue(
            np.array_equal(x.sinh().numpy(),
                           paddle.sinh(x).numpy()))
        self.assertTrue(
            np.array_equal(x.cosh().numpy(),
                           paddle.cosh(x).numpy()))
        self.assertTrue(
            np.array_equal(x.round().numpy(),
                           paddle.round(x).numpy()))
        self.assertTrue(
            np.array_equal(x.reciprocal().numpy(),
                           paddle.reciprocal(x).numpy()))
        self.assertTrue(
            np.array_equal(x.square().numpy(),
                           paddle.square(x).numpy()))
        self.assertTrue(
            np.array_equal(x.rank().numpy(),
                           paddle.rank(x).numpy()))
        self.assertTrue(
            np.array_equal(x[0].t().numpy(),
                           paddle.t(x[0]).numpy()))
        self.assertTrue(
            np.array_equal(x.asinh().numpy(),
                           paddle.asinh(x).numpy()))
        ### acosh(x) = nan, need to change input
        t_np = np.random.uniform(1, 2, [2, 3]).astype(self.dtype)
        t = paddle.to_tensor(t_np)
        self.assertTrue(
            np.array_equal(t.acosh().numpy(),
                           paddle.acosh(t).numpy()))
        self.assertTrue(
            np.array_equal(x.atanh().numpy(),
                           paddle.atanh(x).numpy()))
        d = paddle.to_tensor([[1.2285208, 1.3491015, 1.4899898],
                              [1.30058, 1.0688717, 1.4928783],
                              [1.0958099, 1.3724753, 1.8926544]])
        d = d.matmul(d.t())
        # ROCM not support cholesky
        if not fluid.core.is_compiled_with_rocm():
            self.assertTrue(
                np.array_equal(d.cholesky().numpy(),
                               paddle.cholesky(d).numpy()))

        self.assertTrue(
            np.array_equal(x.is_empty().numpy(),
                           paddle.is_empty(x).numpy()))
        self.assertTrue(
            np.array_equal(x.isfinite().numpy(),
                           paddle.isfinite(x).numpy()))
        self.assertTrue(
            np.array_equal(
                x.cast('int32').numpy(),
                paddle.cast(x, 'int32').numpy()))
        self.assertTrue(
            np.array_equal(
                x.expand([3, 2, 3]).numpy(),
                paddle.expand(x, [3, 2, 3]).numpy()))
        self.assertTrue(
            np.array_equal(
                x.tile([2, 2]).numpy(),
                paddle.tile(x, [2, 2]).numpy()))
        self.assertTrue(
            np.array_equal(x.flatten().numpy(),
                           paddle.flatten(x).numpy()))
        index = paddle.to_tensor([0, 1])
        self.assertTrue(
            np.array_equal(
                x.gather(index).numpy(),
                paddle.gather(x, index).numpy()))
        index = paddle.to_tensor([[0, 1], [1, 2]])
        self.assertTrue(
            np.array_equal(
                x.gather_nd(index).numpy(),
                paddle.gather_nd(x, index).numpy()))
        self.assertTrue(
            np.array_equal(
                x.reverse([0, 1]).numpy(),
                paddle.reverse(x, [0, 1]).numpy()))
        self.assertTrue(
            np.array_equal(
                a.reshape([3, 2]).numpy(),
                paddle.reshape(a, [3, 2]).numpy()))
        self.assertTrue(
            np.array_equal(
                x.slice([0, 1], [0, 0], [1, 2]).numpy(),
                paddle.slice(x, [0, 1], [0, 0], [1, 2]).numpy()))
        self.assertTrue(
            np.array_equal(
                x.split(2)[0].numpy(),
                paddle.split(x, 2)[0].numpy()))
        m = paddle.to_tensor(
            np.random.uniform(-1, 1, [1, 6, 1, 1]).astype(self.dtype))
        self.assertTrue(
            np.array_equal(
                m.squeeze([]).numpy(),
                paddle.squeeze(m, []).numpy()))
        self.assertTrue(
            np.array_equal(
                m.squeeze([1, 2]).numpy(),
                paddle.squeeze(m, [1, 2]).numpy()))
        m = paddle.to_tensor([2, 3, 3, 1, 5, 3], 'float32')
        self.assertTrue(
            np.array_equal(m.unique()[0].numpy(),
                           paddle.unique(m)[0].numpy()))
        self.assertTrue(
            np.array_equal(
                m.unique(return_counts=True)[1],
                paddle.unique(m, return_counts=True)[1]))
        self.assertTrue(np.array_equal(x.flip([0]), paddle.flip(x, [0])))
        self.assertTrue(np.array_equal(x.unbind(0), paddle.unbind(x, 0)))
        self.assertTrue(np.array_equal(x.roll(1), paddle.roll(x, 1)))
        self.assertTrue(np.array_equal(x.cumsum(1), paddle.cumsum(x, 1)))
        m = paddle.to_tensor(1)
        self.assertTrue(np.array_equal(m.increment(), paddle.increment(m)))
        m = x.abs()
        self.assertTrue(np.array_equal(m.log(), paddle.log(m)))
        self.assertTrue(np.array_equal(x.pow(2), paddle.pow(x, 2)))
        self.assertTrue(np.array_equal(x.reciprocal(), paddle.reciprocal(x)))

        # 2. Binary operation
        self.assertTrue(
            np.array_equal(x.divide(y).numpy(),
                           paddle.divide(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.matmul(y, True, False).numpy(),
                paddle.matmul(x, y, True, False).numpy()))
        self.assertTrue(
            np.array_equal(
                x.norm(p='fro', axis=[0, 1]).numpy(),
                paddle.norm(x, p='fro', axis=[0, 1]).numpy()))
        self.assertTrue(
            np.array_equal(x.dist(y).numpy(),
                           paddle.dist(x, y).numpy()))
        self.assertTrue(
            np.array_equal(x.cross(y).numpy(),
                           paddle.cross(x, y).numpy()))
        m = x.expand([2, 2, 3])
        n = y.expand([2, 2, 3]).transpose([0, 2, 1])
        self.assertTrue(
            np.array_equal(m.bmm(n).numpy(),
                           paddle.bmm(m, n).numpy()))
        self.assertTrue(
            np.array_equal(
                x.histogram(5, -1, 1).numpy(),
                paddle.histogram(x, 5, -1, 1).numpy()))
        self.assertTrue(
            np.array_equal(x.equal(y).numpy(),
                           paddle.equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.greater_equal(y).numpy(),
                paddle.greater_equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.greater_than(y).numpy(),
                paddle.greater_than(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.less_equal(y).numpy(),
                paddle.less_equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.less_than(y).numpy(),
                paddle.less_than(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.not_equal(y).numpy(),
                paddle.not_equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.equal_all(y).numpy(),
                paddle.equal_all(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.allclose(y).numpy(),
                paddle.allclose(x, y).numpy()))
        m = x.expand([2, 2, 3])
        self.assertTrue(
            np.array_equal(
                x.expand_as(m).numpy(),
                paddle.expand_as(x, m).numpy()))
        index = paddle.to_tensor([2, 1, 0])
        self.assertTrue(
            np.array_equal(
                a.scatter(index, b).numpy(),
                paddle.scatter(a, index, b).numpy()))

        # 3. Bool tensor operation
        x = paddle.to_tensor([[True, False], [True, False]])
        y = paddle.to_tensor([[False, False], [False, True]])
        self.assertTrue(
            np.array_equal(
                x.logical_and(y).numpy(),
                paddle.logical_and(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_not(y).numpy(),
                paddle.logical_not(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_or(y).numpy(),
                paddle.logical_or(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_xor(y).numpy(),
                paddle.logical_xor(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_and(y).numpy(),
                paddle.logical_and(x, y).numpy()))
        a = paddle.to_tensor([[1, 2], [3, 4]])
        b = paddle.to_tensor([[4, 3], [2, 1]])
        self.assertTrue(
            np.array_equal(
                x.where(a, b).numpy(),
                paddle.where(x, a, b).numpy()))

        x_np = np.random.randn(3, 6, 9, 7)
        x = paddle.to_tensor(x_np)
        x_T = x.T
        self.assertTrue(x_T.shape, [7, 9, 6, 3])
        self.assertTrue(np.array_equal(x_T.numpy(), x_np.T))

        self.assertTrue(inspect.ismethod(a.dot))
        self.assertTrue(inspect.ismethod(a.logsumexp))
        self.assertTrue(inspect.ismethod(a.multiplex))
        self.assertTrue(inspect.ismethod(a.prod))
        self.assertTrue(inspect.ismethod(a.scale))
        self.assertTrue(inspect.ismethod(a.stanh))
        self.assertTrue(inspect.ismethod(a.add_n))
        self.assertTrue(inspect.ismethod(a.max))
        self.assertTrue(inspect.ismethod(a.maximum))
        self.assertTrue(inspect.ismethod(a.min))
        self.assertTrue(inspect.ismethod(a.minimum))
        self.assertTrue(inspect.ismethod(a.floor_divide))
        self.assertTrue(inspect.ismethod(a.remainder))
        self.assertTrue(inspect.ismethod(a.floor_mod))
        self.assertTrue(inspect.ismethod(a.multiply))
        self.assertTrue(inspect.ismethod(a.logsumexp))
        self.assertTrue(inspect.ismethod(a.inverse))
        self.assertTrue(inspect.ismethod(a.log1p))
        self.assertTrue(inspect.ismethod(a.erf))
        self.assertTrue(inspect.ismethod(a.addmm))
        self.assertTrue(inspect.ismethod(a.clip))
        self.assertTrue(inspect.ismethod(a.trace))
        self.assertTrue(inspect.ismethod(a.kron))
        self.assertTrue(inspect.ismethod(a.isinf))
        self.assertTrue(inspect.ismethod(a.isnan))
        self.assertTrue(inspect.ismethod(a.concat))
        self.assertTrue(inspect.ismethod(a.broadcast_to))
        self.assertTrue(inspect.ismethod(a.scatter_nd_add))
        self.assertTrue(inspect.ismethod(a.scatter_nd))
        self.assertTrue(inspect.ismethod(a.shard_index))
        self.assertTrue(inspect.ismethod(a.chunk))
        self.assertTrue(inspect.ismethod(a.stack))
        self.assertTrue(inspect.ismethod(a.strided_slice))
        self.assertTrue(inspect.ismethod(a.unsqueeze))
        self.assertTrue(inspect.ismethod(a.unstack))
        self.assertTrue(inspect.ismethod(a.argmax))
        self.assertTrue(inspect.ismethod(a.argmin))
        self.assertTrue(inspect.ismethod(a.argsort))
        self.assertTrue(inspect.ismethod(a.masked_select))
        self.assertTrue(inspect.ismethod(a.topk))
        self.assertTrue(inspect.ismethod(a.index_select))
        self.assertTrue(inspect.ismethod(a.nonzero))
        self.assertTrue(inspect.ismethod(a.sort))
        self.assertTrue(inspect.ismethod(a.index_sample))
        self.assertTrue(inspect.ismethod(a.mean))
        self.assertTrue(inspect.ismethod(a.std))
        self.assertTrue(inspect.ismethod(a.numel))
Esempio n. 9
0
def retrieval_eval(engine, epoch_id=0):
    engine.model.eval()
    # step1. build gallery
    if engine.gallery_query_dataloader is not None:
        gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
            engine, name='gallery_query')
        query_feas, query_img_id, query_query_id = gallery_feas, gallery_img_id, gallery_unique_id
    else:
        gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
            engine, name='gallery')
        query_feas, query_img_id, query_query_id = cal_feature(engine,
                                                               name='query')

    # step2. do evaluation
    sim_block_size = engine.config["Global"].get("sim_block_size", 64)
    sections = [sim_block_size] * (len(query_feas) // sim_block_size)
    if len(query_feas) % sim_block_size:
        sections.append(len(query_feas) % sim_block_size)
    fea_blocks = paddle.split(query_feas, num_or_sections=sections)
    if query_query_id is not None:
        query_id_blocks = paddle.split(query_query_id,
                                       num_or_sections=sections)
    image_id_blocks = paddle.split(query_img_id, num_or_sections=sections)
    metric_key = None

    if engine.eval_loss_func is None:
        metric_dict = {metric_key: 0.}
    else:
        metric_dict = dict()
        for block_idx, block_fea in enumerate(fea_blocks):
            similarity_matrix = paddle.matmul(block_fea,
                                              gallery_feas,
                                              transpose_y=True)
            if query_query_id is not None:
                query_id_block = query_id_blocks[block_idx]
                query_id_mask = (query_id_block != gallery_unique_id.t())

                image_id_block = image_id_blocks[block_idx]
                image_id_mask = (image_id_block != gallery_img_id.t())

                keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
                similarity_matrix = similarity_matrix * keep_mask.astype(
                    "float32")
            else:
                keep_mask = None

            metric_tmp = engine.eval_metric_func(similarity_matrix,
                                                 image_id_blocks[block_idx],
                                                 gallery_img_id, keep_mask)

            for key in metric_tmp:
                if key not in metric_dict:
                    metric_dict[key] = metric_tmp[key] * block_fea.shape[
                        0] / len(query_feas)
                else:
                    metric_dict[key] += metric_tmp[key] * block_fea.shape[
                        0] / len(query_feas)

    metric_info_list = []
    for key in metric_dict:
        if metric_key is None:
            metric_key = key
        metric_info_list.append("{}: {:.5f}".format(key, metric_dict[key]))
    metric_msg = ", ".join(metric_info_list)
    logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))

    return metric_dict[metric_key]
Esempio n. 10
0
    def test_tensor_patch_method(self):
        paddle.disable_static()
        x_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype)
        y_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype)
        z_np = np.random.uniform(-1, 1, [6, 9]).astype(self.dtype)

        x = paddle.to_tensor(x_np)
        y = paddle.to_tensor(y_np)
        z = paddle.to_tensor(z_np)

        a = paddle.to_tensor([[1, 1], [2, 2], [3, 3]])
        b = paddle.to_tensor([[1, 1], [2, 2], [3, 3]])

        # 1. Unary operation for Tensor
        self.assertEqual(x.dim(), 2)
        self.assertEqual(x.ndimension(), 2)
        self.assertEqual(x.ndim, 2)
        self.assertEqual(x.size(), [2, 3])
        self.assertTrue(
            np.array_equal(x.sigmoid().numpy(),
                           fluid.layers.sigmoid(x).numpy()))
        self.assertTrue(
            np.array_equal(x.logsigmoid().numpy(),
                           fluid.layers.logsigmoid(x).numpy()))
        self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy()))
        self.assertTrue(
            np.array_equal(x.tanh().numpy(),
                           paddle.tanh(x).numpy()))
        self.assertTrue(
            np.array_equal(x.atan().numpy(),
                           paddle.atan(x).numpy()))
        self.assertTrue(
            np.array_equal(x.tanh_shrink().numpy(),
                           fluid.layers.tanh_shrink(x).numpy()))
        self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy()))
        m = x.abs()
        self.assertTrue(
            np.array_equal(m.sqrt().numpy(),
                           paddle.sqrt(m).numpy()))
        self.assertTrue(
            np.array_equal(m.rsqrt().numpy(),
                           paddle.rsqrt(m).numpy()))
        self.assertTrue(
            np.array_equal(x.ceil().numpy(),
                           paddle.ceil(x).numpy()))
        self.assertTrue(
            np.array_equal(x.floor().numpy(),
                           paddle.floor(x).numpy()))
        self.assertTrue(np.array_equal(x.cos().numpy(), paddle.cos(x).numpy()))
        self.assertTrue(
            np.array_equal(x.acos().numpy(),
                           paddle.acos(x).numpy()))
        self.assertTrue(
            np.array_equal(x.asin().numpy(),
                           paddle.asin(x).numpy()))
        self.assertTrue(np.array_equal(x.sin().numpy(), paddle.sin(x).numpy()))
        self.assertTrue(
            np.array_equal(x.sinh().numpy(),
                           paddle.sinh(x).numpy()))
        self.assertTrue(
            np.array_equal(x.cosh().numpy(),
                           paddle.cosh(x).numpy()))
        self.assertTrue(
            np.array_equal(x.round().numpy(),
                           paddle.round(x).numpy()))
        self.assertTrue(
            np.array_equal(x.reciprocal().numpy(),
                           paddle.reciprocal(x).numpy()))
        self.assertTrue(
            np.array_equal(x.square().numpy(),
                           paddle.square(x).numpy()))
        self.assertTrue(
            np.array_equal(x.softplus().numpy(),
                           fluid.layers.softplus(x).numpy()))
        self.assertTrue(
            np.array_equal(x.softsign().numpy(),
                           fluid.layers.softsign(x).numpy()))
        self.assertTrue(
            np.array_equal(x.rank().numpy(),
                           paddle.rank(x).numpy()))
        self.assertTrue(
            np.array_equal(x[0].t().numpy(),
                           paddle.t(x[0]).numpy()))
        m = paddle.to_tensor(np.random.uniform(1, 2, [3, 3]), 'float32')
        m = m.matmul(m.t())
        self.assertTrue(
            np.array_equal(m.cholesky().numpy(),
                           paddle.cholesky(m).numpy()))

        self.assertTrue(
            np.array_equal(x.is_empty().numpy(),
                           paddle.is_empty(x).numpy()))
        self.assertTrue(
            np.array_equal(x.isfinite().numpy(),
                           paddle.isfinite(x).numpy()))
        self.assertTrue(
            np.array_equal(
                x.cast('int32').numpy(),
                paddle.cast(x, 'int32').numpy()))
        self.assertTrue(
            np.array_equal(
                x.expand([3, 2, 3]).numpy(),
                paddle.expand(x, [3, 2, 3]).numpy()))
        self.assertTrue(
            np.array_equal(
                x.tile([2, 2]).numpy(),
                paddle.tile(x, [2, 2]).numpy()))
        self.assertTrue(
            np.array_equal(x.flatten().numpy(),
                           paddle.flatten(x).numpy()))
        index = paddle.to_tensor([0, 1])
        self.assertTrue(
            np.array_equal(
                x.gather(index).numpy(),
                paddle.gather(x, index).numpy()))
        index = paddle.to_tensor([[0, 1], [1, 2]])
        self.assertTrue(
            np.array_equal(
                x.gather_nd(index).numpy(),
                paddle.gather_nd(x, index).numpy()))
        self.assertTrue(
            np.array_equal(
                x.reverse([0, 1]).numpy(),
                paddle.reverse(x, [0, 1]).numpy()))
        self.assertTrue(
            np.array_equal(
                a.reshape([3, 2]).numpy(),
                paddle.reshape(a, [3, 2]).numpy()))
        self.assertTrue(
            np.array_equal(
                x.slice([0, 1], [0, 0], [1, 2]).numpy(),
                paddle.slice(x, [0, 1], [0, 0], [1, 2]).numpy()))
        self.assertTrue(
            np.array_equal(
                x.split(2)[0].numpy(),
                paddle.split(x, 2)[0].numpy()))
        m = paddle.to_tensor(
            np.random.uniform(-1, 1, [1, 6, 1, 1]).astype(self.dtype))
        self.assertTrue(
            np.array_equal(
                m.squeeze([]).numpy(),
                paddle.squeeze(m, []).numpy()))
        self.assertTrue(
            np.array_equal(
                m.squeeze([1, 2]).numpy(),
                paddle.squeeze(m, [1, 2]).numpy()))
        m = paddle.to_tensor([2, 3, 3, 1, 5, 3], 'float32')
        self.assertTrue(
            np.array_equal(m.unique()[0].numpy(),
                           paddle.unique(m)[0].numpy()))
        self.assertTrue(
            np.array_equal(m.unique_with_counts()[2],
                           paddle.unique_with_counts(m)[2]))
        self.assertTrue(np.array_equal(x.flip([0]), paddle.flip(x, [0])))
        self.assertTrue(np.array_equal(x.unbind(0), paddle.unbind(x, 0)))
        self.assertTrue(np.array_equal(x.roll(1), paddle.roll(x, 1)))
        self.assertTrue(np.array_equal(x.cumsum(1), paddle.cumsum(x, 1)))
        m = paddle.to_tensor(1)
        self.assertTrue(np.array_equal(m.increment(), paddle.increment(m)))
        m = x.abs()
        self.assertTrue(np.array_equal(m.log(), paddle.log(m)))
        self.assertTrue(np.array_equal(x.pow(2), paddle.pow(x, 2)))
        self.assertTrue(np.array_equal(x.reciprocal(), paddle.reciprocal(x)))

        # 2. Binary operation
        self.assertTrue(
            np.array_equal(
                x.matmul(y, True, False).numpy(),
                paddle.matmul(x, y, True, False).numpy()))
        self.assertTrue(
            np.array_equal(
                x.norm(p='fro', axis=[0, 1]).numpy(),
                paddle.norm(x, p='fro', axis=[0, 1]).numpy()))
        self.assertTrue(
            np.array_equal(x.dist(y).numpy(),
                           paddle.dist(x, y).numpy()))
        self.assertTrue(
            np.array_equal(x.cross(y).numpy(),
                           paddle.cross(x, y).numpy()))
        m = x.expand([2, 2, 3])
        n = y.expand([2, 2, 3]).transpose([0, 2, 1])
        self.assertTrue(
            np.array_equal(m.bmm(n).numpy(),
                           paddle.bmm(m, n).numpy()))
        self.assertTrue(
            np.array_equal(
                x.histogram(5, -1, 1).numpy(),
                paddle.histogram(x, 5, -1, 1).numpy()))
        self.assertTrue(
            np.array_equal(x.equal(y).numpy(),
                           paddle.equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.greater_equal(y).numpy(),
                paddle.greater_equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.greater_than(y).numpy(),
                paddle.greater_than(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.less_equal(y).numpy(),
                paddle.less_equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.less_than(y).numpy(),
                paddle.less_than(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.not_equal(y).numpy(),
                paddle.not_equal(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.equal_all(y).numpy(),
                paddle.equal_all(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.allclose(y).numpy(),
                paddle.allclose(x, y).numpy()))
        m = x.expand([2, 2, 3])
        self.assertTrue(
            np.array_equal(
                x.expand_as(m).numpy(),
                paddle.expand_as(x, m).numpy()))
        index = paddle.to_tensor([2, 1, 0])
        self.assertTrue(
            np.array_equal(
                a.scatter(index, b).numpy(),
                paddle.scatter(a, index, b).numpy()))

        # 3. Bool tensor operation
        x = paddle.to_tensor([[True, False], [True, False]])
        y = paddle.to_tensor([[False, False], [False, True]])
        self.assertTrue(
            np.array_equal(x.reduce_all().numpy(),
                           paddle.reduce_all(x).numpy()))
        self.assertTrue(
            np.array_equal(x.reduce_any().numpy(),
                           paddle.reduce_any(x).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_and(y).numpy(),
                paddle.logical_and(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_not(y).numpy(),
                paddle.logical_not(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_or(y).numpy(),
                paddle.logical_or(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_xor(y).numpy(),
                paddle.logical_xor(x, y).numpy()))
        self.assertTrue(
            np.array_equal(
                x.logical_and(y).numpy(),
                paddle.logical_and(x, y).numpy()))
Esempio n. 11
0
 def forward(self, inputs, inputs_):
     """
     forward
     """
     x = paddle.logical_or(inputs, inputs_)
     return x
Esempio n. 12
0
def logical_or(a, b):
    return convertTensor(paddle.logical_or(a, b))
Esempio n. 13
0
    def get_loss(self, head_outputs, targets):
        """Here we calculate loss for a batch of images.
        We assign anchors to gts in each image and gather all the assigned
        postive and negative samples. Then loss is calculated on the gathered
        samples.
        """
        cls_logits_list, bboxes_reg_list = head_outputs
        anchors = self.anchor_generator(cls_logits_list)
        anchors = paddle.concat(anchors)

        # matches: contain gt_inds
        # match_labels: -1(ignore), 0(neg) or 1(pos)
        matches_list, match_labels_list = [], []
        # assign anchors to gts, no sampling is involved
        for gt_bbox in targets['gt_bbox']:
            matches, match_labels = self.bbox_assigner(anchors, gt_bbox)
            matches_list.append(matches)
            match_labels_list.append(match_labels)

        # reshape network outputs
        cls_logits = [
            _.transpose([0, 2, 3, 1]).reshape([0, -1, self.num_classes])
            for _ in cls_logits_list
        ]
        bboxes_reg = [
            _.transpose([0, 2, 3, 1]).reshape([0, -1, 4])
            for _ in bboxes_reg_list
        ]
        cls_logits = paddle.concat(cls_logits, axis=1)
        bboxes_reg = paddle.concat(bboxes_reg, axis=1)

        cls_pred_list, cls_tar_list = [], []
        reg_pred_list, reg_tar_list = [], []
        # find and gather preds and targets in each image
        for matches, match_labels, cls_logit, bbox_reg, gt_bbox, gt_class in \
            zip(matches_list, match_labels_list, cls_logits, bboxes_reg,
                targets['gt_bbox'], targets['gt_class']):
            pos_mask = (match_labels == 1)
            neg_mask = (match_labels == 0)
            chosen_mask = paddle.logical_or(pos_mask, neg_mask)

            gt_class = gt_class.reshape([-1])
            bg_class = paddle.to_tensor([self.num_classes],
                                        dtype=gt_class.dtype)
            # a trick to assign num_classes to negative targets
            gt_class = paddle.concat([gt_class, bg_class], axis=-1)
            matches = paddle.where(
                neg_mask, paddle.full_like(matches, gt_class.size - 1),
                matches)

            cls_pred = cls_logit[chosen_mask]
            cls_tar = gt_class[matches[chosen_mask]]
            reg_pred = bbox_reg[pos_mask].reshape([-1, 4])
            reg_tar = gt_bbox[matches[pos_mask]].reshape([-1, 4])
            reg_tar = bbox2delta(anchors[pos_mask], reg_tar, self.weights)
            cls_pred_list.append(cls_pred)
            cls_tar_list.append(cls_tar)
            reg_pred_list.append(reg_pred)
            reg_tar_list.append(reg_tar)
        cls_pred = paddle.concat(cls_pred_list)
        cls_tar = paddle.concat(cls_tar_list)
        reg_pred = paddle.concat(reg_pred_list)
        reg_tar = paddle.concat(reg_tar_list)

        avg_factor = max(1.0, reg_pred.shape[0])
        cls_loss = self.loss_class(cls_pred, cls_tar,
                                   reduction='sum') / avg_factor

        if reg_pred.shape[0] == 0:
            reg_loss = paddle.zeros([1])
            reg_loss.stop_gradient = False
        else:
            reg_loss = self.loss_bbox(reg_pred, reg_tar,
                                      reduction='sum') / avg_factor

        loss = cls_loss + reg_loss
        out_dict = {
            'loss_cls': cls_loss,
            'loss_reg': reg_loss,
            'loss': loss,
        }
        return out_dict
Esempio n. 14
0
    def forward(self, indices, segments, positions, input_mask):
        r'''
        The BertModel forward method, overrides the `__call__()` special method.

        Args:
            indices (Tensor):
                Indices of input sequence tokens in the vocabulary. They are
                numerical representations of tokens that build the input sequence.
                Its data type should be `int32` and it has a shape of [batch_size * sequence_length].
            segments (Tensor):
                Segment token indices to indicate different portions of the inputs.
                Selected in the range ``[0, type_vocab_size - 1]``.
                Its data type should be `int32` and it has a shape of [batch_size * sequence_length].
            positions(Tensor):
                Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
                max_position_embeddings - 1]``.
                Shape as `[batch_size * sequence_length]` and dtype as int32.
            input_mask (Tensor, optional):
                Mask used in multi-head attention to avoid performing attention on to some unwanted positions,
                usually the paddings or the subsequent positions.
                If the task is PRETRAINING:
                    input_mask[0] is the index that masking starts in the mask_tokens
                    input_mask[1] is the index that masking starts in the rest of the sequence
                Otherwise
                    input_mask is the mask tensor that has -1000 in positions to be masked and 0 otherwise.

        Returns:
            tuple: Returns tuple (`sequence_output`, `word_embeddings_weights`).

            With the fields:

            - `sequence_output` (Tensor):
                Sequence of hidden-states at the last layer of the model.
                It's data type should be float32 and its shape is [batch_size, sequence_length, hidden_size].
        '''

        with self.config.embeddings_scope:
            sequence_output, word_embeddings_weights = self.embedding(
                indices, segments, positions)

        if self.config.task == "PRETRAINING":
            with paddle.static.ipu_shard_guard(index=0, stage=0):
                input_mask[0] = self.custom_ops.detach(input_mask[0])
                input_mask[1] = self.custom_ops.detach(input_mask[1])

        for i in range(self.config.num_hidden_layers):
            # Attention
            attn_scope = self.config.attn_scopes[i]
            with attn_scope:
                with paddle.static.name_scope(f"Layer{i}/Attention"):
                    layer_input = sequence_output
                    q = self.create_parameter(shape=[
                        self.config.hidden_size, self.config.hidden_size
                    ],
                                              dtype="float32")
                    k = self.create_parameter(shape=[
                        self.config.hidden_size, self.config.hidden_size
                    ],
                                              dtype="float32")
                    v = self.create_parameter(shape=[
                        self.config.hidden_size, self.config.hidden_size
                    ],
                                              dtype="float32")
                    qkv = paddle.concat([q, k, v], axis=1)
                    qkv = paddle.matmul(sequence_output, qkv)
                    qkv.block.ops[-1]._set_attr(
                        '__available_memory',
                        self.config.available_mem_proportion)
                    q, k, v = paddle.split(qkv,
                                           num_or_sections=[
                                               self.config.hidden_size,
                                               self.config.hidden_size,
                                               self.config.hidden_size
                                           ],
                                           axis=1)
                    q = paddle.reshape(q, self.qkv_shape)
                    q = paddle.transpose(q, [0, 2, 1, 3])
                    k = paddle.reshape(k, self.qkv_shape)
                    k = paddle.transpose(k, [0, 2, 3, 1])
                    v = paddle.reshape(v, self.qkv_shape)
                    v = paddle.transpose(v, [0, 2, 1, 3])

                    # Attention calculation
                    with paddle.static.name_scope(f"Z"):
                        if self.config.task == "PRETRAINING":
                            if attn_scope.index in self.masks:
                                final_mask = self.masks[attn_scope.index]
                            else:
                                with paddle.static.name_scope("Mask"):
                                    base_value = np.arange(
                                        self.config.seq_len).astype('int32')
                                    base = paddle.fluid.layers.assign(
                                        base_value)
                                    mmask = paddle.less_than(
                                        base, input_mask[0])
                                    mask_value = np.greater_equal(
                                        base_value,
                                        self.config.max_predictions_per_seq)
                                    mask = paddle.fluid.layers.assign(
                                        mask_value)
                                    mmask = paddle.logical_or(mmask, mask)
                                    smask = paddle.less_than(
                                        base, input_mask[1])
                                    final_mask = paddle.logical_and(
                                        mmask, smask)
                                    final_mask = paddle.cast(
                                        final_mask, "float16")
                                    sub_attrs = {
                                        'name': 'constant_sub',
                                        'shape': [1],
                                        'dtype': 'float32',
                                        'value': 1,
                                    }
                                    mul_attrs = {
                                        'name': 'constant_mul',
                                        'shape': [1],
                                        'dtype': 'float32',
                                        'value': 1000,
                                    }
                                    final_mask = paddle.fluid.layers.elementwise_sub(
                                        final_mask,
                                        paddle.fluid.layers.fill_constant(
                                            **sub_attrs))
                                    final_mask = paddle.fluid.layers.elementwise_mul(
                                        final_mask,
                                        paddle.fluid.layers.fill_constant(
                                            **mul_attrs))
                                    final_mask = paddle.reshape(
                                        final_mask,
                                        [-1, 1, 1, self.config.seq_len])
                                    final_mask = self.custom_ops.detach(
                                        final_mask)
                                    self.masks[attn_scope.index] = final_mask

                        qk = paddle.matmul(q, k)
                        qk.block.ops[-1]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                        qk_scale = paddle.fluid.layers.fill_constant(
                            **self.qk_scale_attrs)
                        qk = paddle.fluid.layers.elementwise_mul(qk, qk_scale)

                        if self.config.task == "PRETRAINING":
                            qk = paddle.fluid.layers.elementwise_add(
                                qk, final_mask)
                        else:
                            # for SQUAD task, input_mask is calculated in data preprocessing
                            qk = paddle.fluid.layers.elementwise_add(
                                qk, input_mask)

                        qk = paddle.fluid.layers.softmax(qk)
                        if self.config.task == "SQUAD":
                            qk = paddle.fluid.layers.dropout(
                                qk,
                                self.config.attention_probs_dropout_prob,
                                dropout_implementation='upscale_in_train')
                        qkv = paddle.matmul(qk, v)
                        qkv.block.ops[-1]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                        qkv = paddle.transpose(qkv, [0, 2, 1, 3])
                        qkv = paddle.reshape(qkv,
                                             [-1, self.config.hidden_size])

                    qkv_linear = nn.Linear(self.config.hidden_size,
                                           self.config.hidden_size,
                                           bias_attr=False)
                    qkv = qkv_linear(qkv)
                    qkv.block.ops[-1]._set_attr(
                        '__available_memory',
                        self.config.available_mem_proportion)
                    qkv = paddle.fluid.layers.dropout(
                        qkv,
                        self.config.attention_probs_dropout_prob,
                        dropout_implementation='upscale_in_train')
                    attention = paddle.add(layer_input, qkv)
                    layer_norm1 = nn.LayerNorm(self.config.hidden_size,
                                               epsilon=0.001)
                    attention = layer_norm1(attention)

            # FF
            with self.config.ff_scopes[i]:
                with paddle.static.name_scope(f"Layer{i}/FF"):
                    ff_linear1 = nn.Linear(self.config.hidden_size,
                                           4 * self.config.hidden_size)
                    ff_linear2 = nn.Linear(4 * self.config.hidden_size,
                                           self.config.hidden_size)
                    with paddle.static.name_scope(f"1"):
                        ff = ff_linear1(attention)
                        ff.block.ops[-2]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                    ff = paddle.fluid.layers.gelu(ff, approximate=True)
                    with paddle.static.name_scope(f"2"):
                        ff = ff_linear2(ff)
                        ff.block.ops[-2]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                    ff = paddle.fluid.layers.dropout(
                        ff,
                        self.config.attention_probs_dropout_prob,
                        dropout_implementation='upscale_in_train')
                    ff = paddle.add(attention, ff)
                    layer_norm2 = nn.LayerNorm(self.config.hidden_size,
                                               epsilon=0.001)
                    sequence_output = layer_norm2(ff)

                if self.should_checkpoint(i):
                    with paddle.static.name_scope(f"Layer{i}"):
                        logging.info(f'add checkpointoutput for ff_{i}')
                        sequence_output = self.custom_ops.checkpointoutput(
                            sequence_output)
        return sequence_output, word_embeddings_weights
Esempio n. 15
0
 def __or__(self, other):
     return convertTensor(paddle.logical_or(self,other))