Exemple #1
0
    def forward(self, pred, label):

        label = label.reshape(pred.shape)
        sample_weight = label != self._ignore_label
        label = paddle.where(sample_weight, label, paddle.zeros_like(label))

        if not self._from_sigmoid:
            loss = F.relu(pred) - pred * label + F.softplus(-paddle.abs(pred))
        else:
            eps = 1e-12
            loss = -(paddle.log(pred + eps) * label +
                     paddle.log(1. - pred + eps) * (1. - label))
        loss = self._weight * (loss * sample_weight)
        return paddle.mean(loss,
                           axis=misc.get_dims_with_exclusion(
                               len(loss.shape), self._batch_axis))
Exemple #2
0
def bbox_overlaps(boxes1, boxes2):
    area1 = bbox_area(boxes1)
    area2 = bbox_area(boxes2)

    xy_max = paddle.minimum(
        paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:])
    xy_min = paddle.maximum(
        paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2])
    width_height = xy_max - xy_min
    width_height = width_height.clip(min=0)
    inter = width_height.prod(axis=2)

    overlaps = paddle.where(
        inter > 0, inter / (paddle.unsqueeze(area1, 1) + area2 - inter),
        paddle.zeros_like(inter))
    return overlaps
Exemple #3
0
 def forward(self, feature, label):
     cos_theta = paddle.mm(F.normalize(feature, axis=1),
                           F.normalize(self.weight, axis=0))
     sin_theta = paddle.sqrt(
         paddle.clip(1.0 - paddle.pow(cos_theta, 2), min=0, max=1))
     cos_theta_m = cos_theta * self.cos_m - sin_theta * self.sin_m
     cos_theta_m = paddle.where(cos_theta > self.threshold, cos_theta_m,
                                cos_theta - self.mm)
     one_hot = paddle.nn.functional.one_hot(label, self.class_dim)
     output = (one_hot * cos_theta_m) + (paddle.abs(
         (1.0 - one_hot)) * cos_theta)
     output *= self.s
     # 简单的分类方法,学习率需要设置为0.1
     # cosine = self.cosine_sim(feature, self.weight)
     # one_hot = paddle.nn.functional.one_hot(label, self.class_dim)
     # output = self.s * (cosine - one_hot * self.m)
     return output
Exemple #4
0
 def forward(self, input, target):
     if self.log_target:
         out = paddle.exp(target) * (target - input)
     else:
         out_pos = target * (paddle.log(target) - input)
         zeros = paddle.zeros_like(out_pos)
         out = paddle.where(target > 0, out_pos, zeros)
     out_sum = paddle.sum(out)
     if self.reduction == "sum":
         return out_sum
     elif self.reduction == "batchmean":
         n = input.shape[0]
         return out_sum / n
     elif self.reduction == "mean":
         return paddle.mean(out)
     else:
         return out
Exemple #5
0
 def test_api(self, use_cuda=False):
     for x_stop_gradient in [False, True]:
         for y_stop_gradient in [False, True]:
             with fluid.program_guard(Program(), Program()):
                 cond = fluid.layers.data(name='cond',
                                          shape=self.shape,
                                          dtype='bool')
                 x = fluid.layers.data(name='x',
                                       shape=self.shape,
                                       dtype='float32')
                 y = fluid.layers.data(name='y',
                                       shape=self.shape,
                                       dtype='float32')
                 x.stop_gradient = x_stop_gradient
                 y.stop_gradient = y_stop_gradient
                 result = paddle.where(cond, x, y)
                 append_backward(layers.mean(result))
                 for use_cuda in [False, True]:
                     if (use_cuda
                             and (not fluid.core.is_compiled_with_cuda())):
                         break
                     place = (fluid.CUDAPlace(0)
                              if use_cuda else fluid.CPUPlace())
                     exe = fluid.Executor(place)
                     fetch_list = [result, result.grad_name]
                     if (x_stop_gradient is False):
                         fetch_list.append(x.grad_name)
                     if (y_stop_gradient is False):
                         fetch_list.append(y.grad_name)
                     out = exe.run(fluid.default_main_program(),
                                   feed={
                                       'cond': self.cond,
                                       'x': self.x,
                                       'y': self.y
                                   },
                                   fetch_list=fetch_list)
                     assert np.array_equal(out[0], self.out)
                     if (x_stop_gradient is False):
                         assert np.array_equal(out[2],
                                               self.ref_x_backward(out[1]))
                         if (y.stop_gradient is False):
                             assert np.array_equal(
                                 out[3], self.ref_y_backward(out[1]))
                     elif (y.stop_gradient is False):
                         assert np.array_equal(out[2],
                                               self.ref_y_backward(out[1]))
Exemple #6
0
    def forward(self, bond_types_batch, type_count_batch, bond_feat):
        """
        Input example:
            bond_types_batch: [0,0,2,0,1,2] + [0,0,2,0,1,2] + [2]
            type_count_batch: [[3, 3, 0], [1, 1, 0], [2, 2, 1]] # [num_type, batch_size]
        """
        bond_feat = self.fc_1(
            paddle.reshape(bond_feat, [-1, self.num_angle * self.bond_dim]))
        inter_mat_list = []
        for type_i in range(self.num_type):
            type_i_index = paddle.masked_select(paddle.arange(len(bond_feat)),
                                                bond_types_batch == type_i)
            if paddle.sum(type_count_batch[type_i]) == 0:
                inter_mat_list.append(
                    paddle.to_tensor(np.array([0.] *
                                              len(type_count_batch[type_i])),
                                     dtype='float32'))
                continue
            bond_feat_type_i = paddle.gather(bond_feat, type_i_index)
            graph_bond_index = op.get_index_from_counts(
                type_count_batch[type_i])
            # graph_bond_id = generate_segment_id_from_index(graph_bond_index)
            graph_bond_id = generate_segment_id(graph_bond_index)
            graph_feat_type_i = math.segment_pool(bond_feat_type_i,
                                                  graph_bond_id,
                                                  pool_type='sum')
            mat_flat_type_i = self.fc_2(graph_feat_type_i).squeeze(1)

            # print(graph_bond_id)
            # print(graph_bond_id.shape, graph_feat_type_i.shape, mat_flat_type_i.shape)
            my_pad = nn.Pad1D(padding=[
                0, len(type_count_batch[type_i]) - len(mat_flat_type_i)
            ],
                              value=-1e9)
            mat_flat_type_i = my_pad(mat_flat_type_i)
            inter_mat_list.append(mat_flat_type_i)

        inter_mat_batch = paddle.stack(inter_mat_list,
                                       axis=1)  # [batch_size, num_type]
        inter_mat_mask = paddle.ones_like(inter_mat_batch) * -1e9
        inter_mat_batch = paddle.where(
            type_count_batch.transpose([1, 0]) > 0, inter_mat_batch,
            inter_mat_mask)
        inter_mat_batch = self.softmax(inter_mat_batch)
        return inter_mat_batch
Exemple #7
0
def box_overlap_opr(box, gt):
    assert box.ndim == 2
    assert gt.ndim == 2
    area_box = (box[:, 2] - box[:, 0] + 1) * (box[:, 3] - box[:, 1] + 1)
    area_gt = (gt[:, 2] - gt[:, 0] + 1) * (gt[:, 3] - gt[:, 1] + 1)
    width_height = torch.minimum(
        box[:, 2:].unsqueeze(axis=-2), gt[:, 2:]) - torch.maximum(
            box[:, :2].unsqueeze(axis=-2), gt[:, :2]) + 1  # [N,M,2]
    width_height = clamp(width_height, min=0, max=float('inf'))  # [N,M,2]
    inter = width_height.prod(axis=2)  # [N,M]
    del width_height
    # handle empty boxes
    iou = torch.where(
        inter > 0,
        inter / (area_box.unsqueeze(axis=-1) + area_gt - inter),
        torch.zeros(torch.to_tensor([1]), dtype=inter.dtype),
    )
    return iou
Exemple #8
0
    def forward(
            self,
            hidden_states,
            attention_mask=None,
            head_mask=None,
            encoder_hidden_states=None,
            encoder_attention_mask=None,
            past_key_value=None,
            output_attentions=False,
            rel_pos=None,
            rel_2d_pos=None, ):
        q, k, v = self.compute_qkv(hidden_states)

        # (B, L, H*D) -> (B, H, L, D)
        query_layer = self.transpose_for_scores(q)
        key_layer = self.transpose_for_scores(k)
        value_layer = self.transpose_for_scores(v)

        query_layer = query_layer / math.sqrt(self.attention_head_size)
        # [BSZ, NAT, L, L]
        attention_scores = paddle.matmul(query_layer,
                                         key_layer.transpose([0, 1, 3, 2]))
        if self.has_relative_attention_bias:
            attention_scores += rel_pos
        if self.has_spatial_attention_bias:
            attention_scores += rel_2d_pos
        attention_scores = paddle.where(
            attention_mask.astype(paddle.bool).expand_as(attention_scores),
            paddle.ones_like(attention_scores) * float("-inf"),
            attention_scores)
        attention_probs = F.softmax(attention_scores, axis=-1)
        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs = self.dropout(attention_probs)
        context_layer = paddle.matmul(attention_probs, value_layer)
        context_layer = context_layer.transpose([0, 2, 1, 3])
        new_context_layer_shape = context_layer.shape[:-2] + [
            self.all_head_size
        ]
        context_layer = context_layer.reshape(new_context_layer_shape)

        outputs = (context_layer, attention_probs) if output_attentions else (
            context_layer, )
        return outputs
Exemple #9
0
    def test_api(self):
        for x_stop_gradient in [False, True]:
            for y_stop_gradient in [False, True]:
                train_prog = fluid.Program()
                startup = fluid.Program()
                with fluid.program_guard(train_prog, startup):
                    cond = fluid.data(name='cond',
                                      shape=self.shape,
                                      dtype='bool')
                    x = fluid.data(name='x', shape=self.shape, dtype='float32')
                    y = fluid.data(name='y', shape=self.shape, dtype='float32')

                    x.stop_gradient = x_stop_gradient
                    y.stop_gradient = y_stop_gradient

                    result = paddle.where(cond, x, y)
                    append_backward(fluid.layers.mean(result))

                    exe = fluid.Executor(self.place)
                    exe.run(startup)

                    fetch_list = [result, result.grad_name]
                    if x_stop_gradient is False:
                        fetch_list.append(x.grad_name)
                    if y_stop_gradient is False:
                        fetch_list.append(y.grad_name)
                    out = exe.run(train_prog,
                                  feed={
                                      'cond': self.cond,
                                      'x': self.x,
                                      'y': self.y
                                  },
                                  fetch_list=fetch_list)
                    assert np.array_equal(out[0], self.out)

                    if x_stop_gradient is False:
                        assert np.array_equal(out[2],
                                              self.ref_x_backward(out[1]))
                        if y.stop_gradient is False:
                            assert np.array_equal(out[3],
                                                  self.ref_y_backward(out[1]))
                    elif y.stop_gradient is False:
                        assert np.array_equal(out[2],
                                              self.ref_y_backward(out[1]))
    def test_api_broadcast(self, use_cuda=False):
        train_prog = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(train_prog, startup):
            x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
            y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
            x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype("float32")
            y_i = np.array([[1.0, 1.0, 1.0, 1.0],
                            [1.0, 1.0, 1.0, 1.0]]).astype("float32")
            result = paddle.where(x > 1, x=x, y=y)

            exe = fluid.Executor(self.place)
            exe.run(startup)

            out = exe.run(train_prog,
                          feed={'x': x_i,
                                'y': y_i},
                          fetch_list=[result])
            assert np.array_equal(out[0], np.where(x_i > 1, x_i, y_i))
Exemple #11
0
    def forward(self, x, lengths=None):
        C, L = x.shape[1], x.shape[2]  # KP: (N, C, L)

        def _compute_statistics(x, m, axis=2, eps=self.eps):
            mean = (m * x).sum(axis)
            std = paddle.sqrt(
                (m * (x - mean.unsqueeze(axis)).pow(2)).sum(axis).clip(eps))
            return mean, std

        if lengths is None:
            lengths = paddle.ones([x.shape[0]])

        # Make binary mask of shape [N, 1, L]
        mask = length_to_mask(lengths * L, max_len=L)
        mask = mask.unsqueeze(1)

        # Expand the temporal context of the pooling layer by allowing the
        # self-attention to look at global properties of the utterance.
        if self.global_context:
            total = mask.sum(axis=2, keepdim=True).astype('float32')
            mean, std = _compute_statistics(x, mask / total)
            mean = mean.unsqueeze(2).tile((1, 1, L))
            std = std.unsqueeze(2).tile((1, 1, L))
            attn = paddle.concat([x, mean, std], axis=1)
        else:
            attn = x

        # Apply layers
        attn = self.conv(self.tanh(self.tdnn(attn)))

        # Filter out zero-paddings
        attn = paddle.where(
            mask.tile((1, C, 1)) == 0,
            paddle.ones_like(attn) * float("-inf"), attn)

        attn = F.softmax(attn, axis=2)
        mean, std = _compute_statistics(x, attn)

        # Append mean and std of the batch
        pooled_stats = paddle.concat((mean, std), axis=1)
        pooled_stats = pooled_stats.unsqueeze(2)

        return pooled_stats
Exemple #12
0
  def _compute_loss(self,
                    prediction_tensor,
                    target_tensor,
                    weights,
                    class_indices=None):
    """Compute loss function.

    Args:
      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
      target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
      weights: a float tensor of shape [batch_size, num_anchors]
      class_indices: (Optional) A 1-D integer tensor of class indices.
        If provided, computes loss only for the specified class indices.

    Returns:
      loss: a float tensor of shape [batch_size, num_anchors, num_classes]
        representing the value of the loss function.
    """
    weights = weights.unsqueeze(2)
    if class_indices is not None:
      weights *= indices_to_dense_vector(class_indices,
            prediction_tensor.shape[2]).reshape((1, 1, -1)).astype(prediction_tensor.dtype)
    per_entry_cross_ent = (_softmax_cross_entropy_with_logits(
        labels=target_tensor, logits=prediction_tensor))
    # convert [N, num_anchors] to [N, num_anchors, num_classes]
    per_entry_cross_ent = per_entry_cross_ent.unsqueeze(-1) * target_tensor
    prediction_probabilities = F.softmax(prediction_tensor, axis=-1)
    p_t = ((target_tensor * prediction_probabilities) +
           ((1 - target_tensor) * (1 - prediction_probabilities)))
    modulating_factor = 1.0
    if self._gamma:
      modulating_factor = paddle.pow(1.0 - p_t, self._gamma)
    alpha_weight_factor = 1.0
    if self._alpha is not None:
      alpha_weight_factor = paddle.where(target_tensor[..., 0] == 1,
      paddle.to_tensor(1 - self._alpha).astype(per_entry_cross_ent.dtype),
      paddle.to_tensor(self._alpha).astype(per_entry_cross_ent.dtype))
    focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
                                per_entry_cross_ent)
    return focal_cross_entropy_loss * weights
Exemple #13
0
 def get_discriminator_inputs(self, inputs, raw_inputs, gen_logits,
                              gen_labels, use_softmax_sample):
     """Sample from the generator to create discriminator input."""
     # get generator token result
     sampled_tokens = (self.sample_from_softmax(
         gen_logits, use_softmax_sample)).detach()
     sampled_tokids = paddle.argmax(sampled_tokens, axis=-1)
     # update token only at mask position
     # gen_labels : [B, L], L contains -100(unmasked) or token value(masked)
     # mask_positions : [B, L], L contains 0(unmasked) or 1(masked)
     umask_positions = paddle.zeros_like(gen_labels)
     mask_positions = paddle.ones_like(gen_labels)
     mask_positions = paddle.where(gen_labels == -100, umask_positions,
                                   mask_positions)
     updated_inputs = self.update_inputs(inputs, sampled_tokids,
                                         mask_positions)
     # use inputs and updated_input to get discriminator labels
     labels = mask_positions * (paddle.ones_like(inputs) - paddle.equal(
         updated_inputs, raw_inputs).astype("int32"))
     return updated_inputs, labels, sampled_tokids
def rough_ROI(ref_scribble_labels):
    #### b*1*h*w
    dist = 20
    b, _, h, w = ref_scribble_labels.shape
    filter_ = paddle.zeros_like(ref_scribble_labels)
    to_fill = paddle.zeros_like(ref_scribble_labels)
    for i in range(b):
        no_background = (ref_scribble_labels[i] != -1)
        no_background = no_background.squeeze(0)

        no_b = no_background.nonzero()
        (h_min, w_min) = paddle.min(no_b, 0)
        (h_max, w_max) = paddle.max(no_b, 0)
        filter_[i, 0,
                max(h_min - dist, 0):min(h_max + dist, h - 1),
                max(w_min - dist, 0):min(w_max + dist, w - 1)] = 1

    final_scribble_labels = paddle.where(byte_(filter_), ref_scribble_labels,
                                         to_fill)
    return final_scribble_labels
 def forward(self, pred, target, reduction='none'):
     """forward function, based on fvcore.
     Args:
         pred (Tensor): prediction tensor
         target (Tensor): target tensor, pred.shape must be the same as target.shape
         reduction (str): the way to reduce loss, one of (none, sum, mean)
     """
     assert reduction in ('none', 'sum', 'mean')
     target = target.detach()
     if self.beta < 1e-5:
         loss = paddle.abs(pred - target)
     else:
         n = paddle.abs(pred - target)
         cond = n < self.beta
         loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
     if reduction == 'mean':
         loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
     elif reduction == 'sum':
         loss = loss.sum()
     return loss * self.loss_weight
Exemple #16
0
def _compute_iou(pred_mask, gt_mask, ignore_mask=None, keep_ignore=False):
    if ignore_mask is not None:

        pred_mask = paddle.where(ignore_mask, paddle.zeros_like(pred_mask.astype('float32')), pred_mask.astype('float32'))

    reduction_dims = misc.get_dims_with_exclusion(len(gt_mask.shape), 0)
    pred_mask = pred_mask.astype('bool')
    m = pred_mask.numpy() | gt_mask.numpy()
    n = pred_mask.numpy() & gt_mask.numpy()
    union = np.mean(m.astype('float'), axis=tuple(reduction_dims))
    intersection = np.mean(n.astype('float'), axis=tuple(reduction_dims))

    nonzero = union > 0

    iou = intersection[nonzero] / union[nonzero]
    if not keep_ignore:
        return iou
    else:
        result = np.full_like(intersection, -1)
        result[nonzero] = iou
        return result
Exemple #17
0
    def forward(self, generator_prediction_scores,
                discriminator_prediction_scores, generator_labels,
                discriminator_labels):
        # generator loss
        gen_loss = self.gen_loss_fct(
            paddle.reshape(generator_prediction_scores, [-1, self.vocab_size]),
            paddle.reshape(generator_labels, [-1]))
        # todo: we can remove 4 lines after when CrossEntropyLoss(reduction='mean') improved
        umask_positions = paddle.zeros_like(generator_labels).astype("float32")
        mask_positions = paddle.ones_like(generator_labels).astype("float32")
        mask_positions = paddle.where(generator_labels == -100,
                                      umask_positions, mask_positions)
        gen_loss = gen_loss.sum() / mask_positions.sum()

        # discriminator loss
        seq_length = discriminator_labels.shape[1]
        disc_loss = self.disc_loss_fct(
            paddle.reshape(discriminator_prediction_scores, [-1, seq_length]),
            discriminator_labels.astype("float32"))

        return self.gen_weight * gen_loss + self.disc_weight * disc_loss
Exemple #18
0
 def test_api_broadcast(self, use_cuda=False):
     main_program = Program()
     with fluid.program_guard(main_program):
         x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
         y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
         x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype('float32')
         y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0,
                                                1.0]]).astype('float32')
         result = paddle.where((x > 1), x=x, y=y)
         for use_cuda in [False, True]:
             if (use_cuda and (not fluid.core.is_compiled_with_cuda())):
                 return
             place = (fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace())
             exe = fluid.Executor(place)
             out = exe.run(fluid.default_main_program(),
                           feed={
                               'x': x_i,
                               'y': y_i
                           },
                           fetch_list=[result])
             assert np.array_equal(out[0], np.where((x_i > 1), x_i, y_i))
    def forward(self, logits, label):
        """
        Forward computation.

        Args:
            logits (tuple|list): (seg_logit, edge_logit) Tensor, the data type is float32, float64. Shape is
                (N, C), where C is number of classes, and if shape is more than 2D, this
                is (N, C, D1, D2,..., Dk), k >= 1. C =1 of edge_logit .
            label (Tensor): Label tensor, the data type is int64. Shape is (N, C), where each
                value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
                (N, C, D1, D2,..., Dk), k >= 1.
        """
        seg_logit, edge_logit = logits[0], logits[1]
        if len(label.shape) != len(seg_logit.shape):
            label = paddle.unsqueeze(label, 1)
        if edge_logit.shape != label.shape:
            raise ValueError(
                'The shape of edge_logit should equal to the label, but they are {} != {}'
                .format(edge_logit.shape, label.shape))

        filler = paddle.ones_like(label) * self.ignore_index
        label = paddle.where(edge_logit > self.edge_threshold, label, filler)

        seg_logit = paddle.transpose(seg_logit, [0, 2, 3, 1])
        label = paddle.transpose(label, [0, 2, 3, 1])
        loss = F.softmax_with_cross_entropy(seg_logit,
                                            label,
                                            ignore_index=self.ignore_index,
                                            axis=-1)

        mask = label != self.ignore_index
        mask = paddle.cast(mask, 'float32')
        loss = loss * mask
        avg_loss = paddle.mean(loss) / (paddle.mean(mask) + self.EPS)
        if paddle.mean(mask) < self.mean_mask:
            self.mean_mask = paddle.mean(mask)

        label.stop_gradient = True
        mask.stop_gradient = True
        return avg_loss
Exemple #20
0
 def test_scalar(self):
     paddle.enable_static()
     main_program = Program()
     with fluid.program_guard(main_program):
         cond_shape = [2, 4]
         cond = fluid.layers.data(name='cond',
                                  shape=cond_shape,
                                  dtype='bool')
         x_data = 1.0
         y_data = 2.0
         cond_data = np.array([False, False, True, True]).astype('bool')
         result = paddle.where(condition=cond, x=x_data, y=y_data)
         for use_cuda in [False, True]:
             if (use_cuda and (not fluid.core.is_compiled_with_cuda())):
                 return
             place = (fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace())
             exe = fluid.Executor(place)
             out = exe.run(fluid.default_main_program(),
                           feed={'cond': cond_data},
                           fetch_list=[result])
             expect = np.where(cond_data, x_data, y_data)
             assert np.array_equal(out[0], expect)
Exemple #21
0
    def mask_tokens(self, examples):
        if self.tokenizer.mask_token is None:
            raise ValueError(
                "the tokenizer does not have mask_token, please check!")
        mask_token_id = self.tokenizer.convert_tokens_to_ids(
            self.tokenizer.mask_token)

        raw_inputs, probability_matrix = self.add_special_tokens_and_set_maskprob(
            examples, True, self.max_seq_length)
        raw_inputs = self.tensorize_batch(raw_inputs, "int64")
        probability_matrix = self.tensorize_batch(probability_matrix, "float32")
        inputs = raw_inputs.clone()
        labels = raw_inputs.clone()

        total_indices = paddle.bernoulli(probability_matrix).astype(
            "bool").numpy()
        labels[~total_indices] = -100

        # 80% MASK
        indices_mask = paddle.bernoulli(paddle.full(labels.shape, 0.8)).astype(
            "bool").numpy() & total_indices
        inputs[indices_mask] = mask_token_id

        # 10% Random
        indices_random = paddle.bernoulli(paddle.full(
            labels.shape, 0.5)).astype("bool").numpy(
            ) & total_indices & ~indices_mask
        random_words = paddle.randint(
            low=0,
            high=self.tokenizer.vocab_size,
            shape=labels.shape,
            dtype="int64")
        inputs = paddle.where(
            paddle.to_tensor(indices_random), random_words, inputs)

        # 10% Original
        return inputs, raw_inputs, labels
Exemple #22
0
def _nn_features_per_object_for_chunk(reference_embeddings, query_embeddings,
                                      wrong_label_mask, k_nearest_neighbors,
                                      ys):
    """Extracts features for each object using nearest neighbor attention.
  Args:
    reference_embeddings: Tensor of shape [n_chunk, embedding_dim],
      the embedding vectors for the reference frame.
    query_embeddings: Tensor of shape [m_chunk, embedding_dim], the embedding
      vectors for the query frames.
    wrong_label_mask:
    k_nearest_neighbors: Integer, the number of nearest neighbors to use.
  Returns:
    nn_features: A float32 tensor of nearest neighbor features of shape
      [m_chunk, n_objects, feature_dim].
    """
    #    reference_embeddings_key = reference_embeddings
    #    query_embeddings_key = query_embeddings
    dists, ys = _flattened_pairwise_distances(reference_embeddings,
                                              query_embeddings, ys)

    dists = (paddle.unsqueeze(dists, 1) +
             paddle.unsqueeze(float_(wrong_label_mask), 0) *
             WRONG_LABEL_PADDING_DISTANCE)
    if k_nearest_neighbors == 1:
        features = paddle.min(dists, 2, keepdim=True)
    else:
        dists, _ = paddle.topk(-dists, k=k_nearest_neighbors, axis=2)
        dists = -dists
        valid_mask = (dists < WRONG_LABEL_PADDING_DISTANCE)
        masked_dists = dists * valid_mask.float()
        pad_dist = paddle.max(masked_dists, axis=2, keepdim=True)[0].tile(
            (1, 1, masked_dists.shape[-1]))
        dists = paddle.where(valid_mask, dists, pad_dist)
        # take mean of distances
        features = paddle.mean(dists, axis=2, keepdim=True)

    return features, ys
Exemple #23
0
    def _compute_expert_weights(self):
        """Computes the weight vector for the experts.
        Args: None.
        Returns:
          A tuple: (expert_weights, selector_outputs).
            expert_weights is the final weight vector of the experts.
            selector_outputs is a (num_nonzero, num_experts)-matrix whose i-th row
            represents the outputs of the i-th single-expert selector.
        """
        # Shape = (num_nonzero, 1, num_binary)
        smooth_step_activations = self._smooth_step(self._z_logits)

        # Shape = (num_nonzero, num_experts)
        selector_outputs = paddle.prod(paddle.where(
            self._binary_codes, smooth_step_activations,
            1 - smooth_step_activations),
                                       axis=2)

        # Weights for the single-expert selectors: shape = (num_nonzero, 1)
        selector_weights = F.softmax(self._w_logits, axis=0)
        expert_weights = paddle.sum(selector_weights * selector_outputs,
                                    axis=0)

        return expert_weights, selector_outputs
Exemple #24
0
 def forward(self, inputs):
     querys, keys, sess_length = inputs
     #assert(type(sess_length) == paddle.Tensor), f"At Attention SequencePoolingLayer expected inputs[2]'s type is paddle.Tensor, but got {type(sess_length)}"
     keys_length = keys.shape[1]
     key_masks = nn.functional.sequence_mask(sess_length, keys_length)
     querys = paddle.tile(querys.unsqueeze(1), [1, keys_length, 1])
     att_input = paddle.concat([querys, keys, querys - keys, querys * keys],
                               axis=-1)
     for i, layer in enumerate(self.layers):
         att_input = layer(att_input)
         #att_input = self.bn_layer[i](att_input)  # BatchNomalization
         att_input = self.activation(att_input)  # activation
     att_score = self.dnn(att_input)  # (N, 50, 1)
     att_score = paddle.transpose(att_score, [0, 2, 1])  # (N, 1, 50)
     if self.weight_normalization:
         paddings = paddle.ones_like(att_score) * (-2**32 + 1)
     else:
         paddings = paddle.zeros_like(att_score)
     att_score = paddle.where(
         key_masks.unsqueeze(1) == 1, att_score, paddings
     )  # key_masks.unsqueeze in order to keep shape same as att_score
     att_score = self.soft(att_score)
     out = paddle.matmul(att_score, keys)
     return out
Exemple #25
0
    def forward(self, x):
        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.max_pool2d(h, 2, 2)

        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pool2d(h, 2, 2)

        h = F.relu(self.conv3_1(h))
        h = F.relu(self.conv3_2(h))
        h = F.relu(self.conv3_3(h))
        f3_3 = h
        h = F.max_pool2d(h, 2, 2)

        h = F.relu(self.conv4_1(h))
        h = F.relu(self.conv4_2(h))
        h = F.relu(self.conv4_3(h))
        f4_3 = h
        h = F.max_pool2d(h, 2, 2)

        h = F.relu(self.conv5_1(h))
        h = F.relu(self.conv5_2(h))
        h = F.relu(self.conv5_3(h))
        f5_3 = h
        h = F.max_pool2d(h, 2, 2)

        h = F.relu(self.fc6(h))
        h = F.relu(self.fc7(h))
        ffc7 = h
        h = F.relu(self.conv6_1(h))
        h = F.relu(self.conv6_2(h))
        f6_2 = h
        h = F.relu(self.conv7_1(h))
        h = F.relu(self.conv7_2(h))
        f7_2 = h

        f3_3 = self.conv3_3_norm(f3_3)
        f4_3 = self.conv4_3_norm(f4_3)
        f5_3 = self.conv5_3_norm(f5_3)

        cls1 = self.conv3_3_norm_mbox_conf(f3_3)
        reg1 = self.conv3_3_norm_mbox_loc(f3_3)
        cls2 = self.conv4_3_norm_mbox_conf(f4_3)
        reg2 = self.conv4_3_norm_mbox_loc(f4_3)
        cls3 = self.conv5_3_norm_mbox_conf(f5_3)
        reg3 = self.conv5_3_norm_mbox_loc(f5_3)
        cls4 = self.fc7_mbox_conf(ffc7)
        reg4 = self.fc7_mbox_loc(ffc7)
        cls5 = self.conv6_2_mbox_conf(f6_2)
        reg5 = self.conv6_2_mbox_loc(f6_2)
        cls6 = self.conv7_2_mbox_conf(f7_2)
        reg6 = self.conv7_2_mbox_loc(f7_2)

        # max-out background label
        chunk = paddle.chunk(cls1, 4, 1)
        tmp_max = paddle.where(chunk[0] > chunk[1], chunk[0], chunk[1])
        bmax = paddle.where(tmp_max > chunk[2], tmp_max, chunk[2])
        cls1 = paddle.concat([bmax, chunk[3]], axis=1)

        return [
            cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6,
            reg6
        ]
    def forward(self,
                query,
                key,
                value,
                key_padding_mask=None,
                incremental_state=None,
                attn_mask=None):
        """
        Inputs of forward function
            query: [target length, batch size, embed dim]
            key: [sequence length, batch size, embed dim]
            value: [sequence length, batch size, embed dim]
            key_padding_mask: if True, mask padding based on batch size
            incremental_state: if provided, previous time steps are cashed
            need_weights: output attn_output_weights
            static_kv: key and value are static

        Outputs of forward function
            attn_output: [target length, batch size, embed dim]
            attn_output_weights: [batch size, target length, sequence length]
        """
        q_shape = paddle.shape(query)
        src_shape = paddle.shape(key)
        q = self._in_proj_q(query)
        k = self._in_proj_k(key)
        v = self._in_proj_v(value)
        q *= self.scaling
        q = paddle.transpose(
            paddle.reshape(
                q, [q_shape[0], q_shape[1], self.num_heads, self.head_dim]),
            [1, 2, 0, 3])
        k = paddle.transpose(
            paddle.reshape(
                k, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]),
            [1, 2, 0, 3])
        v = paddle.transpose(
            paddle.reshape(
                v, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]),
            [1, 2, 0, 3])
        if key_padding_mask is not None:
            assert key_padding_mask.shape[0] == q_shape[1]
            assert key_padding_mask.shape[1] == src_shape[0]
        attn_output_weights = paddle.matmul(q,
                                            paddle.transpose(k, [0, 1, 3, 2]))
        if attn_mask is not None:
            attn_mask = paddle.unsqueeze(paddle.unsqueeze(attn_mask, 0), 0)
            attn_output_weights += attn_mask
        if key_padding_mask is not None:
            attn_output_weights = paddle.reshape(
                attn_output_weights,
                [q_shape[1], self.num_heads, q_shape[0], src_shape[0]])
            key = paddle.unsqueeze(paddle.unsqueeze(key_padding_mask, 1), 2)
            key = paddle.cast(key, 'float32')
            y = paddle.full(shape=paddle.shape(key),
                            dtype='float32',
                            fill_value='-inf')
            y = paddle.where(key == 0., key, y)
            attn_output_weights += y
        attn_output_weights = F.softmax(
            attn_output_weights.astype('float32'),
            axis=-1,
            dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16
            else attn_output_weights.dtype)
        attn_output_weights = F.dropout(attn_output_weights,
                                        p=self.dropout,
                                        training=self.training)

        attn_output = paddle.matmul(attn_output_weights, v)
        attn_output = paddle.reshape(
            paddle.transpose(attn_output, [2, 0, 1, 3]),
            [q_shape[0], q_shape[1], self.embed_dim])
        attn_output = self.out_proj(attn_output)

        return attn_output
    def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
        """
        Rescale, clip and filter the bbox from the output of NMS to 
        get final prediction. 
        
        Notes:
        Currently only support bs = 1.

        Args:
            bboxes (Tensor): The output bboxes with shape [N, 6] after decode
                and NMS, including labels, scores and bboxes.
            bbox_num (Tensor): The number of prediction boxes of each batch with
                shape [1], and is N.
            im_shape (Tensor): The shape of the input image.
            scale_factor (Tensor): The scale factor of the input image.
        Returns:
            pred_result (Tensor): The final prediction results with shape [N, 6]
                including labels, scores and bboxes.
        """

        bboxes_list = []
        bbox_num_list = []
        id_start = 0
        # add fake bbox when output is empty for each batch
        for i in range(bbox_num.shape[0]):
            if bbox_num[i] == 0:
                bboxes_i = self.fake_bboxes
                bbox_num_i = self.fake_bbox_num
                id_start += 1
            else:
                bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
                bbox_num_i = bbox_num[i]
                id_start += bbox_num[i]
            bboxes_list.append(bboxes_i)
            bbox_num_list.append(bbox_num_i)
        bboxes = paddle.concat(bboxes_list)
        bbox_num = paddle.concat(bbox_num_list)

        origin_shape = paddle.floor(im_shape / scale_factor + 0.5)

        origin_shape_list = []
        scale_factor_list = []
        # scale_factor: scale_y, scale_x
        for i in range(bbox_num.shape[0]):
            expand_shape = paddle.expand(origin_shape[i:i + 1, :],
                                         [bbox_num[i], 2])
            scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
            scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
            expand_scale = paddle.expand(scale, [bbox_num[i], 4])
            origin_shape_list.append(expand_shape)
            scale_factor_list.append(expand_scale)

        self.origin_shape_list = paddle.concat(origin_shape_list)
        scale_factor_list = paddle.concat(scale_factor_list)

        # bboxes: [N, 6], label, score, bbox
        pred_label = bboxes[:, 0:1]
        pred_score = bboxes[:, 1:2]
        pred_bbox = bboxes[:, 2:]
        # rescale bbox to original image
        scaled_bbox = pred_bbox / scale_factor_list
        origin_h = self.origin_shape_list[:, 0]
        origin_w = self.origin_shape_list[:, 1]
        zeros = paddle.zeros_like(origin_h)
        # clip bbox to [0, original_size]
        x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros)
        y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros)
        x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros)
        y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros)
        pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1)
        # filter empty bbox
        keep_mask = nonempty_bbox(pred_bbox, return_mask=True)
        keep_mask = paddle.unsqueeze(keep_mask, [1])
        pred_label = paddle.where(keep_mask, pred_label,
                                  paddle.ones_like(pred_label) * -1)
        pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1)
        return pred_result
Exemple #28
0
    def __call__(self,
                 seg_preds,
                 seg_masks,
                 cate_labels,
                 cate_scores,
                 sum_masks=None):
        # sort and keep top nms_pre
        sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
        seg_masks = paddle.gather(seg_masks, index=sort_inds)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        sum_masks = paddle.gather(sum_masks, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)

        seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
        # inter.
        inter_matrix = paddle.mm(seg_masks,
                                 paddle.transpose(seg_masks, [1, 0]))
        n_samples = paddle.shape(cate_labels)
        # union.
        sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples])
        # iou.
        iou_matrix = (inter_matrix /
                      (sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) -
                       inter_matrix))
        iou_matrix = paddle.triu(iou_matrix, diagonal=1)
        # label_specific matrix.
        cate_labels_x = paddle.expand(cate_labels,
                                      shape=[n_samples, n_samples])
        label_matrix = paddle.cast(
            (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])),
            'float32')
        label_matrix = paddle.triu(label_matrix, diagonal=1)

        # IoU compensation
        compensate_iou = paddle.max((iou_matrix * label_matrix), axis=0)
        compensate_iou = paddle.expand(compensate_iou,
                                       shape=[n_samples, n_samples])
        compensate_iou = paddle.transpose(compensate_iou, [1, 0])

        # IoU decay
        decay_iou = iou_matrix * label_matrix

        # matrix nms
        if self.kernel == 'gaussian':
            decay_matrix = paddle.exp(-1 * self.sigma * (decay_iou**2))
            compensate_matrix = paddle.exp(-1 * self.sigma *
                                           (compensate_iou**2))
            decay_coefficient = paddle.min(decay_matrix / compensate_matrix,
                                           axis=0)
        elif self.kernel == 'linear':
            decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
            decay_coefficient = paddle.min(decay_matrix, axis=0)
        else:
            raise NotImplementedError

        # update the score.
        cate_scores = cate_scores * decay_coefficient
        y = paddle.zeros(shape=paddle.shape(cate_scores), dtype='float32')
        keep = paddle.where(cate_scores >= self.update_threshold, cate_scores,
                            y)
        keep = paddle.nonzero(keep)
        keep = paddle.squeeze(keep, axis=[1])
        # Prevent empty and increase fake data
        keep = paddle.concat(
            [keep,
             paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')])

        seg_preds = paddle.gather(seg_preds, index=keep)
        cate_scores = paddle.gather(cate_scores, index=keep)
        cate_labels = paddle.gather(cate_labels, index=keep)

        # sort and keep top_k
        sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)
        return seg_preds, cate_scores, cate_labels
Exemple #29
0
    def _append_optimize_op(self, block, param_and_grad):
        assert isinstance(block, fluid.framework.Block)
        block.program._use_lamb = True

        m = moment1 = self._get_accumulator(self._moment1_acc_str,
                                            param_and_grad[0])
        v = self._get_accumulator(self._moment2_acc_str, param_and_grad[0])
        beta_1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
                                               param_and_grad[0])
        beta_2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
                                               param_and_grad[0])

        beta_1 = layers.fill_constant(dtype='float32',
                                      shape=[1],
                                      value=self._beta1,
                                      name='lamb_beta_1')
        beta_2 = layers.fill_constant(dtype='float32',
                                      shape=[1],
                                      value=self._beta2,
                                      name='lamb_beta_2')
        epsilon = layers.fill_constant(dtype='float32',
                                       shape=[1],
                                       value=self._epsilon,
                                       name='epsilon')

        one = paddle.ones(shape=[1]).astype('float32')
        zero = paddle.zeros(shape=[1]).astype('float32')

        next_m = paddle.multiply(m, beta_1) + paddle.multiply(
            param_and_grad[1], one - beta_1)
        next_v = paddle.multiply(v, beta_2) + paddle.multiply(
            paddle.pow(param_and_grad[1], 2), one - beta_2)

        beta1_correction = one - beta_1_pow_acc
        beta2_correction = one - beta_2_pow_acc

        next_m_unbiased = next_m / beta1_correction
        next_v_unbiased = next_v / beta2_correction

        update = next_m_unbiased / (paddle.sqrt(next_v_unbiased) + epsilon)

        if self._exclude_from_weight_decay_fn is not None and self._exclude_from_weight_decay_fn(
                param_and_grad[0]):
            self._lamb_weight_decay = 0.0
        update += self._lamb_weight_decay * param_and_grad[0]

        w_norm = paddle.norm(param_and_grad[0], p=2)
        g_norm = paddle.norm(update, p=2)

        learning_rate = self._create_param_lr(param_and_grad)

        ratio = paddle.where(
            paddle.greater_than(w_norm, zero),
            paddle.where(paddle.greater_than(g_norm, zero), (w_norm / g_norm),
                         one), one)
        update_with_lr = ratio * learning_rate * update
        next_param = param_and_grad[0] - update_with_lr

        beta_1_pow_acc *= beta_1
        beta_2_pow_acc *= beta_2

        paddle.assign(next_m, m)
        paddle.assign(next_v, v)
        paddle.assign(next_param, param_and_grad[0])

        return None
Exemple #30
0
def do_eval(args):
    paddle.set_device(args.device)
    model_class, tokenizer_class = MODEL_CLASSES["gpt"]
    tokenizer = tokenizer_class.from_pretrained(args.model_name)

    if args.init_checkpoint_path is not None:
        model = GPTForPretraining(
            GPTModel(
                **model_class.pretrained_init_configuration[args.model_name]))

        logger.info("Load model checkpoint from %s" %
                    args.init_checkpoint_path)
        model_dict = paddle.load(os.path.join(args.init_checkpoint_path))
        model.set_dict(model_dict)
    else:
        model = model_class.from_pretrained(args.model_name)

    tic_eval = time.time()
    eval_data_loader = create_eval_dataset(args)
    model.eval()
    total_score = 0
    score_name = "loss" if not args.cloze_eval else "number correct"
    with paddle.no_grad():
        for step, batch in enumerate(eval_data_loader):
            tokens, loss_mask, attention_mask, position_ids, labels = batch
            preds = model(tokens, position_ids, attention_mask)
            if not args.cloze_eval:
                masked_lm_loss = paddle.nn.functional.cross_entropy(
                    preds, labels, reduction="none")
                loss = paddle.sum(masked_lm_loss * loss_mask)
                total_score += loss.numpy() / (args.num_tokenized_tokens - 1)
            else:
                outputs = paddle.argmax(preds, -1)
                acc = paddle.cast(outputs == labels, 'float32')
                acc = paddle.where(paddle.cast(loss_mask, 'bool'), acc,
                                   paddle.ones_like(acc))
                acc = paddle.sum(paddle.prod(acc, -1))
                total_score += acc.numpy()
            if step % args.logging_steps == 0:
                logger.info(
                    "step %d, batch: %d, %s: %f, speed: %.2f step/s" %
                    (step, step, score_name, total_score, args.logging_steps /
                     (time.time() - tic_eval)))
                tic_eval = time.time()

    if not args.cloze_eval:
        total_loss = float(total_score)
        ppl = math.exp(min(20, total_loss))
        token_ratio = (args.num_tokenized_tokens -
                       1) / (args.num_original_tokens - 1)
        adjusted_ppl = math.exp(min(20, total_loss * token_ratio))
        string = ' validation results on {} | '.format(args.eval_path)
        string += 'avg loss: {:.4E} | '.format(total_loss)
        string += 'ppl: {:.4E} | '.format(ppl)
        string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl)
        string += 'token ratio: {} |'.format(token_ratio)
    else:
        num_correct = float(total_score)
        acc = float(num_correct / args.num_examples)
        string = ' validation results on {} | '.format(args.eval_path)
        string += 'number correct: {:.4E} | '.format(num_correct)
        string += 'total examples: {:.4E} | '.format(args.num_examples)
        string += 'avg accuracy: {:.4E}'.format(acc)
    logger.info(string)