Esempio n. 1
0
 def _generate_anchors(self, feats=None):
     # just use in eval time
     anchor_points = []
     stride_tensor = []
     for i, stride in enumerate(self.fpn_stride):
         if feats is not None:
             _, _, h, w = feats[i].shape
         else:
             h = math.ceil(self.eval_size[0] / stride)
             w = math.ceil(self.eval_size[1] / stride)
         shift_x = paddle.arange(end=w) + self.cell_offset
         shift_y = paddle.arange(end=h) + self.cell_offset
         shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
         anchor_point = paddle.cast(
             paddle.stack(
                 [shift_x, shift_y], axis=-1), dtype='float32')
         anchor_points.append(anchor_point.reshape([-1, 2]))
         stride_tensor.append(
             paddle.full(
                 [h * w, 1], stride, dtype='float32'))
     anchor_points = paddle.concat(anchor_points)
     stride_tensor = paddle.concat(stride_tensor)
     return anchor_points, stride_tensor
Esempio n. 2
0
def pdpd_range(name : str, x, start, end, step, out_type):
    import paddle as pdpd
    pdpd.enable_static()
    
    with pdpd.static.program_guard(pdpd.static.Program(), pdpd.static.Program()):
        node_x = pdpd.static.data(name='x', shape=x.shape, dtype='float32')
        # Range op only support fill_constant input, since dynamic op is not supported in ov
        out = pdpd.fluid.layers.range(start, end, step, out_type)
        out = pdpd.cast(out, np.float32)
        out = pdpd.add(node_x, out)
        #out = pdpd.cast(out, np.float32)
        cpu = pdpd.static.cpu_places(1)
        exe = pdpd.static.Executor(cpu[0])
        # startup program will call initializer to initialize the parameters.
        exe.run(pdpd.static.default_startup_program())

        outs = exe.run(
                feed={'x': x},
                fetch_list=[out])

        saveModel(name, exe, feedkeys=['x'], fetchlist=[out], inputs=[x], outputs=[outs[0]], target_dir=sys.argv[1])

    return outs[0]
Esempio n. 3
0
def pdpd_scale_tensor(name : str, x, scale, bias, attrs : dict, data_type):
    import paddle as pdpd
    pdpd.enable_static()

    with pdpd.static.program_guard(pdpd.static.Program(), pdpd.static.Program()):
        node_x = pdpd.static.data(name='x', shape=x.shape, dtype=data_type)
        node_scale = pdpd.static.data(name='scale', shape=[1], dtype='float32')
        out = pdpd.scale(x=node_x, scale=node_scale, bias=bias,
                         bias_after_scale=attrs['bias_after_scale'])
        #FuzzyTest only support FP32 now, so cast result to fp32
        out = pdpd.cast(out, "float32")
        cpu = pdpd.static.cpu_places(1)
        exe = pdpd.static.Executor(cpu[0])
        # startup program will call initializer to initialize the parameters.
        exe.run(pdpd.static.default_startup_program())

        outs = exe.run(
            feed={'x': x, 'scale': scale},
            fetch_list=[out])

        saveModel(name, exe, feedkeys=['x', 'scale'], fetchlist=[out], inputs=[x, np.array([scale]).astype('float32')], outputs=[outs[0]], target_dir=sys.argv[1])

    return outs[0]
Esempio n. 4
0
def degree_norm(graph, mode="indegree", p=-1):
    """Calculate the degree normalization of a graph
    Args:
        graph: the graph object from (:code:`Graph`)
        mode: which degree to be normalized ("indegree" or "outdegree")
    return:
        A tensor with shape (num_nodes, 1).
    """

    assert mode in [
        'indegree', 'outdegree'
    ], "The degree_norm mode should be in ['indegree', 'outdegree']. But recieve mode=%s" % mode

    if mode == "indegree":
        degree = graph.indegree() + 1
    elif mode == "outdegree":
        degree = graph.outdegree() + 1

    norm = paddle.cast(degree, dtype=paddle.get_default_dtype())
    norm = paddle.clip(norm, min=1.0)
    norm = paddle.pow(norm, p)
    norm = paddle.reshape(norm, [-1, 1])
    return norm
Esempio n. 5
0
    def rect2rbox(self, bboxes):
        """
        :param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
        :return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
        """
        bboxes = paddle.reshape(bboxes, [-1, 4])
        num_boxes = paddle.shape(bboxes)[0]
        x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
        y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
        edges1 = paddle.abs(bboxes[:, 2] - bboxes[:, 0])
        edges2 = paddle.abs(bboxes[:, 3] - bboxes[:, 1])

        rbox_w = paddle.maximum(edges1, edges2)
        rbox_h = paddle.minimum(edges1, edges2)

        # set angle
        inds = edges1 < edges2
        inds = paddle.cast(inds, 'int32')
        rboxes_angle = inds * np.pi / 2.0

        rboxes = paddle.stack((x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle),
                              axis=1)
        return rboxes
Esempio n. 6
0
    def net(self, inputs, is_infer=False):
        self.hist_item_seq = inputs[0]
        self.hist_cat_seq = inputs[1]
        self.target_item = inputs[2]
        self.target_cat = inputs[3]
        self.label = inputs[4].reshape([-1, 1])
        self.mask = inputs[5]
        self.target_item_seq = inputs[6]
        self.target_cat_seq = inputs[7]
        din_model = DINLayer(self.item_emb_size, self.cat_emb_size, self.act,
                             self.is_sparse, self.use_DataLoader,
                             self.item_count, self.cat_count)

        raw_predict = din_model.forward(self.hist_item_seq, self.hist_cat_seq,
                                        self.target_item, self.target_cat,
                                        self.label, self.mask,
                                        self.target_item_seq,
                                        self.target_cat_seq)

        avg_loss = paddle.nn.functional.binary_cross_entropy_with_logits(
            raw_predict, self.label, reduction='mean')
        self._cost = avg_loss

        self.predict = paddle.nn.functional.sigmoid(raw_predict)
        predict_2d = paddle.concat([1 - self.predict, self.predict], 1)
        label_int = paddle.cast(self.label, 'int64')
        auc, batch_auc, _ = paddle.static.auc(input=predict_2d,
                                              label=label_int,
                                              slide_steps=0)

        self.inference_target_var = auc
        if is_infer:
            fetch_dict = {'auc': auc}
            return fetch_dict

        fetch_dict = {'cost': avg_loss, 'auc': auc}
        return fetch_dict
Esempio n. 7
0
    def net(self, input, is_infer=False):
        self.sparse_inputs = self._sparse_data_var[1:]
        self.dense_input = self._dense_data_var[0]
        self.label_input = self._sparse_data_var[0]
        sparse_number = self.sparse_inputs_slot - 1
        assert sparse_number == len(self.sparse_inputs)

        dcn_model = DeepCroLayer(self.sparse_feature_number,
                                 self.sparse_feature_dim, self.dense_input_dim,
                                 sparse_number, self.fc_sizes, self.cross_num,
                                 self.clip_by_norm, self.l2_reg_cross,
                                 self.is_sparse)
        print("----self.dense_input-----", self.dense_input)
        print("----self.sparse_inputs----", self.sparse_inputs)
        pred, l2_loss = dcn_model.forward(self.sparse_inputs, self.dense_input)

        #pred = F.sigmoid(prediction)

        predict_2d = paddle.concat(x=[1 - pred, pred], axis=1)

        auc, batch_auc_var, _ = paddle.fluid.layers.auc(input=predict_2d,
                                                        label=self.label_input,
                                                        slide_steps=0)

        self.inference_target_var = auc
        if is_infer:
            fetch_dict = {'auc': auc}
            return fetch_dict

        cost = paddle.nn.functional.log_loss(input=pred,
                                             label=paddle.cast(
                                                 self.label_input,
                                                 dtype="float32"))
        avg_cost = paddle.mean(x=cost)
        self._cost = avg_cost + l2_loss
        fetch_dict = {'cost': avg_cost, 'auc': auc}
        return fetch_dict
def greater_equal(name: str, x, y, data_type, cast_to_fp32=False):
    paddle.enable_static()

    with paddle.static.program_guard(paddle.static.Program(),
                                     paddle.static.Program()):
        node_x = paddle.static.data(name='input_x',
                                    shape=x.shape,
                                    dtype=data_type)
        node_y = paddle.static.data(name='input_y',
                                    shape=y.shape,
                                    dtype=data_type)
        out = paddle.fluid.layers.greater_equal(x=node_x,
                                                y=node_y,
                                                name='greater_equal')
        # FuzzyTest framework doesn't support boolean so cast to fp32/int32

        if cast_to_fp32:
            data_type = "float32"

        out = paddle.cast(out, data_type)
        cpu = paddle.static.cpu_places(1)
        exe = paddle.static.Executor(cpu[0])
        # startup program will call initializer to initialize the parameters.
        exe.run(paddle.static.default_startup_program())

        outs = exe.run(feed={'input_x': x, 'input_y': y}, fetch_list=[out])

        saveModel(name,
                  exe,
                  feedkeys=['input_x', 'input_y'],
                  fetchlist=[out],
                  inputs=[x, y],
                  outputs=[outs[0]],
                  target_dir=sys.argv[1])

    return outs[0]
Esempio n. 9
0
    def forward(self, predicts, batch):
        predict = predicts['predict']
        word_predict = predicts['word_out']
        gsrm_predict = predicts['gsrm_out']
        label = batch[1]

        casted_label = paddle.cast(x=label, dtype='int64')
        casted_label = paddle.reshape(x=casted_label, shape=[-1, 1])

        cost_word = self.loss_func(word_predict, label=casted_label)
        cost_gsrm = self.loss_func(gsrm_predict, label=casted_label)
        cost_vsfd = self.loss_func(predict, label=casted_label)

        cost_word = paddle.reshape(x=paddle.sum(cost_word), shape=[1])
        cost_gsrm = paddle.reshape(x=paddle.sum(cost_gsrm), shape=[1])
        cost_vsfd = paddle.reshape(x=paddle.sum(cost_vsfd), shape=[1])

        sum_cost = cost_word * 3.0 + cost_vsfd + cost_gsrm * 0.15

        return {
            'loss': sum_cost,
            'word_loss': cost_word,
            'img_loss': cost_vsfd
        }
Esempio n. 10
0
    def net(self, input, is_infer=False):
        self.sparse_inputs = input[1:self.sparse_inputs_slot]
        self.dense_input = input[-1]
        self.label_input = input[0]
        sparse_number = self.sparse_inputs_slot - 1
        assert sparse_number == len(self.sparse_inputs)

        xdeepfm_model = xDeepFMLayer(self.sparse_feature_number,
                                     self.sparse_feature_dim,
                                     self.dense_input_dim, sparse_number,
                                     self.layer_sizes_cin,
                                     self.layer_sizes_dnn)

        pred = xdeepfm_model.forward(self.sparse_inputs, self.dense_input)

        #pred = F.sigmoid(prediction)

        predict_2d = paddle.concat(x=[1 - pred, pred], axis=1)

        auc, batch_auc_var, _ = paddle.static.auc(input=predict_2d,
                                                  label=self.label_input,
                                                  slide_steps=0)

        self.inference_target_var = auc
        if is_infer:
            fetch_dict = {'auc': auc}
            return fetch_dict

        cost = paddle.nn.functional.log_loss(input=pred,
                                             label=paddle.cast(
                                                 self.label_input,
                                                 dtype="float32"))
        avg_cost = paddle.mean(x=cost)
        self._cost = avg_cost
        fetch_dict = {'cost': avg_cost, 'auc': auc}
        return fetch_dict
Esempio n. 11
0
    def forward(self, input, label, conf):
        x_emb = self.embedding(input)
        fc = self.lin_a(x_emb)
        mask = conf > 0
        mask = paddle.cast(mask, dtype="int64")
        mask.stop_gradient = True
        emb_mask = mask.max(1).flatten()
        emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
        emb_mask_inds.stop_gradient = True

        if emb_mask_inds.numel() == 0:
            loss_box = self.phony * 0
        else:
            projection = self.lin_b(fc)
            projection = paddle.reshape(projection, shape=[-1, 1])
            output = paddle.gather(projection, emb_mask_inds)
            target = paddle.gather(label, emb_mask_inds)
            loss_box = F.smooth_l1_loss(output,
                                        target,
                                        reduction='sum',
                                        delta=1.0)
            loss_box = loss_box / len(conf)

        return loss_box
Esempio n. 12
0
    def forward(self, input, mask=None):
        """
        Args:
            input (paddle.Tensor) of shape (batch, seq_len, hidden_size): Tensor containing the features of the input sequence.
            mask (paddle.Tensor) of shape (batch, seq_len) :
                Tensor is a bool tensor, whose each element identifies whether the input word id is pad token or not.
                Defaults to `None
        """
        weight = self.input_weight.tile(
            repeat_times=(paddle.shape(input)[0], 1,
                          1))  # tensor[batch, hidden_size, hidden_size]
        bias = self.bias.tile(
            repeat_times=(paddle.shape(input)[0], 1,
                          1))  # tensor[batch, 1, hidden_size]
        word_squish = paddle.bmm(
            input, weight) + bias  # Shape: (batch_size, seq_len, hidden_size)
        att_context_vector = self.att_context_vector.tile(
            repeat_times=(paddle.shape(input)[0], 1,
                          1))  # Shape: (batch_size, hidden_size, 1)
        att_score = paddle.bmm(
            word_squish, att_context_vector)  # tensor[batch_size, seq_len, 1]
        if mask is not None:
            # mask, remove the effect of 'PAD'
            mask = paddle.cast(mask, dtype='float32')
            mask = mask.unsqueeze(axis=-1)
            inf_tensor = paddle.full(shape=paddle.shape(mask),
                                     dtype='float32',
                                     fill_value=-INF)
            att_score = paddle.multiply(att_score, mask) + paddle.multiply(
                inf_tensor, (1 - mask))
        att_weight = F.softmax(att_score,
                               axis=1)  # tensor[batch_size, seq_len, 1]

        reps = paddle.bmm(input.transpose(perm=(0, 2, 1)), att_weight).squeeze(
            -1)  # Shape: (batch_size, hidden_size)
        return reps, att_weight
Esempio n. 13
0
def sample_logits(embedding, bias, labels, inputs, sampler):
    true_log_probs, samp_log_probs, neg_samples = sampler.sample(labels)
    n_sample = neg_samples.shape[0]
    b1, b2 = labels.shape[0], labels.shape[1]
    all_ids = paddle.concat([paddle.reshape(labels, shape=[-1]), neg_samples])
    all_w = embedding(all_ids)
    true_w = paddle.reshape(all_w[:-n_sample], shape=[b1, b2, -1])
    sample_w = paddle.reshape(all_w[-n_sample:], shape=[n_sample, -1])

    all_b = paddle.gather(bias, all_ids)
    true_b = paddle.reshape(all_b[:-n_sample], shape=[b1, b2])
    sample_b = all_b[-n_sample:]

    hit = paddle.cast((labels.unsqueeze([2]) == neg_samples),
                      dtype=global_dtype).detach()
    true_logits = paddle.sum(true_w * inputs,
                             axis=-1) + true_b - true_log_probs
    sample_logits = paddle.transpose(
        paddle.matmul(sample_w, paddle.transpose(inputs, [0, 2, 1])),
        [0, 2, 1]) + sample_b - samp_log_probs
    sample_logits = sample_logits - 1e30 * hit
    logits = paddle.concat([true_logits.unsqueeze([2]), sample_logits], -1)

    return logits
Esempio n. 14
0
    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        """
        The MBartEncoder forward method, overrides the `__call__()` special method.

        Args:
            input_ids (Tensor, optional):
                See :class:`MBartModel`.
            attention_mask (Tensor, optional):
                See :class:`MBartModel`.

        Returns:
            Tensor: Returns tensor `encoder_output`, which is the output at the last layer of the model.
            Its data type should be float32 and has a shape of [batch_size, sequence_length, hidden_size].

        """
        if input_ids is None:
            raise ValueError("Input_ids cannot be None.")
        inputs_embeds = self.d_model**0.5 * self.embed_tokens(input_ids)
        inputs_embed_pos = self.encoder_embed_positions(input_ids.shape)
        hidden_states = inputs_embeds + inputs_embed_pos
        hidden_states = self.encoder_layernorm_embedding(hidden_states)
        encoder_input = self.encoder_dropout(hidden_states)

        if attention_mask is None:
            attention_mask = paddle.cast(
                input_ids == self.pad_token_id,
                dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e4
        # For 2D attention_mask from tokenizer
        elif attention_mask.ndim == 2:
            attention_mask = paddle.unsqueeze(
                attention_mask, axis=[1, 2]).astype(paddle.get_default_dtype())
            attention_mask = (1.0 - attention_mask) * -1e4
        attention_mask.stop_gradient = True

        encoder_output = self.encoder(encoder_input, src_mask=attention_mask)
        return encoder_output
Esempio n. 15
0
def run_evaluate(args,
                 data_loader,
                 model,
                 criterion,
                 iter_steps,
                 log_writer,
                 global_step,
                 epoch,
                 task_name="valid"):
    model.eval()
    all_loss = []
    local_time = time.time()
    for eval_step, batch in enumerate(data_loader):
        tokens, loss_mask, labels = batch
        with paddle.amp.auto_cast(args.use_pure_fp16,
                                  custom_black_list=[
                                      "reduce_sum",
                                      "c_softmax_with_cross_entropy",
                                      "elementwise_div",
                                  ],
                                  level='O2'):
            preds = model(tokens)
        preds = paddle.cast(preds, dtype="float32")
        loss = criterion(preds, labels, loss_mask)

        all_loss.append(float(loss))
        if eval_step >= iter_steps - 1:
            break

    average_loss = sum(all_loss) / len(all_loss)
    logger.info(
        "%s step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s" %
        (task_name, global_step, epoch, eval_step, average_loss, iter_steps /
         (time.time() - local_time)))
    log_writer.add_scalar(task_name + "_loss", average_loss, global_step)
    model.train()
Esempio n. 16
0
    def forward(self, similarities_matrix, query_img_id, gallery_img_id,
                keep_mask):
        metric_dict = dict()

        #get cmc
        choosen_indices = paddle.argsort(similarities_matrix,
                                         axis=1,
                                         descending=True)
        gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0])
        gallery_labels_transpose = paddle.broadcast_to(
            gallery_labels_transpose,
            shape=[
                choosen_indices.shape[0], gallery_labels_transpose.shape[1]
            ])
        choosen_label = paddle.index_sample(gallery_labels_transpose,
                                            choosen_indices)
        equal_flag = paddle.equal(choosen_label, query_img_id)
        if keep_mask is not None:
            keep_mask = paddle.index_sample(keep_mask.astype('float32'),
                                            choosen_indices)
            equal_flag = paddle.logical_and(equal_flag,
                                            keep_mask.astype('bool'))
        equal_flag = paddle.cast(equal_flag, 'float32')
        real_query_num = paddle.sum(equal_flag, axis=1)
        real_query_num = paddle.sum(
            paddle.greater_than(real_query_num,
                                paddle.to_tensor(0.)).astype("float32"))

        acc_sum = paddle.cumsum(equal_flag, axis=1)
        mask = paddle.greater_than(acc_sum,
                                   paddle.to_tensor(0.)).astype("float32")
        all_cmc = (paddle.sum(mask, axis=0) / real_query_num).numpy()

        for k in self.topk:
            metric_dict["recall{}".format(k)] = all_cmc[k - 1]
        return metric_dict
Esempio n. 17
0
    def forward(
        self,
        input_ids=None,
        token_type_ids=None,
        attention_mask=None,
        mems=None,
        perm_mask=None,
        target_mapping=None,
        input_mask=None,
        head_mask=None,
        inputs_embeds=None,
        use_mems_train=False,
        use_mems_eval=False,
        output_attentions=False,
        output_hidden_states=False,
        return_dict=False,
    ):

        if self.training:
            use_mems = use_mems_train
        else:
            use_mems = use_mems_eval

        # The original code for XLNet uses shapes [len, bsz] with the batch dimension at the end
        # but we want a unified interface in the library with the batch size on the first dimension
        # so we move here the first dimension (batch) to the end
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_ids = paddle.transpose(input_ids, perm=[1, 0])
            qlen, bsz = input_ids.shape[0], input_ids.shape[1]
        elif inputs_embeds is not None:
            inputs_embeds = paddle.transpose(inputs_embeds, perm=[1, 0])
            qlen, bsz = inputs_embeds.shape[0], inputs_embeds.shape[1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        token_type_ids = token_type_ids.transpose(
            [1, 0]) if token_type_ids is not None else None
        input_mask = input_mask.transpose(
            [1, 0]) if input_mask is not None else None
        attention_mask = attention_mask.transpose(
            [1, 0]) if attention_mask is not None else None
        perm_mask = perm_mask.transpose([1, 2, 0
                                         ]) if perm_mask is not None else None
        target_mapping = target_mapping.transpose(
            [1, 2, 0]) if target_mapping is not None else None

        mlen = mems[0].shape[
            0] if mems is not None and mems[0] is not None else 0
        klen = mlen + qlen

        # Attention mask
        # Causal attention mask
        if self.attn_type == "uni":
            attn_mask = self.create_mask(qlen, mlen)
            attn_mask = paddle.unsqueeze(attn_mask, axis=[2, 3])
        elif self.attn_type == "bi":
            attn_mask = None
        else:
            raise ValueError("Unsupported attention type: {}".format(
                self.attn_type))

        # Data mask: input mask & perm mask
        assert input_mask is None or attention_mask is None, "You can only use one of input_mask (uses 1 for padding) "
        "or attention_mask (uses 0 for padding, added for compatibility with BERT). Please choose one."
        if input_mask is None and attention_mask is not None:
            input_mask = 1.0 - attention_mask
        if input_mask is not None and perm_mask is not None:
            data_mask = paddle.unsqueeze(input_mask, axis=0) + perm_mask
        elif input_mask is not None and perm_mask is None:
            data_mask = paddle.unsqueeze(input_mask, axis=0)
        elif input_mask is None and perm_mask is not None:
            data_mask = perm_mask
        else:
            data_mask = None

        if data_mask is not None:
            # All mems can be attended to
            if mlen > 0:
                mems_mask = paddle.cast(paddle.zeros(
                    [data_mask.shape[0], mlen, bsz]),
                                        dtype=dtype_float)
                data_mask = paddle.concat([mems_mask, data_mask], axis=1)
            if attn_mask is None:
                attn_mask = paddle.unsqueeze(data_mask, axis=-1)
            else:
                attn_mask += paddle.unsqueeze(data_mask, axis=-1)

        if attn_mask is not None:
            attn_mask = paddle.cast((attn_mask > 0), dtype=dtype_float)

        if attn_mask is not None:
            non_tgt_mask = paddle.cast(-paddle.eye(qlen), dtype=dtype_float)

            if mlen > 0:
                non_tgt_mask = paddle.concat([
                    paddle.cast(paddle.zeros([qlen, mlen]), dtype=dtype_float),
                    non_tgt_mask
                ],
                                             axis=-1)
            non_tgt_mask = paddle.cast((
                (attn_mask + paddle.unsqueeze(non_tgt_mask, axis=[2, 3])) > 0),
                                       dtype=dtype_float)
        else:
            non_tgt_mask = None

        # Word embeddings and prepare h & g hidden states
        if inputs_embeds is not None:
            word_emb_k = inputs_embeds
        else:
            word_emb_k = self.word_embedding(input_ids)

        output_h = self.dropout(word_emb_k)
        if target_mapping is not None:
            word_emb_q = self.mask_emb.expand(
                [target_mapping.shape[0], bsz, -1])
            output_g = self.dropout(word_emb_q)
        else:
            output_g = None

        # Segment embedding
        if token_type_ids is not None:
            # Convert `token_type_ids` to one-hot `seg_mat`
            if mlen > 0:
                mem_pad = paddle.zeros(shape=[mlen, bsz], dtype='int64')
                cat_ids = paddle.concat(x=[mem_pad, token_type_ids], axis=0)
            else:
                cat_ids = token_type_ids

            # `1` indicates not in the same segment [qlen x klen x bsz]
            seg_mat = paddle.cast(paddle.unsqueeze(token_type_ids, axis=1) !=
                                  paddle.unsqueeze(cat_ids, axis=0),
                                  dtype='int64')
            seg_mat = paddle.cast(F.one_hot(seg_mat, num_classes=2),
                                  dtype=dtype_float)
        else:
            seg_mat = None

        # Positional encoding
        pos_emb = self.relative_positional_encoding(qlen, klen, bsz=bsz)
        pos_emb = self.dropout(pos_emb)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # Attention_probs has shape bsz x n_heads x N x N
        # Input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
        # And head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
        if head_mask is not None:
            if head_mask.dim() == 1:
                head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(
                    0).unsqueeze(0)
                head_mask = head_mask.expand([self.n_layer, -1, -1, -1, -1])
            elif head_mask.dim() == 2:
                head_mask = head_mask.unsqueeze(1).unsqueeze(1).unsqueeze(1)
        else:
            head_mask = [None] * self.n_layer

        new_mems = ()
        if mems is None:
            mems = [None] * len(self.layer)

        attentions = [] if output_attentions else None
        hidden_states = [] if output_hidden_states else None
        for i, layer_module in enumerate(self.layer):
            if use_mems:
                # Cache new mems
                new_mems = new_mems + (self.cache_mem(output_h, mems[i]), )
            if output_hidden_states:
                hidden_states.append((
                    output_h, output_g) if output_g is not None else output_h)

            outputs = layer_module(
                output_h,
                output_g,
                attn_mask_h=non_tgt_mask,
                attn_mask_g=attn_mask,
                r=pos_emb,
                seg_mat=seg_mat,
                mems=mems[i],
                target_mapping=target_mapping,
                head_mask=head_mask[i],
                output_attentions=output_attentions,
            )
            output_h, output_g = outputs[:2]

            if output_attentions:
                attentions.append(outputs[2])

        # Add last hidden state
        if output_hidden_states:
            hidden_states.append((
                output_h, output_g) if output_g is not None else output_h)

        output = self.dropout(output_g if output_g is not None else output_h)

        # Prepare outputs, we transpose back here to shape [bsz, len, hidden_dim] (cf. beginning of forward() method)
        output = paddle.transpose(output, perm=[1, 0, 2])

        if not use_mems:
            new_mems = None

        if output_hidden_states:
            if output_g is not None:
                hidden_states = tuple(
                    paddle.transpose(h, perm=[1, 0, 2]) for hs in hidden_states
                    for h in hs)
            else:
                hidden_states = tuple(
                    paddle.transpose(hs, perm=[1, 0, 2])
                    for hs in hidden_states)

        if output_attentions:
            if target_mapping is not None:
                # When target_mapping is provided, there are 2-tuple of attentions
                attentions = tuple(
                    tuple(
                        paddle.transpose(att_stream, perm=[2, 3, 0, 1])
                        for att_stream in t) for t in attentions)
            else:
                attentions = tuple(
                    paddle.transpose(t, perm=[2, 3, 0, 1]) for t in attentions)

        if not return_dict:
            return tuple(
                v for v in [output, new_mems, hidden_states, attentions]
                if v is not None)
        return {
            "last_hidden_state": output,
            "mems": new_mems,
            "hidden_states": hidden_states,
            "attentions": attentions,
        }
Esempio n. 18
0
    def _append_optimize_op(self, block, param_and_grad):
        assert isinstance(block, framework.Block)
        if isinstance(param_and_grad, dict):
            param_and_grad = self._update_param_group(param_and_grad)
        param, grad = param_and_grad

        # Whether we should do weight decay for the parameter.
        with_decay = True
        if self._apply_decay_param_fun is not None \
                and not self._apply_decay_param_fun(param.name):
            with_decay = False

        moment1 = self._get_accumulator(self._moment1_acc_str,
                                        param_and_grad[0])
        moment2 = self._get_accumulator(self._moment2_acc_str,
                                        param_and_grad[0])
        beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
                                              param_and_grad[0])
        beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
                                              param_and_grad[0])
        find_master = self._multi_precision and param_and_grad[
            0].dtype == core.VarDesc.VarType.FP16
        master_weight = (self._master_weights[param_and_grad[0].name]
                         if find_master else None)
        lr = self._create_param_lr(param_and_grad)

        # create the adamw optimize op
        if framework._non_static_mode():
            lr_ratio_ = 1. if self._lr_ratio is None else self._lr_ratio(
                param_and_grad[0])

            _beta1 = self._beta1 if not isinstance(
                self._beta1, Variable) else self._beta1.numpy().item(0)
            _beta2 = self._beta2 if not isinstance(
                self._beta2, Variable) else self._beta2.numpy().item(0)

            lr = paddle.cast(lr, dtype="float32")
            _, _, _, _, _, _ = _C_ops.adamw(
                param_and_grad[0], param_and_grad[1], lr, moment1, moment2,
                beta1_pow_acc, beta2_pow_acc, master_weight, param_and_grad[0],
                moment1, moment2, beta1_pow_acc, beta2_pow_acc, master_weight,
                'epsilon', self._epsilon, 'lazy_mode', self._lazy_mode,
                'min_row_size_to_use_multithread', 1000, 'beta1', _beta1,
                'beta2', _beta2, "with_decay", with_decay, 'coeff', self._coeff,
                'multi_precision', find_master, 'lr_ratio', lr_ratio_)
            return None

        inputs = {
            "Param": [param_and_grad[0]],
            "Grad": [param_and_grad[1]],
            "LearningRate": [lr],
            "Moment1": [moment1],
            "Moment2": [moment2],
            "Beta1Pow": [beta1_pow_acc],
            "Beta2Pow": [beta2_pow_acc],
        }

        # Pass found_inf to adamw, to skip update for not only param, but also momentum and beta_pow
        found_inf = self._get_auxiliary_var('found_inf')

        if found_inf:
            inputs['SkipUpdate'] = found_inf

        outputs = {
            "ParamOut": [param_and_grad[0]],
            "Moment1Out": [moment1],
            "Moment2Out": [moment2],
            "Beta1PowOut": [beta1_pow_acc],
            "Beta2PowOut": [beta2_pow_acc],
        }
        attrs = {
            "lazy_mode": self._lazy_mode,
            "min_row_size_to_use_multithread": 1000,
            "multi_precision": find_master,
            "with_decay": with_decay,
            "coeff": self._coeff,
            "lr_ratio": 1.
            if self._lr_ratio is None else self._lr_ratio(param_and_grad[0])
        }

        if isinstance(self._beta1, Variable):
            inputs['Beta1Tensor'] = self._beta1
        else:
            attrs['beta1'] = self._beta1
        if isinstance(self._beta2, Variable):
            inputs['Beta2Tensor'] = self._beta2
        else:
            attrs['beta2'] = self._beta2
        if isinstance(self._epsilon, Variable):
            inputs['EpsilonTensor'] = self._epsilon
        else:
            attrs['epsilon'] = self._epsilon

        if find_master:
            inputs["MasterParam"] = master_weight
            outputs["MasterParamOut"] = master_weight

        adamw_op = block.append_op(
            type=self.type,
            inputs=inputs,
            outputs=outputs,
            attrs=attrs,
            stop_gradient=True)

        return adamw_op
Esempio n. 19
0
    def forward(self,
                query,
                key,
                value,
                key_padding_mask=None,
                incremental_state=None,
                attn_mask=None):
        """
        Inputs of forward function
            query: [target length, batch size, embed dim]
            key: [sequence length, batch size, embed dim]
            value: [sequence length, batch size, embed dim]
            key_padding_mask: if True, mask padding based on batch size
            incremental_state: if provided, previous time steps are cashed
            need_weights: output attn_output_weights
            static_kv: key and value are static

        Outputs of forward function
            attn_output: [target length, batch size, embed dim]
            attn_output_weights: [batch size, target length, sequence length]
        """
        q_shape = paddle.shape(query)
        src_shape = paddle.shape(key)
        q = self._in_proj_q(query)
        k = self._in_proj_k(key)
        v = self._in_proj_v(value)
        q *= self.scaling
        q = paddle.transpose(
            paddle.reshape(
                q, [q_shape[0], q_shape[1], self.num_heads, self.head_dim]),
            [1, 2, 0, 3])
        k = paddle.transpose(
            paddle.reshape(
                k, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]),
            [1, 2, 0, 3])
        v = paddle.transpose(
            paddle.reshape(
                v, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]),
            [1, 2, 0, 3])
        if key_padding_mask is not None:
            assert key_padding_mask.shape[0] == q_shape[1]
            assert key_padding_mask.shape[1] == src_shape[0]
        attn_output_weights = paddle.matmul(q,
                                            paddle.transpose(k, [0, 1, 3, 2]))
        if attn_mask is not None:
            attn_mask = paddle.unsqueeze(paddle.unsqueeze(attn_mask, 0), 0)
            attn_output_weights += attn_mask
        if key_padding_mask is not None:
            attn_output_weights = paddle.reshape(
                attn_output_weights,
                [q_shape[1], self.num_heads, q_shape[0], src_shape[0]])
            key = paddle.unsqueeze(paddle.unsqueeze(key_padding_mask, 1), 2)
            key = paddle.cast(key, 'float32')
            y = paddle.full(shape=paddle.shape(key),
                            dtype='float32',
                            fill_value='-inf')
            y = paddle.where(key == 0., key, y)
            attn_output_weights += y
        attn_output_weights = F.softmax(
            attn_output_weights.astype('float32'),
            axis=-1,
            dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16
            else attn_output_weights.dtype)
        attn_output_weights = F.dropout(attn_output_weights,
                                        p=self.dropout,
                                        training=self.training)

        attn_output = paddle.matmul(attn_output_weights, v)
        attn_output = paddle.reshape(
            paddle.transpose(attn_output, [2, 0, 1, 3]),
            [q_shape[0], q_shape[1], self.embed_dim])
        attn_output = self.out_proj(attn_output)

        return attn_output
Esempio n. 20
0
def randint_like(x, low=0, high=None, dtype=None, name=None):
    """
    This OP returns a Tensor filled with random integers from a discrete uniform
    distribution in the range [``low``, ``high``), with the same shape as ``x``.
    (use ``dtype`` if ``dtype`` is not None) 
    If ``high`` is None (the default), the range is [0, ``low``).

    Args:
        x (Tensor): The input tensor which specifies shape. The dtype of ``x`` 
            can be bool, int32, int64, float16, float32, float64.
        low (int): The lower bound on the range of random values to generate.
            The ``low`` is included in the range. If ``high`` is None, the
            range is [0, ``low``). Default is 0.
        high (int, optional): The upper bound on the range of random values to
            generate, the ``high`` is excluded in the range. Default is None
            (see above for behavior if high = None). Default is None.
        dtype (str|np.dtype, optional): The data type of the
            output tensor. Supported data types: bool, int32, int64, float16, 
            float32, float64. If ``dytpe`` is None, the data type is the
            same as x's data type. Default is None.
        name (str, optional): The default value is None.  Normally there is no
            need for user to set this property.  For more information, please
            refer to :ref:`api_guide_Name`.

    Returns: 
        Tensor: A Tensor filled with random integers from a discrete uniform
        distribution in the range [``low``, ``high``), with ``shape`` and ``dtype``.

    Examples:
        .. code-block:: python

            import paddle

            # example 1:
            # dtype is None and the dtype of x is float16
            x = paddle.zeros((1,2)).astype("float16")
            out1 = paddle.randint_like(x, low=-5, high=5)
            print(out1)
            print(out1.dtype)
            # [[0, -3]]  # random
            # paddle.float16

            # example 2:
            # dtype is None and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out2 = paddle.randint_like(x, low=-5, high=5)
            print(out2)
            print(out2.dtype)
            # [[0, -3]]  # random
            # paddle.float32

            # example 3:
            # dtype is None and the dtype of x is float64
            x = paddle.zeros((1,2)).astype("float64")
            out3 = paddle.randint_like(x, low=-5, high=5)
            print(out3)
            print(out3.dtype)
            # [[0, -3]]  # random
            # paddle.float64

            # example 4:
            # dtype is None and the dtype of x is int32
            x = paddle.zeros((1,2)).astype("int32")
            out4 = paddle.randint_like(x, low=-5, high=5)
            print(out4)
            print(out4.dtype)
            # [[0, -3]]  # random
            # paddle.int32

            # example 5:
            # dtype is None and the dtype of x is int64
            x = paddle.zeros((1,2)).astype("int64")
            out5 = paddle.randint_like(x, low=-5, high=5)
            print(out5)
            print(out5.dtype)
            # [[0, -3]]  # random
            # paddle.int64

            # example 6:
            # dtype is float64 and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out6 = paddle.randint_like(x, low=-5, high=5, dtype="float64")
            print(out6)
            print(out6.dtype)
            # [[0, -1]]  # random
            # paddle.float64

            # example 7:
            # dtype is bool and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out7 = paddle.randint_like(x, low=-5, high=5, dtype="bool")
            print(out7)
            print(out7.dtype)
            # [[0, -1]]  # random
            # paddle.bool

            # example 8:
            # dtype is int32 and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out8 = paddle.randint_like(x, low=-5, high=5, dtype="int32")
            print(out8)
            print(out8.dtype)
            # [[0, -1]]  # random
            # paddle.int32

            # example 9:
            # dtype is int64 and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out9 = paddle.randint_like(x, low=-5, high=5, dtype="int64")
            print(out9)
            print(out9.dtype)
            # [[0, -1]]  # random
            # paddle.int64

            # example 10:
            # dtype is int64 and the dtype of x is bool
            x = paddle.zeros((1,2)).astype("bool")
            out10 = paddle.randint_like(x, low=-5, high=5, dtype="int64")
            print(out10)
            print(out10.dtype)
            # [[0, -1]]  # random
            # paddle.int64

    """
    if high is None:
        if low <= 0:
            raise ValueError(
                "If high is None, low must be greater than 0, but received low = {0}."
                .format(low))
        high = low
        low = 0
    if dtype is None:
        dtype = x.dtype
    if not isinstance(dtype, core.VarDesc.VarType):
        dtype = convert_np_dtype_to_dtype_(dtype)
    shape = x.shape

    if low >= high:
        raise ValueError(
            "randint_like's low must less then high, but received low = {0}, "
            "high = {1}".format(low, high))

    if in_dygraph_mode():
        shape = utils.convert_shape_to_list(shape)
        out = _C_ops.randint('shape', shape, 'low', low, 'high', high, 'seed',
                             0, 'dtype', core.VarDesc.VarType.INT64)
        out = paddle.cast(out, dtype)
        return out

    check_shape(shape, 'randint_like')
    check_dtype(dtype, 'dtype',
                ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'],
                'randint_like')

    inputs = dict()
    attrs = {
        'low': low,
        'high': high,
        'seed': 0,
        'dtype': core.VarDesc.VarType.INT64
    }
    utils.get_shape_tensor_inputs(inputs=inputs,
                                  attrs=attrs,
                                  shape=shape,
                                  op_type='randint_like')

    helper = LayerHelper("randint", **locals())
    out = helper.create_variable_for_type_inference(
        dtype=core.VarDesc.VarType.INT64)
    helper.append_op(type='randint',
                     inputs=inputs,
                     outputs={'Out': out},
                     attrs=attrs)
    out.stop_gradient = True
    out = paddle.cast(out, dtype)
    return out
Esempio n. 21
0
def normal(mean=0.0, std=1.0, shape=None, name=None):
    """
    This OP returns a Tensor filled with random values sampled from a normal
    distribution with ``mean`` and ``std`` (standard deviation) .

    If ``mean`` is a Tensor, the output Tensor has the same shape and data type as ``mean``.
    If ``mean`` is not a Tensor and ``std`` is a Tensor, the output Tensor has the same shape and data type as ``std``.
    If ``mean`` and ``std`` are not a Tensor, the output Tensor has the same shape as ``shape``, with data type float32.

    If ``mean`` and ``std`` are Tensor, the num of elements of ``mean`` and ``std`` should be the same.

    Args:
        mean (float|Tensor, optional): The mean of the output Tensor's normal distribution.
            If ``mean`` is float, all elements of the output Tensor shared the same mean.
            If ``mean`` is a Tensor(data type supports float32, float64), it has per-element means.
            Default is 0.0
        std (float|Tensor, optional): The  standard deviation of the output Tensor's normal distribution.
            If ``std`` is float, all elements of the output Tensor shared the same standard deviation.
            If ``std`` is a Tensor(data type supports float32, float64), it has per-element standard deviations.
            Defaule is 1.0
        shape (list|tuple|Tensor, optional): The shape of the output Tensor. If ``shape``
            is a list or tuple, the elements of it should be integers or Tensors
            (with the shape [1], and the data type int32 or int64). If ``shape``
            is a Tensor, it should be a 1-D Tensor(with the data type int32 or
            int64). If ``mean`` or ``std`` is a Tensor, the shape of the output
            Tensor is the same as ``mean`` or ``std`` , attr ``shape`` is ignored.
            Default is None
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        A Tensor filled with random values sampled from a normal distribution with ``mean`` and ``std`` .

    Examples:
        .. code-block:: python

            import paddle

            out1 = paddle.normal(shape=[2, 3])
            # [[ 0.17501129  0.32364586  1.561118  ]  # random
            #  [-1.7232178   1.1545963  -0.76156676]]  # random

            mean_tensor = paddle.to_tensor([1.0, 2.0, 3.0])
            out2 = paddle.normal(mean=mean_tensor)
            # [ 0.18644847 -1.19434458  3.93694787]  # random

            std_tensor = paddle.to_tensor([1.0, 2.0, 3.0])
            out3 = paddle.normal(mean=mean_tensor, std=std_tensor)
            # [1.00780561 3.78457445 5.81058198]  # random

    """
    if not in_dygraph_mode():
        check_type(mean, 'mean', (int, float, Variable), 'normal')
        check_type(std, 'std', (int, float, Variable), 'normal')
        if isinstance(mean, Variable):
            check_dtype(
                mean.dtype, 'mean', ['float32', 'float64'], 'normal',
                "If mean is Tensor, it's data type only support float32, float64."
            )
        if isinstance(std, Variable):
            check_dtype(
                std.dtype, 'std', ['float32', 'float64'], 'normal',
                "If std is Tensor, it's data type only support float32, float64."
            )
        if shape is not None:
            check_shape(shape, 'normal')

    if isinstance(mean, Variable):
        if isinstance(std, Variable):
            if std.dtype != mean.dtype:
                std = paddle.cast(std, mean.dtype)
            mean_shape = paddle.shape(mean)
            std = paddle.reshape(std, mean_shape)
        else:
            std = float(std)
        out = standard_normal(paddle.shape(mean), mean.dtype, name)
    elif isinstance(std, Variable):
        mean = float(mean)
        out = standard_normal(paddle.shape(std), std.dtype, name)
    else:
        return gaussian(shape=shape, mean=mean, std=std, name=name)

    out = out * std + mean
    if not in_dygraph_mode():
        out.stop_grediant = True
    return out
Esempio n. 22
0
 def create_loss(self, raw_pred, label):
     loss = paddle.nn.functional.log_loss(input=raw_pred,
                                          label=paddle.cast(
                                              label, "float32"))
     loss = paddle.mean(loss)
     return loss
Esempio n. 23
0
    def forward(self, bn, observed, initial_position):

        if initial_position:
            observed_ = {**initial_position, **observed}
        else:
            observed_ = observed
        bn.forward(observed_)

        q0 = [[k, v.tensor] for k, v in bn.nodes.items()
              if k not in observed.keys()]
        normals = [[k, Normal(mean=fluid.layers.zeros(shape=v.shape, dtype='float32'), std=1)]\
                    for k,v in q0]

        for e in range(self.iters):
            q1 = [[k, paddle.assign(v)] for k, v in q0]
            p0 = [[k, v.sample()] for k, v in normals]
            p1 = [[k, paddle.assign(v)] for k, v in p0]

            ###### leapfrog integrator
            for s in range(self.n_leapfrogs):
                observed_ = {**dict(q1), **observed}
                bn.forward(observed_)
                log_joint_ = bn.log_joint()
                q_v = [v for _, v in q1]
                q_grad = paddle.grad(log_joint_, q_v)

                for i, _ in enumerate(q_grad):
                    p1[i][1] = p1[i][1] + self.step_size * q_grad[i] / 2.0
                    q1[i][1] = q1[i][1] + self.step_size * p1[i][1]
                    p1[i][1] = p1[i][1].detach()
                    p1[i][1].stop_gradient = False
                    q1[i][1] = q1[i][1].detach()
                    q1[i][1].stop_gradient = False

                observed_ = {**dict(q1), **observed}
                q_v = [v for _, v in q1]
                bn.forward(observed_)
                #print(dir(bn))
                log_joint_ = bn.log_joint()
                q_grad = paddle.grad(log_joint_, q_v)

                for i, _ in enumerate(q_grad):
                    p1[i][1] = p1[i][1] + self.step_size * q_grad[i] / 2.0
                    p1[i][1] = p1[i][1].detach()
                    p1[i][1].stop_gradient = False

            ###### reverse p1
            for i, _ in enumerate(p1):
                p1[i][1] = -1 * p1[i][1]

            ###### M-H step
            observed_ = {**dict(q0), **observed}
            bn.forward(observed_)
            log_prob_q0 = bn.log_joint()
            log_prob_p0 = None
            for i, _ in enumerate(p0):
                len_q = len(log_prob_q0.shape)
                len_p = len(p0[i][1].shape)
                assert (len_p >= len_q)
                if len_p > len_q:
                    dims = [i for i in range(len_q - len_p, 0)]
                    try:
                        log_prob_p0 = log_prob_p0 + fluid.layers.reduce_sum(
                            p0[i][1], dims)
                    except:
                        log_prob_p0 = fluid.layers.reduce_sum(p0[i][1], dims)
                else:
                    try:
                        log_prob_p0 = log_prob_p0 + p0[i][1]
                    except:
                        log_prob_p0 = p0[i][1]

            observed_ = {**dict(q1), **observed}
            bn.forward(observed_)
            log_prob_q1 = bn.log_joint()
            log_prob_p1 = None
            for i, _ in enumerate(p1):
                len_q = len(log_prob_q0.shape)
                len_p = len(p1[i][1].shape)
                assert (len_p >= len_q)
                if len_p > len_q:
                    dims = [i for i in range(len_q - len_p, 0)]
                    try:
                        log_prob_p1 = log_prob_p1 + fluid.layers.reduce_sum(
                            p1[i][1], dims)
                    except:
                        log_prob_p1 = fluid.layers.reduce_sum(p1[i][1], dims)
                else:
                    try:
                        log_prob_p1 = log_prob_p1 + p1[i][1]
                    except:
                        log_prob_p1 = p1[i][1]

            assert (log_prob_q0.shape == log_prob_p1.shape)

            acceptance = log_prob_q1 + log_prob_p1 - log_prob_q0 - log_prob_p0
            #acceptance = log_prob_q0 + log_prob_p0 - log_prob_q1 - log_prob_p1

            for i, _ in enumerate(q1):
                event = paddle.to_tensor(np.log(
                    np.random.rand(*q1[i][1].shape)),
                                         dtype='float32')
                #q0[i][1] = paddle.where(acceptance>=event, q1[i][1], q0[i][1])
                a = paddle.cast(acceptance > event, dtype='float32')
                q0[i][1] = paddle.assign(a * q1[i][1] + (1.0 - a) * q0[i][1])

            #print(q0[0][1])
            #print(dir(bn))
            #print(bn.clear_gradients())

        sample_ = dict(q0)
        return sample_
Esempio n. 24
0
def where(condition, x=None, y=None, name=None):
    r"""
    Return a tensor of elements selected from either $x$ or $y$, depending on $condition$.

    **Note**:
        ``paddle.where(condition)`` is identical to ``paddle.nonzero(condition, as_tuple=True)``.

    .. math::

      out_i =
      \begin{cases}
      x_i, \quad  \text{if}  \ condition_i \  is \ True \\
      y_i, \quad  \text{if}  \ condition_i \  is \ False \\
      \end{cases}


    Args:
        condition(Tensor): The condition to choose x or y. When True(nonzero), yield x, otherwise yield y.
        x(Tensor or Scalar, optional): x is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given.
        y(Tensor or Scalar, optional): y is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given.

        name(str, optional): The default value is None. Normally there is no
            need for user to set this property. For more information, please
            refer to :ref:`api_guide_Name`.

    Returns:
        Tensor: A Tensor with the same data dype as x. 

    Examples:
        .. code-block:: python

          import paddle

          x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2])
          y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0])
          out = paddle.where(x>1, x, y)

          print(out)
          #out: [1.0, 1.0, 3.2, 1.2]

          out = paddle.where(x>1)
          print(out)
          #out: (Tensor(shape=[2, 1], dtype=int64, place=CPUPlace, stop_gradient=True,
          #            [[2],
          #             [3]]),)
    """
    if np.isscalar(x):
        x = paddle.full([1], x, np.array([x]).dtype.name)

    if np.isscalar(y):
        y = paddle.full([1], y, np.array([y]).dtype.name)

    if x is None and y is None:
        return nonzero(condition, as_tuple=True)

    if x is None or y is None:
        raise ValueError("either both or neither of x and y should be given")

    if not paddle.in_dynamic_mode():
        check_variable_and_dtype(condition, 'condition', ['bool'], 'where')
        check_variable_and_dtype(x, 'x',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'where')
        check_variable_and_dtype(y, 'y',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'where')

    condition_shape = list(condition.shape)
    x_shape = list(x.shape)
    y_shape = list(y.shape)

    if x_shape == y_shape and condition_shape == x_shape:
        broadcast_condition = condition
        broadcast_x = x
        broadcast_y = y
    else:
        if core.is_compiled_with_xpu():
            cond_int = paddle.cast(condition, x.dtype)
            cond_not_int = paddle.cast(logical_not(condition), x.dtype)
            out1 = paddle.multiply(x, cond_int)
            out2 = paddle.multiply(y, cond_not_int)
            out = paddle.add(out1, out2)
            return out

        zeros_like_x = paddle.zeros_like(x)
        zeros_like_y = paddle.zeros_like(y)
        zeros_like_condition = paddle.zeros_like(condition)
        zeros_like_condition = paddle.cast(zeros_like_condition, x.dtype)
        cast_cond = paddle.cast(condition, x.dtype)

        broadcast_zeros = paddle.add(zeros_like_x, zeros_like_y)
        broadcast_zeros = paddle.add(broadcast_zeros, zeros_like_condition)
        broadcast_x = paddle.add(x, broadcast_zeros)
        broadcast_y = paddle.add(y, broadcast_zeros)
        broadcast_condition = paddle.add(cast_cond, broadcast_zeros)
        broadcast_condition = paddle.cast(broadcast_condition, 'bool')

    if in_dygraph_mode():
        return _C_ops.final_state_where(broadcast_condition, broadcast_x,
                                        broadcast_y)
    else:
        if _in_legacy_dygraph():
            return _C_ops.where(broadcast_condition, broadcast_x, broadcast_y)
        else:
            helper = LayerHelper("where", **locals())
            out = helper.create_variable_for_type_inference(dtype=x.dtype)

            helper.append_op(type='where',
                             inputs={
                                 'Condition': broadcast_condition,
                                 'X': broadcast_x,
                                 'Y': broadcast_y
                             },
                             outputs={'Out': [out]})

            return out
Esempio n. 25
0
    def __call__(self,
                 seg_preds,
                 seg_masks,
                 cate_labels,
                 cate_scores,
                 sum_masks=None):
        # sort and keep top nms_pre
        sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
        seg_masks = paddle.gather(seg_masks, index=sort_inds)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        sum_masks = paddle.gather(sum_masks, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)

        seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
        # inter.
        inter_matrix = paddle.mm(seg_masks,
                                 paddle.transpose(seg_masks, [1, 0]))
        n_samples = paddle.shape(cate_labels)
        # union.
        sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples])
        # iou.
        iou_matrix = (inter_matrix /
                      (sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) -
                       inter_matrix))
        iou_matrix = paddle.triu(iou_matrix, diagonal=1)
        # label_specific matrix.
        cate_labels_x = paddle.expand(cate_labels,
                                      shape=[n_samples, n_samples])
        label_matrix = paddle.cast(
            (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])),
            'float32')
        label_matrix = paddle.triu(label_matrix, diagonal=1)

        # IoU compensation
        compensate_iou = paddle.max((iou_matrix * label_matrix), axis=0)
        compensate_iou = paddle.expand(compensate_iou,
                                       shape=[n_samples, n_samples])
        compensate_iou = paddle.transpose(compensate_iou, [1, 0])

        # IoU decay
        decay_iou = iou_matrix * label_matrix

        # matrix nms
        if self.kernel == 'gaussian':
            decay_matrix = paddle.exp(-1 * self.sigma * (decay_iou**2))
            compensate_matrix = paddle.exp(-1 * self.sigma *
                                           (compensate_iou**2))
            decay_coefficient = paddle.min(decay_matrix / compensate_matrix,
                                           axis=0)
        elif self.kernel == 'linear':
            decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
            decay_coefficient = paddle.min(decay_matrix, axis=0)
        else:
            raise NotImplementedError

        # update the score.
        cate_scores = cate_scores * decay_coefficient
        y = paddle.zeros(shape=paddle.shape(cate_scores), dtype='float32')
        keep = paddle.where(cate_scores >= self.update_threshold, cate_scores,
                            y)
        keep = paddle.nonzero(keep)
        keep = paddle.squeeze(keep, axis=[1])
        # Prevent empty and increase fake data
        keep = paddle.concat(
            [keep,
             paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')])

        seg_preds = paddle.gather(seg_preds, index=keep)
        cate_scores = paddle.gather(cate_scores, index=keep)
        cate_labels = paddle.gather(cate_labels, index=keep)

        # sort and keep top_k
        sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)
        return seg_preds, cate_scores, cate_labels
Esempio n. 26
0
 def dec2bin(self, x, bits):
     mask = paddle.arange(bits - 1, -1, -1, dtype=paddle.float32)
     mask = paddle.cast(2**mask, dtype=paddle.int64)
     return paddle.not_equal(
         x.unsqueeze(-1).bitwise_and(mask),
         paddle.full(shape=[1], fill_value=0, dtype=paddle.int64))
Esempio n. 27
0
def median(x, axis=None, keepdim=False, name=None):
    """
    Compute the median along the specified axis.

    Args:
        x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.
        axis (int, optional): The axis along which to perform median calculations ``axis`` should be int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, results of median along ``axis`` of ``x``. If data type of ``x`` is float64, data type of results will be float64, otherwise data type will be float32.

    Examples:
        .. code-block:: python

            import paddle

            x = paddle.arange(12).reshape([3, 4])
            # x is [[0 , 1 , 2 , 3 ],
            #       [4 , 5 , 6 , 7 ],
            #       [8 , 9 , 10, 11]]

            y1 = paddle.median(x)
            # y1 is [5.5]

            y2 = paddle.median(x, axis=0)
            # y2 is [4., 5., 6., 7.]

            y3 = paddle.median(x, axis=1)
            # y3 is [1.5, 5.5, 9.5]

            y4 = paddle.median(x, axis=0, keepdim=True)
            # y4 is [[4., 5., 6., 7.]]

    """
    if not isinstance(x, Variable):
        raise TypeError("In median, the input x should be a Tensor.")
    is_flatten = axis is None
    dims = len(x.shape)
    if is_flatten:
        x = paddle.flatten(x)
        axis = 0
    else:
        if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
            raise ValueError(
                "In median, axis should be none or an integer in range [-rank(x), rank(x))."
            )
        if axis < 0:
            axis += dims
    sz = x.shape[axis]
    kth = sz >> 1
    tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False)
    dtype = 'float64' if x.dtype == core.VarDesc.VarType.FP64 else 'float32'
    if sz & 1 == 0:
        out_tensor = paddle.slice(
            tensor_topk, axes=[axis], starts=[kth - 1],
            ends=[kth]) + paddle.slice(
                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1])
        out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2
    else:
        out_tensor = paddle.cast(paddle.slice(tensor_topk,
                                              axes=[axis],
                                              starts=[kth],
                                              ends=[kth + 1]),
                                 dtype=dtype)
    if not keepdim or is_flatten:
        if not is_flatten:
            newshape = x.shape[:axis] + x.shape[axis + 1:]
        elif not keepdim:
            newshape = [1]
        else:
            newshape = [1] * dims
    else:
        newshape = out_tensor.shape
    out_tensor = out_tensor.reshape(newshape, name=name)
    return out_tensor
Esempio n. 28
0
 def create_loss(self, pred, label):
     cost = paddle.nn.functional.log_loss(input=pred,
                                          label=paddle.cast(
                                              label, dtype="float32"))
     avg_cost = paddle.mean(x=cost)
     return avg_cost
Esempio n. 29
0
    def beam_search(self, x, beam_width, eos, embed):
        def _inflate(tensor, times, dim):
            repeat_dims = [1] * tensor.dim()
            repeat_dims[dim] = times
            output = paddle.tile(tensor, repeat_dims)
            return output

        # https://github.com/IBM/pytorch-seq2seq/blob/fede87655ddce6c94b38886089e05321dc9802af/seq2seq/models/TopKDecoder.py
        batch_size, l, d = x.shape
        x = paddle.tile(paddle.transpose(x.unsqueeze(1), perm=[1, 0, 2, 3]),
                        [beam_width, 1, 1, 1])
        inflated_encoder_feats = paddle.reshape(
            paddle.transpose(x, perm=[1, 0, 2, 3]), [-1, l, d])

        # Initialize the decoder
        state = self.decoder.get_initial_state(embed, tile_times=beam_width)

        pos_index = paddle.reshape(paddle.arange(batch_size) * beam_width,
                                   shape=[-1, 1])

        # Initialize the scores
        sequence_scores = paddle.full(shape=[batch_size * beam_width, 1],
                                      fill_value=-float('Inf'))
        index = [i * beam_width for i in range(0, batch_size)]
        sequence_scores[index] = 0.0

        # Initialize the input vector
        y_prev = paddle.full(shape=[batch_size * beam_width],
                             fill_value=self.num_classes)

        # Store decisions for backtracking
        stored_scores = list()
        stored_predecessors = list()
        stored_emitted_symbols = list()

        for i in range(self.max_len_labels):
            output, state = self.decoder(inflated_encoder_feats, state, y_prev)
            state = paddle.unsqueeze(state, axis=0)
            log_softmax_output = paddle.nn.functional.log_softmax(output,
                                                                  axis=1)

            sequence_scores = _inflate(sequence_scores, self.num_classes, 1)
            sequence_scores += log_softmax_output
            scores, candidates = paddle.topk(paddle.reshape(
                sequence_scores, [batch_size, -1]),
                                             beam_width,
                                             axis=1)

            # Reshape input = (bk, 1) and sequence_scores = (bk, 1)
            y_prev = paddle.reshape(candidates % self.num_classes,
                                    shape=[batch_size * beam_width])
            sequence_scores = paddle.reshape(
                scores, shape=[batch_size * beam_width, 1])

            # Update fields for next timestep
            pos_index = paddle.expand_as(pos_index, candidates)
            predecessors = paddle.cast(candidates / self.num_classes +
                                       pos_index,
                                       dtype='int64')
            predecessors = paddle.reshape(predecessors,
                                          shape=[batch_size * beam_width, 1])
            state = paddle.index_select(state,
                                        index=predecessors.squeeze(),
                                        axis=1)

            # Update sequence socres and erase scores for <eos> symbol so that they aren't expanded
            stored_scores.append(sequence_scores.clone())
            y_prev = paddle.reshape(y_prev, shape=[-1, 1])
            eos_prev = paddle.full_like(y_prev, fill_value=eos)
            mask = eos_prev == y_prev
            mask = paddle.nonzero(mask)
            if mask.dim() > 0:
                sequence_scores = sequence_scores.numpy()
                mask = mask.numpy()
                sequence_scores[mask] = -float('inf')
                sequence_scores = paddle.to_tensor(sequence_scores)

            # Cache results for backtracking
            stored_predecessors.append(predecessors)
            y_prev = paddle.squeeze(y_prev)
            stored_emitted_symbols.append(y_prev)

        # Do backtracking to return the optimal values
        #====== backtrak ======#
        # Initialize return variables given different types
        p = list()
        l = [[self.max_len_labels] * beam_width for _ in range(batch_size)
             ]  # Placeholder for lengths of top-k sequences

        # the last step output of the beams are not sorted
        # thus they are sorted here
        sorted_score, sorted_idx = paddle.topk(
            paddle.reshape(stored_scores[-1], shape=[batch_size, beam_width]),
            beam_width)

        # initialize the sequence scores with the sorted last step beam scores
        s = sorted_score.clone()

        batch_eos_found = [0] * batch_size  # the number of EOS found
        # in the backward loop below for each batch
        t = self.max_len_labels - 1
        # initialize the back pointer with the sorted order of the last step beams.
        # add pos_index for indexing variable with b*k as the first dimension.
        t_predecessors = paddle.reshape(sorted_idx +
                                        pos_index.expand_as(sorted_idx),
                                        shape=[batch_size * beam_width])
        while t >= 0:
            # Re-order the variables with the back pointer
            current_symbol = paddle.index_select(stored_emitted_symbols[t],
                                                 index=t_predecessors,
                                                 axis=0)
            t_predecessors = paddle.index_select(
                stored_predecessors[t].squeeze(), index=t_predecessors, axis=0)
            eos_indices = stored_emitted_symbols[t] == eos
            eos_indices = paddle.nonzero(eos_indices)

            if eos_indices.dim() > 0:
                for i in range(eos_indices.shape[0] - 1, -1, -1):
                    # Indices of the EOS symbol for both variables
                    # with b*k as the first dimension, and b, k for
                    # the first two dimensions
                    idx = eos_indices[i]
                    b_idx = int(idx[0] / beam_width)
                    # The indices of the replacing position
                    # according to the replacement strategy noted above
                    res_k_idx = beam_width - (batch_eos_found[b_idx] %
                                              beam_width) - 1
                    batch_eos_found[b_idx] += 1
                    res_idx = b_idx * beam_width + res_k_idx

                    # Replace the old information in return variables
                    # with the new ended sequence information
                    t_predecessors[res_idx] = stored_predecessors[t][idx[0]]
                    current_symbol[res_idx] = stored_emitted_symbols[t][idx[0]]
                    s[b_idx, res_k_idx] = stored_scores[t][idx[0], 0]
                    l[b_idx][res_k_idx] = t + 1

            # record the back tracked results
            p.append(current_symbol)
            t -= 1

        # Sort and re-order again as the added ended sequences may change
        # the order (very unlikely)
        s, re_sorted_idx = s.topk(beam_width)
        for b_idx in range(batch_size):
            l[b_idx] = [
                l[b_idx][k_idx.item()] for k_idx in re_sorted_idx[b_idx, :]
            ]

        re_sorted_idx = paddle.reshape(
            re_sorted_idx + pos_index.expand_as(re_sorted_idx),
            [batch_size * beam_width])

        # Reverse the sequences and re-order at the same time
        # It is reversed because the backtracking happens in reverse time order
        p = [
            paddle.reshape(paddle.index_select(step, re_sorted_idx, 0),
                           shape=[batch_size, beam_width, -1])
            for step in reversed(p)
        ]
        p = paddle.concat(p, -1)[:, 0, :]
        return p, paddle.ones_like(p)
Esempio n. 30
0
def do_eval(args):
    paddle.set_device(args.device)
    model_class, tokenizer_class = MODEL_CLASSES["gpt"]
    tokenizer = tokenizer_class.from_pretrained(args.model_name)

    if args.init_checkpoint_path is not None:
        model = GPTForPretraining(
            GPTModel(
                **model_class.pretrained_init_configuration[args.model_name]))

        logger.info("Load model checkpoint from %s" %
                    args.init_checkpoint_path)
        model_dict = paddle.load(os.path.join(args.init_checkpoint_path))
        model.set_dict(model_dict)
    else:
        model = model_class.from_pretrained(args.model_name)

    tic_eval = time.time()
    eval_data_loader = create_eval_dataset(args)
    model.eval()
    total_score = 0
    score_name = "loss" if not args.cloze_eval else "number correct"
    with paddle.no_grad():
        for step, batch in enumerate(eval_data_loader):
            tokens, loss_mask, attention_mask, position_ids, labels = batch
            preds = model(tokens, position_ids, attention_mask)
            if not args.cloze_eval:
                masked_lm_loss = paddle.nn.functional.cross_entropy(
                    preds, labels, reduction="none")
                loss = paddle.sum(masked_lm_loss * loss_mask)
                total_score += loss.numpy() / (args.num_tokenized_tokens - 1)
            else:
                outputs = paddle.argmax(preds, -1)
                acc = paddle.cast(outputs == labels, 'float32')
                acc = paddle.where(paddle.cast(loss_mask, 'bool'), acc,
                                   paddle.ones_like(acc))
                acc = paddle.sum(paddle.prod(acc, -1))
                total_score += acc.numpy()
            if step % args.logging_steps == 0:
                logger.info(
                    "step %d, batch: %d, %s: %f, speed: %.2f step/s" %
                    (step, step, score_name, total_score, args.logging_steps /
                     (time.time() - tic_eval)))
                tic_eval = time.time()

    if not args.cloze_eval:
        total_loss = float(total_score)
        ppl = math.exp(min(20, total_loss))
        token_ratio = (args.num_tokenized_tokens -
                       1) / (args.num_original_tokens - 1)
        adjusted_ppl = math.exp(min(20, total_loss * token_ratio))
        string = ' validation results on {} | '.format(args.eval_path)
        string += 'avg loss: {:.4E} | '.format(total_loss)
        string += 'ppl: {:.4E} | '.format(ppl)
        string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl)
        string += 'token ratio: {} |'.format(token_ratio)
    else:
        num_correct = float(total_score)
        acc = float(num_correct / args.num_examples)
        string = ' validation results on {} | '.format(args.eval_path)
        string += 'number correct: {:.4E} | '.format(num_correct)
        string += 'total examples: {:.4E} | '.format(args.num_examples)
        string += 'avg accuracy: {:.4E}'.format(acc)
    logger.info(string)