Esempio n. 1
0
    def forward(self, *args, **kwargs):
        """
        Args:
            start_pos (optional, `Variable` of shape [batch_size]):
                token index of start of answer span in `context`
            end_pos (optional, `Variable` of shape [batch_size]):
                token index of end of answer span in `context`
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            start_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of start position, use argmax(start_logit) to get start index
            end_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of end position, use argmax(end_logit) to get end index
        """

        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        pooled, encoded = super(ErnieModelForQuestionAnswering,
                                self).forward(*args, **kwargs)
        encoded = self.dropout(encoded)
        encoded = self.classifier(encoded)
        start_logit, end_logits = P.unstack(encoded, axis=-1)
        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) != 1:
                start_pos = start_pos.squeeze()
            if len(end_pos.shape) != 1:
                end_pos = end_pos.squeeze()
            start_loss = F.cross_entropy(start_logit, start_pos)
            end_loss = F.cross_entropy(end_logits, end_pos)
            loss = (start_loss.mean() + end_loss.mean()) / 2.
        else:
            loss = None
        return loss, start_logit, end_logits
Esempio n. 2
0
    def forward(self, prediction_scores, seq_relationship_score,
                masked_lm_labels, next_sentence_labels):
        """
        Args:
            prediction_scores(Tensor):
                The scores of masked token prediction. Its data type should be float32.
                If `masked_positions` is None, its shape is [batch_size, sequence_length, vocab_size].
                Otherwise, its shape is [batch_size, mask_token_num, vocab_size]
            seq_relationship_score(Tensor):
                The scores of next sentence prediction. Its data type should be float32 and
                its shape is [batch_size, 2]
            masked_lm_labels(Tensor):
                The labels of the masked language modeling, its dimensionality is equal to `prediction_scores`.
                Its data type should be int64. If `masked_positions` is None, its shape is [batch_size, sequence_length, 1].
                Otherwise, its shape is [batch_size, mask_token_num, 1]
            next_sentence_labels(Tensor):
                The labels of the next sentence prediction task, the dimensionality of `next_sentence_labels`
                is equal to `seq_relation_labels`. Its data type should be int64 and
                its shape is [batch_size, 1]

        Returns:
            Tensor: The pretraining loss, equals to the sum of `masked_lm_loss` plus the mean of `next_sentence_loss`.
            Its data type should be float32 and its shape is [1].

        """

        with paddle.static.amp.fp16_guard():
            masked_lm_loss = F.cross_entropy(prediction_scores,
                                             masked_lm_labels,
                                             ignore_index=-1,
                                             reduction='none')
            next_sentence_loss = F.cross_entropy(seq_relationship_score,
                                                 next_sentence_labels,
                                                 reduction='none')
            return paddle.mean(masked_lm_loss), paddle.mean(next_sentence_loss)
Esempio n. 3
0
    def forward(self, logits, labels):
        start_logits, end_logits = logits
        start_position, end_position = labels
        start_position = paddle.unsqueeze(start_position, axis=-1)
        end_position = paddle.unsqueeze(end_position, axis=-1)
        start_loss = F.cross_entropy(input=start_logits, label=start_position)
        end_loss = F.cross_entropy(input=end_logits, label=end_position)
        loss = (start_loss + end_loss) / 2

        return loss
Esempio n. 4
0
 def forward(self, prediction_scores, seq_relationship_score,
             masked_lm_labels, next_sentence_labels):
     with paddle.static.amp.fp16_guard():
         masked_lm_loss = F.cross_entropy(
             prediction_scores,
             masked_lm_labels,
             ignore_index=-1,
             reduction='none')
         next_sentence_loss = F.cross_entropy(
             seq_relationship_score, next_sentence_labels, reduction='none')
         return paddle.mean(masked_lm_loss), paddle.mean(next_sentence_loss)
Esempio n. 5
0
    def train_forward(self, dy_model, metrics_list, batch_data, config):
        np.random.seed(12345)
        x_spt, y_spt, x_qry, y_qry = self.create_feeds(batch_data, config)
        update_step = config.get("hyper_parameters.update_step", 5)
        task_num = x_spt.shape[0]
        query_size = x_qry.shape[
            1]  # 75 = 15 * 5, x_qry.shape = [32,75,1,28,28]
        loss_list = []
        loss_list.clear()
        correct_list = []
        correct_list.clear()
        task_grad = [[] for _ in range(task_num)]

        for i in range(task_num):
            # 外循环
            task_net = copy.deepcopy(dy_model)
            base_lr = config.get(
                "hyper_parameters.base_optimizer.learning_rate", 0.1)
            task_optimizer = paddle.optimizer.SGD(
                learning_rate=base_lr, parameters=task_net.parameters())
            for j in range(update_step):
                #内循环
                task_optimizer.clear_grad()  # 梯度清零
                y_hat = task_net.forward(x_spt[i])  # (setsz, ways) [5,5]
                loss_spt = F.cross_entropy(y_hat, y_spt[i])
                loss_spt.backward()
                task_optimizer.step()

            y_hat = task_net.forward(x_qry[i])
            loss_qry = F.cross_entropy(y_hat, y_qry[i])
            loss_qry.backward()
            for k in task_net.parameters():
                task_grad[i].append(k.grad)
            loss_list.append(loss_qry)
            pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)
            correct = paddle.equal(pred_qry, y_qry[i]).numpy().sum().item()
            correct_list.append(correct)

        loss_average = paddle.add_n(loss_list) / task_num
        acc = sum(correct_list) / (query_size * task_num)

        for num, k in enumerate(dy_model.parameters()):
            tmp_list = [task_grad[i][num] for i in range(task_num)]
            if tmp_list[0] is not None:
                k._set_grad_ivar(paddle.add_n(tmp_list) / task_num)

        acc = paddle.to_tensor(acc)
        print_dict = {'loss': loss_average, "acc": acc}
        _ = paddle.ones(shape=[5, 5], dtype="float32")
        return _, metrics_list, print_dict
def train(model):
    print('start training ... ')
    # turn into training mode
    model.train()

    opt = paddle.optimizer.Adam(learning_rate=learning_rate,
                                parameters=model.parameters())

    train_loader = paddle.io.DataLoader(cifar10_train,
                                        shuffle=True,
                                        batch_size=batch_size)

    valid_loader = paddle.io.DataLoader(cifar10_test, batch_size=batch_size)

    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_loader()):
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)

            if batch_id % 1000 == 0:
                print("epoch: {}, batch_id: {}, loss is: {}".format(
                    epoch, batch_id, loss.numpy()))
            loss.backward()
            opt.step()
            opt.clear_grad()

        # evaluate model after one epoch
        model.eval()
        accuracies = []
        losses = []
        for batch_id, data in enumerate(valid_loader()):
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc = paddle.metric.accuracy(logits, y_data)
            accuracies.append(acc.numpy())
            losses.append(loss.numpy())

        avg_acc, avg_loss = np.mean(accuracies), np.mean(losses)
        print("[validation] accuracy/loss: {}/{}".format(avg_acc, avg_loss))
        val_acc_history.append(avg_acc)
        val_loss_history.append(avg_loss)
        model.train()
Esempio n. 7
0
    def finetunning(self, x_spt, y_spt, x_qry, y_qry):
        # assert len(x_spt.shape) == 4

        query_size = x_qry.shape[0]
        correct_list = [0 for _ in range(self.update_step_test + 1)]

        new_net = deepcopy(self.net)
        y_hat = new_net(x_spt)
        loss = F.cross_entropy(y_hat, y_spt)
        grad = paddle.grad(loss, new_net.parameters())
        fast_weights = list(
            map(lambda p: p[1] - self.base_lr * p[0],
                zip(grad, new_net.parameters())))

        # 在query集上测试,计算准确率
        # 这一步使用更新前的数据
        with paddle.no_grad():
            y_hat = new_net(x_qry,
                            params=new_net.parameters(),
                            bn_training=True)
            pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)  # size = (75)
            correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
            correct_list[0] += correct

        # 使用更新后的数据在query集上测试。
        with paddle.no_grad():
            y_hat = new_net(x_qry, params=fast_weights, bn_training=True)
            pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)  # size = (75)
            correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
            correct_list[1] += correct

        for k in range(1, self.update_step_test):
            y_hat = new_net(x_spt, params=fast_weights, bn_training=True)
            loss = F.cross_entropy(y_hat, y_spt)
            grad = paddle.grad(loss, fast_weights)
            fast_weights = list(
                map(lambda p: p[1] - self.base_lr * p[0],
                    zip(grad, fast_weights)))

            y_hat = new_net(x_qry, fast_weights, bn_training=True)

            with paddle.no_grad():
                pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)
                correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
                correct_list[k + 1] += correct

        del new_net
        accs = np.array(correct_list) / query_size
        return accs
Esempio n. 8
0
    def forward(self, x, y):
        # [batch_size, seq_len, vocab_size]
        fw_logits, bw_logits = x
        # [batch_size, seq_len]
        fw_label, bw_label = y
        # [batch_size, seq_len, 1]
        fw_label = paddle.unsqueeze(fw_label, axis=2)
        bw_label = paddle.unsqueeze(bw_label, axis=2)

        # [batch_size, seq_len, 1]
        fw_loss = F.cross_entropy(input=fw_logits, label=fw_label)
        bw_loss = F.cross_entropy(input=bw_logits, label=bw_label)

        avg_loss = 0.5 * (fw_loss + bw_loss)
        return avg_loss
Esempio n. 9
0
    def forward(self, inputs, label):
        input0, input1, input2 = inputs
        if isinstance(input0, dict):
            input0 = input0["logits"]
        if isinstance(input1, dict):
            input1 = input1["logits"]
        if isinstance(input2, dict):
            input2 = input2["logits"]

        loss0 = F.cross_entropy(input0, label=label, soft_label=False)
        loss1 = F.cross_entropy(input1, label=label, soft_label=False)
        loss2 = F.cross_entropy(input2, label=label, soft_label=False)
        loss = loss0 + 0.3 * loss1 + 0.3 * loss2
        loss = loss.mean()
        return {"GooleNetLoss": loss}
Esempio n. 10
0
    def validation_step(self, batch: int, batch_idx: int) -> dict:
        '''
        One step for validation, which should be called as forward computation.

        Args:
            batch(list[paddle.Tensor]): The one batch data, which contains images and labels.
            batch_idx(int): The index of batch.

        Returns:
            results(dict) : The model outputs, such as metrics.
        '''
        if Version(paddle.__version__) >= '2.1' or Version(
                paddle.__version__) == '0.0.0':
            img = self.preprocess(batch)
        else:
            img = self.preprocess(batch[0])

        out_class, out_reg = self(img['A'], img['hint_B'], img['mask_B'])

        # loss
        loss_ce = F.cross_entropy(out_class,
                                  img['real_B_enc'][:, :1, :, :],
                                  axis=1)
        loss_ce = paddle.mean(loss_ce)
        loss_G_L1_reg = paddle.sum(paddle.abs(img['B'] - out_reg),
                                   axis=1,
                                   keepdim=True)
        loss_G_L1_reg = paddle.mean(loss_G_L1_reg)
        loss = loss_ce + loss_G_L1_reg
        return {'loss': loss}
 def runTest(self):
     with fluid.unique_name.guard():
         net = paddle.vision.models.LeNet()
         optimizer = paddle.optimizer.Adam(learning_rate=0.001,
                                           parameters=net.parameters())
         inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
         labels = [Input([None, 1], 'int64', name='label')]
     pruner = UnstructuredPruner(net, mode='ratio', ratio=0.55)
     net.train()
     self._update_masks(pruner, 0.0)
     pruner.update_params()
     self._update_masks(pruner, 1.0)
     pruner.set_static_masks()
     sparsity_0 = UnstructuredPruner.total_sparse(net)
     for i, data in enumerate(self.train_loader):
         x_data = data[0]
         y_data = paddle.to_tensor(data[1])
         logits = net(x_data)
         loss = F.cross_entropy(logits, y_data)
         loss.backward()
         optimizer.step()
         optimizer.clear_grad()
         if i == 10: break
     sparsity_1 = UnstructuredPruner.total_sparse(net)
     pruner.update_params()
     sparsity_2 = UnstructuredPruner.total_sparse(net)
     print(sparsity_0, sparsity_1, sparsity_2)
     self.assertEqual(sparsity_0, 1.0)
     self.assertEqual(sparsity_2, 1.0)
     self.assertLess(sparsity_1, 1.0)
Esempio n. 12
0
    def infer_forward(self, dy_model, metrics_list, batch_data, config):
        dy_model.train()
        x_spt, y_spt, x_qry, y_qry = self.create_feeds(batch_data, config)
        x_spt = x_spt[0]
        y_spt = y_spt[0]
        x_qry = x_qry[0]
        y_qry = y_qry[0]
        update_step = config.get("hyper_parameters.update_step_test", 5)
        query_size = x_qry.shape[0]
        correct_list = []
        correct_list.clear()

        task_net = copy.deepcopy(dy_model)
        base_lr = config.get("hyper_parameters.base_optimizer.learning_rate",
                             0.1)
        task_optimizer = paddle.optimizer.SGD(learning_rate=base_lr,
                                              parameters=task_net.parameters())
        for j in range(update_step):
            task_optimizer.clear_grad()
            y_hat = task_net.forward(x_spt)
            loss_spt = F.cross_entropy(y_hat, y_spt)
            loss_spt.backward()
            task_optimizer.step()

        y_hat = task_net.forward(x_qry)
        pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1)
        correct = paddle.equal(pred_qry, y_qry).numpy().sum().item()
        correct_list.append(correct)
        acc = sum(correct_list) / query_size
        acc = paddle.to_tensor(acc)
        print_dict = {"acc": acc}

        return metrics_list, print_dict
Esempio n. 13
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size]):
                ground truth label id for each sentence
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of classifier
        """
        labels = kwargs.pop('labels', None)
        pooled, encoded = super(ErnieModelForSequenceClassification,
                                self).forward(*args, **kwargs)
        hidden = self.dropout(pooled)
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) != 1:
                labels = labels.squeeze()
            loss = F.cross_entropy(logits, labels)
        else:
            loss = None
        return loss, logits
Esempio n. 14
0
    def forward_decoder(self, x, z):
        """
        decoder
        """
        data = x[0]
        data_length = x[1]

        embedding_data = self.x_emb(data)

        z_0 = paddle.expand(z.unsqueeze(1), shape=[z.unsqueeze(1).shape[0], \
                embedding_data.shape[1], z.unsqueeze(1).shape[2]])

        x_input = paddle.concat([embedding_data, z_0], axis=-1)

        h_0 = self.decoder_lat(z)
        h_0 = paddle.expand(h_0.unsqueeze(0), \
            shape=[self.decoder_rnn.num_layers, h_0.unsqueeze(0).shape[1], h_0.unsqueeze(0).shape[2]])

        ####
        output, _ = self.decoder_rnn(x_input, h_0, sequence_length=data_length)
        y = self.decoder_fc(output)

        recon_loss = F.cross_entropy(paddle.reshape(y[:, :-1], shape=[-1, y.shape[-1]]), \
            paddle.reshape(data[:, 1:], shape=[-1]), \
            ignore_index=self.pad
        )

        return recon_loss
Esempio n. 15
0
    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))
Esempio n. 16
0
    def evaluation(self, args):
        self.model.eval()
        valid_dataset = DialogueDataset(args.valid_data_path,
                                        args.batch_size,
                                        self.tokenizer.pad_token_id,
                                        self.tokenizer.cls_token_id,
                                        args.sort_pool_size,
                                        args.seed,
                                        mode='valid')
        valid_data_loader = DataLoader(valid_dataset,
                                       return_list=True,
                                       batch_size=None)
        total_tokens = 0
        total_loss = 0.0
        start_time = time.time()
        step = 0
        for inputs in valid_data_loader:
            step += 1
            token_ids, type_ids, pos_ids, generation_mask, tgt_label, tgt_pos = inputs

            logits = self.model(token_ids, type_ids, pos_ids, generation_mask,
                                tgt_pos)
            loss = F.cross_entropy(logits, tgt_label, reduction='sum')

            total_loss += loss.numpy()[0]
            total_tokens += tgt_label.shape[0]

        avg_loss = total_loss / total_tokens
        ppl = math.exp(avg_loss)
        avg_speed = (time.time() - start_time) / step
        logging.info('loss: %.4f - ppl: %.4f - %.3fs/step\n' %
                     (avg_loss, ppl, avg_speed))
        self.model.train()
Esempio n. 17
0
def val(epoch, model, val_loader, cfg, args):
    total_loss = 0.0
    total_acc1 = 0.0
    total_acc5 = 0.0
    total_sample = 0

    for batch_id, data in enumerate(val_loader):
        imgs = paddle.to_tensor(data[0])
        labels = paddle.to_tensor(data[1])
        labels.stop_gradient = True

        outputs = model(imgs)

        loss = F.cross_entropy(input=outputs, label=labels, ignore_index=-1)
        avg_loss = paddle.mean(loss)
        acc_top1 = paddle.metric.accuracy(input=outputs, label=labels, k=1)
        acc_top5 = paddle.metric.accuracy(input=outputs, label=labels, k=5)

        dy_out = avg_loss.numpy()[0]
        total_loss += dy_out
        total_acc1 += acc_top1.numpy()[0]
        total_acc5 += acc_top5.numpy()[0]
        total_sample += 1

        if batch_id % 5 == 0:
            print(
                "TEST Epoch {}, iter {}, loss={:.5f}, acc1 {:.5f}, acc5 {:.5f}"
                .format(epoch, batch_id, total_loss / total_sample,
                        total_acc1 / total_sample, total_acc5 / total_sample))

    print('Finish loss {} , acc1 {} , acc5 {}'.format(
        total_loss / total_sample, total_acc1 / total_sample,
        total_acc5 / total_sample))
    return total_acc1 / total_sample
Esempio n. 18
0
 def train(step_num_samples=None):
     dataset = RandomDataset(20 * 4)
     simple_net = SimpleNet()
     opt = paddle.optimizer.SGD(learning_rate=1e-3,
                                parameters=simple_net.parameters())
     loader = DataLoader(
         dataset,
         batch_size=4,
         shuffle=True,
         drop_last=True,
         num_workers=2)
     step_info = ''
     p = profiler.Profiler(timer_only=True)
     p.start()
     for i, (image, label) in enumerate(loader()):
         out = simple_net(image)
         loss = F.cross_entropy(out, label)
         avg_loss = paddle.mean(loss)
         avg_loss.backward()
         opt.minimize(avg_loss)
         simple_net.clear_gradients()
         p.step(num_samples=step_num_samples)
         if i % 10 == 0:
             step_info = p.step_info()
             print("Iter {}: {}".format(i, step_info))
     p.stop()
     return step_info
Esempio n. 19
0
    def forward(self,
                query_input_ids,
                pos_title_input_ids,
                neg_title_input_ids,
                is_prediction=False,
                query_token_type_ids=None,
                query_position_ids=None,
                query_attention_mask=None,
                pos_title_token_type_ids=None,
                pos_title_position_ids=None,
                pos_title_attention_mask=None,
                neg_title_token_type_ids=None,
                neg_title_position_ids=None,
                neg_title_attention_mask=None):
        query_cls_embedding = self.get_pooled_embedding(
            query_input_ids, query_token_type_ids, query_position_ids,
            query_attention_mask)

        pos_title_cls_embedding = self.get_pooled_embedding(
            pos_title_input_ids, pos_title_token_type_ids,
            pos_title_position_ids, pos_title_attention_mask)

        neg_title_cls_embedding = self.get_pooled_embedding(
            neg_title_input_ids, neg_title_token_type_ids,
            neg_title_position_ids, neg_title_attention_mask)

        all_title_cls_embedding = paddle.concat(
            x=[pos_title_cls_embedding, neg_title_cls_embedding], axis=0)

        if is_prediction:
            logits = paddle.dot(query_cls_embedding, pos_title_cls_embedding)
            outputs = {
                "probs": logits,
                "q_rep": query_cls_embedding,
                "p_rep": pos_title_cls_embedding
            }
            return outputs

        if self.use_cross_batch:
            tensor_list = []
            paddle.distributed.all_gather(tensor_list, all_title_cls_embedding)
            all_title_cls_embedding = paddle.concat(x=tensor_list, axis=0)

        # multiply
        logits = paddle.matmul(query_cls_embedding,
                               all_title_cls_embedding,
                               transpose_y=True)

        batch_size = query_cls_embedding.shape[0]

        labels = paddle.arange(batch_size * self.rank * 2,
                               batch_size * (self.rank * 2 + 1),
                               dtype='int64')
        labels = paddle.reshape(labels, shape=[-1, 1])

        accuracy = paddle.metric.accuracy(input=logits, label=labels)
        loss = F.cross_entropy(input=logits, label=labels)
        outputs = {"loss": loss, "accuracy": accuracy}

        return outputs
Esempio n. 20
0
    def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier):
        emb_dim = p_ide.shape[1]
        p_ide = p_ide.transpose((0, 2, 3, 1))
        p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim])
        mask = t_conf > 0
        mask = paddle.cast(mask, dtype="int64")
        mask.stop_gradient = True
        emb_mask = mask.max(1).flatten()
        emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
        emb_mask_inds.stop_gradient = True
        # use max(1) to decide the id, TODO: more reseanable strategy
        t_ide_flatten = t_ide.max(1).flatten()
        t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64")
        valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten()

        if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0:
            # loss_ide = paddle.to_tensor([0]) # will be error in gradient backward
            loss_ide = self.phony * 0  # todo
        else:
            embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
            embedding = emb_scale * F.normalize(embedding)
            logits = classifier(embedding)

            ide_target = paddle.gather(t_ide_flatten, emb_mask_inds)

            loss_ide = F.cross_entropy(logits,
                                       ide_target,
                                       ignore_index=-1,
                                       reduction='mean')
        loss_ide.stop_gradient = False

        return loss_ide
Esempio n. 21
0
    def compute(self, pred, label, seq_mask=None):
        """
        Computes cross entropy loss.

        Args:
            pred (Tensor):
                Predictor tensor, and its dtype is float32 or float64, and has
                a shape of [batch_size, sequence_length, vocab_size].
            label(Tensor):
                Label tensor, and its dtype is int64, and has a shape of
                [batch_size, sequence_length, 1] or [batch_size, sequence_length].
            seq_mask(Tensor, optional):
                Sequence mask tensor, and its type could be float32, float64,
                int32 or int64, and has a shape of [batch_size, sequence_length].
                It's used to calculate loss. Defaults to None.

        """
        if label.dim() == 2:
            label = paddle.unsqueeze(label, axis=2)
        ce = F.cross_entropy(input=pred,
                             label=label,
                             reduction='none',
                             soft_label=False)
        ce = paddle.squeeze(ce, axis=[2])
        if seq_mask is not None:
            ce = ce * seq_mask
            word_num = paddle.sum(seq_mask)
            return ce, word_num
        return ce
    def forward(self, logit, label):

        n, c, h, w = logit.shape
        total_loss = 0.0
        if len(label.shape) != len(logit.shape):
            label = paddle.unsqueeze(label, 1)

        for i in range(n):
            x = paddle.unsqueeze(logit[i], 0)
            y = paddle.unsqueeze(label[i], 0)
            x = paddle.transpose(x, (0, 2, 3, 1))
            y = paddle.transpose(y, (0, 2, 3, 1))
            x = paddle.reshape(x, shape=(-1, c))
            y = paddle.reshape(y, shape=(-1, ))
            loss = F.cross_entropy(x,
                                   y,
                                   weight=self.weight,
                                   ignore_index=self.ignore_index,
                                   reduction="none")
            sorted_loss = paddle.sort(loss, descending=True)
            if sorted_loss[self.K] > self.threshold:
                new_indices = paddle.nonzero(sorted_loss > self.threshold)
                loss = paddle.gather(sorted_loss, new_indices)
            else:
                loss = sorted_loss[:self.K]

            total_loss += paddle.mean(loss)
        return total_loss / float(n)
Esempio n. 23
0
    def __call__(self, s_arc, s_rel, arcs, rels, mask):

        arcs = paddle.masked_select(arcs, mask)
        rels = paddle.masked_select(rels, mask)

        select = paddle.nonzero(mask)
        s_arc = paddle.gather_nd(s_arc, select)
        s_rel = paddle.gather_nd(s_rel, select)

        s_rel = index_sample(s_rel, paddle.unsqueeze(arcs, axis=1))

        arc_cost = F.cross_entropy(s_arc, arcs)
        rel_cost = F.cross_entropy(s_rel, rels)

        avg_cost = paddle.mean(arc_cost + rel_cost)
        return avg_cost
Esempio n. 24
0
    def get_loss(self, scores, deltas, targets, rois, bbox_weight):
        """
        scores (Tensor): scores from bbox head outputs
        deltas (Tensor): deltas from bbox head outputs
        targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds
        rois (List[Tensor]): RoIs generated in each batch
        """
        # TODO: better pass args
        tgt_labels, tgt_bboxes, tgt_gt_inds = targets
        tgt_labels = paddle.concat(
            tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0]
        tgt_labels = tgt_labels.cast('int64')
        tgt_labels.stop_gradient = True
        loss_bbox_cls = F.cross_entropy(input=scores,
                                        label=tgt_labels,
                                        reduction='mean')
        # bbox reg

        cls_agnostic_bbox_reg = deltas.shape[1] == 4

        fg_inds = paddle.nonzero(
            paddle.logical_and(tgt_labels >= 0,
                               tgt_labels < self.num_classes)).flatten()

        cls_name = 'loss_bbox_cls'
        reg_name = 'loss_bbox_reg'
        loss_bbox = {}

        if cls_agnostic_bbox_reg:
            reg_delta = paddle.gather(deltas, fg_inds)
        else:
            fg_gt_classes = paddle.gather(tgt_labels, fg_inds)

            reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1)
            reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1])

            reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4)

            reg_col_inds = reg_col_inds.reshape([-1, 1])
            reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1)

            reg_delta = paddle.gather(deltas, fg_inds)
            reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4])
        rois = paddle.concat(rois) if len(rois) > 1 else rois[0]
        tgt_bboxes = paddle.concat(
            tgt_bboxes) if len(tgt_bboxes) > 1 else tgt_bboxes[0]

        reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight)
        reg_target = paddle.gather(reg_target, fg_inds)
        reg_target.stop_gradient = True

        loss_bbox_reg = paddle.abs(reg_delta -
                                   reg_target).sum() / tgt_labels.shape[0]

        loss_bbox[cls_name] = loss_bbox_cls
        loss_bbox[reg_name] = loss_bbox_reg

        return loss_bbox
Esempio n. 25
0
 def calc_loss(self, x, target):
     if self._label_smoothing:
         target = self._labelsmoothing(target)
         x = -F.log_softmax(x, axis=-1)
         cost = paddle.sum(x * target, axis=-1)
     else:
         cost = F.cross_entropy(x, label=target)
     avg_cost = self.reduce_loss(cost)
     return avg_cost
Esempio n. 26
0
def mlp(input_x, input_y, hid_dim=128, label_dim=2):
    fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim, activation='tanh')
    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim, activation='tanh')
    prediction = paddle.static.nn.fc(x=[fc_2],
                                     size=label_dim,
                                     activation='softmax')
    cost = F.cross_entropy(input=prediction, label=input_y)
    avg_cost = paddle.mean(x=cost)
    return avg_cost
Esempio n. 27
0
 def _crossentropy(self, input, target):
     if self._label_smoothing:
         target = self._labelsmoothing(target)
         input = -F.log_softmax(input, axis=-1)
         cost = paddle.sum(target * input, axis=-1)
     else:
         cost = F.cross_entropy(input=input, label=target)
     avg_cost = paddle.mean(cost)
     return avg_cost
Esempio n. 28
0
 def forward(
     self,
     prediction_scores,
     seq_relationship_score,
     masked_lm_labels,
     next_sentence_labels,
     masked_lm_scale,
 ):
     with paddle.static.amp.fp16_guard():
         masked_lm_loss = F.cross_entropy(prediction_scores,
                                          masked_lm_labels,
                                          reduction="none",
                                          ignore_index=-1)
         masked_lm_loss = masked_lm_loss / masked_lm_scale
         next_sentence_loss = F.cross_entropy(seq_relationship_score,
                                              next_sentence_labels,
                                              reduction="none")
     return paddle.sum(masked_lm_loss) + paddle.mean(next_sentence_loss)
Esempio n. 29
0
def network():
    img = static.data(name='image', shape=[None, 784])
    hidden = static.nn.fc(input=img, size=200, act='relu')
    hidden = F.dropout(hidden, p=0.5)
    loss = F.cross_entropy(input=static.nn.fc(hidden, size=10, act='softmax'),
                           label=static.data(name='label',
                                             shape=[1],
                                             dtype='int64'))
    avg_loss = paddle.mean(loss)
    return avg_loss
Esempio n. 30
0
    def forward(self, predict, label, trg_mask):
        cost = F.cross_entropy(input=predict,
                               label=label,
                               reduction='none',
                               soft_label=False)
        cost = paddle.squeeze(cost, axis=[2])
        masked_cost = cost * trg_mask
        batch_mean_cost = paddle.mean(masked_cost, axis=[0])
        seq_cost = paddle.sum(batch_mean_cost)

        return seq_cost