def forward(self, *args, **kwargs): """ Args: start_pos (optional, `Variable` of shape [batch_size]): token index of start of answer span in `context` end_pos (optional, `Variable` of shape [batch_size]): token index of end of answer span in `context` Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None start_logits (`Variable` of shape [batch_size, hidden_size]): output logits of start position, use argmax(start_logit) to get start index end_logits (`Variable` of shape [batch_size, hidden_size]): output logits of end position, use argmax(end_logit) to get end index """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) encoded = self.dropout(encoded) encoded = self.classifier(encoded) start_logit, end_logits = P.unstack(encoded, axis=-1) if start_pos is not None and end_pos is not None: if len(start_pos.shape) != 1: start_pos = start_pos.squeeze() if len(end_pos.shape) != 1: end_pos = end_pos.squeeze() start_loss = F.cross_entropy(start_logit, start_pos) end_loss = F.cross_entropy(end_logits, end_pos) loss = (start_loss.mean() + end_loss.mean()) / 2. else: loss = None return loss, start_logit, end_logits
def forward(self, prediction_scores, seq_relationship_score, masked_lm_labels, next_sentence_labels): """ Args: prediction_scores(Tensor): The scores of masked token prediction. Its data type should be float32. If `masked_positions` is None, its shape is [batch_size, sequence_length, vocab_size]. Otherwise, its shape is [batch_size, mask_token_num, vocab_size] seq_relationship_score(Tensor): The scores of next sentence prediction. Its data type should be float32 and its shape is [batch_size, 2] masked_lm_labels(Tensor): The labels of the masked language modeling, its dimensionality is equal to `prediction_scores`. Its data type should be int64. If `masked_positions` is None, its shape is [batch_size, sequence_length, 1]. Otherwise, its shape is [batch_size, mask_token_num, 1] next_sentence_labels(Tensor): The labels of the next sentence prediction task, the dimensionality of `next_sentence_labels` is equal to `seq_relation_labels`. Its data type should be int64 and its shape is [batch_size, 1] Returns: Tensor: The pretraining loss, equals to the sum of `masked_lm_loss` plus the mean of `next_sentence_loss`. Its data type should be float32 and its shape is [1]. """ with paddle.static.amp.fp16_guard(): masked_lm_loss = F.cross_entropy(prediction_scores, masked_lm_labels, ignore_index=-1, reduction='none') next_sentence_loss = F.cross_entropy(seq_relationship_score, next_sentence_labels, reduction='none') return paddle.mean(masked_lm_loss), paddle.mean(next_sentence_loss)
def forward(self, logits, labels): start_logits, end_logits = logits start_position, end_position = labels start_position = paddle.unsqueeze(start_position, axis=-1) end_position = paddle.unsqueeze(end_position, axis=-1) start_loss = F.cross_entropy(input=start_logits, label=start_position) end_loss = F.cross_entropy(input=end_logits, label=end_position) loss = (start_loss + end_loss) / 2 return loss
def forward(self, prediction_scores, seq_relationship_score, masked_lm_labels, next_sentence_labels): with paddle.static.amp.fp16_guard(): masked_lm_loss = F.cross_entropy( prediction_scores, masked_lm_labels, ignore_index=-1, reduction='none') next_sentence_loss = F.cross_entropy( seq_relationship_score, next_sentence_labels, reduction='none') return paddle.mean(masked_lm_loss), paddle.mean(next_sentence_loss)
def train_forward(self, dy_model, metrics_list, batch_data, config): np.random.seed(12345) x_spt, y_spt, x_qry, y_qry = self.create_feeds(batch_data, config) update_step = config.get("hyper_parameters.update_step", 5) task_num = x_spt.shape[0] query_size = x_qry.shape[ 1] # 75 = 15 * 5, x_qry.shape = [32,75,1,28,28] loss_list = [] loss_list.clear() correct_list = [] correct_list.clear() task_grad = [[] for _ in range(task_num)] for i in range(task_num): # 外循环 task_net = copy.deepcopy(dy_model) base_lr = config.get( "hyper_parameters.base_optimizer.learning_rate", 0.1) task_optimizer = paddle.optimizer.SGD( learning_rate=base_lr, parameters=task_net.parameters()) for j in range(update_step): #内循环 task_optimizer.clear_grad() # 梯度清零 y_hat = task_net.forward(x_spt[i]) # (setsz, ways) [5,5] loss_spt = F.cross_entropy(y_hat, y_spt[i]) loss_spt.backward() task_optimizer.step() y_hat = task_net.forward(x_qry[i]) loss_qry = F.cross_entropy(y_hat, y_qry[i]) loss_qry.backward() for k in task_net.parameters(): task_grad[i].append(k.grad) loss_list.append(loss_qry) pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1) correct = paddle.equal(pred_qry, y_qry[i]).numpy().sum().item() correct_list.append(correct) loss_average = paddle.add_n(loss_list) / task_num acc = sum(correct_list) / (query_size * task_num) for num, k in enumerate(dy_model.parameters()): tmp_list = [task_grad[i][num] for i in range(task_num)] if tmp_list[0] is not None: k._set_grad_ivar(paddle.add_n(tmp_list) / task_num) acc = paddle.to_tensor(acc) print_dict = {'loss': loss_average, "acc": acc} _ = paddle.ones(shape=[5, 5], dtype="float32") return _, metrics_list, print_dict
def train(model): print('start training ... ') # turn into training mode model.train() opt = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=model.parameters()) train_loader = paddle.io.DataLoader(cifar10_train, shuffle=True, batch_size=batch_size) valid_loader = paddle.io.DataLoader(cifar10_test, batch_size=batch_size) for epoch in range(epoch_num): for batch_id, data in enumerate(train_loader()): x_data = data[0] y_data = paddle.to_tensor(data[1]) y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) if batch_id % 1000 == 0: print("epoch: {}, batch_id: {}, loss is: {}".format( epoch, batch_id, loss.numpy())) loss.backward() opt.step() opt.clear_grad() # evaluate model after one epoch model.eval() accuracies = [] losses = [] for batch_id, data in enumerate(valid_loader()): x_data = data[0] y_data = paddle.to_tensor(data[1]) y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc = paddle.metric.accuracy(logits, y_data) accuracies.append(acc.numpy()) losses.append(loss.numpy()) avg_acc, avg_loss = np.mean(accuracies), np.mean(losses) print("[validation] accuracy/loss: {}/{}".format(avg_acc, avg_loss)) val_acc_history.append(avg_acc) val_loss_history.append(avg_loss) model.train()
def finetunning(self, x_spt, y_spt, x_qry, y_qry): # assert len(x_spt.shape) == 4 query_size = x_qry.shape[0] correct_list = [0 for _ in range(self.update_step_test + 1)] new_net = deepcopy(self.net) y_hat = new_net(x_spt) loss = F.cross_entropy(y_hat, y_spt) grad = paddle.grad(loss, new_net.parameters()) fast_weights = list( map(lambda p: p[1] - self.base_lr * p[0], zip(grad, new_net.parameters()))) # 在query集上测试,计算准确率 # 这一步使用更新前的数据 with paddle.no_grad(): y_hat = new_net(x_qry, params=new_net.parameters(), bn_training=True) pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1) # size = (75) correct = paddle.equal(pred_qry, y_qry).numpy().sum().item() correct_list[0] += correct # 使用更新后的数据在query集上测试。 with paddle.no_grad(): y_hat = new_net(x_qry, params=fast_weights, bn_training=True) pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1) # size = (75) correct = paddle.equal(pred_qry, y_qry).numpy().sum().item() correct_list[1] += correct for k in range(1, self.update_step_test): y_hat = new_net(x_spt, params=fast_weights, bn_training=True) loss = F.cross_entropy(y_hat, y_spt) grad = paddle.grad(loss, fast_weights) fast_weights = list( map(lambda p: p[1] - self.base_lr * p[0], zip(grad, fast_weights))) y_hat = new_net(x_qry, fast_weights, bn_training=True) with paddle.no_grad(): pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1) correct = paddle.equal(pred_qry, y_qry).numpy().sum().item() correct_list[k + 1] += correct del new_net accs = np.array(correct_list) / query_size return accs
def forward(self, x, y): # [batch_size, seq_len, vocab_size] fw_logits, bw_logits = x # [batch_size, seq_len] fw_label, bw_label = y # [batch_size, seq_len, 1] fw_label = paddle.unsqueeze(fw_label, axis=2) bw_label = paddle.unsqueeze(bw_label, axis=2) # [batch_size, seq_len, 1] fw_loss = F.cross_entropy(input=fw_logits, label=fw_label) bw_loss = F.cross_entropy(input=bw_logits, label=bw_label) avg_loss = 0.5 * (fw_loss + bw_loss) return avg_loss
def forward(self, inputs, label): input0, input1, input2 = inputs if isinstance(input0, dict): input0 = input0["logits"] if isinstance(input1, dict): input1 = input1["logits"] if isinstance(input2, dict): input2 = input2["logits"] loss0 = F.cross_entropy(input0, label=label, soft_label=False) loss1 = F.cross_entropy(input1, label=label, soft_label=False) loss2 = F.cross_entropy(input2, label=label, soft_label=False) loss = loss0 + 0.3 * loss1 + 0.3 * loss2 loss = loss.mean() return {"GooleNetLoss": loss}
def validation_step(self, batch: int, batch_idx: int) -> dict: ''' One step for validation, which should be called as forward computation. Args: batch(list[paddle.Tensor]): The one batch data, which contains images and labels. batch_idx(int): The index of batch. Returns: results(dict) : The model outputs, such as metrics. ''' if Version(paddle.__version__) >= '2.1' or Version( paddle.__version__) == '0.0.0': img = self.preprocess(batch) else: img = self.preprocess(batch[0]) out_class, out_reg = self(img['A'], img['hint_B'], img['mask_B']) # loss loss_ce = F.cross_entropy(out_class, img['real_B_enc'][:, :1, :, :], axis=1) loss_ce = paddle.mean(loss_ce) loss_G_L1_reg = paddle.sum(paddle.abs(img['B'] - out_reg), axis=1, keepdim=True) loss_G_L1_reg = paddle.mean(loss_G_L1_reg) loss = loss_ce + loss_G_L1_reg return {'loss': loss}
def runTest(self): with fluid.unique_name.guard(): net = paddle.vision.models.LeNet() optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=net.parameters()) inputs = [Input([None, 1, 28, 28], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] pruner = UnstructuredPruner(net, mode='ratio', ratio=0.55) net.train() self._update_masks(pruner, 0.0) pruner.update_params() self._update_masks(pruner, 1.0) pruner.set_static_masks() sparsity_0 = UnstructuredPruner.total_sparse(net) for i, data in enumerate(self.train_loader): x_data = data[0] y_data = paddle.to_tensor(data[1]) logits = net(x_data) loss = F.cross_entropy(logits, y_data) loss.backward() optimizer.step() optimizer.clear_grad() if i == 10: break sparsity_1 = UnstructuredPruner.total_sparse(net) pruner.update_params() sparsity_2 = UnstructuredPruner.total_sparse(net) print(sparsity_0, sparsity_1, sparsity_2) self.assertEqual(sparsity_0, 1.0) self.assertEqual(sparsity_2, 1.0) self.assertLess(sparsity_1, 1.0)
def infer_forward(self, dy_model, metrics_list, batch_data, config): dy_model.train() x_spt, y_spt, x_qry, y_qry = self.create_feeds(batch_data, config) x_spt = x_spt[0] y_spt = y_spt[0] x_qry = x_qry[0] y_qry = y_qry[0] update_step = config.get("hyper_parameters.update_step_test", 5) query_size = x_qry.shape[0] correct_list = [] correct_list.clear() task_net = copy.deepcopy(dy_model) base_lr = config.get("hyper_parameters.base_optimizer.learning_rate", 0.1) task_optimizer = paddle.optimizer.SGD(learning_rate=base_lr, parameters=task_net.parameters()) for j in range(update_step): task_optimizer.clear_grad() y_hat = task_net.forward(x_spt) loss_spt = F.cross_entropy(y_hat, y_spt) loss_spt.backward() task_optimizer.step() y_hat = task_net.forward(x_qry) pred_qry = F.softmax(y_hat, axis=1).argmax(axis=1) correct = paddle.equal(pred_qry, y_qry).numpy().sum().item() correct_list.append(correct) acc = sum(correct_list) / query_size acc = paddle.to_tensor(acc) print_dict = {"acc": acc} return metrics_list, print_dict
def forward(self, *args, **kwargs): """ Args: labels (optional, `Variable` of shape [batch_size]): ground truth label id for each sentence Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch if labels not set, returns None logits (`Variable` of shape [batch_size, hidden_size]): output logits of classifier """ labels = kwargs.pop('labels', None) pooled, encoded = super(ErnieModelForSequenceClassification, self).forward(*args, **kwargs) hidden = self.dropout(pooled) logits = self.classifier(hidden) if labels is not None: if len(labels.shape) != 1: labels = labels.squeeze() loss = F.cross_entropy(logits, labels) else: loss = None return loss, logits
def forward_decoder(self, x, z): """ decoder """ data = x[0] data_length = x[1] embedding_data = self.x_emb(data) z_0 = paddle.expand(z.unsqueeze(1), shape=[z.unsqueeze(1).shape[0], \ embedding_data.shape[1], z.unsqueeze(1).shape[2]]) x_input = paddle.concat([embedding_data, z_0], axis=-1) h_0 = self.decoder_lat(z) h_0 = paddle.expand(h_0.unsqueeze(0), \ shape=[self.decoder_rnn.num_layers, h_0.unsqueeze(0).shape[1], h_0.unsqueeze(0).shape[2]]) #### output, _ = self.decoder_rnn(x_input, h_0, sequence_length=data_length) y = self.decoder_fc(output) recon_loss = F.cross_entropy(paddle.reshape(y[:, :-1], shape=[-1, y.shape[-1]]), \ paddle.reshape(data[:, 1:], shape=[-1]), \ ignore_index=self.pad ) return recon_loss
def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object"))))
def evaluation(self, args): self.model.eval() valid_dataset = DialogueDataset(args.valid_data_path, args.batch_size, self.tokenizer.pad_token_id, self.tokenizer.cls_token_id, args.sort_pool_size, args.seed, mode='valid') valid_data_loader = DataLoader(valid_dataset, return_list=True, batch_size=None) total_tokens = 0 total_loss = 0.0 start_time = time.time() step = 0 for inputs in valid_data_loader: step += 1 token_ids, type_ids, pos_ids, generation_mask, tgt_label, tgt_pos = inputs logits = self.model(token_ids, type_ids, pos_ids, generation_mask, tgt_pos) loss = F.cross_entropy(logits, tgt_label, reduction='sum') total_loss += loss.numpy()[0] total_tokens += tgt_label.shape[0] avg_loss = total_loss / total_tokens ppl = math.exp(avg_loss) avg_speed = (time.time() - start_time) / step logging.info('loss: %.4f - ppl: %.4f - %.3fs/step\n' % (avg_loss, ppl, avg_speed)) self.model.train()
def val(epoch, model, val_loader, cfg, args): total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 for batch_id, data in enumerate(val_loader): imgs = paddle.to_tensor(data[0]) labels = paddle.to_tensor(data[1]) labels.stop_gradient = True outputs = model(imgs) loss = F.cross_entropy(input=outputs, label=labels, ignore_index=-1) avg_loss = paddle.mean(loss) acc_top1 = paddle.metric.accuracy(input=outputs, label=labels, k=1) acc_top5 = paddle.metric.accuracy(input=outputs, label=labels, k=5) dy_out = avg_loss.numpy()[0] total_loss += dy_out total_acc1 += acc_top1.numpy()[0] total_acc5 += acc_top5.numpy()[0] total_sample += 1 if batch_id % 5 == 0: print( "TEST Epoch {}, iter {}, loss={:.5f}, acc1 {:.5f}, acc5 {:.5f}" .format(epoch, batch_id, total_loss / total_sample, total_acc1 / total_sample, total_acc5 / total_sample)) print('Finish loss {} , acc1 {} , acc5 {}'.format( total_loss / total_sample, total_acc1 / total_sample, total_acc5 / total_sample)) return total_acc1 / total_sample
def train(step_num_samples=None): dataset = RandomDataset(20 * 4) simple_net = SimpleNet() opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=simple_net.parameters()) loader = DataLoader( dataset, batch_size=4, shuffle=True, drop_last=True, num_workers=2) step_info = '' p = profiler.Profiler(timer_only=True) p.start() for i, (image, label) in enumerate(loader()): out = simple_net(image) loss = F.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() opt.minimize(avg_loss) simple_net.clear_gradients() p.step(num_samples=step_num_samples) if i % 10 == 0: step_info = p.step_info() print("Iter {}: {}".format(i, step_info)) p.stop() return step_info
def forward(self, query_input_ids, pos_title_input_ids, neg_title_input_ids, is_prediction=False, query_token_type_ids=None, query_position_ids=None, query_attention_mask=None, pos_title_token_type_ids=None, pos_title_position_ids=None, pos_title_attention_mask=None, neg_title_token_type_ids=None, neg_title_position_ids=None, neg_title_attention_mask=None): query_cls_embedding = self.get_pooled_embedding( query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) pos_title_cls_embedding = self.get_pooled_embedding( pos_title_input_ids, pos_title_token_type_ids, pos_title_position_ids, pos_title_attention_mask) neg_title_cls_embedding = self.get_pooled_embedding( neg_title_input_ids, neg_title_token_type_ids, neg_title_position_ids, neg_title_attention_mask) all_title_cls_embedding = paddle.concat( x=[pos_title_cls_embedding, neg_title_cls_embedding], axis=0) if is_prediction: logits = paddle.dot(query_cls_embedding, pos_title_cls_embedding) outputs = { "probs": logits, "q_rep": query_cls_embedding, "p_rep": pos_title_cls_embedding } return outputs if self.use_cross_batch: tensor_list = [] paddle.distributed.all_gather(tensor_list, all_title_cls_embedding) all_title_cls_embedding = paddle.concat(x=tensor_list, axis=0) # multiply logits = paddle.matmul(query_cls_embedding, all_title_cls_embedding, transpose_y=True) batch_size = query_cls_embedding.shape[0] labels = paddle.arange(batch_size * self.rank * 2, batch_size * (self.rank * 2 + 1), dtype='int64') labels = paddle.reshape(labels, shape=[-1, 1]) accuracy = paddle.metric.accuracy(input=logits, label=labels) loss = F.cross_entropy(input=logits, label=labels) outputs = {"loss": loss, "accuracy": accuracy} return outputs
def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier): emb_dim = p_ide.shape[1] p_ide = p_ide.transpose((0, 2, 3, 1)) p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim]) mask = t_conf > 0 mask = paddle.cast(mask, dtype="int64") mask.stop_gradient = True emb_mask = mask.max(1).flatten() emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten() emb_mask_inds.stop_gradient = True # use max(1) to decide the id, TODO: more reseanable strategy t_ide_flatten = t_ide.max(1).flatten() t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64") valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten() if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0: # loss_ide = paddle.to_tensor([0]) # will be error in gradient backward loss_ide = self.phony * 0 # todo else: embedding = paddle.gather(p_ide_flatten, emb_mask_inds) embedding = emb_scale * F.normalize(embedding) logits = classifier(embedding) ide_target = paddle.gather(t_ide_flatten, emb_mask_inds) loss_ide = F.cross_entropy(logits, ide_target, ignore_index=-1, reduction='mean') loss_ide.stop_gradient = False return loss_ide
def compute(self, pred, label, seq_mask=None): """ Computes cross entropy loss. Args: pred (Tensor): Predictor tensor, and its dtype is float32 or float64, and has a shape of [batch_size, sequence_length, vocab_size]. label(Tensor): Label tensor, and its dtype is int64, and has a shape of [batch_size, sequence_length, 1] or [batch_size, sequence_length]. seq_mask(Tensor, optional): Sequence mask tensor, and its type could be float32, float64, int32 or int64, and has a shape of [batch_size, sequence_length]. It's used to calculate loss. Defaults to None. """ if label.dim() == 2: label = paddle.unsqueeze(label, axis=2) ce = F.cross_entropy(input=pred, label=label, reduction='none', soft_label=False) ce = paddle.squeeze(ce, axis=[2]) if seq_mask is not None: ce = ce * seq_mask word_num = paddle.sum(seq_mask) return ce, word_num return ce
def forward(self, logit, label): n, c, h, w = logit.shape total_loss = 0.0 if len(label.shape) != len(logit.shape): label = paddle.unsqueeze(label, 1) for i in range(n): x = paddle.unsqueeze(logit[i], 0) y = paddle.unsqueeze(label[i], 0) x = paddle.transpose(x, (0, 2, 3, 1)) y = paddle.transpose(y, (0, 2, 3, 1)) x = paddle.reshape(x, shape=(-1, c)) y = paddle.reshape(y, shape=(-1, )) loss = F.cross_entropy(x, y, weight=self.weight, ignore_index=self.ignore_index, reduction="none") sorted_loss = paddle.sort(loss, descending=True) if sorted_loss[self.K] > self.threshold: new_indices = paddle.nonzero(sorted_loss > self.threshold) loss = paddle.gather(sorted_loss, new_indices) else: loss = sorted_loss[:self.K] total_loss += paddle.mean(loss) return total_loss / float(n)
def __call__(self, s_arc, s_rel, arcs, rels, mask): arcs = paddle.masked_select(arcs, mask) rels = paddle.masked_select(rels, mask) select = paddle.nonzero(mask) s_arc = paddle.gather_nd(s_arc, select) s_rel = paddle.gather_nd(s_rel, select) s_rel = index_sample(s_rel, paddle.unsqueeze(arcs, axis=1)) arc_cost = F.cross_entropy(s_arc, arcs) rel_cost = F.cross_entropy(s_rel, rels) avg_cost = paddle.mean(arc_cost + rel_cost) return avg_cost
def get_loss(self, scores, deltas, targets, rois, bbox_weight): """ scores (Tensor): scores from bbox head outputs deltas (Tensor): deltas from bbox head outputs targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds rois (List[Tensor]): RoIs generated in each batch """ # TODO: better pass args tgt_labels, tgt_bboxes, tgt_gt_inds = targets tgt_labels = paddle.concat( tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0] tgt_labels = tgt_labels.cast('int64') tgt_labels.stop_gradient = True loss_bbox_cls = F.cross_entropy(input=scores, label=tgt_labels, reduction='mean') # bbox reg cls_agnostic_bbox_reg = deltas.shape[1] == 4 fg_inds = paddle.nonzero( paddle.logical_and(tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() cls_name = 'loss_bbox_cls' reg_name = 'loss_bbox_reg' loss_bbox = {} if cls_agnostic_bbox_reg: reg_delta = paddle.gather(deltas, fg_inds) else: fg_gt_classes = paddle.gather(tgt_labels, fg_inds) reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1) reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1]) reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4) reg_col_inds = reg_col_inds.reshape([-1, 1]) reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1) reg_delta = paddle.gather(deltas, fg_inds) reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4]) rois = paddle.concat(rois) if len(rois) > 1 else rois[0] tgt_bboxes = paddle.concat( tgt_bboxes) if len(tgt_bboxes) > 1 else tgt_bboxes[0] reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight) reg_target = paddle.gather(reg_target, fg_inds) reg_target.stop_gradient = True loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum() / tgt_labels.shape[0] loss_bbox[cls_name] = loss_bbox_cls loss_bbox[reg_name] = loss_bbox_reg return loss_bbox
def calc_loss(self, x, target): if self._label_smoothing: target = self._labelsmoothing(target) x = -F.log_softmax(x, axis=-1) cost = paddle.sum(x * target, axis=-1) else: cost = F.cross_entropy(x, label=target) avg_cost = self.reduce_loss(cost) return avg_cost
def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = paddle.static.nn.fc(x=input_x, size=hid_dim, activation='tanh') fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim, activation='tanh') prediction = paddle.static.nn.fc(x=[fc_2], size=label_dim, activation='softmax') cost = F.cross_entropy(input=prediction, label=input_y) avg_cost = paddle.mean(x=cost) return avg_cost
def _crossentropy(self, input, target): if self._label_smoothing: target = self._labelsmoothing(target) input = -F.log_softmax(input, axis=-1) cost = paddle.sum(target * input, axis=-1) else: cost = F.cross_entropy(input=input, label=target) avg_cost = paddle.mean(cost) return avg_cost
def forward( self, prediction_scores, seq_relationship_score, masked_lm_labels, next_sentence_labels, masked_lm_scale, ): with paddle.static.amp.fp16_guard(): masked_lm_loss = F.cross_entropy(prediction_scores, masked_lm_labels, reduction="none", ignore_index=-1) masked_lm_loss = masked_lm_loss / masked_lm_scale next_sentence_loss = F.cross_entropy(seq_relationship_score, next_sentence_labels, reduction="none") return paddle.sum(masked_lm_loss) + paddle.mean(next_sentence_loss)
def network(): img = static.data(name='image', shape=[None, 784]) hidden = static.nn.fc(input=img, size=200, act='relu') hidden = F.dropout(hidden, p=0.5) loss = F.cross_entropy(input=static.nn.fc(hidden, size=10, act='softmax'), label=static.data(name='label', shape=[1], dtype='int64')) avg_loss = paddle.mean(loss) return avg_loss
def forward(self, predict, label, trg_mask): cost = F.cross_entropy(input=predict, label=label, reduction='none', soft_label=False) cost = paddle.squeeze(cost, axis=[2]) masked_cost = cost * trg_mask batch_mean_cost = paddle.mean(masked_cost, axis=[0]) seq_cost = paddle.sum(batch_mean_cost) return seq_cost