def build_program(main_program, startup_program, image_shape, archs, args, is_train): with static.program_guard(main_program, startup_program): data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader( image_shape, is_train, args) logits, logits_aux = archs(data, drop_path_prob, drop_path_mask, is_train, 10) top1 = paddle.metric.accuracy(input=logits, label=label, k=1) top5 = paddle.metric.accuracy(input=logits, label=label, k=5) loss = paddle.mean(F.softmax_with_cross_entropy(logits, label)) if is_train: if auxiliary: loss_aux = paddle.mean( F.softmax_with_cross_entropy(logits_aux, label)) loss = loss + auxiliary_weight * loss_aux step_per_epoch = int(trainset_num / args.batch_size) learning_rate = paddle.optimizer.lr.CosineAnnealingDecay( lr, T_max=step_per_epoch * args.retain_epoch) optimizer = paddle.optimizer.Momentum( learning_rate, momentum, weight_decay=paddle.regularizer.L2Decay(weight_decay), grad_clip=nn.ClipGradByGlobalNorm(clip_norm=5.0)) optimizer.minimize(loss) outs = [loss, top1, top5] else: outs = [loss, top1, top5] return outs, (data, label), data_loader
def forward(self, x, y): # [batch_size, seq_len, vocab_size] fw_logits, bw_logits = x # [batch_size, seq_len] fw_label, bw_label = y # [batch_size, seq_len, 1] fw_label = paddle.unsqueeze(fw_label, axis=2) bw_label = paddle.unsqueeze(bw_label, axis=2) # [batch_size, seq_len, 1] fw_loss = F.softmax_with_cross_entropy(logits=fw_logits, label=fw_label) bw_loss = F.softmax_with_cross_entropy(logits=bw_logits, label=bw_label) avg_loss = 0.5 * (paddle.mean(fw_loss) + paddle.mean(bw_loss)) return avg_loss
def compute_softmax_loss(self, y, t): """Compute the loss when output distributions are categorial distributions. Parameters ---------- y : Tensor [shape=(B, T, C_output)] The logits of the output distributions. t : Tensor [shape=(B, T)] The target audio. The audio is first quantized then used as the target. Notes ------- Output distributions whose input contains padding is neglected in loss computation. So the first ``context_size`` steps does not contribute to the loss. Returns -------- Tensor: [shape=(1,)] The loss. """ # context size is not taken into account y = y[:, self.context_size:, :] t = t[:, self.context_size:] t = paddle.clip(t, min=-1.0, max=0.99999) quantized = quantize(t, n_bands=self.output_dim) label = paddle.unsqueeze(quantized, -1) loss = F.softmax_with_cross_entropy(y, label) reduced_loss = paddle.mean(loss) return reduced_loss
def masked_softmax_with_cross_entropy(logits, label, mask, axis=-1): """Compute masked softmax with cross entropy loss. Parameters ---------- logits : Tensor The logits. The ``axis``-th axis is the class dimension. label : Tensor [dtype: int] The label. The size of the ``axis``-th axis should be 1. mask : Tensor The mask. The shape should be broadcastable to ``label``. axis : int, optional The index of the class dimension in the shape of ``logits``, by default -1. Returns ------- Tensor [shape=(1,)] The masked softmax with cross entropy loss. """ ce = F.softmax_with_cross_entropy(logits, label, axis=axis) loss = weighted_mean(ce, mask) return loss
def forward(self, logit, label): """ Forward computation. Args: logit (Tensor): Logit tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. label (Tensor): Label tensor, the data type is int64. Shape is (N), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, D1, D2,..., Dk), k >= 1. """ if len(label.shape) != len(logit.shape): label = paddle.unsqueeze(label, 1) logit = paddle.transpose(logit, [0, 2, 3, 1]) label = paddle.transpose(label, [0, 2, 3, 1]) loss = F.softmax_with_cross_entropy(logit, label, ignore_index=self.ignore_index, axis=-1) mask = label != self.ignore_index mask = paddle.cast(mask, 'float32') loss = loss * mask avg_loss = paddle.mean(loss) / (paddle.mean(mask) + self.EPS) label.stop_gradient = True mask.stop_gradient = True return avg_loss
def simple_net(image, label): fc_tmp = static.nn.fc(image, size=CLASS_NUM) cross_entropy = F.softmax_with_cross_entropy(image, label) loss = paddle.mean(cross_entropy) sgd = paddle.optimizer.SGD(learning_rate=1e-3) sgd.minimize(loss) return loss
def forward(self, predict, label, trg_mask): cost = F.softmax_with_cross_entropy( logits=predict, label=label, soft_label=False) cost = paddle.squeeze(cost, axis=[2]) masked_cost = cost * trg_mask batch_mean_cost = paddle.mean(masked_cost, axis=[0]) seq_cost = paddle.sum(batch_mean_cost) return seq_cost
def compute(self, pred, label, seq_mask=None): label = paddle.unsqueeze(label, axis=2) ce = F.softmax_with_cross_entropy(logits=pred, label=label, soft_label=False) ce = paddle.squeeze(ce, axis=[2]) if seq_mask is not None: ce = ce * seq_mask word_num = paddle.sum(seq_mask) return ce, word_num return ce
def forward(self, inputs, labels, weights, bias): """forward """ # weights.stop_gradient = False embedding_dim = paddle.shape(weights)[-1] true_log_probs, samp_log_probs, neg_samples = self.sample(labels) n_sample = neg_samples.shape[0] b1 = paddle.shape(labels)[0] b2 = paddle.shape(labels)[1] all_ids = paddle.concat([labels.reshape((-1, )), neg_samples]) all_w = paddle.gather(weights, all_ids) true_w = all_w[:-n_sample].reshape((-1, b2, embedding_dim)) sample_w = all_w[-n_sample:].reshape((n_sample, embedding_dim)) all_b = paddle.gather(bias, all_ids) true_b = all_b[:-n_sample].reshape((-1, 1)) sample_b = all_b[-n_sample:] # [B, D] * [B, 1,D] true_logist = paddle.matmul( true_w, inputs.unsqueeze(1), transpose_y=True).squeeze(1) + true_b sample_logist = paddle.matmul( inputs.unsqueeze(1), sample_w, transpose_y=True) + sample_b if self.subtract_log_q: true_logist = true_logist - true_log_probs.unsqueeze(1) sample_logist = sample_logist - samp_log_probs if self.remove_accidental_hits: hit = (paddle.equal(labels[:, :], neg_samples)).unsqueeze(1) padding = paddle.ones_like(sample_logist) * -1e30 sample_logist = paddle.where(hit, padding, sample_logist) sample_logist = sample_logist.squeeze(1) out_logist = paddle.concat([true_logist, sample_logist], axis=1) out_label = paddle.concat([ paddle.ones_like(true_logist) / self.num_true, paddle.zeros_like(sample_logist) ], axis=1) sampled_loss = F.softmax_with_cross_entropy(logits=out_logist, label=out_label, soft_label=True) return sampled_loss, out_logist, out_label
def build_model(self, on_ipu): x = paddle.static.data(name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32") if on_ipu: label = paddle.static.data(name=self.feed_list[1], shape=self.feed_shape[1], dtype='int32') else: label = paddle.static.data(name=self.feed_list[1], shape=self.feed_shape[1], dtype='int64') out = F.softmax_with_cross_entropy(x, label, **self.attrs) self.fetch_list = [out.name]
def forward(self, inputs, label=None): # 首先我们需要定义LSTM的初始hidden和cell,这里我们使用0来初始化这个序列的记忆 batch_size = inputs.shape[0] init_hidden_data = np.zeros( (self.num_layers, batch_size, self.hidden_size), dtype='float32') init_cell_data = np.zeros( (self.num_layers, batch_size, self.hidden_size), dtype='float32') # 将这些初始记忆转换为飞桨可计算的向量 # 设置stop_gradient=True,避免这些向量被更新,从而影响训练效果 init_hidden = paddle.to_tensor(init_hidden_data) init_hidden.stop_gradient = True init_cell = paddle.to_tensor(init_cell_data) init_cell.stop_gradient = True init_h = paddle.reshape(init_hidden, shape=[self.num_layers, -1, self.hidden_size]) init_c = paddle.reshape(init_cell, shape=[self.num_layers, -1, self.hidden_size]) # 将输入的句子的mini-batch转换为词向量表示 x_emb = self.embedding(inputs) x_emb = paddle.reshape(x_emb, shape=[-1, self.num_steps, self.hidden_size]) if self.dropout is not None and self.dropout > 0.0: x_emb = self.dropout_layer(x_emb) # 使用LSTM网络,把每个句子转换为向量表示 rnn_out, (last_hidden, last_cell) = self.simple_lstm_rnn(x_emb, (init_h, init_c)) last_hidden = paddle.reshape(last_hidden[-1], shape=[-1, self.hidden_size]) # 将每个句子的向量表示映射到具体的情感类别上 projection = self.cls_fc(last_hidden) pred = F.softmax(projection, axis=-1) if label is not None: # 根据给定的标签信息,计算整个网络的损失函数,这里我们可以直接使用分类任务中常使用的交叉熵来训练网络 loss = F.softmax_with_cross_entropy(logits=projection, label=label, soft_label=False) loss = paddle.mean(loss) # 最终返回预测结果pred,和网络的loss return pred, loss else: return pred
def forward(self, kl_loss, dec_output, trg_mask, label): self.update_kl_weight() self.kl_loss = kl_loss rec_loss = F.softmax_with_cross_entropy(logits=dec_output, label=label, soft_label=False) rec_loss = paddle.squeeze(rec_loss, axis=[2]) rec_loss = rec_loss * trg_mask rec_loss = paddle.mean(rec_loss, axis=[0]) rec_loss = paddle.sum(rec_loss) self.rec_loss = rec_loss self.loss = self.kl_loss * self.kl_weight + self.rec_loss return self.loss
def _get_head_loss(self, score, delta, target): # bbox cls labels_int64 = paddle.cast(x=target['labels_int32'], dtype='int64') labels_int64.stop_gradient = True loss_bbox_cls = F.softmax_with_cross_entropy(logits=score, label=labels_int64) loss_bbox_cls = paddle.mean(loss_bbox_cls) # bbox reg loss_bbox_reg = ops.smooth_l1( input=delta, label=target['bbox_targets'], inside_weight=target['bbox_inside_weights'], outside_weight=target['bbox_outside_weights'], sigma=1.0) loss_bbox_reg = paddle.mean(loss_bbox_reg) return loss_bbox_cls, loss_bbox_reg
def forward(self, predict, label): weights = paddle.cast(label != self.pad_idx, dtype=paddle.get_default_dtype()) if self.label_smooth_eps: label = F.label_smooth(label=F.one_hot( x=label, num_classes=predict.shape[-1]), epsilon=self.label_smooth_eps) cost = F.softmax_with_cross_entropy( logits=predict, label=label, soft_label=True if self.label_smooth_eps else False).squeeze() weighted_cost = cost * weights sum_cost = paddle.sum(weighted_cost) token_num = paddle.sum(weights) token_num.stop_gradient = True avg_cost = sum_cost / token_num return sum_cost, avg_cost, token_num
def validation_step(self, batch: int, batch_idx: int) -> dict: ''' One step for validation, which should be called as forward computation. Args: batch(list[paddle.Tensor]) : The one batch data, which contains images and labels. batch_idx(int) : The index of batch. Returns: results(dict) : The model outputs, such as metrics. ''' images = batch[0] labels = paddle.unsqueeze(batch[1], axis=-1) preds, feature = self(images) loss, _ = F.softmax_with_cross_entropy(preds, labels, return_softmax=True, axis=1) loss = paddle.mean(loss) acc = paddle.metric.accuracy(preds, labels) return {'loss': loss, 'metrics': {'acc': acc}}
def forward(self, logits, label): """ Forward computation. Args: logits (tuple|list): (seg_logit, edge_logit) Tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. C =1 of edge_logit . label (Tensor): Label tensor, the data type is int64. Shape is (N, C), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. """ seg_logit, edge_logit = logits[0], logits[1] if len(label.shape) != len(seg_logit.shape): label = paddle.unsqueeze(label, 1) if edge_logit.shape != label.shape: raise ValueError( 'The shape of edge_logit should equal to the label, but they are {} != {}' .format(edge_logit.shape, label.shape)) filler = paddle.ones_like(label) * self.ignore_index label = paddle.where(edge_logit > self.edge_threshold, label, filler) seg_logit = paddle.transpose(seg_logit, [0, 2, 3, 1]) label = paddle.transpose(label, [0, 2, 3, 1]) loss = F.softmax_with_cross_entropy(seg_logit, label, ignore_index=self.ignore_index, axis=-1) mask = label != self.ignore_index mask = paddle.cast(mask, 'float32') loss = loss * mask avg_loss = paddle.mean(loss) / (paddle.mean(mask) + self.EPS) if paddle.mean(mask) < self.mean_mask: self.mean_mask = paddle.mean(mask) label.stop_gradient = True mask.stop_gradient = True return avg_loss
def forward(self, query_input_ids, title_input_ids, query_token_type_ids=None, query_position_ids=None, query_attention_mask=None, title_token_type_ids=None, title_position_ids=None, title_attention_mask=None): query_cls_embedding = self.get_pooled_embedding( query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) title_cls_embedding = self.get_pooled_embedding( title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) cosine_sim = paddle.matmul(query_cls_embedding, title_cls_embedding, transpose_y=True) # substract margin from all positive samples cosine_sim() margin_diag = paddle.full(shape=[query_cls_embedding.shape[0]], fill_value=self.margin, dtype=paddle.get_default_dtype()) cosine_sim = cosine_sim - paddle.diag(margin_diag) # scale cosine to ease training converge cosine_sim *= self.sacle labels = paddle.arange(0, query_cls_embedding.shape[0], dtype='int64') labels = paddle.reshape(labels, shape=[-1, 1]) loss = F.softmax_with_cross_entropy(logits=cosine_sim, label=labels) return paddle.mean(loss)
def net(self, inputs, is_infer=False): mind_model = net.MindLayer(self.item_count, self.embedding_dim, self.hidden_size, self.neg_samples, self.maxlen, self.pow_p, self.capsual_iters, self.capsual_max_k, self.capsual_init_std) # self.model = mind_model if is_infer: mind_model.eval() user_cap, cap_weights = mind_model.forward(*inputs) # self.inference_target_var = user_cap fetch_dict = {"user_cap": user_cap} return fetch_dict hist_item, labels, seqlen = inputs [_, sampled_logist, sampled_labels], weight, user_cap, cap_weights, cap_mask = mind_model( hist_item, seqlen, labels) loss = F.softmax_with_cross_entropy( sampled_logist, sampled_labels, soft_label=True) self._cost = paddle.mean(loss) fetch_dict = {"loss": self._cost} return fetch_dict
def forward(self, boxes, scores, gt_bbox, gt_label, prior_boxes): boxes = paddle.concat(boxes, axis=1) scores = paddle.concat(scores, axis=1) gt_label = gt_label.unsqueeze(-1).astype('int64') prior_boxes = paddle.concat(prior_boxes, axis=0) bg_index = scores.shape[-1] - 1 # Match bbox and get targets. targets_bbox, targets_label = \ self._bipartite_match_for_batch(gt_bbox, gt_label, prior_boxes, bg_index) targets_bbox.stop_gradient = True targets_label.stop_gradient = True # Compute regression loss. # Select positive samples. bbox_mask = (targets_label != bg_index).astype(boxes.dtype) loc_loss = bbox_mask * F.smooth_l1_loss( boxes, targets_bbox, reduction='none') loc_loss = loc_loss.sum() * self.loc_loss_weight # Compute confidence loss. conf_loss = F.softmax_with_cross_entropy(scores, targets_label) # Mining hard examples. label_mask = self._mine_hard_example(conf_loss.squeeze(-1), targets_label.squeeze(-1), bg_index) conf_loss = conf_loss * label_mask.unsqueeze(-1).astype( conf_loss.dtype) conf_loss = conf_loss.sum() * self.conf_loss_weight # Compute overall weighted loss. normalizer = (targets_label != bg_index).astype('float32').sum().clip( min=1) loss = (conf_loss + loc_loss) / (normalizer + 1e-9) return loss
def forward(self, boxes, scores, gt_box, gt_class, anchors): boxes = paddle.concat(boxes, axis=1) scores = paddle.concat(scores, axis=1) prior_boxes = paddle.concat(anchors, axis=0) gt_label = gt_class.unsqueeze(-1) batch_size, num_priors, num_classes = scores.shape def _reshape_to_2d(x): return paddle.flatten(x, start_axis=2) # 1. Find matched bounding box by prior box. # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. # 1.2 Compute matched bounding box by bipartite matching algorithm. matched_indices = [] matched_dist = [] for i in range(gt_box.shape[0]): iou = iou_similarity(gt_box[i], prior_boxes) matched_indice, matched_d = bipartite_match( iou, self.match_type, self.overlap_threshold) matched_indices.append(matched_indice) matched_dist.append(matched_d) matched_indices = paddle.concat(matched_indices, axis=0) matched_indices.stop_gradient = True matched_dist = paddle.concat(matched_dist, axis=0) matched_dist.stop_gradient = True # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices target_label, _ = self._label_target_assign(gt_label, matched_indices) confidence = _reshape_to_2d(scores) # 2.2. Compute confidence loss. # Reshape confidence to 2D tensor. target_label = _reshape_to_2d(target_label).astype('int64') conf_loss = F.softmax_with_cross_entropy(confidence, target_label) conf_loss = paddle.reshape(conf_loss, [batch_size, num_priors]) # 3. Mining hard examples neg_mask = self._mine_hard_example(conf_loss, matched_indices, matched_dist, neg_pos_ratio=self.neg_pos_ratio, neg_overlap=self.neg_overlap) # 4. Assign classification and regression targets # 4.1. Encoded bbox according to the prior boxes. prior_box_var = paddle.to_tensor( np.array([0.1, 0.1, 0.2, 0.2], dtype='float32')).reshape([1, 4]).expand_as(prior_boxes) encoded_bbox = [] for i in range(gt_box.shape[0]): encoded_bbox.append( box_coder(prior_box=prior_boxes, prior_box_var=prior_box_var, target_box=gt_box[i], code_type='encode_center_size')) encoded_bbox = paddle.stack(encoded_bbox, axis=0) # 4.2. Assign regression targets target_bbox, target_loc_weight = self._bbox_target_assign( encoded_bbox, matched_indices) # 4.3. Assign classification targets target_label, target_conf_weight = self._label_target_assign( gt_label, matched_indices, neg_mask=neg_mask) # 5. Compute loss. # 5.1 Compute confidence loss. target_label = _reshape_to_2d(target_label).astype('int64') conf_loss = F.softmax_with_cross_entropy(confidence, target_label) target_conf_weight = _reshape_to_2d(target_conf_weight) conf_loss = conf_loss * target_conf_weight * self.conf_loss_weight # 5.2 Compute regression loss. location = _reshape_to_2d(boxes) target_bbox = _reshape_to_2d(target_bbox) loc_loss = F.smooth_l1_loss(location, target_bbox, reduction='none') loc_loss = paddle.sum(loc_loss, axis=-1, keepdim=True) target_loc_weight = _reshape_to_2d(target_loc_weight) loc_loss = loc_loss * target_loc_weight * self.loc_loss_weight # 5.3 Compute overall weighted loss. loss = conf_loss + loc_loss loss = paddle.reshape(loss, [batch_size, num_priors]) loss = paddle.sum(loss, axis=1, keepdim=True) normalizer = paddle.sum(target_loc_weight) loss = paddle.sum(loss / normalizer) return loss
def train(model): # 开启0号GPU训练 use_gpu = True paddle.set_device('gpu:0') if use_gpu else paddle.set_device('cpu') print('start training ... ') model.train() epoch_num = 5 opt = paddle.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameters=model.parameters()) # 使用Paddle自带的数据读取器 train_loader = paddle.batch(paddle.dataset.mnist.train(), batch_size=10) valid_loader = paddle.batch(paddle.dataset.mnist.test(), batch_size=10) for epoch in range(epoch_num): for batch_id, data in enumerate(train_loader()): # 调整输入数据形状和类型 x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28) y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1) # 将numpy.ndarray转化成Tensor img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) # 计算模型输出 logits = model(img) # 计算损失函数 loss = F.softmax_with_cross_entropy(logits, label) avg_loss = paddle.mean(loss) if batch_id % 1000 == 0: print("epoch: {}, batch_id: {}, loss is: {}".format( epoch, batch_id, avg_loss.numpy())) avg_loss.backward() opt.step() opt.clear_grad() model.eval() accuracies = [] losses = [] for batch_id, data in enumerate(valid_loader()): # 调整输入数据形状和类型 x_data = np.array([item[0] for item in data], dtype='float32').reshape(-1, 1, 28, 28) y_data = np.array([item[1] for item in data], dtype='int64').reshape(-1, 1) # 将numpy.ndarray转化成Tensor img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) # 计算模型输出 logits = model(img) pred = F.softmax(logits) # 计算损失函数 loss = F.softmax_with_cross_entropy(logits, label) acc = paddle.metric.accuracy(pred, label) accuracies.append(acc.numpy()) losses.append(loss.numpy()) print("[validation] accuracy/loss: {}/{}".format( np.mean(accuracies), np.mean(losses))) model.train() # 保存模型参数 paddle.save(model.state_dict(), 'mnist.pdparams')
def forward(self, logits, label): return paddle.mean(F.softmax_with_cross_entropy(logits, label))
def forward(self, logits, label): """ Forward computation. Args: logits (tuple|list): (seg_logit, edge_logit) Tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. C =1 of edge_logit . label (Tensor): Label tensor, the data type is int64. Shape is (N, C), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. """ seg_logit, edge_logit = logits[0], logits[1] if len(label.shape) != len(seg_logit.shape): label = paddle.unsqueeze(label, 1) if edge_logit.shape != label.shape: raise ValueError( 'The shape of edge_logit should equal to the label, but they are {} != {}' .format(edge_logit.shape, label.shape)) # Filter out edge filler = paddle.ones_like(label) * self.ignore_index label = paddle.where(edge_logit > self.edge_threshold, label, filler) # ohem n, c, h, w = seg_logit.shape label = label.reshape((-1, )) valid_mask = (label != self.ignore_index).astype('int64') num_valid = valid_mask.sum() label = label * valid_mask prob = F.softmax(seg_logit, axis=1) prob = prob.transpose((1, 0, 2, 3)).reshape((c, -1)) if self.min_kept < num_valid and num_valid > 0: # let the value which ignored greater than 1 prob = prob + (1 - valid_mask) # get the prob of relevant label label_onehot = F.one_hot(label, c) label_onehot = label_onehot.transpose((1, 0)) prob = prob * label_onehot prob = paddle.sum(prob, axis=0) threshold = self.thresh if self.min_kept > 0: index = prob.argsort() threshold_index = index[min(len(index), self.min_kept) - 1] threshold_index = int(threshold_index.numpy()[0]) if prob[threshold_index] > self.thresh: threshold = prob[threshold_index] kept_mask = (prob < threshold).astype('int64') label = label * kept_mask valid_mask = valid_mask * kept_mask # make the invalid region as ignore label = label + (1 - valid_mask) * self.ignore_index label = label.reshape((n, 1, h, w)) valid_mask = valid_mask.reshape((n, 1, h, w)).astype('float32') loss = F.softmax_with_cross_entropy( seg_logit, label, ignore_index=self.ignore_index, axis=1) loss = loss * valid_mask avg_loss = paddle.mean(loss) / (paddle.mean(valid_mask) + self.EPS) label.stop_gradient = True valid_mask.stop_gradient = True return avg_loss
def forward(self, input, label): loss = F.softmax_with_cross_entropy(input, label, return_softmax=False, axis=1) return paddle.mean(loss)
def forward(self, logit, label): """ Forward computation. Args: logit (Tensor): Logit tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. label (Tensor): Label tensor, the data type is int64. Shape is (N), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, D1, D2,..., Dk), k >= 1. """ if len(label.shape) != len(logit.shape): label = paddle.unsqueeze(label, 1) # get the label after ohem n, c, h, w = logit.shape label = label.reshape((-1, )) valid_mask = (label != self.ignore_index).astype('int64') num_valid = valid_mask.sum() label = label * valid_mask prob = F.softmax(logit, axis=1) prob = prob.transpose((1, 0, 2, 3)).reshape((c, -1)) if self.min_kept < num_valid and num_valid > 0: # let the value which ignored greater than 1 prob = prob + (1 - valid_mask) # get the prob of relevant label label_onehot = F.one_hot(label, c) label_onehot = label_onehot.transpose((1, 0)) prob = prob * label_onehot prob = paddle.sum(prob, axis=0) threshold = self.thresh if self.min_kept > 0: index = prob.argsort() threshold_index = index[min(len(index), self.min_kept) - 1] threshold_index = int(threshold_index.numpy()[0]) if prob[threshold_index] > self.thresh: threshold = prob[threshold_index] kept_mask = (prob < threshold).astype('int64') label = label * kept_mask valid_mask = valid_mask * kept_mask # make the invalid region as ignore label = label + (1 - valid_mask) * self.ignore_index label = label.reshape((n, 1, h, w)) valid_mask = valid_mask.reshape((n, 1, h, w)).astype('float32') loss = F.softmax_with_cross_entropy(logit, label, ignore_index=self.ignore_index, axis=1) loss = loss * valid_mask avg_loss = paddle.mean(loss) / (paddle.mean(valid_mask) + self.EPS) label.stop_gradient = True valid_mask.stop_gradient = True return avg_loss
def forward(self, predict, label): r""" Computes cross entropy loss with or without label smoothing. Args: predict (Tensor): The predict results of `TransformerModel` with shape `[batch_size, sequence_length, vocab_size]` whose data type can be float32 or float64. label (Tensor): The label for correspoding results with shape `[batch_size, sequence_length, 1]`. Returns: tuple: A tuple with items: (`sum_cost`, `avg_cost`, `token_num`). With the corresponding fields: - `sum_cost` (Tensor): The sum of loss of current batch whose data type can be float32, float64. - `avg_cost` (Tensor): The average loss of current batch whose data type can be float32, float64. The relation between `sum_cost` and `avg_cost` can be described as: .. math: avg_cost = sum_cost / token_num - `token_num` (Tensor): The number of tokens of current batch. Example: .. code-block:: import paddle from paddlenlp.transformers import CrossEntropyCriterion criterion = CrossEntropyCriterion(label_smooth_eps=0.1, pad_idx=0) batch_size = 1 seq_len = 2 vocab_size = 30000 predict = paddle.rand(shape=[batch_size, seq_len, vocab_size]) label = paddle.randint( low=3, high=vocab_size, shape=[batch_size, seq_len, vocab_size]) criterion(predict, label) """ weights = paddle.cast(label != self.pad_idx, dtype=paddle.get_default_dtype()) if self.label_smooth_eps: label = paddle.squeeze(label, axis=[2]) label = F.label_smooth(label=F.one_hot( x=label, num_classes=predict.shape[-1]), epsilon=self.label_smooth_eps) cost = F.softmax_with_cross_entropy( logits=predict, label=label, soft_label=True if self.label_smooth_eps else False) weighted_cost = cost * weights sum_cost = paddle.sum(weighted_cost) token_num = paddle.sum(weights) token_num.stop_gradient = True avg_cost = sum_cost / token_num return sum_cost, avg_cost, token_num