def l1_loss(input, label, reduction='mean', name=None): """ This operator computes the L1 Loss of Tensor ``input`` and ``label`` as follows. If `reduction` set to ``'none'``, the loss is: .. math:: Out = \lvert input - label\rvert If `reduction` set to ``'mean'``, the loss is: .. math:: Out = MEAN(\lvert input - label\rvert) If `reduction` set to ``'sum'``, the loss is: .. math:: Out = SUM(\lvert input - label\rvert) Parameters: input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of additional dimensions. It's data type should be float32, float64, int32, int64. label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64, int32, int64. reduction (str, optional): Indicate the reduction to apply to the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If `reduction` is ``'none'``, the unreduced loss is returned; If `reduction` is ``'mean'``, the reduced mean loss is returned. If `reduction` is ``'sum'``, the reduced sum loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor, the L1 Loss of Tensor ``input`` and ``label``. If `reduction` is ``'none'``, the shape of output loss is [N, *], the same as ``input`` . If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. Examples: .. code-block:: python import paddle paddle.disable_static() input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]]) label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]]) l1_loss = paddle.nn.functional.l1_loss(input, label) print(l1_loss.numpy()) # [0.35] l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='none') print(l1_loss.numpy()) # [[0.20000005 0.19999999] # [0.2 0.79999995]] l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='sum') print(l1_loss.numpy()) # [1.4] """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but " "received %s, which is not allowed." % reduction) if in_dygraph_mode(): unreduced = _elementwise_op_in_dygraph(input, label, axis=-1, act='abs', op_name='elementwise_sub') if reduction == 'mean': return core.ops.mean(unreduced) elif reduction == 'sum': return core.ops.reduce_sum(unreduced, 'dim', [0], 'keep_dim', False, 'reduce_all', True) else: return unreduced fluid.data_feeder.check_variable_and_dtype( input, 'input', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') if reduction == 'sum': unreduced = paddle.elementwise_sub(input, label, act='abs') return paddle.sum(unreduced, name=name) elif reduction == 'mean': unreduced = paddle.elementwise_sub(input, label, act='abs') return paddle.mean(unreduced, name=name) else: return paddle.elementwise_sub(input, label, act='abs', name=name)
def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels, cate_labels, grid_order_list, fg_num): """ Get loss of network of SOLOv2. Args: cate_preds (list): Tensor list of categroy branch output. kernel_preds (list): Tensor list of kernel branch output. ins_pred (list): Tensor list of instance branch output. ins_labels (list): List of instance labels pre batch. cate_labels (list): List of categroy labels pre batch. grid_order_list (list): List of index in pre grid. fg_num (int): Number of positive samples in a mini-batch. Returns: loss_ins (Tensor): The instance loss Tensor of SOLOv2 network. loss_cate (Tensor): The category loss Tensor of SOLOv2 network. """ batch_size = paddle.shape(grid_order_list[0])[0] ins_pred_list = [] for kernel_preds_level, grid_orders_level in zip( kernel_preds, grid_order_list): if grid_orders_level.shape[1] == 0: ins_pred_list.append(None) continue grid_orders_level = paddle.reshape(grid_orders_level, [-1]) reshape_pred = paddle.reshape( kernel_preds_level, shape=(paddle.shape(kernel_preds_level)[0], paddle.shape(kernel_preds_level)[1], -1)) reshape_pred = paddle.transpose(reshape_pred, [0, 2, 1]) reshape_pred = paddle.reshape( reshape_pred, shape=(-1, paddle.shape(reshape_pred)[2])) gathered_pred = paddle.gather(reshape_pred, index=grid_orders_level) gathered_pred = paddle.reshape( gathered_pred, shape=[batch_size, -1, paddle.shape(gathered_pred)[1]]) cur_ins_pred = ins_pred cur_ins_pred = paddle.reshape(cur_ins_pred, shape=(paddle.shape(cur_ins_pred)[0], paddle.shape(cur_ins_pred)[1], -1)) ins_pred_conv = paddle.matmul(gathered_pred, cur_ins_pred) cur_ins_pred = paddle.reshape(ins_pred_conv, shape=(-1, paddle.shape(ins_pred)[-2], paddle.shape(ins_pred)[-1])) ins_pred_list.append(cur_ins_pred) num_ins = paddle.sum(fg_num) cate_preds = [ paddle.reshape(paddle.transpose(cate_pred, [0, 2, 3, 1]), shape=(-1, self.cate_out_channels)) for cate_pred in cate_preds ] flatten_cate_preds = paddle.concat(cate_preds) new_cate_labels = [] for cate_label in cate_labels: new_cate_labels.append(paddle.reshape(cate_label, shape=[-1])) cate_labels = paddle.concat(new_cate_labels) loss_ins, loss_cate = self.solov2_loss(ins_pred_list, ins_labels, flatten_cate_preds, cate_labels, num_ins) return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
def get_test_score(self, entity_embedding, head, rel, tail): head_score = paddle.sum(paddle.abs(entity_embedding + rel - tail), axis=1) tail_score = paddle.sum(paddle.abs(entity_embedding - rel - tail), axis=1) return head_score, tail_score
def loss(self, z_2d, z_3d): loss = self.gcl_weight * self.gcl_loss(z_2d, z_3d).sum() for layer in self.mpnn_3d.edge2edge_layers: w_g = paddle.stack([conv.G.weight for conv in layer.conv_layer]) loss += self.spa_weight * paddle.sum((w_g[1:, :, :] - w_g[:-1, :, :])**2) return loss
def func(x): minimum_ = paddle.assign(minimum) scale_ = paddle.assign(scale) return paddle.sum( paddle.multiply(scale_, (F.square_error_cost(x, minimum_))))
#定义新输入 newimg = paddle.tanh(modifier + timg) * boxmul + boxplus #print ("newimg shape: ", newimg.shape) #[1, 3, 224, 224] #print ("new img type: ", type(newimg)) #newimg = paddle.to_tensor(newimg) #print ("new img type: ", type(newimg)) output = model(newimg) pred_label = np.argmax(output) print("in iter pred_label={}".format(pred_label)) #345 #定义cw中的损失函数 #l2范数 # print(newimg) # print(paddle.tanh(timg) * boxmul + boxplus) # loss2 = paddle.dist(newimg, (paddle.tanh(timg) * boxmul + boxplus), p=2) loss2 = paddle.sum( (newimg - (paddle.tanh(timg) * boxmul + boxplus))**2) """ # compute the probability of the label class versus the maximum other real = tf.reduce_sum((tlab)*output,1) # 论文中的开源实现 #other = tf.reduce_max((1-tlab)*output - (tlab*10000),1) other = tf.reduce_max((1-tlab)*output) loss1 = tf.maximum(0.0, other-real+k) loss1 = tf.reduce_sum(const*loss1) """ #paddle.max((1 - tlab) * output) - paddle.max(output * tlab) +k real = paddle.max(output * tlab) other = paddle.max((1 - tlab) * output) loss1 = other - real + k loss1 = paddle.clip(loss1, min=0)
def forward(self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, encoder_output=None, use_cache=False, cache=None): r""" The MBartForSequenceClassification forward method, overrides the __call__() special method. Args: input_ids (Tensor): See :class:`MBartModel`. attention_mask (Tensor, optional): See :class:`MBartModel`. decoder_input_ids (Tensor, `optional`): See :class:`MBartModel`. decoder_attention_mask (Tensor, optional): See :class:`MBartModel`. encoder_output (Tensor, optonal): See :class:`MBartModel`. use_cache (bool, optional): See :class:`MBartModel`. cache (Tensor, optional): See :class:`MBartModel`. Returns: Tensor: Returns tensor `logits`, a tensor of the input text classification logits. Shape as `[batch_size, num_labels]` and dtype as float32. Example: .. code-block:: import paddle from paddlenlp.transformers import MBartForSequenceClassification, MBartTokenizer tokenizer = MBartTokenizer.from_pretrained('bart-base') model = MBartForSequenceClassification.from_pretrained('bart-base') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ output = self.mbart(input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, encoder_output, use_cache, cache) if use_cache: output = output[0] eos_mask = paddle.cast(input_ids == self.mbart.config['eos_token_id'], dtype='int64') if len(paddle.unique(paddle.sum(eos_mask, axis=1))) > 1: raise ValueError( 'All examples must have the same number of <eos> tokens.') output_shape = paddle.shape(output) # TODO(gongenlei): support bool tensor index output = output.masked_select( eos_mask.unsqueeze(-1).astype('bool').tile( [1, 1, output_shape[-1]])) sentence_representation = output.reshape( [output_shape[0], -1, output_shape[-1]])[:, -1, :] logits = self.classifier(sentence_representation) return logits
def create_loss(self, pred, label): cost = paddle.nn.functional.log_loss(input=pred, label=paddle.cast( label, dtype="float32")) avg_cost = paddle.sum(x=cost) return avg_cost
def forward(self, cls_logits, bboxes_reg, centerness, tag_labels, tag_bboxes, tag_center): """ Calculate the loss for classification, location and centerness Args: cls_logits (list): list of Tensor, which is predicted score for all anchor points with shape [N, M, C] bboxes_reg (list): list of Tensor, which is predicted offsets for all anchor points with shape [N, M, 4] centerness (list): list of Tensor, which is predicted centerness for all anchor points with shape [N, M, 1] tag_labels (list): list of Tensor, which is category targets for each anchor point tag_bboxes (list): list of Tensor, which is bounding boxes targets for positive samples tag_center (list): list of Tensor, which is centerness targets for positive samples Return: loss (dict): loss composed by classification loss, bounding box """ cls_logits_flatten_list = [] bboxes_reg_flatten_list = [] centerness_flatten_list = [] tag_labels_flatten_list = [] tag_bboxes_flatten_list = [] tag_center_flatten_list = [] num_lvl = len(cls_logits) for lvl in range(num_lvl): cls_logits_flatten_list.append( flatten_tensor(cls_logits[lvl], True)) bboxes_reg_flatten_list.append( flatten_tensor(bboxes_reg[lvl], True)) centerness_flatten_list.append( flatten_tensor(centerness[lvl], True)) tag_labels_flatten_list.append( flatten_tensor(tag_labels[lvl], False)) tag_bboxes_flatten_list.append( flatten_tensor(tag_bboxes[lvl], False)) tag_center_flatten_list.append( flatten_tensor(tag_center[lvl], False)) cls_logits_flatten = paddle.concat(cls_logits_flatten_list, axis=0) bboxes_reg_flatten = paddle.concat(bboxes_reg_flatten_list, axis=0) centerness_flatten = paddle.concat(centerness_flatten_list, axis=0) tag_labels_flatten = paddle.concat(tag_labels_flatten_list, axis=0) tag_bboxes_flatten = paddle.concat(tag_bboxes_flatten_list, axis=0) tag_center_flatten = paddle.concat(tag_center_flatten_list, axis=0) tag_labels_flatten.stop_gradient = True tag_bboxes_flatten.stop_gradient = True tag_center_flatten.stop_gradient = True mask_positive_bool = tag_labels_flatten > 0 mask_positive_bool.stop_gradient = True mask_positive_float = paddle.cast(mask_positive_bool, dtype="float32") mask_positive_float.stop_gradient = True num_positive_fp32 = paddle.sum(mask_positive_float) num_positive_fp32.stop_gradient = True num_positive_int32 = paddle.cast(num_positive_fp32, dtype="int32") num_positive_int32 = num_positive_int32 * 0 + 1 num_positive_int32.stop_gradient = True normalize_sum = paddle.sum(tag_center_flatten * mask_positive_float) normalize_sum.stop_gradient = True # 1. cls_logits: sigmoid_focal_loss # expand onehot labels num_classes = cls_logits_flatten.shape[-1] tag_labels_flatten = paddle.squeeze(tag_labels_flatten, axis=-1) tag_labels_flatten_bin = F.one_hot(tag_labels_flatten, num_classes=1 + num_classes) tag_labels_flatten_bin = tag_labels_flatten_bin[:, 1:] # sigmoid_focal_loss cls_loss = F.sigmoid_focal_loss( cls_logits_flatten, tag_labels_flatten_bin) / num_positive_fp32 # 2. bboxes_reg: giou_loss mask_positive_float = paddle.squeeze(mask_positive_float, axis=-1) tag_center_flatten = paddle.squeeze(tag_center_flatten, axis=-1) reg_loss = self.__iou_loss(bboxes_reg_flatten, tag_bboxes_flatten, mask_positive_float, weights=tag_center_flatten) reg_loss = reg_loss * mask_positive_float / normalize_sum # 3. centerness: sigmoid_cross_entropy_with_logits_loss centerness_flatten = paddle.squeeze(centerness_flatten, axis=-1) ctn_loss = ops.sigmoid_cross_entropy_with_logits( centerness_flatten, tag_center_flatten) ctn_loss = ctn_loss * mask_positive_float / num_positive_fp32 loss_all = { "loss_centerness": paddle.sum(ctn_loss), "loss_cls": paddle.sum(cls_loss), "loss_box": paddle.sum(reg_loss) } return loss_all
def forward(self, x): x = self.bn_s1(x) out = paddle.sum(paddle.abs(self.bn_s2(x))) return out
def cal_feature(engine, name='gallery'): all_feas = None all_image_id = None all_unique_id = None has_unique_id = False if name == 'gallery': dataloader = engine.gallery_dataloader elif name == 'query': dataloader = engine.query_dataloader elif name == 'gallery_query': dataloader = engine.gallery_query_dataloader else: raise RuntimeError("Only support gallery or query dataset") max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len( dataloader) for idx, batch in enumerate(dataloader): # load is very time-consuming if idx >= max_iter: break if idx % engine.config["Global"]["print_batch_step"] == 0: logger.info( f"{name} feature calculation process: [{idx}/{len(dataloader)}]" ) if engine.use_dali: batch = [ paddle.to_tensor(batch[0]['data']), paddle.to_tensor(batch[0]['label']) ] batch = [paddle.to_tensor(x) for x in batch] batch[1] = batch[1].reshape([-1, 1]).astype("int64") if len(batch) == 3: has_unique_id = True batch[2] = batch[2].reshape([-1, 1]).astype("int64") out = engine.model(batch[0], batch[1]) batch_feas = out["features"] # do norm if engine.config["Global"].get("feature_normalize", True): feas_norm = paddle.sqrt( paddle.sum(paddle.square(batch_feas), axis=1, keepdim=True)) batch_feas = paddle.divide(batch_feas, feas_norm) # do binarize if engine.config["Global"].get("feature_binarize") == "round": batch_feas = paddle.round(batch_feas).astype("float32") * 2.0 - 1.0 if engine.config["Global"].get("feature_binarize") == "sign": batch_feas = paddle.sign(batch_feas).astype("float32") if all_feas is None: all_feas = batch_feas if has_unique_id: all_unique_id = batch[2] all_image_id = batch[1] else: all_feas = paddle.concat([all_feas, batch_feas]) all_image_id = paddle.concat([all_image_id, batch[1]]) if has_unique_id: all_unique_id = paddle.concat([all_unique_id, batch[2]]) if engine.use_dali: dataloader.reset() if paddle.distributed.get_world_size() > 1: feat_list = [] img_id_list = [] unique_id_list = [] paddle.distributed.all_gather(feat_list, all_feas) paddle.distributed.all_gather(img_id_list, all_image_id) all_feas = paddle.concat(feat_list, axis=0) all_image_id = paddle.concat(img_id_list, axis=0) if has_unique_id: paddle.distributed.all_gather(unique_id_list, all_unique_id) all_unique_id = paddle.concat(unique_id_list, axis=0) logger.info("Build {} done, all feat shape: {}, begin to eval..".format( name, all_feas.shape)) return all_feas, all_image_id, all_unique_id
def ctc_loss(log_probs, labels, input_lengths, label_lengths, blank=0, reduction='mean'): """ An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc) to compute Connectionist Temporal Classification (CTC) loss. It can be aliased as softmax with CTC, since a native softmax activation is interated to the Warp-CTC library to normalize values for each row of the input tensor. Parameters: log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type must be float32. labels (Tensor): The ground truth sequence with padding, which must be a 3-D Tensor. The tensor shape is [batch_size, max_label_length], where max_label_length is the longest length of label sequence. The data type must be int32. input_lengths (Tensor): The length for each input sequence, it should have shape [batch_size] and dtype int64. label_lengths (Tensor): The length for each label sequence, it should have shape [batch_size] and dtype int64. blank (int, optional): The blank label index of Connectionist Temporal Classification (CTC) loss, which is in the half-opened interval [0, num_classes + 1). The data type must be int32. Default is 0. reduction (string, optional): Indicate how to average the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the output loss will be divided by the label_lengths, and then return the mean of quotient; If :attr:`reduction` is ``'sum'``, return the sum of loss; If :attr:`reduction` is ``'none'``, no reduction will be applied. Default is ``'mean'``. Returns: Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. Examples: .. code-block:: python # declarative mode import paddle.nn.functional as F import numpy as np import paddle # length of the longest logit sequence max_seq_length = 4 #length of the longest label sequence max_label_length = 3 # number of logit sequences batch_size = 2 # class num class_num = 3 np.random.seed(1) log_probs = np.array([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04], [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]], [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01], [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]], [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02], [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]], [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01], [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]], [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02], [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]]).astype("float32") labels = np.array([[1, 2, 2], [1, 2, 2]]).astype("int32") input_lengths = np.array([5, 5]).astype("int64") label_lengths = np.array([3, 3]).astype("int64") paddle.disable_static() log_probs = paddle.to_tensor(log_probs) labels = paddle.to_tensor(labels) input_lengths = paddle.to_tensor(input_lengths) label_lengths = paddle.to_tensor(label_lengths) loss = F.ctc_loss(log_probs, labels, input_lengths, label_lengths, blank=0, reduction='none') print(loss.numpy()) #[3.9179852 2.9076521] loss = F.ctc_loss(log_probs, labels, input_lengths, label_lengths, blank=0, reduction='mean') print(loss.numpy()) #[1.1376063] """ loss_out = fluid.layers.warpctc(log_probs, labels, blank, False, input_lengths, label_lengths) loss_out = fluid.layers.squeeze(loss_out, [-1]) assert reduction in ['mean', 'sum', 'none'] if reduction == 'mean': loss_out = paddle.mean(loss_out / paddle.cast(label_lengths, loss_out.dtype)) elif reduction == 'sum': loss_out = paddle.sum(loss_out) return loss_out
def mse_loss(input, label, reduction='mean', name=None): """ This op accepts input predications and label and returns the mean square error. If :attr:`reduction` is set to ``'none'``, loss is calculated as: .. math:: Out = (input - label)^2 If :attr:`reduction` is set to ``'mean'``, loss is calculated as: .. math:: Out = \operatorname{mean}((input - label)^2) If :attr:`reduction` is set to ``'sum'``, loss is calculated as: .. math:: Out = \operatorname{sum}((input - label)^2) Parameters: input (Tensor): Input tensor, the data type should be float32 or float64. label (Tensor): Label tensor, the data type should be float32 or float64. reduction (string, optional): The reduction method for the output, could be 'none' | 'mean' | 'sum'. If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. If :attr:`reduction` is ``'none'``, the unreduced loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor: The tensor tensor storing the mean square error difference of input and label. Return type: Tensor. Examples: .. code-block:: python import paddle # static graph mode paddle.enable_static() mse_loss = paddle.nn.loss.MSELoss() input = paddle.data(name="input", shape=[1]) label = paddle.data(name="label", shape=[1]) place = paddle.CPUPlace() output = mse_loss(input,label) exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) output_data = exe.run( paddle.static.default_main_program(), feed={"input":input_data, "label":label_data}, fetch_list=[output], return_numpy=True) print(output_data) # [array([0.04000002], dtype=float32)] # dynamic graph mode paddle.disable_static() input = paddle.to_tensor(1.5) label = paddle.to_tensor(1.7) output = mse_loss(input, label) print(output.numpy()) # [0.04000002] """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( "'reduction' in 'mse_loss' should be 'sum', 'mean' or 'none', " "but received {}.".format(reduction)) if not paddle.fluid.framework.in_dygraph_mode(): paddle.fluid.data_feeder.check_variable_and_dtype( input, 'input', ['float32', 'float64'], 'mse_loss') paddle.fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64'], 'mse_loss') if reduction == 'none': return paddle.fluid.layers.square(paddle.fluid.layers.elementwise_sub( input, label), name=name) elif reduction == 'mean': return paddle.mean(paddle.fluid.layers.square( paddle.fluid.layers.elementwise_sub(input, label)), name=name) else: return paddle.sum(paddle.fluid.layers.square( paddle.fluid.layers.elementwise_sub(input, label)), name=name)
def binary_cross_entropy(input, label, weight=None, reduction='mean', name=None): """ This op measures the binary_cross_entropy loss between input predictions ``input`` and target labels ``label`` . The binary_cross_entropy loss can be described as: If :attr:`weight` is set, the loss is: .. math:: Out = -1 * weight * (label * log(input) + (1 - label) * log(1 - input)) If :attr:`weight` is None, the loss is: .. math:: Out = -1 * (label * log(input) + (1 - label) * log(1 - input)) If :attr:`reduction` set to ``'none'``, the interface will return the original loss `Out`. If :attr:`reduction` set to ``'mean'``, the reduced mean loss is: .. math:: Out = MEAN(Out) If :attr:`reduction` set to ``'sum'``, the reduced sum loss is: .. math:: Out = SUM(Out) Note that the input predictions ``input`` always be the output of sigmoid, and the target labels ``label`` should be numbers between 0 and 1. Parameters: input (Tensor): The input predications tensor. 2-D tensor with shape: [N, *], N is batch_size, `*` means number of additional dimensions. The ``input`` should always be the output of sigmod. Available dtype is float32, float64. label (Tensor): The target labels tensor. 2-D tensor with the same shape as ``input``. The target labels which values should be numbers between 0 and 1. Available dtype is float32, float64. weight (Tensor, optional): A manual rescaling weight given to the loss of each batch element. If given, has to be a Tensor of size nbatch and the data type is float32, float64. Default is ``'None'``. reduction (str, optional): Indicate how to average the loss by batch_size, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'none'``, the unreduced loss is returned; If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; If :attr:`reduction` is ``'sum'``, the summed loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is scalar. Examples: .. code-block:: python import paddle paddle.disable_static() input = paddle.to_tensor([0.5, 0.6, 0.7], 'float32') label = paddle.to_tensor([1.0, 0.0, 1.0], 'float32') output = paddle.nn.functional.binary_cross_entropy(input, label) print(output.numpy()) # [0.65537095] """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( "The value of 'reduction' in binary_cross_entropy should be 'sum', " "'mean' or 'none', but received %s, which is not allowed." % reduction) if in_dygraph_mode(): out = core.ops.bce_loss(input, label) if weight is not None: out = core.ops.elementwise_mul(out, weight, 'axis', -1) if reduction == 'sum': return core.ops.reduce_sum(out, 'dim', [0], 'keep_dim', False, "reduce_all", True) elif reduction == 'mean': return core.ops.mean(out) else: return out fluid.data_feeder.check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'binary_cross_entropy') fluid.data_feeder.check_variable_and_dtype(label, 'label', ['float32', 'float64'], 'binary_cross_entropy') sub_name = name if weight is None and reduction is 'none' else None helper = LayerHelper("binary_cross_entropy", name=sub_name) out = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op(type='bce_loss', inputs={ 'X': [input], 'Label': [label], }, outputs={'Out': [out]}) if weight is not None: if isinstance(weight, paddle.framework.Variable): weight_name = name if reduction is 'none' else None out = paddle.multiply(out, weight, axis=-1, name=weight_name) else: raise ValueError( "The weight is not a Tensor, please convert to Tensor.") if reduction == 'sum': return paddle.sum(out, name=name) elif reduction == 'mean': return paddle.mean(out, name=name) else: return out
def greedy_search_infilling(model, token_ids, token_type_ids, sos_id, eos_id, attn_id, pad_id, unk_id, vocab_size, max_encode_len=640, max_decode_len=100, tgt_type_id=3): _, logits, info = model(token_ids, token_type_ids) d_batch, d_seqlen = token_ids.shape seqlen = paddle.sum(paddle.cast(token_ids != 0, 'int64'), 1, keepdim=True) has_stopped = np.zeros([d_batch], dtype=np.bool) gen_seq_len = np.zeros([d_batch], dtype=np.int64) output_ids = [] past_cache = info['caches'] cls_ids = paddle.ones([d_batch], dtype='int64') * sos_id attn_ids = paddle.ones([d_batch], dtype='int64') * attn_id ids = paddle.stack([cls_ids, attn_ids], -1) for step in range(max_decode_len): bias = gen_bias(token_ids, ids, step) pos_ids = paddle.to_tensor( np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) pos_ids += seqlen _, logits, info = model(ids, paddle.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) if logits.shape[-1] > vocab_size: logits[:, :, vocab_size:] = 0 logits[:, :, pad_id] = 0 logits[:, :, unk_id] = 0 logits[:, :, attn_id] = 0 gen_ids = paddle.argmax(logits, -1) past_cached_k, past_cached_v = past_cache cached_k, cached_v = info['caches'] cached_k = [ paddle.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k) ] # concat cached cached_v = [ paddle.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v) ] past_cache = (cached_k, cached_v) gen_ids = gen_ids[:, 1] ids = paddle.stack([gen_ids, attn_ids], 1) gen_ids = gen_ids.numpy() has_stopped |= (gen_ids == eos_id).astype(np.bool) gen_seq_len += (1 - has_stopped.astype(np.int64)) output_ids.append(gen_ids.tolist()) if has_stopped.all(): break output_ids = np.array(output_ids).transpose([1, 0]) return output_ids
def deep_match(item_his_eb, context_his_eb, mask, match_mask, mid_his_batch, item_vectors, item_biases, n_mid): query = context_his_eb query = self.query_layer( query) # [-1, self.history_length, self.main_embedding_size*2] query = self.query_prelu(query) inputs = paddle.concat( [ query, item_his_eb, query - item_his_eb, query * item_his_eb ], axis=-1) # B,T,E att_layer1 = self.att_layer1_layer(inputs) att_layer1 = F.sigmoid(att_layer1) att_layer2 = self.att_layer2_layer(att_layer1) att_layer2 = F.sigmoid(att_layer2) att_layer3 = self.att_layer3_layer(att_layer2) # B,T,1 scores = paddle.transpose(att_layer3, [0, 2, 1]) # B,1,T # mask bool_mask = paddle.equal(mask, paddle.ones_like(mask)) # B,T key_masks = paddle.unsqueeze(bool_mask, axis=1) # B,1,T paddings = paddle.ones_like(scores) * (-2**32 + 1) scores = paddle.where(key_masks, scores, paddings) # tril scores_tile = paddle.tile( paddle.sum(scores, axis=1), [1, paddle.shape(scores)[-1]]) # B, T*T scores_tile = paddle.reshape(scores_tile, [ -1, paddle.shape(scores)[-1], paddle.shape(scores)[-1] ]) # B, T, T diag_vals = paddle.ones_like(scores_tile) # B, T, T tril = paddle.tril(diag_vals) paddings = paddle.ones_like(tril) * (-2**32 + 1) scores_tile = paddle.where( paddle.equal(tril, paddle.full([1], 0.0, "float32")), paddings, scores_tile) # B, T, T scores_tile = F.softmax(scores_tile) # B, T, T att_dm_item_his_eb = paddle.matmul(scores_tile, item_his_eb) # B, T, E dnn_layer1 = self.dnn_layer1_layer(att_dm_item_his_eb) dnn_layer1 = dnn_layer1.reshape( [-1, self.history_length, self.main_embedding_size]) ## dnn_layer1 = self.dnn_layer1_prelu(dnn_layer1) # target mask user_vector = dnn_layer1[:, -1, :] # B, E user_vector2 = dnn_layer1[:, -2, :] * paddle.reshape( match_mask, [-1, paddle.shape(match_mask)[1], 1])[:, -2, :] # B, E num_sampled = 2000 labels = paddle.reshape(mid_his_batch[:, -1], [-1, 1]) # B, 1 # not sample, slow # [B, E] * [E_size, cate_size] logits = paddle.matmul( user_vector2, item_vectors, transpose_y=True) logits = paddle.add(logits, item_biases) loss = F.cross_entropy(input=logits, label=labels) return loss, user_vector, scores
def compute_mle_loss(self, enc_input, example, desc_enc, debug=False): """compute mle loss""" traversal = TrainTreeTraversal(self, desc_enc, debug) traversal.step(None) #### for debug #class List(list): # def __init__(self, *args, **kwargs): # """ """ # super(List, self).__init__(*args, **kwargs) # def append(self, *args, **kwargs): # """ """ # super().append(*args, **kwargs) # print('append:', list(reversed(self))) # def pop(self): # """ """ # print('pop:', list(reversed(self))) # item = super().pop() # return item # #queue = List() #queue.append( # TreeState( # node=example.tree, # parent_field_type=self.preproc.grammar.root_type, # )) queue = [ TreeState( node=example.tree, parent_field_type=self.preproc.grammar.root_type, ) ] while queue: item = queue.pop() node = item.node parent_field_type = item.parent_field_type if isinstance(node, (list, tuple)): node_type = parent_field_type + '*' rule = (node_type, len(node)) rule_idx = self.rules_index[rule] assert traversal.cur_item.state == TreeTraversal.State.LIST_LENGTH_APPLY traversal.step(rule_idx) if self.preproc.use_seq_elem_rules and parent_field_type in self.ast_wrapper.sum_types: parent_field_type += '_seq_elem' for i, elem in reversed(list(enumerate(node))): queue.append( TreeState( node=elem, parent_field_type=parent_field_type, )) continue if parent_field_type in self.preproc.grammar.pointers: assert isinstance(node, int) assert traversal.cur_item.state == TreeTraversal.State.POINTER_APPLY pointer_map = desc_enc.pointer_maps.get(parent_field_type) # TODO: fix -1 if node == -1: node = 0 if pointer_map: values = pointer_map[node] traversal.step(values[0]) else: traversal.step(node) continue if parent_field_type in self.ast_wrapper.primitive_types: # identifier, int, string, bytes, object, singleton # - could be bytes, str, int, float, bool, NoneType # - terminal tokens vocabulary is created by turning everything into a string (with `str`) # - at decoding time, cast back to str/int/float/bool field_type = type(node).__name__ field_value_split = self.preproc.grammar.tokenize_field_value( node) + [vocab.EOS] for token in field_value_split: assert traversal.cur_item.state == TreeTraversal.State.GEN_TOKEN traversal.step(token) continue type_info = self.ast_wrapper.singular_types[node['_type']] if parent_field_type in self.preproc.sum_type_constructors: # ApplyRule, like expr -> Call rule = (parent_field_type, type_info.name) rule_idx = self.rules_index[rule] assert traversal.cur_item.state == TreeTraversal.State.SUM_TYPE_APPLY extra_rules = [ self.rules_index[parent_field_type, extra_type] for extra_type in node.get('_extra_types', []) ] traversal.step(rule_idx, extra_rules) if type_info.fields: # ApplyRule, like Call -> expr[func] expr*[args] keyword*[keywords] # Figure out which rule needs to be applied present = sql_preproc_v2.get_field_presence_info( self.ast_wrapper, node, type_info.fields) rule = (node['_type'], tuple(present)) rule_idx = self.rules_index[rule] assert traversal.cur_item.state == TreeTraversal.State.CHILDREN_APPLY traversal.step(rule_idx) # reversed so that we perform a DFS in left-to-right order for field_info in reversed(type_info.fields): if field_info.name not in node: continue queue.append( TreeState( node=node[field_info.name], parent_field_type=field_info.type, )) loss = paddle.sum(paddle.stack(tuple(traversal.loss), axis=0), axis=0) if debug: return loss, [attr.asdict(entry) for entry in traversal.history] else: return loss
def forward(self, inputs_tensor, is_infer=0): # input inputs = inputs_tensor[0] # sparse_tensor dense_tensor = inputs_tensor[1] self.btag_his = inputs[:, 0:self.history_length] self.cate_his = inputs[:, self.history_length:self.history_length * 2] self.brand_his = inputs[:, self.history_length * 2:self.history_length * 3] self.mask = inputs[:, self.history_length * 3:self.history_length * 4] self.match_mask = inputs[:, self.history_length * 4:self.history_length * 5] self.uid = inputs[:, self.history_length * 5] self.cms_segid = inputs[:, self.history_length * 5 + 1] self.cms_group_id = inputs[:, self.history_length * 5 + 2] self.final_gender_code = inputs[:, self.history_length * 5 + 3] self.age_level = inputs[:, self.history_length * 5 + 4] self.pvalue_level = inputs[:, self.history_length * 5 + 5] self.shopping_level = inputs[:, self.history_length * 5 + 6] self.occupation = inputs[:, self.history_length * 5 + 7] self.new_user_class_level = inputs[:, self.history_length * 5 + 8] self.mid = inputs[:, self.history_length * 5 + 9] self.cate_id = inputs[:, self.history_length * 5 + 10] self.campaign_id = inputs[:, self.history_length * 5 + 11] self.customer = inputs[:, self.history_length * 5 + 12] self.brand = inputs[:, self.history_length * 5 + 13] self.price = dense_tensor.astype('float32') self.pid = inputs[:, self.history_length * 5 + 15] if is_infer == 0: self.labels = inputs[:, self.history_length * 5 + 16] # embedding layer self.uid_batch_embedded = self.uid_embeddings_var(self.uid) self.mid_batch_embedded = self.mid_embeddings_var(self.mid) self.cat_batch_embedded = self.cat_embeddings_var(self.cate_id) self.cat_his_batch_embedded = self.cat_embeddings_var(self.cate_his) self.brand_batch_embedded = self.brand_embeddings_var(self.brand) self.brand_his_batch_embedded = self.brand_embeddings_var( self.brand_his) self.btag_his_batch_embedded = self.btag_embeddings_var(self.btag_his) self.dm_btag_his_batch_embedded = self.dm_btag_embeddings_var( self.btag_his) self.campaign_id_batch_embedded = self.campaign_id_embeddings_var( self.campaign_id) self.customer_batch_embedded = self.customer_embeddings_var( self.customer) self.cms_segid_batch_embedded = self.cms_segid_embeddings_var( self.cms_segid) self.cms_group_id_batch_embedded = self.cms_group_id_embeddings_var( self.cms_group_id) self.final_gender_code_batch_embedded = self.final_gender_code_embeddings_var( self.final_gender_code) self.age_level_batch_embedded = self.age_level_embeddings_var( self.age_level) self.pvalue_level_batch_embedded = self.pvalue_level_embeddings_var( self.pvalue_level) self.shopping_level_batch_embedded = self.shopping_level_embeddings_var( self.shopping_level) self.occupation_batch_embedded = self.occupation_embeddings_var( self.occupation) self.new_user_class_level_batch_embedded = self.new_user_class_level_embeddings_var( self.new_user_class_level) self.pid_batch_embedded = self.pid_embeddings_var(self.pid) self.user_feat = paddle.concat([ self.uid_batch_embedded, self.cms_segid_batch_embedded, self.cms_group_id_batch_embedded, self.final_gender_code_batch_embedded, self.age_level_batch_embedded, self.pvalue_level_batch_embedded, self.shopping_level_batch_embedded, self.occupation_batch_embedded, self.new_user_class_level_batch_embedded ], -1) self.item_his_eb = paddle.concat( [self.cat_his_batch_embedded, self.brand_his_batch_embedded], -1) self.item_his_eb_sum = paddle.sum(self.item_his_eb, 1) self.item_feat = paddle.concat([ self.mid_batch_embedded, self.cat_batch_embedded, self.brand_batch_embedded, self.campaign_id_batch_embedded, self.customer_batch_embedded, self.price ], -1) self.item_eb = paddle.concat( [self.cat_batch_embedded, self.brand_batch_embedded], -1) self.context_feat = self.pid_batch_embedded self.position_his_eb = self.position_embeddings_var( self.position_his) # T, E self.position_his_eb = paddle.tile( self.position_his_eb, [paddle.shape(self.mid)[0], 1]) # B*T, E self.position_his_eb = paddle.reshape(self.position_his_eb, [ paddle.shape(self.mid)[0], -1, paddle.shape(self.position_his_eb)[1] ]) # B, T, E self.dm_position_his_eb = self.dm_position_embeddings_var( self.dm_position_his) # T, E self.dm_position_his_eb = paddle.tile( self.dm_position_his_eb, [paddle.shape(self.mid)[0], 1]) # B*T, E self.dm_position_his_eb = paddle.reshape(self.dm_position_his_eb, [ paddle.shape(self.mid)[0], -1, paddle.shape(self.dm_position_his_eb)[1] ]) # B, T, E self.position_his_eb = paddle.concat( [self.position_his_eb, self.btag_his_batch_embedded], -1) self.dm_position_his_eb = paddle.concat( [self.dm_position_his_eb, self.dm_btag_his_batch_embedded], -1) # User-to-Item Network # Auxiliary Match Network self.match_mask = paddle.cast(self.match_mask, 'float32') self.aux_loss, self.dm_user_vector, scores = self._deep_match( self.item_his_eb, self.dm_position_his_eb, self.mask, self.match_mask, self.cate_his, self.dm_item_vectors_var.weight, self.dm_item_biases, self.cate_size) self.aux_loss *= 0.1 self.dm_item_vec = self.dm_item_vectors_var(self.cate_id) rel_u2i = paddle.sum(self.dm_user_vector * self.dm_item_vec, -1, keepdim=True) # B,1 self.rel_u2i = rel_u2i # Item-to-Item Network att_outputs, alphas, scores_unnorm = self._dmr_fcn_attention( self.item_eb, self.item_his_eb, self.position_his_eb, self.mask) rel_i2i = paddle.unsqueeze(paddle.sum(scores_unnorm, [1, 2]), -1) self.rel_i2i = rel_i2i self.scores = paddle.sum(alphas, 1) inp = paddle.concat([ self.user_feat, self.item_feat, self.context_feat, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, rel_u2i, rel_i2i, att_outputs ], -1) # build fcn net inp = self.inp_layer(inp) dnn0 = self.dnn0_layer(inp) dnn0 = self.dnn0_prelu(dnn0) dnn1 = self.dnn1_layer(dnn0) dnn1 = self.dnn1_prelu(dnn1) dnn2 = self.dnn2_layer(dnn1) dnn2 = self.dnn2_prelu(dnn2) dnn3 = self.dnn3_layer(dnn2) dnn3 = self.dnn3_prelu(dnn3) # prediction self.y_hat = F.sigmoid(dnn3) if is_infer == False: # Cross-entropy loss and optimizer initialization x = paddle.sum(dnn3, 1) BCE = paddle.nn.BCEWithLogitsLoss() ctr_loss = paddle.mean(BCE(x, label=self.labels.astype('float32'))) self.ctr_loss = ctr_loss self.loss = self.ctr_loss + self.aux_loss return self.y_hat, self.loss else: return self.y_hat, paddle.ones(shape=[1])
def test_net(cfg, epoch_idx=-1, output_dir=None, test_data_loader=None, test_writer=None, res_gru_net=None): # Load taxonomies of dataset taxonomies = [] with open( cfg.DATASETS[cfg.DATASET.TEST_DATASET.upper()].TAXONOMY_FILE_PATH, encoding='utf-8') as file: taxonomies = json.loads(file.read()) taxonomies = {t['taxonomy_id']: t for t in taxonomies} # # Set up data loader if test_data_loader is None: # Set up data augmentation IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W test_transforms = utils.data_transforms.Compose([ utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE), utils.data_transforms.RandomBackground( cfg.TEST.RANDOM_BG_COLOR_RANGE), utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN, std=cfg.DATASET.STD), utils.data_transforms.ToTensor(), ]) dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = paddle.io.DataLoader( dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetType.TEST, cfg.CONST.N_VIEWS_RENDERING, test_transforms), batch_size=1, # num_workers=1, shuffle=False) mode = 'test' else: mode = 'val' # paddle.io.Dataset not support 'str' input dataset_taxonomy = None rendering_image_path_template = cfg.DATASETS.SHAPENET.RENDERING_PATH volume_path_template = cfg.DATASETS.SHAPENET.VOXEL_PATH # Load all taxonomies of the dataset with open('./datasets/ShapeNet.json', encoding='utf-8') as file: dataset_taxonomy = json.loads(file.read()) # print("[INFO]TEST-- open TAXONOMY_FILE_PATH succeess") all_test_taxonomy_id_and_sample_name = [] # Load data for each category for taxonomy in dataset_taxonomy: taxonomy_folder_name = taxonomy['taxonomy_id'] # print('[INFO] %set -- Collecting files of Taxonomy[ID=%s, Name=%s]' % # (mode, taxonomy['taxonomy_id'], taxonomy['taxonomy_name'])) samples = taxonomy[mode] for sample in samples: all_test_taxonomy_id_and_sample_name.append( [taxonomy_folder_name, sample]) # print(len(all_test_taxonomy_id_and_sample_name)) # print(all_test_taxonomy_id_and_sample_name) print('[INFO] Collected files of %set' % (mode)) # Set up networks if res_gru_net is None: res_gru_net = Res_Gru_Net(cfg) print('[INFO] %s Loading weights from %s ...' % (dt.now(), cfg.CONST.WEIGHTS)) res_gru_net_state_dict = paddle.load( os.path.join(cfg.CONST.WEIGHTS, "res_gru_net.pdparams")) res_gru_net.set_state_dict(res_gru_net_state_dict) # Set up loss functions bce_loss = paddle.nn.BCELoss() # Testing loop n_samples = len(test_data_loader) test_iou = dict() res_gru_net_losses = utils.network_utils.AverageMeter() # Switch models to evaluation mode res_gru_net.eval() for sample_idx, (rendering_images, ground_truth_volume) in enumerate(test_data_loader): taxonomy_id = all_test_taxonomy_id_and_sample_name[sample_idx][0] sample_name = all_test_taxonomy_id_and_sample_name[sample_idx][1] # print("all_test_taxonomy_id_and_sample_name") # print(taxonomy_id) # print(sample_name) with paddle.no_grad(): # Get data from data loader rendering_images = utils.network_utils.var_or_cuda( rendering_images) ground_truth_volume = utils.network_utils.var_or_cuda( ground_truth_volume) # Test the res_gru_net, decoder and merger generated_volume = res_gru_net(rendering_images) res_gru_net_loss = bce_loss(generated_volume, ground_truth_volume) * 10 # Append loss and accuracy to average metrics res_gru_net_losses.update(res_gru_net_loss) # IoU per sample sample_iou = [] for th in cfg.TEST.VOXEL_THRESH: _volume = paddle.greater_equal( generated_volume, paddle.to_tensor(th)).astype("float32") intersection = paddle.sum( paddle.multiply(_volume, ground_truth_volume)) union = paddle.sum( paddle.greater_equal( paddle.add(_volume, ground_truth_volume).astype("float32"), paddle.to_tensor(1., dtype='float32')).astype("float32")) sample_iou.append((intersection / union)) # IoU per taxonomy if taxonomy_id not in test_iou: test_iou[taxonomy_id] = {'n_samples': 0, 'iou': []} test_iou[taxonomy_id]['n_samples'] += 1 test_iou[taxonomy_id]['iou'].append(sample_iou) # Append generated volumes to TensorBoard if output_dir and sample_idx < 1: img_dir = output_dir % 'images' # Volume Visualization gv = generated_volume.cpu().numpy() rendering_views = utils.binvox_visualization.get_volume_views( gv, os.path.join(img_dir, 'Reconstructed'), epoch_idx) test_writer.add_image(tag='Reconstructed', img=rendering_views, step=epoch_idx) gtv = ground_truth_volume.cpu().numpy() rendering_views = utils.binvox_visualization.get_volume_views( gtv, os.path.join(img_dir, 'GroundTruth'), epoch_idx) test_writer.add_image(tag='GroundTruth', img=rendering_views, step=epoch_idx) # # Print sample loss and IoU print( '[INFO] %s Test[%d/%d] Taxonomy = %s Sample = %s EDLoss = %.4f IoU = %s' % (dt.now(), sample_idx + 1, n_samples, taxonomy_id, sample_name, res_gru_net_loss, ['%.4f' % si for si in sample_iou])) # Output testing results mean_iou = [] for taxonomy_id in test_iou: test_iou[taxonomy_id]['iou'] = np.mean(test_iou[taxonomy_id]['iou'], axis=0) mean_iou.append(test_iou[taxonomy_id]['iou'] * test_iou[taxonomy_id]['n_samples']) mean_iou = np.sum(mean_iou, axis=0) / n_samples # Print header print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') print('Baseline', end='\t') for th in cfg.TEST.VOXEL_THRESH: print('t=%.2f' % th, end='\t') print() # Print body for taxonomy_id in test_iou: print('%s' % taxonomies[taxonomy_id]['taxonomy_name'].ljust(8), end='\t') print('%d' % test_iou[taxonomy_id]['n_samples'], end='\t') if 'baseline' in taxonomies[taxonomy_id]: print('%.4f' % taxonomies[taxonomy_id]['baseline'][ '%d-view' % cfg.CONST.N_VIEWS_RENDERING], end='\t\t') else: print('N/a', end='\t\t') for ti in test_iou[taxonomy_id]['iou']: print('%.4f' % ti, end='\t') print() # Print mean IoU for each threshold print('Overall ', end='\t\t\t\t') for mi in mean_iou: print('%.4f' % mi, end='\t') print('\n') # Add testing results to TensorBoard max_iou = np.max(mean_iou) if test_writer is not None: test_writer.add_scalar(tag='Res_Gru_Net/EpochLoss', value=res_gru_net_losses.avg, step=epoch_idx) test_writer.add_scalar(tag='Res_Gru_Net/IoU', value=max_iou, step=epoch_idx) return max_iou
def median(x, axis=None, keepdim=False, name=None): """ Compute the median along the specified axis. Args: x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64. axis (int, optional): The axis along which to perform median calculations ``axis`` should be int. ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` . If ``axis`` is less than 0, it works the same way as :math:`axis + D`. If ``axis`` is None, median is calculated over all elements of ``x``. Default is None. keepdim (bool, optional): Whether to reserve the reduced dimension(s) in the output Tensor. If ``keepdim`` is True, the dimensions of the output Tensor is the same as ``x`` except in the reduced dimensions(it is of size 1 in this case). Otherwise, the shape of the output Tensor is squeezed in ``axis`` . Default is False. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor, results of median along ``axis`` of ``x``. If data type of ``x`` is float64, data type of results will be float64, otherwise data type will be float32. Examples: .. code-block:: python import paddle x = paddle.arange(12).reshape([3, 4]) # Tensor(shape=[3, 4], dtype=int64, place=Place(cpu), stop_gradient=True, # [[0 , 1 , 2 , 3 ], # [4 , 5 , 6 , 7 ], # [8 , 9 , 10, 11]]) y1 = paddle.median(x) # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, # [5.50000000]) y2 = paddle.median(x, axis=0) # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, # [4., 5., 6., 7.]) y3 = paddle.median(x, axis=1) # Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, # [1.50000000, 5.50000000, 9.50000000]) y4 = paddle.median(x, axis=0, keepdim=True) # Tensor(shape=[1, 4], dtype=float32, place=Place(cpu), stop_gradient=True, # [[4., 5., 6., 7.]]) """ if not isinstance(x, Variable): raise TypeError("In median, the input x should be a Tensor.") is_flatten = axis is None dims = len(x.shape) if is_flatten: x = paddle.flatten(x) axis = 0 else: if not isinstance(axis, int) or not (axis < dims and axis >= -dims): raise ValueError( "In median, axis should be none or an integer in range [-rank(x), rank(x))." ) if axis < 0: axis += dims sz = x.shape[axis] kth = sz >> 1 tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False) dtype = 'float64' if x.dtype == core.VarDesc.VarType.FP64 else 'float32' if sz & 1 == 0: out_tensor = paddle.slice( tensor_topk, axes=[axis], starts=[kth - 1], ends=[kth]) + paddle.slice( tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]) out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2 else: out_tensor = paddle.cast( paddle.slice( tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]), dtype=dtype) out_tensor = out_tensor + paddle.sum( paddle.cast( paddle.isnan(x), dtype=dtype) * x, axis=axis, keepdim=True) if not keepdim or is_flatten: if not is_flatten: newshape = x.shape[:axis] + x.shape[axis + 1:] elif not keepdim: newshape = [1] else: newshape = [1] * dims else: newshape = out_tensor.shape out_tensor = out_tensor.reshape(newshape, name=name) return out_tensor
def loss_reg(self, y_hat, y): loss = F.l1_loss(y_hat, y, reduction='sum') for layer in self.mpnn_3d.edge2edge_layers: w_g = paddle.stack([conv.G.weight for conv in layer.conv_layer]) loss += self.spa_weight * paddle.sum((w_g[1:, :, :] - w_g[:-1, :, :])**2) return loss
def soft_cross_entropy(inp, target): inp_likelihood = F.log_softmax(inp, axis=-1) target_prob = F.softmax(target, axis=-1) return -1. * paddle.mean(paddle.sum(inp_likelihood * target_prob, axis=-1))
def forward(self, input_ids=None, token_type_ids=None, position_ids=None, attention_mask=None, query_input_ids=None, query_token_type_ids=None, query_position_ids=None, query_attention_mask=None, title_input_ids=None, title_token_type_ids=None, title_position_ids=None, title_attention_mask=None, seq_lengths=None, labels=None): if self.task != 'text-matching': result = self.model(input_ids, token_type_ids, position_ids, attention_mask) else: query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) if self.task == 'seq-cls': logits = result probs = F.softmax(logits, axis=1) if labels is not None: loss = self.criterion(logits, labels) correct = self.metric.compute(probs, labels) acc = self.metric.update(correct) return probs, loss, {'acc': acc} return probs elif self.task == 'token-cls': logits = result token_level_probs = F.softmax(logits, axis=-1) preds = token_level_probs.argmax(axis=-1) if labels is not None: loss = self.criterion(logits, labels.unsqueeze(-1)) num_infer_chunks, num_label_chunks, num_correct_chunks = \ self.metric.compute(None, seq_lengths, preds, labels) self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) _, _, f1_score = map(float, self.metric.accumulate()) return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs elif self.task == 'text-matching': query_token_embedding, _ = query_result query_token_embedding = self.dropout(query_token_embedding) query_attention_mask = paddle.unsqueeze( (query_input_ids != self.model.pad_token_id).astype( self.model.pooler.dense.weight.dtype), axis=2) query_token_embedding = query_token_embedding * query_attention_mask query_sum_embedding = paddle.sum(query_token_embedding, axis=1) query_sum_mask = paddle.sum(query_attention_mask, axis=1) query_mean = query_sum_embedding / query_sum_mask title_token_embedding, _ = title_result title_token_embedding = self.dropout(title_token_embedding) title_attention_mask = paddle.unsqueeze( (title_input_ids != self.model.pad_token_id).astype( self.model.pooler.dense.weight.dtype), axis=2) title_token_embedding = title_token_embedding * title_attention_mask title_sum_embedding = paddle.sum(title_token_embedding, axis=1) title_sum_mask = paddle.sum(title_attention_mask, axis=1) title_mean = title_sum_embedding / title_sum_mask sub = paddle.abs(paddle.subtract(query_mean, title_mean)) projection = paddle.concat([query_mean, title_mean, sub], axis=-1) logits = self.classifier(projection) probs = F.softmax(logits) if labels is not None: loss = self.criterion(logits, labels) correct = self.metric.compute(probs, labels) acc = self.metric.update(correct) return probs, loss, {'acc': acc} return probs else: sequence_output, pooled_output = result return sequence_output, pooled_output
def log_sum_exp(vec, dim=0): # Avoid underflow and overflow max_num = paddle.max(vec, dim) max_exp = max_num.unsqueeze(-1) return max_num + paddle.log(paddle.sum(paddle.exp(vec - max_exp), dim))
def train(args): if args.use_gpu: place = paddle.CUDAPlace(0) else: place = paddle.CPUPlace() paddle.disable_static(place) deepfm = DeepFM(args) train_filelist = [ os.path.join(args.train_data_dir, x) for x in os.listdir(args.train_data_dir) ] test_filelist = [ os.path.join(args.test_data_dir, x) for x in os.listdir(args.test_data_dir) ] train_reader = data_reader.data_reader(args.batch_size, train_filelist, args.feat_dict, data_type="train") test_reader = data_reader.data_reader(args.batch_size, test_filelist, args.feat_dict, data_type="test") def eval(epoch): deepfm.eval() logger.info("start eval model.") total_step = 0.0 auc_metric_test = paddle.metric.Auc("ROC") for data in test_reader(): total_step += 1 raw_feat_idx, raw_feat_value, label = zip(*data) raw_feat_idx = np.array(raw_feat_idx, dtype=np.int64) raw_feat_value = np.array(raw_feat_value, dtype=np.float32) label = np.array(label, dtype=np.int64) raw_feat_idx, raw_feat_value, label = [ paddle.to_tensor(data=i, dtype=None, place=None, stop_gradient=True) for i in [raw_feat_idx, raw_feat_value, label] ] predict = deepfm(raw_feat_idx, raw_feat_value, label) # for auc predict_2d = paddle.concat(x=[1 - predict, predict], axis=1) auc_metric_test.update(preds=predict_2d.numpy(), labels=label.numpy()) logger.info("test auc of epoch %d is %.6f" % (epoch, auc_metric_test.accumulate())) optimizer = paddle.optimizer.Adam(parameters=deepfm.parameters(), weight_decay=paddle.regularizer.L2Decay( args.reg)) # load model if exists start_epoch = 0 if args.checkpoint: model_dict = paddle.load(os.path.join(args.checkpoint, ".pdparams")) optimizer_dict = paddle.load(os.path.join(args.checkpoint, ".pdopt")) deepfm.set_dict(model_dict) optimizer.set_state_dict(optimizer_dict) start_epoch = int(os.path.basename( args.checkpoint).split("_")[-1]) + 1 # get next train epoch logger.info("load model {} finished.".format(args.checkpoint)) for epoch in range(start_epoch, args.num_epoch): begin = time.time() batch_begin = time.time() batch_id = 0 total_loss = 0.0 auc_metric = paddle.metric.Auc("ROC") logger.info("training epoch {} start.".format(epoch)) for data in train_reader(): raw_feat_idx, raw_feat_value, label = zip(*data) raw_feat_idx = np.array(raw_feat_idx, dtype=np.int64) raw_feat_value = np.array(raw_feat_value, dtype=np.float32) label = np.array(label, dtype=np.int64) raw_feat_idx, raw_feat_value, label = [ paddle.to_tensor(data=i, dtype=None, place=None, stop_gradient=True) for i in [raw_feat_idx, raw_feat_value, label] ] predict = deepfm(raw_feat_idx, raw_feat_value, label) loss = paddle.nn.functional.log_loss(input=predict, label=paddle.cast( label, dtype="float32")) batch_loss = paddle.sum(loss) total_loss += batch_loss.numpy().item() batch_loss.backward() optimizer.minimize(batch_loss) deepfm.clear_gradients() # for auc predict_2d = paddle.concat(x=[1 - predict, predict], axis=1) auc_metric.update(preds=predict_2d.numpy(), labels=label.numpy()) if batch_id > 0 and batch_id % 100 == 0: logger.info( "epoch: {}, batch_id: {}, loss: {:.6f}, auc: {:.6f}, speed: {:.2f} ins/s" .format( epoch, batch_id, total_loss / args.batch_size / 100, auc_metric.accumulate(), 100 * args.batch_size / (time.time() - batch_begin))) batch_begin = time.time() total_loss = 0.0 batch_id += 1 logger.info("epoch %d is finished and takes %f s" % (epoch, time.time() - begin)) # save model and optimizer logger.info( "going to save epoch {} model and optimizer.".format(epoch)) paddle.save(deepfm.state_dict(), path=os.path.join(args.model_output_dir, "epoch_" + str(epoch), ".pdparams")) paddle.save(optimizer.state_dict(), path=os.path.join(args.model_output_dir, "epoch_" + str(epoch), ".pdopt")) logger.info("save epoch {} finished.".format(epoch)) # eval model deepfm.eval() eval(epoch) deepfm.train() paddle.enable_static()
def avg_pool(all_vecs, scope, dim): """Average pooling""" size = paddle.to_tensor([le for _, le in scope]) return paddle.sum(all_vecs, axis=dim) / paddle.unsqueeze(size, axis=-1)
def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size, im_shape, scale_factor): h = paddle.cast(im_shape[0], 'int32')[0] w = paddle.cast(im_shape[1], 'int32')[0] upsampled_size_out = [featmap_size[0] * 4, featmap_size[1] * 4] y = paddle.zeros(shape=paddle.shape(cate_preds), dtype='float32') inds = paddle.where(cate_preds > self.score_threshold, cate_preds, y) inds = paddle.nonzero(inds) cate_preds = paddle.reshape(cate_preds, shape=[-1]) # Prevent empty and increase fake data ind_a = paddle.cast(paddle.shape(kernel_preds)[0], 'int64') ind_b = paddle.zeros(shape=[1], dtype='int64') inds_end = paddle.unsqueeze(paddle.concat([ind_a, ind_b]), 0) inds = paddle.concat([inds, inds_end]) kernel_preds_end = paddle.ones(shape=[1, self.kernel_out_channels], dtype='float32') kernel_preds = paddle.concat([kernel_preds, kernel_preds_end]) cate_preds = paddle.concat( [cate_preds, paddle.zeros(shape=[1], dtype='float32')]) # cate_labels & kernel_preds cate_labels = inds[:, 1] kernel_preds = paddle.gather(kernel_preds, index=inds[:, 0]) cate_score_idx = paddle.add(inds[:, 0] * 80, cate_labels) cate_scores = paddle.gather(cate_preds, index=cate_score_idx) size_trans = np.power(self.seg_num_grids, 2) strides = [] for _ind in range(len(self.segm_strides)): strides.append( paddle.full(shape=[int(size_trans[_ind])], fill_value=self.segm_strides[_ind], dtype="int32")) strides = paddle.concat(strides) strides = paddle.gather(strides, index=inds[:, 0]) # mask encoding. kernel_preds = paddle.unsqueeze(kernel_preds, [2, 3]) seg_preds = F.conv2d(seg_preds, kernel_preds) seg_preds = F.sigmoid(paddle.squeeze(seg_preds, [0])) seg_masks = seg_preds > self.mask_threshold seg_masks = paddle.cast(seg_masks, 'float32') sum_masks = paddle.sum(seg_masks, axis=[1, 2]) y = paddle.zeros(shape=paddle.shape(sum_masks), dtype='float32') keep = paddle.where(sum_masks > strides, sum_masks, y) keep = paddle.nonzero(keep) keep = paddle.squeeze(keep, axis=[1]) # Prevent empty and increase fake data keep_other = paddle.concat( [keep, paddle.cast(paddle.shape(sum_masks)[0] - 1, 'int64')]) keep_scores = paddle.concat( [keep, paddle.cast(paddle.shape(sum_masks)[0], 'int64')]) cate_scores_end = paddle.zeros(shape=[1], dtype='float32') cate_scores = paddle.concat([cate_scores, cate_scores_end]) seg_masks = paddle.gather(seg_masks, index=keep_other) seg_preds = paddle.gather(seg_preds, index=keep_other) sum_masks = paddle.gather(sum_masks, index=keep_other) cate_labels = paddle.gather(cate_labels, index=keep_other) cate_scores = paddle.gather(cate_scores, index=keep_scores) # mask scoring. seg_mul = paddle.cast(seg_preds * seg_masks, 'float32') seg_scores = paddle.sum(seg_mul, axis=[1, 2]) / sum_masks cate_scores *= seg_scores # Matrix NMS seg_preds, cate_scores, cate_labels = self.mask_nms( seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=sum_masks) ori_shape = im_shape[:2] / scale_factor + 0.5 ori_shape = paddle.cast(ori_shape, 'int32') seg_preds = F.interpolate(paddle.unsqueeze(seg_preds, 0), size=upsampled_size_out, mode='bilinear', align_corners=False, align_mode=0) seg_preds = paddle.slice(seg_preds, axes=[2, 3], starts=[0, 0], ends=[h, w]) seg_masks = paddle.squeeze(F.interpolate(seg_preds, size=ori_shape[:2], mode='bilinear', align_corners=False, align_mode=0), axis=[0]) seg_masks = paddle.cast(seg_masks > self.mask_threshold, 'uint8') return seg_masks, cate_labels, cate_scores
def beam_search_infilling(model, token_ids, token_type_ids, sos_id, eos_id, attn_id, pad_id, unk_id, vocab_size, max_encode_len=640, max_decode_len=100, beam_width=5, tgt_type_id=3, length_penalty=1.0): _, __, info = model(token_ids, token_type_ids) d_batch, d_seqlen = token_ids.shape state = BeamSearchState(log_probs=paddle.zeros([d_batch, beam_width], 'float32'), lengths=paddle.zeros([d_batch, beam_width], 'int64'), finished=paddle.zeros([d_batch, beam_width], 'int64')) outputs = [] def reorder_(t, parent_id): """reorder cache according to parent beam id""" gather_idx = paddle.nonzero( parent_id != -1)[:, 0] * beam_width + paddle.reshape( parent_id, [-1]) t = paddle.gather(t, gather_idx) return t def tile_(t, times): _shapes = list(t.shape[1:]) new_shape = [t.shape[0], times] + list(t.shape[1:]) ret = paddle.reshape( paddle.expand(paddle.unsqueeze(t, [1]), new_shape), [ -1, ] + _shapes) return ret cached_k, cached_v = info['caches'] cached_k = [tile_(k, beam_width) for k in cached_k] cached_v = [tile_(v, beam_width) for v in cached_v] past_cache = (cached_k, cached_v) token_ids = tile_(token_ids, beam_width) seqlen = paddle.sum(paddle.cast(token_ids != 0, 'int64'), 1, keepdim=True) cls_ids = paddle.ones([d_batch * beam_width], dtype='int64') * sos_id attn_ids = paddle.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS ids = paddle.stack([cls_ids, attn_ids], -1) for step in range(max_decode_len): bias = gen_bias(token_ids, ids, step) pos_ids = paddle.to_tensor( np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) pos_ids += seqlen _, logits, info = model(ids, paddle.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) if logits.shape[-1] > vocab_size: logits[:, :, vocab_size:] = 0 logits[:, :, pad_id] = 0 logits[:, :, unk_id] = 0 logits[:, :, attn_id] = 0 output, state = beam_search_step(state, logits[:, 1], eos_id=eos_id, beam_width=beam_width, is_first_step=(step == 0), length_penalty=length_penalty) outputs.append(output) past_cached_k, past_cached_v = past_cache cached_k, cached_v = info['caches'] cached_k = [ reorder_(paddle.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) ] # concat cached cached_v = [ reorder_(paddle.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) ] past_cache = (cached_k, cached_v) pred_ids_flatten = paddle.reshape(output.predicted_ids, [d_batch * beam_width]) ids = paddle.stack([pred_ids_flatten, attn_ids], 1) if state.finished.numpy().all(): break final_ids = paddle.stack([o.predicted_ids for o in outputs], 0) final_parent_ids = paddle.stack([o.beam_parent_ids for o in outputs], 0) final_ids = nn.functional.gather_tree( final_ids, final_parent_ids) #[:, :, 0] #pick best beam final_ids = paddle.transpose( paddle.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) return final_ids.numpy()
def forward(self, predict, label): r""" Computes cross entropy loss with or without label smoothing. Args: predict (Tensor): The predict results of `TransformerModel` with shape `[batch_size, sequence_length, vocab_size]` whose data type can be float32 or float64. label (Tensor): The label for correspoding results with shape `[batch_size, sequence_length, 1]`. Returns: tuple: A tuple with items: (`sum_cost`, `avg_cost`, `token_num`). With the corresponding fields: - `sum_cost` (Tensor): The sum of loss of current batch whose data type can be float32, float64. - `avg_cost` (Tensor): The average loss of current batch whose data type can be float32, float64. The relation between `sum_cost` and `avg_cost` can be described as: .. math: avg_cost = sum_cost / token_num - `token_num` (Tensor): The number of tokens of current batch. Example: .. code-block:: import paddle from paddlenlp.transformers import CrossEntropyCriterion criterion = CrossEntropyCriterion(label_smooth_eps=0.1, pad_idx=0) batch_size = 1 seq_len = 2 vocab_size = 30000 predict = paddle.rand(shape=[batch_size, seq_len, vocab_size]) label = paddle.randint( low=3, high=vocab_size, shape=[batch_size, seq_len, 1]) criterion(predict, label) """ weights = paddle.cast(label != self.pad_idx, dtype=paddle.get_default_dtype()) if self.label_smooth_eps: label = paddle.squeeze(label, axis=[2]) label = F.label_smooth(label=F.one_hot( x=label, num_classes=predict.shape[-1]), epsilon=self.label_smooth_eps) if paddle.get_default_dtype() != "float32": label = paddle.cast(label, dtype=paddle.get_default_dtype()) cost = F.cross_entropy( input=predict, label=label, reduction='none', soft_label=True if self.label_smooth_eps else False) weighted_cost = cost * weights sum_cost = paddle.sum(weighted_cost) token_num = paddle.sum(weights) token_num.stop_gradient = True avg_cost = sum_cost / token_num return sum_cost, avg_cost, token_num
def binary_cross_entropy_with_logits(logit, label, weight=None, reduction='mean', pos_weight=None, name=None): """ This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` layer and some reduce operations. This measures the element-wise probability error in classification tasks in which each class is independent. This can be thought of as predicting labels for a data-point, where labels are not mutually exclusive. For example, a news article can be about politics, technology or sports at the same time or none of these. First this operator calculate loss function as follows: .. math:: Out = -Labels * \\log(\\sigma(Logit)) - (1 - Labels) * \\log(1 - \\sigma(Logit)) We know that :math:`\\sigma(Logit) = \\frac{1}{1 + \\e^{-Logit}}`. By substituting this we get: .. math:: Out = Logit - Logit * Labels + \\log(1 + \\e^{-Logit}) For stability and to prevent overflow of :math:`\\e^{-Logit}` when Logit < 0, we reformulate the loss as follows: .. math:: Out = \\max(Logit, 0) - Logit * Labels + \\log(1 + \\e^{-\|Logit\|}) Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the weight tensor on the loss `Out`. The ``weight`` tensor will attach different weight on every items in the batch. The ``pos_weight`` will attach different weight on the positive label of each class. Finally, this operator applies reduce operation on the loss. If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`. If :attr:`reduction` set to ``'mean'``, the reduced mean loss is :math:`Out = MEAN(Out)`. If :attr:`reduction` set to ``'sum'``, the reduced sum loss is :math:`Out = SUM(Out)`. Note that the target labels ``label`` should be numbers between 0 and 1. Args: logit (Tensor): The input predications tensor. 2-D tensor with shape: [N, *], N is batch_size, `*` means number of additional dimensions. The ``logit`` is usually the output of Linear layer. Available dtype is float32, float64. label (Tensor): The target labels tensor. 2-D tensor with the same shape as ``logit``. The target labels which values should be numbers between 0 and 1. Available dtype is float32, float64. weight (Tensor, optional): A manual rescaling weight given to the loss of each batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`, The data type is float32, float64. Default is ``'None'``. reduction (str, optional): Indicate how to average the loss by batch_size, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'none'``, the unreduced loss is returned; If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; If :attr:`reduction` is ``'sum'``, the summed loss is returned. Default is ``'mean'``. pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the number of classes. The data type is float32, float64. Default is ``'None'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``logit`` , else the shape of output is scalar. Examples: .. code-block:: python import paddle paddle.disable_static() logit = paddle.to_tensor([5.0, 1.0, 3.0]) label = paddle.to_tensor([1.0, 0.0, 1.0]) output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label) print(output.numpy()) # [0.45618808] """ if reduction not in ['sum', 'mean', 'none']: raise ValueError( "The value of 'reduction' in binary_cross_entropy_with_logits " "should be 'sum', 'mean' or 'none', but received %s, which is not allowed." % reduction) if in_dygraph_mode(): one = _varbase_creator(dtype=logit.dtype) core.ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False, 'dtype', one.dtype, 'str_value', '1.0', 'shape', [1]) out = core.ops.sigmoid_cross_entropy_with_logits(logit, label) if pos_weight is not None: log_weight = core.ops.elementwise_add( core.ops.elementwise_mul( label, core.ops.elementwise_sub(pos_weight, one)), one) out = core.ops.elementwise_mul(out, log_weight) if weight is not None: out = core.ops.elementwise_mul(out, weight) if reduction == "sum": return core.ops.reduce_sum(out, 'reduce_all', True) elif reduction == "mean": return core.ops.mean(out) else: return out fluid.data_feeder.check_variable_and_dtype( logit, 'logit', ['float32', 'float64'], 'binary_cross_entropy_with_logits') fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64'], 'binary_cross_entropy_with_logits') sigmoid_name = None if reduction == 'none' and pos_weight is None and weight is None: sigmoid_name = name out = paddle.nn.functional.sigmoid_cross_entropy_with_logits( logit, label, name=sigmoid_name) one = paddle.fill_constant(shape=[1], value=1.0, dtype=logit.dtype) if pos_weight is not None: fluid.data_feeder.check_variable_and_dtype( pos_weight, 'pos_weight', ['float32', 'float64'], 'binary_cross_entropy_with_logits') log_weight = paddle.add( paddle.multiply(label, paddle.elementwise_sub(pos_weight, one)), one) pos_weight_name = name if reduction == 'none' and weight is None else None out = paddle.multiply(out, log_weight, name=pos_weight_name) if weight is not None: fluid.data_feeder.check_variable_and_dtype( weight, 'weight', ['float32', 'float64'], 'binary_cross_entropy_with_logits') weight_name = name if reduction == 'none' else None out = paddle.multiply(out, weight, name=weight_name) if reduction == "sum": return paddle.sum(out, name=name) elif reduction == "mean": return paddle.mean(out, name=name) return out