def __call__(self, pred, target): """ Calculate the loss Args: pred (Tensor): heatmap prediction target (Tensor): target for positive samples Return: ct_focal_loss (Tensor): Focal Loss used in CornerNet & CenterNet. Note that the values in target are in [0, 1] since gaussian is used to reduce the punishment and we treat [0, 1) as neg example. """ fg_map = paddle.cast(target == 1, 'float32') fg_map.stop_gradient = True bg_map = paddle.cast(target < 1, 'float32') bg_map.stop_gradient = True neg_weights = paddle.pow(1 - target, 4) * bg_map pos_loss = 0 - paddle.log(pred) * paddle.pow(1 - pred, self.gamma) * fg_map neg_loss = 0 - paddle.log(1 - pred) * paddle.pow( pred, self.gamma) * neg_weights pos_loss = paddle.sum(pos_loss) neg_loss = paddle.sum(neg_loss) fg_num = paddle.sum(fg_map) ct_focal_loss = (pos_loss + neg_loss) / ( fg_num + paddle.cast(fg_num == 0, 'float32')) return ct_focal_loss * self.loss_weight
def bbox2delta_v2(src_boxes, tgt_boxes, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)): """Encode bboxes to deltas. Modified from ppdet.modeling.bbox_utils.bbox2delta. Args: src_boxes (Tensor[..., 4]): base bboxes tgt_boxes (Tensor[..., 4]): target bboxes means (list[float]): the mean that will be used to normalize delta stds (list[float]): the std that will be used to normalize delta """ if src_boxes.size == 0: return paddle.empty_like(src_boxes) src_w = src_boxes[..., 2] - src_boxes[..., 0] src_h = src_boxes[..., 3] - src_boxes[..., 1] src_ctr_x = src_boxes[..., 0] + 0.5 * src_w src_ctr_y = src_boxes[..., 1] + 0.5 * src_h tgt_w = tgt_boxes[..., 2] - tgt_boxes[..., 0] tgt_h = tgt_boxes[..., 3] - tgt_boxes[..., 1] tgt_ctr_x = tgt_boxes[..., 0] + 0.5 * tgt_w tgt_ctr_y = tgt_boxes[..., 1] + 0.5 * tgt_h dx = (tgt_ctr_x - src_ctr_x) / src_w dy = (tgt_ctr_y - src_ctr_y) / src_h dw = paddle.log(tgt_w / src_w) dh = paddle.log(tgt_h / src_h) deltas = paddle.stack((dx, dy, dw, dh), axis=1) # [n, 4] means = paddle.to_tensor(means, place=src_boxes.place) stds = paddle.to_tensor(stds, place=src_boxes.place) deltas = (deltas - means) / stds return deltas
def bev_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=False): """box encode for VoxelNet Args: boxes ([N, 7] Tensor): normal boxes: x, y, z, l, w, h, r anchors ([N, 7] Tensor): anchors """ xa, ya, wa, la, ra = paddle.split(anchors, 5, axis=-1) xg, yg, wg, lg, rg = paddle.split(boxes, 5, axis=-1) diagonal = paddle.sqrt(la**2 + wa**2) xt = (xg - xa) / diagonal yt = (yg - ya) / diagonal if smooth_dim: lt = lg / la - 1 wt = wg / wa - 1 else: lt = paddle.log(lg / la) wt = paddle.log(wg / wa) if encode_angle_to_vector: rgx = paddle.cos(rg) rgy = paddle.sin(rg) rax = paddle.cos(ra) ray = paddle.sin(ra) rtx = rgx - rax rty = rgy - ray return paddle.concat([xt, yt, wt, lt, rtx, rty], axis=-1) else: rt = rg - ra return paddle.concat([xt, yt, wt, lt, rt], axis=-1)
def hierarchical_self_supervision(self, em, adj): def row_shuffle(embedding): return embedding[paddle.randperm(paddle.shape(embedding)[0])] def row_column_shuffle(embedding): embedding = paddle.transpose(embedding, perm=[1, 0]) corrupted_embedding = paddle.transpose(embedding[paddle.randperm( paddle.shape(embedding)[0])], perm=[1, 0]) return corrupted_embedding[paddle.randperm( paddle.shape(corrupted_embedding)[0])] def score(x1, x2): return paddle.sum(paddle.multiply(x1, x2), axis=1) user_embeddings = em edge_embeddings = paddle.matmul(adj, user_embeddings) # Local MIN pos = score(user_embeddings, edge_embeddings) neg1 = score(row_shuffle(user_embeddings), edge_embeddings) neg2 = score(row_column_shuffle(edge_embeddings), user_embeddings) local_loss = paddle.sum(-paddle.log(F.sigmoid(pos - neg1)) - paddle.log(F.sigmoid(neg1 - neg2))) # Global MIN graph = paddle.mean(edge_embeddings, axis=0) pos = score(edge_embeddings, graph) neg1 = score(row_column_shuffle(edge_embeddings), graph) global_loss = paddle.sum(-paddle.log(F.sigmoid(pos - neg1))) return global_loss + local_loss
def forward(self, prediction, target): """forward Args: prediction (paddle.Tensor): model prediction target (paddle.Tensor): ground truth Returns: paddle.Tensor: focal loss """ positive_index = (target == 1).astype("float32") negative_index = (target < 1).astype("float32") negative_weights = paddle.pow(1 - target, self.beta) loss = 0. positive_loss = paddle.log(prediction) \ * paddle.pow(1 - prediction, self.alpha) * positive_index negative_loss = paddle.log(1 - prediction) \ * paddle.pow(prediction, self.alpha) * negative_weights * negative_index num_positive = positive_index.sum() positive_loss = positive_loss.sum() negative_loss = negative_loss.sum() if num_positive == 0: loss -= negative_loss else: loss -= (positive_loss + negative_loss) / num_positive return loss
def _neg_loss(pred, gt): ''' Modified focal loss. Exactly the same as CornerNet. Runs faster and costs a little bit more memory Arguments: pred (batch x c x h x w) gt_regr (batch x c x h x w) ''' # pos_inds = gt.eq(1).float() # neg_inds = gt.lt(1).float() pos_inds = gt.equal(paddle.ones(gt.shape, dtype=gt.dtype)).cast('float32') neg_inds = gt.less_than(paddle.ones(gt.shape, dtype=gt.dtype)).cast('float32') # neg_weights = torch.pow(1 - gt, 4) neg_weights = paddle.pow(1 - gt, 4) loss = 0 # pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds # neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds pos_loss = paddle.log(pred) * paddle.pow(1 - pred, 2) * pos_inds neg_loss = paddle.log(1 - pred) * paddle.pow(pred, 2) * neg_weights * neg_inds # num_pos = pos_inds.float().sum() num_pos = pos_inds.cast('float32').sum() pos_loss = pos_loss.sum() neg_loss = neg_loss.sum() if num_pos == 0: loss = loss - neg_loss else: loss = loss - (pos_loss + neg_loss) / num_pos return loss
def compute_align_loss(model, desc_enc, example): """model: a nl2code decoder""" # find relevant columns root_node = example.tree rel_cols = list( reversed([ val for val in model.ast_wrapper.find_all_descendants_of_type( root_node, 'column') ])) rel_tabs = list( reversed([ val for val in model.ast_wrapper.find_all_descendants_of_type( root_node, 'table') ])) rel_vals = np.abs( list( reversed([ val for val in model.ast_wrapper.find_all_descendants_of_type( root_node, 'value') ]))) rel_cols_t = paddle.to_tensor(sorted(list(set(rel_cols))), dtype='int64') rel_tabs_t = paddle.to_tensor(sorted(list(set(rel_tabs))), dtype='int64') rel_vals_t = paddle.to_tensor(sorted(list(set(rel_vals))), dtype='int64') mc_att_on_rel_col = desc_enc.m2c_align_mat.index_select(rel_cols_t, axis=1) mc_max_rel_att = mc_att_on_rel_col.max(axis=0) mc_max_rel_att = mc_max_rel_att.clip(min=1e-9) mt_att_on_rel_tab = desc_enc.m2t_align_mat.index_select(rel_tabs_t, axis=1) mt_max_rel_att = mt_att_on_rel_tab.max(axis=0) mt_max_rel_att = mt_max_rel_att.clip(min=1e-9) mv_att_on_rel_val = desc_enc.m2v_align_mat.index_select(rel_vals_t, axis=1) mv_max_rel_att = mv_att_on_rel_val.max(axis=0) mv_max_rel_att = mv_max_rel_att.clip(min=1e-9) #c_num = desc_enc.m2c_align_mat.shape[1] #un_rel_cols_t = paddle.to_tensor(sorted(list(set(range(c_num)) - set(rel_cols))), dtype='int64') #mc_att_on_unrel_col = desc_enc.m2c_align_mat.index_select(un_rel_cols_t, axis=1) #mc_max_unrel_att = mc_att_on_unrel_col.max(axis=0) #mc_max_unrel_att = mc_max_unrel_att.clip(min=1e-9) #mc_margin = paddle.log(mc_max_unrel_att).mean() - paddle.log(mc_max_rel_att).mean() #t_num = desc_enc.m2t_align_mat.shape[1] #if t_num > len(set(rel_tabs)): # un_rel_tabs_t = paddle.to_tensor(sorted(list(set(range(t_num)) - set(rel_tabs))), dtype='int64') # mt_att_on_unrel_tab = desc_enc.m2t_align_mat.index_select(un_rel_tabs_t, axis=1) # mt_max_unrel_att = mt_att_on_unrel_tab.max(axis=0) # mt_max_unrel_att = mt_max_unrel_att.clip(min=1e-9) # mt_margin = paddle.log(mt_max_unrel_att).mean() - paddle.log(mt_max_rel_att).mean() #else: # mt_margin = paddle.to_tensor([0.0]) value_loss_weight = 2.0 align_loss = - paddle.log(mc_max_rel_att).mean() \ - paddle.log(mt_max_rel_att).mean() \ - value_loss_weight * paddle.log(mv_max_rel_att).mean() return align_loss
def _probs_to_logits(self, probs, is_binary=False): r""" Converts probabilities into logits. For the binary, probs denotes the probability of occurrence of the event indexed by `1`. For the multi-dimensional, values of last axis denote the probabilities of occurrence of each of the events. """ return (paddle.log(probs) - paddle.log1p(-probs)) \ if is_binary else paddle.log(probs)
def focal_loss(inputs, targets, alpha=-1, gamma=2): class_range = torch.arange(1, inputs.shape[1] + 1) pos_pred = (1 - inputs) ** gamma * torch.log(inputs) neg_pred = inputs ** gamma * torch.log(1 - inputs) pos_loss = (targets == class_range) * pos_pred * alpha neg_loss = (targets != class_range) * neg_pred * (1 - alpha) loss = -(pos_loss + neg_loss) return loss.sum(axis=1)
def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div(log_out1, out2, reduction='batchmean') + F.kl_div(log_out2, out1, reduction='batchmean')) / 2.0 return {"DMLLoss": loss}
def kl_divergence(self, other): """The KL-divergence between two Categorical distributions. Args: other (Categorical): instance of Categorical. The data type is float32. Returns: Tensor: kl-divergence between two Categorical distributions. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] paddle.seed(200) # on CPU device y = paddle.rand([6]) print(y) # [0.77663314 0.90824795 0.15685187 # 0.04279523 0.34468332 0.7955718 ] cat = Categorical(x) cat2 = Categorical(y) cat.kl_divergence(cat2) # [0.071952] """ name = self.name + '_kl_divergence' if not _non_static_mode(): check_type(other, 'other', Categorical, 'kl_divergence') logits = self.logits - \ paddle.max(self.logits, axis=-1, keepdim=True) other_logits = other.logits - paddle.max( other.logits, axis=-1, keepdim=True) e_logits = ops.exp(logits) other_e_logits = ops.exp(other_logits) z = paddle.sum(e_logits, axis=-1, keepdim=True) other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True) prob = e_logits / z kl = paddle.sum( prob * (logits - paddle.log(z) - other_logits + paddle.log(other_z)), axis=-1, keepdim=True, name=name) return kl
def _gumbel_softmax_sample(self, logit, tau=1, eps=1e-10): """ Draw a sample from the Gumbel-Softmax distribution based on https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb (MIT license) """ gumbel_noise = paddle.rand(logit.shape) gumbel_noise = -paddle.log(eps - paddle.log(gumbel_noise + eps)) logit = logit + gumbel_noise return F.softmax(logit / tau, axis=1)
def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) if len(out1.shape) < 2: log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div(log_out1, out2, reduction='batchmean') + F.kl_div(log_out2, out1, reduction='batchmean')) / 2.0 else: loss = self.jskl_loss(out1, out2) return loss
def get_l1_target(self, l1_target, gt, stride, x_shifts, y_shifts, eps=1e-8): l1_target[:, 0] = gt[:, 0] / stride - x_shifts l1_target[:, 1] = gt[:, 1] / stride - y_shifts l1_target[:, 2] = paddle.log(gt[:, 2] / stride + eps) l1_target[:, 3] = paddle.log(gt[:, 3] / stride + eps) l1_target.stop_gradient = True return l1_target
def sample_from_softmax(self, logits, use_softmax_sample=True): if use_softmax_sample: #uniform_noise = paddle.uniform(logits.shape, dtype="float32", min=0, max=1) uniform_noise = paddle.rand(logits.shape, dtype="float32") gumbel_noise = -paddle.log(-paddle.log(uniform_noise + 1e-9) + 1e-9) else: gumbel_noise = paddle.zeros_like(logits) # softmax_sample equal to sampled_tokids.unsqueeze(-1) softmax_sample = paddle.argmax(F.softmax(logits + gumbel_noise), axis=-1) # one hot return F.one_hot(softmax_sample, logits.shape[-1])
def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) if self.use_log: # for recognition distillation, log is needed for feature map log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0 else: # for detection distillation log is not needed loss = self.jskl_loss(out1, out2) return loss
def __init__(self, range_max, n_sample): with paddle.no_grad(): self.range_max = range_max log_indices = paddle.log( paddle.arange(1., range_max + 2., 1., dtype=global_dtype)) self.dist = (log_indices[1:] - log_indices[:-1]) / log_indices[-1] self.log_q = paddle.cast(paddle.log( paddle.exp(-( paddle.log1p(-paddle.cast(self.dist, dtype=global_dtype)) * 2 * n_sample)) - 1), dtype=global_dtype) self.n_sample = n_sample
def softmax_with_cross_entropy(self, shard_logit, shard_one_hot): shard_max = paddle.max(shard_logit, axis=1, keepdim=True) global_max = shard_max paddle.distributed.all_reduce(global_max, op=paddle.distributed.ReduceOp.MAX) shard_logit_new = paddle.subtract(shard_logit, global_max) shard_exp = paddle.exp(shard_logit_new) shard_demon = paddle.sum(shard_exp, axis=1, keepdim=True) global_demon = shard_demon paddle.distributed.all_reduce(global_demon, op=paddle.distributed.ReduceOp.SUM) global_log_demon = paddle.log(global_demon) shard_log_prob = shard_logit_new - global_log_demon shard_prob = paddle.exp(shard_log_prob) target_log_prob = paddle.min(shard_log_prob * shard_one_hot, axis=1, keepdim=True) shard_loss = paddle.scale(target_log_prob, scale=-1.0) #TODO paddle.distributed.reducescatter not found global_loss = paddle.fluid.layers.collective._c_reducescatter( shard_loss, nranks=self.nranks, use_calc_stream=True) return global_loss, shard_prob
def log_prob(self, value): """Log probabilities of the given category. Refer to ``probs`` method. Args: value (Tensor): The input tensor represents the selected category index. Returns: Tensor: Log probability. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] cat = Categorical(x) value = paddle.to_tensor([2,1,3]) cat.log_prob(value) # [-5.10271 -2.22287 -1.31061] """ name = self.name + '_log_prob' return paddle.log(self.probs(value), name=name)
def entropy(self): """Shannon entropy in nats. Returns: Tensor: Shannon entropy of Categorical distribution. The data type is float32. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] cat = Categorical(x) cat.entropy() # [1.77528] """ name = self.name + '_entropy' logits = self.logits - \ paddle.max(self.logits, axis=-1, keepdim=True) e_logits = ops.exp(logits) z = paddle.sum(e_logits, axis=-1, keepdim=True) prob = e_logits / z neg_entropy = paddle.sum(prob * (logits - paddle.log(z)), axis=-1) entropy = paddle.scale(neg_entropy, scale=-1.0, name=name) return entropy
def forward(self): fpn_rois = self.input('FpnRois', 0) areas = self.bbox_area(fpn_rois) scale = paddle.sqrt(areas) num_level = self.max_level - self.min_level + 1 target_level = paddle.log(scale / self.refer_scale + 1e-06) / np.log(2) target_level = paddle.floor(self.refer_level + target_level) target_level = paddle.clip(target_level, min=self.min_level, max=self.max_level) rois = list() rois_idx_order = list() for level in range(self.min_level, self.max_level + 1): level_tensor = paddle.full_like(target_level, fill_value=level) res = paddle.equal(target_level, level_tensor) res = paddle.squeeze(res, axis=1) res = paddle.cast(res, dtype='int32') index = paddle.nonzero(res) roi = paddle.gather(fpn_rois, index, axis=0) rois.append(roi) rois_idx_order.append(index) rois_idx_order = paddle.concat(rois_idx_order, axis=0) size = paddle.shape(rois_idx_order)[0] _, rois_idx_restore = paddle.topk(rois_idx_order, axis=0, sorted=True, largest=False, k=size) #rois_idx_restore = paddle.cast(rois_idx_restore, dtype='int32') return {'MultiFpnRois': rois, 'RestoreIndex': [rois_idx_restore]}
def warp_coordinates(self, coordinates): theta = self.theta.astype('float32') theta = theta.unsqueeze(1) coordinates = coordinates.unsqueeze(-1) # If x1:(1, 5, 2, 2), x2:(10, 100, 2, 1) # torch.matmul can broadcast x1, x2 to (10, 100, ...) # In PDPD, it should be done manually theta_part_a = theta[:, :, :, :2] theta_part_b = theta[:, :, :, 2:] # TODO: paddle.matmul have no double_grad_op, use 'paddle.fluid.layers.matmul' transformed = paddle.fluid.layers.matmul( *broadcast(theta_part_a, coordinates)) + theta_part_b transformed = transformed.squeeze(-1) if self.tps: control_points = self.control_points.astype('float32') control_params = self.control_params.astype('float32') distances = coordinates.reshape( (coordinates.shape[0], -1, 1, 2)) - control_points.reshape( (1, 1, -1, 2)) distances = distances.abs().sum(-1) result = distances * distances result = result * paddle.log(distances + 1e-6) result = result * control_params result = result.sum(2).reshape((self.bs, coordinates.shape[1], 1)) transformed = transformed + result return transformed
def forward(self, data, target, *mems): if not mems: batch_size = data.shape[0] mems = self.init_mems(batch_size, self.d_model) hidden, new_mems = self._forward(data, mems=mems) # TODO(FrostML): use getitem. tgt_len = target.shape[1] pred_hid = paddle.slice(hidden, [1], [-tgt_len], [hidden.shape[1]]) if self.sample_softmax > 0 and self.training: assert self.tie_weight, "tie_weight must be True if sample_softmax > 0" logit = sample_logits(self.word_emb, self.out_layer.bias, target, pred_hid, self.sampler) loss = -paddle.log(F.softmax(logit, axis=-1))[:, :, 0] else: loss = self.crit( paddle.reshape( pred_hid, shape=[-1, pred_hid.shape[-1]]), paddle.reshape( target, shape=[-1])) if new_mems is None: return [loss.mean()] else: return [loss.mean()] + new_mems
def logstd(self): """The log standard deviation of the Normal distribution.""" try: return self._logstd except: self._logstd = paddle.log(self._std) return self._logstd
def build_inv_delta_C_paddle(self, C): """ Return inv_delta_C which is needed to calculate T """ F = self.F hat_C = paddle.zeros((F, F), dtype='float32') # F x F for i in range(0, F): for j in range(i, F): if i == j: hat_C[i, j] = 1 else: r = paddle.norm(C[i] - C[j]) hat_C[i, j] = r hat_C[j, i] = r hat_C = (hat_C**2) * paddle.log(hat_C) delta_C = paddle.concat( # F+3 x F+3 [ paddle.concat([paddle.ones( (F, 1)), C, hat_C], axis=1), # F x F+3 paddle.concat( [paddle.zeros((2, 3)), paddle.transpose(C, perm=[1, 0])], axis=1), # 2 x F+3 paddle.concat([paddle.zeros( (1, 3)), paddle.ones((1, F))], axis=1) # 1 x F+3 ], axis=0) inv_delta_C = paddle.inverse(delta_C) return inv_delta_C # F+3 x F+3
def forward(self, x, bev=None): x = self.block1(x) up1 = self.deconv1(x) if self._use_bev: bev[:, -1] = paddle.clip(paddle.log(1 + bev[:, -1]) / np.log(16.0), max=1.0) x = paddle.concat([x, self.bev_extractor(bev)], axis=1) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = paddle.concat([up1, up2, up3], axis=1) box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.transpose((0, 2, 3, 1)) cls_preds = cls_preds.transpose((0, 2, 3, 1)) ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.transpose((0, 2, 3, 1)) ret_dict["dir_cls_preds"] = dir_cls_preds return ret_dict
def relative_position_bucket(relative_position, bidirectional=True, num_buckets=32, max_distance=128): ret = 0 if bidirectional: num_buckets //= 2 ret += (relative_position > 0).astype(paddle.int64) * num_buckets n = paddle.abs(relative_position) else: n = paddle.max(-relative_position, paddle.zeros_like(relative_position)) # now n is in the range [0, inf) # half of the buckets are for exact increments in positions max_exact = num_buckets // 2 is_small = n < max_exact # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance val_if_large = max_exact + (paddle.log( n.astype(paddle.float32) / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)).astype(paddle.int64) val_if_large = paddle.minimum( val_if_large, paddle.full_like(val_if_large, num_buckets - 1)) ret += paddle.where(is_small, n, val_if_large) return ret
def __call__(self, preds, targets): heatmaps_gt, mask = targets heatmaps_pred = preds[0] scalemaps_pred = preds[1] heatmaps_scaled_gt = paddle.where( heatmaps_gt > 0, 0.5 * heatmaps_gt * (1 + (1 + (scalemaps_pred - 1.) * paddle.log(heatmaps_gt + 1e-10))**2), heatmaps_gt) regularizer_loss = paddle.mean( paddle.pow((scalemaps_pred - 1.) * (heatmaps_gt > 0).astype(float), 2)) omiga = 0.01 # thres = 2**(-1/omiga), threshold for positive weight hm_weight = heatmaps_scaled_gt**(omiga) * paddle.abs( 1 - heatmaps_pred) + paddle.abs(heatmaps_pred) * ( 1 - heatmaps_scaled_gt**(omiga)) loss = (((heatmaps_pred - heatmaps_scaled_gt)**2) * mask.cast('float').unsqueeze(1)) * hm_weight loss = loss.mean() loss = self.loss_factor * (loss + 1.0 * regularizer_loss) return loss
def build_inv_delta_C_paddle(self, C): """ Return inv_delta_C which is needed to calculate T """ F = self.F hat_eye = paddle.eye(F, dtype='float64') # F x F hat_C = paddle.norm(C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye hat_C = (hat_C**2) * paddle.log(hat_C) delta_C = paddle.concat( # F+3 x F+3 [ paddle.concat([paddle.ones((F, 1), dtype='float64'), C, hat_C], axis=1), # F x F+3 paddle.concat([ paddle.zeros((2, 3), dtype='float64'), paddle.transpose(C, perm=[1, 0]) ], axis=1), # 2 x F+3 paddle.concat([ paddle.zeros((1, 3), dtype='float64'), paddle.ones((1, F), dtype='float64') ], axis=1) # 1 x F+3 ], axis=0) inv_delta_C = paddle.inverse(delta_C) return inv_delta_C # F+3 x F+3
def forward(self, true_binary, rule_masks, raw_logits): """ tbd """ if cmd_args.loss_type == 'binary': exp_pred = paddle.exp(raw_logits) * rule_masks norm = paddle.sum(exp_pred, axis=2, keepdim=True) prob = paddle.divide(exp_pred, norm) return F.binary_cross_entropy( prob, true_binary) * cmd_args.max_decode_steps if cmd_args.loss_type == 'perplexity': my_perp_loss = MyPerpLoss() return my_perp_loss(true_binary, rule_masks, raw_logits) if cmd_args.loss_type == 'vanilla': exp_pred = paddle.exp(raw_logits) * rule_masks + 1e-30 norm = paddle.sum(exp_pred, 2, keepdim=True) prob = paddle.divide(exp_pred, norm) ll = paddle.abs(paddle.sum(true_binary * prob, 2)) mask = 1 - rule_masks[:, :, -1] logll = mask * paddle.log(ll) loss = -paddle.sum(logll) / true_binary.shape[1] return loss print('unknown loss type %s' % cmd_args.loss_type) raise NotImplementedError