def test_clamp(): """Fix an issue when `lower` or `upper` is 0, it will be recognized as `False` and `F.clip` will fall into wrong conditions unexpectedly. """ x = np.linspace(-6, 6, dtype="float32") np.testing.assert_allclose( F.clip(tensor(x) + 3, 0, 6).numpy(), np.clip(x + 3, 0, 6)) np.testing.assert_allclose( F.clip(tensor(x) - 3, -6, 0).numpy(), np.clip(x - 3, -6, 0))
def get_clipped_boxes(boxes, hw): """ Clip the boxes into the image region.""" # x1 >=0 box_x1 = F.clip(boxes[:, 0::4], lower=0, upper=hw[1]) # y1 >=0 box_y1 = F.clip(boxes[:, 1::4], lower=0, upper=hw[0]) # x2 < im_info[1] box_x2 = F.clip(boxes[:, 2::4], lower=0, upper=hw[1]) # y2 < im_info[0] box_y2 = F.clip(boxes[:, 3::4], lower=0, upper=hw[0]) clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1) return clip_box
def forward(self, input): """ Forward pass of the function. """ if self.swish is False and self.eswish is False and self.flatten is False: return swish_function(input, self.swish, self.eswish, self.beta, self.param) if self.swish is not False: return swish_function(input, self.swish, self.eswish, self.beta, self.param) if self.eswish is not False: return swish_function(input, self.swish, self.eswish, self.beta, self.param) if self.flatten is not False: return F.clip( swish_function(input, self.swish, self.eswish, self.beta, self.param), lower=0, )
def SSIM(x, y, md=1): patch_size = 2 * md + 1 C1 = 0.01**2 C2 = 0.03**2 mu_x = nn.AvgPool2d(patch_size, 1, 0, mode="average")(x) mu_y = nn.AvgPool2d(patch_size, 1, 0, mode="average")(y) mu_x_mu_y = mu_x * mu_y mu_x_sq = F.pow(mu_x, 2) mu_y_sq = F.pow(mu_y, 2) sigma_x = nn.AvgPool2d(patch_size, 1, 0, mode="average")(x * x) - mu_x_sq sigma_y = nn.AvgPool2d(patch_size, 1, 0, mode="average")(y * y) - mu_y_sq sigma_xy = nn.AvgPool2d(patch_size, 1, 0, mode="average")( x * y) - mu_x_mu_y SSIM_n = (2 * mu_x_mu_y + C1) * (2 * sigma_xy + C2) SSIM_d = (mu_x_sq + mu_y_sq + C1) * (sigma_x + sigma_y + C2) SSIM = SSIM_n / SSIM_d dist = F.clip((1 - SSIM) / 2, 0, 1) return dist
def _weighted_ssim(x, y, weight, c1=float('inf'), c2=9e-6, weight_epsilon=0.01): def _avg_pool3x3(x): xx = F.avg_pool2d(x, kernel_size=3, stride=1) return xx if c1 == float('inf') and c2 == float('inf'): raise ValueError( 'Both c1 and c2 are infinite, SSIM loss is zero. This is ' 'likely unintended.') average_pooled_weight = _avg_pool3x3(weight) weight_plus_epsilon = weight + weight_epsilon inverse_average_pooled_weight = 1.0 / (average_pooled_weight + weight_epsilon) def weighted_avg_pool3x3(z): wighted_avg = _avg_pool3x3(z * weight_plus_epsilon) return wighted_avg * inverse_average_pooled_weight mu_x = weighted_avg_pool3x3(x) mu_y = weighted_avg_pool3x3(y) sigma_x = weighted_avg_pool3x3(x**2) - mu_x**2 sigma_y = weighted_avg_pool3x3(y**2) - mu_y**2 sigma_xy = weighted_avg_pool3x3(x * y) - mu_x * mu_y if c1 == float('inf'): ssim_n = (2 * sigma_xy + c2) ssim_d = (sigma_x + sigma_y + c2) elif c2 == float('inf'): ssim_n = 2 * mu_x * mu_y + c1 ssim_d = mu_x**2 + mu_y**2 + c1 else: ssim_n = (2 * mu_x * mu_y + c1) * (2 * sigma_xy + c2) ssim_d = (mu_x**2 + mu_y**2 + c1) * (sigma_x + sigma_y + c2) result = ssim_n / ssim_d return F.clip((1 - result) / 2, 0, 1), average_pooled_weight
def fake_quant(x, scale): x = x / scale x = F.round(x) x = F.clip(x, -128, 127) x = x * scale return x
def fake_quant(x, scale, qmin, qmax): x = x / scale x = F.round(x) x = F.clip(x, qmin, qmax) x = x * scale return x
def forward(self, inps): return F.clip(inps[0], self.param["lower"], self.param["upper"])
def f(x, lower, upper): y = F.clip(x, lower, upper) return y
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes, im_info): # pylint: disable=too-many-statements def positive_bag_loss(logits, axis=1): weight = 1.0 / (1.0 - logits) weight /= weight.sum(axis=axis, keepdims=True) bag_prob = (weight * logits).sum(axis=1) return -layers.safelog(bag_prob) def negative_bag_loss(logits, gamma): return (logits**gamma) * (-layers.safelog(1.0 - logits)) pred_scores = F.sigmoid(pred_logits) box_prob_list = [] positive_losses = [] clamp_eps = 1e-7 bucket_size = self.cfg.bucket_size for bid in range(im_info.shape[0]): boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")] # id 0 is used for background classes, so -1 first labels = boxes_info[:, 4].astype("int32") - 1 pred_box = self.box_coder.decode(anchors, pred_offsets[bid]).detach() overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach() thresh1 = self.cfg.box_iou_threshold thresh2 = F.clip(overlaps.max(axis=1, keepdims=True), lower=thresh1 + clamp_eps, upper=1.0) gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1), lower=0, upper=1.0) image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach() # guarantee that nonzero_idx is not empty if gt_pred_prob.max() > clamp_eps: _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob) # since nonzeros is only 1 dim, use num_anchor to get real indices num_anchors = gt_pred_prob.shape[1] anchors_idx = nonzero_idx % num_anchors gt_idx = nonzero_idx // num_anchors image_boxes_prob[anchors_idx, labels[gt_idx]] = gt_pred_prob[gt_idx, anchors_idx] box_prob_list.append(image_boxes_prob) # construct bags for objects match_quality_matrix = layers.get_iou(boxes_info[:, :4], anchors).detach() num_gt = match_quality_matrix.shape[0] _, matched_idx = F.topk( match_quality_matrix, k=bucket_size, descending=True, no_sort=True, ) matched_idx = matched_idx.detach() matched_idx_flatten = matched_idx.reshape(-1) gather_idx = labels.reshape(-1, 1) gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size)) gather_src = pred_scores[bid, matched_idx_flatten] gather_src = gather_src.reshape(num_gt, bucket_size, -1) matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2) topk_anchors = anchors[matched_idx_flatten] boxes_broad_cast = F.broadcast_to( F.expand_dims(boxes_info[:, :4], axis=1), (num_gt, bucket_size, 4)).reshape(-1, 4) matched_offsets = self.box_coder.encode(topk_anchors, boxes_broad_cast) reg_loss = layers.smooth_l1_loss( pred_offsets[bid, matched_idx_flatten], matched_offsets, beta=self.cfg.smooth_l1_beta).sum( axis=-1) * self.cfg.reg_loss_weight matched_reg_scores = F.exp(-reg_loss) positive_losses.append( positive_bag_loss(matched_score * matched_reg_scores.reshape(-1, bucket_size), axis=1)) num_foreground = im_info[:, 4].sum() pos_loss = F.concat(positive_losses).sum() / F.maximum( 1.0, num_foreground) box_probs = F.stack(box_prob_list, axis=0) neg_loss = negative_bag_loss( pred_scores * (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum( 1.0, num_foreground * bucket_size) alpha = self.cfg.focal_loss_alpha pos_loss = pos_loss * alpha neg_loss = neg_loss * (1 - alpha) loss_dict = { "total_loss": pos_loss + neg_loss, "pos_loss": pos_loss, "neg_loss": neg_loss, } return loss_dict