def forward(self, classifications, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones_like(classification) * alpha alpha_factor = alpha_factor.cuda(self.gpu_id) alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append( torch.tensor(0).to(dtype).cuda(self.gpu_id)) classification_losses.append(cls_loss.sum()) else: alpha_factor = torch.ones_like(classification) * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(cls_loss.sum()) continue IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # compute the loss for classification targets = torch.ones_like(classification) * -1 if torch.cuda.is_available(): targets = targets.cuda(self.gpu_id) targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones_like(targets) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda(self.gpu_id) alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce zeros = torch.zeros_like(cls_loss) if torch.cuda.is_available(): zeros = zeros.cuda(self.gpu_id) cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append( torch.tensor(0).to(dtype).cuda(self.gpu_id)) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess( imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True) * 50 # https://github.com/google/automl/blob/6fdd1de778408625c1faf368a327fe36ecd41bf7/efficientdet/hparams_config.py#L233
def forward(self, act_classifications, obj_classifications, obj_regressions, anchors, inst_annotations, **kwargs): anchors = anchors.float() act_classifications = act_classifications.float() alpha = 0.25 gamma = 2.0 batch_size = act_classifications.shape[0] act_classification_losses = [] obj_classification_losses = [] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): act_classification = act_classifications[ j, :, :] # (h*w*feat_num, num_act_classes) obj_classification = obj_classifications[ j, :, :] # (h*w*feat_num, num_obj_classes) regression = obj_regressions[ j, :, :] # (h*w*feat_num, num_anchor*4) bbox_annotation = inst_annotations[j, :, :5] act_annotation_oh = inst_annotations[j, :, 5:] act_annotation_oh = act_annotation_oh[bbox_annotation[:, 4] != -1] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] # (num_boxes, 5) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): act_classification_losses.append( torch.tensor(0).to(dtype).cuda()) regression_losses.append(torch.tensor(0).to(dtype).cuda()) obj_classification_losses.append( torch.tensor(0).to(dtype).cuda()) else: act_classification_losses.append(torch.tensor(0).to(dtype)) regression_losses.append(torch.tensor(0).to(dtype)) obj_classification_losses.append(torch.tensor(0).to(dtype)) continue obj_classification = torch.clamp(obj_classification, 1e-4, 1.0 - 1e-4) act_classification = torch.clamp(act_classification, 1e-4, 1.0 - 1e-4) IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # compute the loss for classification act_targets = torch.ones_like(act_classification) * -1 obj_targets = torch.ones_like(obj_classification) * -1 if torch.cuda.is_available(): act_targets = act_targets.cuda() obj_targets = obj_targets.cuda() obj_targets[torch.lt(IoU_max, 0.4), :] = 0 # IoU < 0.4 act_targets[torch.lt(IoU_max, 0.4), :] = 0 # IoU < 0.4 positive_indices = torch.ge(IoU_max, 0.5) # IoU > 0.5 num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] assigned_act_annotation = act_annotation_oh[IoU_argmax, :] act_targets[positive_indices, :] = 0 obj_targets[positive_indices, :] = 0 obj_targets[positive_indices, assigned_annotations[ positive_indices, 4].long()] = 1 # set the corresponding categories as 1 act_targets[positive_indices, :] = assigned_act_annotation[ positive_indices, :] # set the corresponding categories as 1 foreground = torch.max(act_targets, dim=1)[0] > 0 act_targets = act_targets[foreground] act_classification = act_classification[foreground] alpha_factor_obj = torch.ones_like(obj_targets) * alpha if torch.cuda.is_available(): alpha_factor_obj = alpha_factor_obj.cuda() alpha_factor_obj = torch.where(torch.eq(obj_targets, 1.), alpha_factor_obj, 1. - alpha_factor_obj) obj_focal_weight = torch.where(torch.eq(obj_targets, 1.), 1. - obj_classification, obj_classification) obj_focal_weight = alpha_factor_obj * torch.pow( obj_focal_weight, gamma) obj_bce = -( obj_targets * torch.log(obj_classification) + (1.0 - obj_targets) * torch.log(1.0 - obj_classification)) act_bce = -( act_targets * torch.log(act_classification) + (1.0 - act_targets) * torch.log(1.0 - act_classification)) obj_cls_loss = obj_focal_weight * obj_bce # classification loss if self.dataset == "vcoco": act_cls_loss = act_bce else: act_cls_loss = act_bce * self.verb_weight.to(dtype).cuda() obj_zeros = torch.zeros_like(obj_cls_loss) act_zeros = torch.zeros_like(act_cls_loss) if torch.cuda.is_available(): obj_zeros = obj_zeros.cuda() act_zeros = act_zeros.cuda() obj_cls_loss = torch.where( torch.ne(obj_targets, -1.0), obj_cls_loss, obj_zeros) # ignore loss if IoU is too small act_cls_loss = torch.where( torch.ne(act_targets, -1.0), act_cls_loss, act_zeros) # ignore loss if IoU is too small obj_classification_losses.append( obj_cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) act_classification_losses.append( act_cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess( imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), obj_classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return torch.stack(act_classification_losses).mean(dim=0, keepdim=True), \ torch.stack(obj_classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def forward(self, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = regressions.shape[0] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): regression = regressions[j, :, :] bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) positive_indices = torch.ge(IoU_max, 0.5) assigned_annotations = bbox_annotation[IoU_argmax, :] if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess( imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return torch.stack(regression_losses).mean(dim=0, keepdim=True)
def evaluate_coco(img_path, set_name, image_ids, coco, model, params, step, threshold=0.2, nms_threshold=0.5, compound_coef=4, use_cuda=True): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for image_id in tqdm(image_ids): image_info = coco.loadImgs(image_id)[0] image_path = img_path + image_info['file_name'] image = cv.imread(image_path) ori_imgs, framed_imgs, framed_metas = preprocess([image], max_size=input_sizes[compound_coef]) x = torch.from_numpy(framed_imgs[0]) if use_cuda: x = x.cuda(0) x = x.float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = model(x) preds = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, nms_threshold) if not preds: continue preds = invert_affine(framed_metas, preds)[0] display([preds], [image], params['obj_list'], imshow=False, imwrite=False, send=True, step=step, tag='val') scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] if rois.shape[0] > 0: # x1,y1,x2,y2 -> x1,y1,w,h rois[:, 2] -= rois[:, 0] rois[:, 3] -= rois[:, 1] bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result = { 'image_id': image_id, 'category_id': label + 1, 'score': float(score), 'bbox': box.tolist(), } results.append(image_result) if not len(results): raise Exception('the model does not provide any valid output, check model architecture and the data input') # write output filepath = f'{set_name}_bbox_results.json' if os.path.exists(filepath): os.remove(filepath) json.dump(results, open(filepath, 'w'), indent=4)