def test_merged(): model = retinanet() model = model.cuda() for k in range(1, 6): dl = iter(fetch_trn_loader(k)) for i in range(100): img_batch, labels_batch, regression_batch = next(dl) img_batch = img_batch.cuda() labels_batch = labels_batch.cuda() regression_batch = regression_batch.cuda() classification, regression = model(img_batch) assert classification.shape == (config['batch_size'], length, config['num_classes']) assert labels_batch.shape == (config['batch_size'], length, config['num_classes'] + 1) assert regression.shape == (config['batch_size'], length, 4) assert regression_batch.shape == (config['batch_size'], length, 4 + 1) focal = focal_loss() smooth_l1 = smooth_l1_loss() assert focal(labels_batch, classification).shape == torch.Size([]) assert smooth_l1(regression_batch, regression).shape == torch.Size([]) results = model.predict(img_batch) for k in range(1, 6): dl = iter(fetch_val_loader(k)) for i in range(100): img_batch, labels_batch, regression_batch = next(dl) img_batch = img_batch.cuda() labels_batch = labels_batch.cuda() regression_batch = regression_batch.cuda() classification, regression = model(img_batch) assert classification.shape == (config['batch_size'], length, config['num_classes']) assert labels_batch.shape == (config['batch_size'], length, config['num_classes'] + 1) assert regression.shape == (config['batch_size'], length, 4) assert regression_batch.shape == (config['batch_size'], length, 4 + 1) focal = focal_loss() smooth_l1 = smooth_l1_loss() assert focal(labels_batch, classification).shape == torch.Size([]) assert smooth_l1(regression_batch, regression).shape == torch.Size([]) results = model.predict(img_batch)
def test_focal(): focal = focal_loss() y_pred = torch.rand(8, 3, 1) y_true = torch.rand(8, 3, 2) y_true[..., -1] = torch.tensor([1, -1, 0]) assert (y_true[..., -1] == torch.tensor([1, -1, 0], dtype=torch.float32)).all() assert focal(y_true, y_pred)
def loss(self, outputs: tuple, gt_bboxes: list, gt_labels: list, iou_threshs: tuple = (0.4, 0.5)) -> dict: """ 損失関数 Args: outputs (tuple): (予測オフセット, 予測信頼度) * 予測オフセット : (B, P, 4) (coord fmt: [Δcx, Δcy, Δw, Δh]) (P: PBoxの数. P = 32526 の想定.) * 予測信頼度 : (B, P, num_classes + 1) gt_bboxes (list): 正解BBOX座標 [(G1, 4), (G2, 4), ...] (coord fmt: [cx, cy, w, h]) gt_labels (list): 正解ラベル [(G1,), (G2,)] iou_threshs (float): Potitive / Negative を判定する際の iou の閾値 Returns: dict: { loss: xxx, loss_loc: xxx, loss_conf: xxx } """ out_locs, out_confs = outputs device = out_locs.device # [Step 1] # target を作成する # - Pred を GT に対応させる # - Pred の Default Box との IoU が最大となる BBox, Label # - BBox との IoU が最大となる Default Box -> その BBox に割り当てる # - 最大 IoU が 0.4 未満の場合、Label を 0 に設定する # - 最大 IoU が 0.5 未満の場合、Label を -1 に設定する (void) B, P, C = out_confs.size() target_locs = torch.zeros(B, P, 4, device=device) target_labels = torch.zeros(B, P, dtype=torch.long, device=device) pboxes = self.pboxes.to(device) for i in range(B): bboxes = gt_bboxes[i].to(device) labels = gt_labels[i].to(device) bboxes_xyxy = box_convert(bboxes, in_fmt='cxcywh', out_fmt='xyxy') pboxes_xyxy = box_convert(pboxes, in_fmt='cxcywh', out_fmt='xyxy') ious = box_iou(pboxes_xyxy, bboxes_xyxy) best_ious, best_pbox_ids = ious.max(dim=0) max_ious, matched_bbox_ids = ious.max(dim=1) # 各 BBox に対し最大 IoU を取る Prior Box を選ぶ -> その BBox に割り当てる for j in range(len(best_pbox_ids)): matched_bbox_ids[best_pbox_ids][j] = j max_ious[best_pbox_ids] = iou_threshs[1] bboxes = bboxes[matched_bbox_ids] locs = self._calc_delta(bboxes, pboxes) labels = labels[matched_bbox_ids] labels[max_ious.less(iou_threshs[1])] = -1 # void クラス. 計算に含めない. labels[max_ious.less( iou_threshs[0])] = 0 # 0 が背景クラス. Positive Class は 1 ~ target_locs[i] = locs target_labels[i] = labels # [Step 2] # pos_mask, neg_mask を作成する # - pos_mask: Label が 0 でないもの # - neg_mask: Label が 0 のもの pos_mask = target_labels > 0 neg_mask = target_labels == 0 N = pos_mask.sum() # [Step 3] # Positive に対して、 Localization Loss を計算する loss_loc = F.smooth_l1_loss( out_locs[pos_mask], target_locs[pos_mask], reduction='sum') / N # [Step 4] # Positive & Negative に対して、Confidence Loss を計算する loss_conf = focal_loss(out_confs[pos_mask + neg_mask], target_labels[pos_mask + neg_mask], reduction='sum') / N # [Step 5] # 損失の和を計算する loss = loss_conf + loss_loc return {'loss': loss, 'loss_loc': loss_loc, 'loss_conf': loss_conf}
if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) # save config in file with open(os.path.join(args.save_dir, 'config.json'), 'w') as f: config.update(vars(args)) json.dump(config, f, indent=4) utils.set_logger(os.path.join(args.save_dir, 'train.log')) logging.info(' '.join(sys.argv[:])) logging.info(args.save_dir) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders train_dl = fetch_trn_loader(args.fold) val_dl = fetch_val_loader(args.fold) # Define the model and optimizer Net = retinanet(config['backbone']) model = Net.to(device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # fetch loss function and metrics loss_fn = {'focal': focal_loss(alpha=config['focal_alpha']), 'smooth_l1': smooth_l1_loss(sigma=config['l1_sigma'])} # Train the model logging.info("Starting training for {} epoch(s)".format(args.num_epochs)) train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, args, scheduler=None) logging.info('Done')
def loss(self, outputs: tuple, gt_bboxes: list, gt_labels: list) -> dict: """ 損失関数 Args: outputs (tuple): (予測オフセット, 予測信頼度) * 予測オフセット : (B, P, 4) (coord fmt: [Δcx, Δcy, Δw, Δh]) (P: PBoxの数. P = 32526 の想定.) * 予測信頼度 : (B, P, num_classes + 1) gt_bboxes (list): 正解BBOX座標 [(G1, 4), (G2, 4), ...] (coord fmt: [cx, cy, w, h]) gt_labels (list): 正解ラベル [(G1,), (G2,)] Returns: dict: { loss: xxx, loss_loc: xxx, loss_conf: xxx } """ out_locs, out_confs, out_cents = outputs device = out_locs.device # [Step 1] # target を作成する # - Points を GT に対応させる # - 条件 1: Points の点が BBox に含まれる # - 条件 2: ray の長さの最大値が regress_range の範囲内である # - 条件 3: Points が複数の BBox に対応する場合(ambiguous sample)、BBox の面積が一番小さいものに対応させる # - 対応する GT が存在しない場合、Label を 0 にする B, P, C = out_confs.size() target_locs = torch.zeros(B, P, 4) target_cents = torch.zeros(B, P) target_labels = torch.zeros(B, P, dtype=torch.long) points = self.points regress_ranges = self.regress_ranges for i in range(B): bboxes = gt_bboxes[i] labels = gt_labels[i] bboxes_xyxy = box_convert(bboxes, in_fmt='cxcywh', out_fmt='xyxy') areas = (bboxes_xyxy[:, 2] - bboxes_xyxy[:, 0]) * ( bboxes_xyxy[:, 3] - bboxes_xyxy[:, 1]).repeat(len(points), 1) left = points[:, [0]] - bboxes_xyxy[:, 0] right = bboxes_xyxy[:, 2] - points[:, [0]] top = points[:, [1]] - bboxes_xyxy[:, 1] bottom = bboxes_xyxy[:, 3] - points[:, [1]] rays = torch.stack([left, right, top, bottom], dim=-1) # 条件 1 inside_bbox = rays.min(dim=-1).values > 0 areas[~inside_bbox] = INF # 条件 2 max_ray = rays.max(dim=-1).values inside_regress_range = (regress_ranges[:, [0]] <= max_ray) * ( max_ray <= regress_ranges[:, [1]]) areas[~inside_regress_range] = INF # 条件 3 min_areas, matched_bbox_ids = areas.min(dim=1) locs = rays[range(len(points)), matched_bbox_ids] cents = (locs[:, 0:2].min(dim=1).values / locs[:, 0:2].max(dim=1).values * locs[:, 2:4].min(dim=1).values / locs[:, 2:4].max(dim=1).values).sqrt() labels = labels[matched_bbox_ids] labels[min_areas == INF] = 0 # 0 が背景クラス. Positive Class は 1 ~ target_locs[i] = locs target_cents[i] = cents target_labels[i] = labels target_locs = target_locs.to(device) target_cents = target_cents.to(device) target_labels = target_labels.to(device) # [Step 2] # pos_mask, neg_mask を作成する # - pos_mask: Label が 0 でないもの # - neg_mask: Label が 0 のもの pos_mask = target_labels > 0 neg_mask = target_labels == 0 N = pos_mask.sum() # [Step 3] # Positive に対して、 Localization Loss を計算する loss_loc = iou_loss_with_distance( out_locs[pos_mask].exp(), target_locs[pos_mask], reduction='sum') / target_cents[pos_mask].sum() # [Step 4] # Positive に対して、 Centerness Loss を計算する loss_cent = F.binary_cross_entropy_with_logits( out_cents[pos_mask], target_cents[pos_mask], reduction='sum') / N # [Step 5] # Positive & Negative に対して、Confidence Loss を計算する loss_conf = focal_loss(out_confs[pos_mask + neg_mask], target_labels[pos_mask + neg_mask], reduction='sum') / N # [Step 5] # 損失の和を計算する loss = loss_conf + loss_cent + loss_loc return { 'loss': loss, 'loss_loc': loss_loc, 'loss_cent': loss_cent, 'loss_conf': loss_conf }