def test_merged():
    model = retinanet()
    model = model.cuda()
    for k in range(1, 6):
        dl = iter(fetch_trn_loader(k))
        for i in range(100):
            img_batch, labels_batch, regression_batch = next(dl)
            img_batch = img_batch.cuda()
            labels_batch = labels_batch.cuda()
            regression_batch = regression_batch.cuda()

            classification, regression = model(img_batch)
            assert classification.shape == (config['batch_size'], length,
                                            config['num_classes'])
            assert labels_batch.shape == (config['batch_size'], length,
                                          config['num_classes'] + 1)
            assert regression.shape == (config['batch_size'], length, 4)
            assert regression_batch.shape == (config['batch_size'], length,
                                              4 + 1)

            focal = focal_loss()
            smooth_l1 = smooth_l1_loss()
            assert focal(labels_batch, classification).shape == torch.Size([])
            assert smooth_l1(regression_batch,
                             regression).shape == torch.Size([])

            results = model.predict(img_batch)

    for k in range(1, 6):
        dl = iter(fetch_val_loader(k))
        for i in range(100):
            img_batch, labels_batch, regression_batch = next(dl)
            img_batch = img_batch.cuda()
            labels_batch = labels_batch.cuda()
            regression_batch = regression_batch.cuda()

            classification, regression = model(img_batch)
            assert classification.shape == (config['batch_size'], length,
                                            config['num_classes'])
            assert labels_batch.shape == (config['batch_size'], length,
                                          config['num_classes'] + 1)
            assert regression.shape == (config['batch_size'], length, 4)
            assert regression_batch.shape == (config['batch_size'], length,
                                              4 + 1)

            focal = focal_loss()
            smooth_l1 = smooth_l1_loss()
            assert focal(labels_batch, classification).shape == torch.Size([])
            assert smooth_l1(regression_batch,
                             regression).shape == torch.Size([])

            results = model.predict(img_batch)
def test_focal():
    focal = focal_loss()
    y_pred = torch.rand(8, 3, 1)
    y_true = torch.rand(8, 3, 2)
    y_true[..., -1] = torch.tensor([1, -1, 0])
    assert (y_true[..., -1] == torch.tensor([1, -1, 0],
                                            dtype=torch.float32)).all()
    assert focal(y_true, y_pred)
Beispiel #3
0
    def loss(self,
             outputs: tuple,
             gt_bboxes: list,
             gt_labels: list,
             iou_threshs: tuple = (0.4, 0.5)) -> dict:
        """ 損失関数

        Args:
            outputs (tuple): (予測オフセット, 予測信頼度)
                            * 予測オフセット : (B, P, 4) (coord fmt: [Δcx, Δcy, Δw, Δh])
                                    (P: PBoxの数. P = 32526 の想定.)
                            * 予測信頼度     : (B, P, num_classes + 1)
            gt_bboxes (list): 正解BBOX座標 [(G1, 4), (G2, 4), ...] (coord fmt: [cx, cy, w, h])
            gt_labels (list): 正解ラベル [(G1,), (G2,)]
            iou_threshs (float): Potitive / Negative を判定する際の iou の閾値

        Returns:
            dict: {
                loss: xxx,
                loss_loc: xxx,
                loss_conf: xxx
            }
        """
        out_locs, out_confs = outputs
        device = out_locs.device

        # [Step 1]
        #   target を作成する
        #   - Pred を GT に対応させる
        #     - Pred の Default Box との IoU が最大となる BBox, Label
        #     - BBox との IoU が最大となる Default Box -> その BBox に割り当てる
        #   - 最大 IoU が 0.4 未満の場合、Label を 0 に設定する
        #   - 最大 IoU が 0.5 未満の場合、Label を -1 に設定する (void)

        B, P, C = out_confs.size()
        target_locs = torch.zeros(B, P, 4, device=device)
        target_labels = torch.zeros(B, P, dtype=torch.long, device=device)

        pboxes = self.pboxes.to(device)
        for i in range(B):
            bboxes = gt_bboxes[i].to(device)
            labels = gt_labels[i].to(device)

            bboxes_xyxy = box_convert(bboxes, in_fmt='cxcywh', out_fmt='xyxy')
            pboxes_xyxy = box_convert(pboxes, in_fmt='cxcywh', out_fmt='xyxy')
            ious = box_iou(pboxes_xyxy, bboxes_xyxy)
            best_ious, best_pbox_ids = ious.max(dim=0)
            max_ious, matched_bbox_ids = ious.max(dim=1)

            # 各 BBox に対し最大 IoU を取る Prior Box を選ぶ -> その BBox に割り当てる
            for j in range(len(best_pbox_ids)):
                matched_bbox_ids[best_pbox_ids][j] = j
            max_ious[best_pbox_ids] = iou_threshs[1]

            bboxes = bboxes[matched_bbox_ids]
            locs = self._calc_delta(bboxes, pboxes)
            labels = labels[matched_bbox_ids]
            labels[max_ious.less(iou_threshs[1])] = -1  # void クラス. 計算に含めない.
            labels[max_ious.less(
                iou_threshs[0])] = 0  # 0 が背景クラス. Positive Class は 1 ~

            target_locs[i] = locs
            target_labels[i] = labels

        # [Step 2]
        #   pos_mask, neg_mask を作成する
        #   - pos_mask: Label が 0 でないもの
        #   - neg_mask: Label が 0 のもの

        pos_mask = target_labels > 0
        neg_mask = target_labels == 0

        N = pos_mask.sum()
        # [Step 3]
        #   Positive に対して、 Localization Loss を計算する
        loss_loc = F.smooth_l1_loss(
            out_locs[pos_mask], target_locs[pos_mask], reduction='sum') / N

        # [Step 4]
        #   Positive & Negative に対して、Confidence Loss を計算する
        loss_conf = focal_loss(out_confs[pos_mask + neg_mask],
                               target_labels[pos_mask + neg_mask],
                               reduction='sum') / N

        # [Step 5]
        #   損失の和を計算する
        loss = loss_conf + loss_loc

        return {'loss': loss, 'loss_loc': loss_loc, 'loss_conf': loss_conf}
Beispiel #4
0
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    # save config in file
    with open(os.path.join(args.save_dir, 'config.json'), 'w') as f:
        config.update(vars(args))
        json.dump(config, f, indent=4)

    utils.set_logger(os.path.join(args.save_dir, 'train.log'))
    logging.info(' '.join(sys.argv[:]))
    logging.info(args.save_dir)

    # Create the input data pipeline
    logging.info("Loading the datasets...")
    # fetch dataloaders
    train_dl = fetch_trn_loader(args.fold)
    val_dl = fetch_val_loader(args.fold)

    # Define the model and optimizer
    Net = retinanet(config['backbone'])
    model = Net.to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    # fetch loss function and metrics
    loss_fn = {'focal': focal_loss(alpha=config['focal_alpha']), 'smooth_l1': smooth_l1_loss(sigma=config['l1_sigma'])}

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(args.num_epochs))
    train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, args, scheduler=None)
    logging.info('Done')
Beispiel #5
0
    def loss(self, outputs: tuple, gt_bboxes: list, gt_labels: list) -> dict:
        """ 損失関数

        Args:
            outputs (tuple): (予測オフセット, 予測信頼度)
                            * 予測オフセット : (B, P, 4) (coord fmt: [Δcx, Δcy, Δw, Δh])
                                    (P: PBoxの数. P = 32526 の想定.)
                            * 予測信頼度     : (B, P, num_classes + 1)
            gt_bboxes (list): 正解BBOX座標 [(G1, 4), (G2, 4), ...] (coord fmt: [cx, cy, w, h])
            gt_labels (list): 正解ラベル [(G1,), (G2,)]

        Returns:
            dict: {
                loss: xxx,
                loss_loc: xxx,
                loss_conf: xxx
            }
        """
        out_locs, out_confs, out_cents = outputs
        device = out_locs.device

        # [Step 1]
        #   target を作成する
        #   - Points を GT に対応させる
        #     - 条件 1: Points の点が BBox に含まれる
        #     - 条件 2: ray の長さの最大値が regress_range の範囲内である
        #     - 条件 3: Points が複数の BBox に対応する場合(ambiguous sample)、BBox の面積が一番小さいものに対応させる
        #   - 対応する GT が存在しない場合、Label を 0 にする

        B, P, C = out_confs.size()
        target_locs = torch.zeros(B, P, 4)
        target_cents = torch.zeros(B, P)
        target_labels = torch.zeros(B, P, dtype=torch.long)

        points = self.points
        regress_ranges = self.regress_ranges
        for i in range(B):
            bboxes = gt_bboxes[i]
            labels = gt_labels[i]

            bboxes_xyxy = box_convert(bboxes, in_fmt='cxcywh', out_fmt='xyxy')
            areas = (bboxes_xyxy[:, 2] - bboxes_xyxy[:, 0]) * (
                bboxes_xyxy[:, 3] - bboxes_xyxy[:, 1]).repeat(len(points), 1)
            left = points[:, [0]] - bboxes_xyxy[:, 0]
            right = bboxes_xyxy[:, 2] - points[:, [0]]
            top = points[:, [1]] - bboxes_xyxy[:, 1]
            bottom = bboxes_xyxy[:, 3] - points[:, [1]]
            rays = torch.stack([left, right, top, bottom], dim=-1)

            # 条件 1
            inside_bbox = rays.min(dim=-1).values > 0
            areas[~inside_bbox] = INF

            # 条件 2
            max_ray = rays.max(dim=-1).values
            inside_regress_range = (regress_ranges[:, [0]] <= max_ray) * (
                max_ray <= regress_ranges[:, [1]])
            areas[~inside_regress_range] = INF

            # 条件 3
            min_areas, matched_bbox_ids = areas.min(dim=1)
            locs = rays[range(len(points)), matched_bbox_ids]
            cents = (locs[:, 0:2].min(dim=1).values /
                     locs[:, 0:2].max(dim=1).values *
                     locs[:, 2:4].min(dim=1).values /
                     locs[:, 2:4].max(dim=1).values).sqrt()
            labels = labels[matched_bbox_ids]
            labels[min_areas == INF] = 0  # 0 が背景クラス. Positive Class は 1 ~

            target_locs[i] = locs
            target_cents[i] = cents
            target_labels[i] = labels

        target_locs = target_locs.to(device)
        target_cents = target_cents.to(device)
        target_labels = target_labels.to(device)

        # [Step 2]
        #   pos_mask, neg_mask を作成する
        #   - pos_mask: Label が 0 でないもの
        #   - neg_mask: Label が 0 のもの

        pos_mask = target_labels > 0
        neg_mask = target_labels == 0

        N = pos_mask.sum()
        # [Step 3]
        #   Positive に対して、 Localization Loss を計算する
        loss_loc = iou_loss_with_distance(
            out_locs[pos_mask].exp(), target_locs[pos_mask],
            reduction='sum') / target_cents[pos_mask].sum()

        # [Step 4]
        #   Positive に対して、 Centerness Loss を計算する
        loss_cent = F.binary_cross_entropy_with_logits(
            out_cents[pos_mask], target_cents[pos_mask], reduction='sum') / N

        # [Step 5]
        #   Positive & Negative に対して、Confidence Loss を計算する
        loss_conf = focal_loss(out_confs[pos_mask + neg_mask],
                               target_labels[pos_mask + neg_mask],
                               reduction='sum') / N

        # [Step 5]
        #   損失の和を計算する
        loss = loss_conf + loss_cent + loss_loc

        return {
            'loss': loss,
            'loss_loc': loss_loc,
            'loss_cent': loss_cent,
            'loss_conf': loss_conf
        }