コード例 #1
0
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda(self.gpu_id)
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(
                        torch.tensor(0).to(dtype).cuda(self.gpu_id))
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda(self.gpu_id)

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda(self.gpu_id)

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda(self.gpu_id)
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(
                        torch.tensor(0).to(dtype).cuda(self.gpu_id))
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True) * 50  # https://github.com/google/automl/blob/6fdd1de778408625c1faf368a327fe36ecd41bf7/efficientdet/hparams_config.py#L233
コード例 #2
0
    def forward(self, act_classifications, obj_classifications,
                obj_regressions, anchors, inst_annotations, **kwargs):
        anchors = anchors.float()
        act_classifications = act_classifications.float()

        alpha = 0.25
        gamma = 2.0
        batch_size = act_classifications.shape[0]
        act_classification_losses = []
        obj_classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):
            act_classification = act_classifications[
                j, :, :]  # (h*w*feat_num, num_act_classes)
            obj_classification = obj_classifications[
                j, :, :]  # (h*w*feat_num, num_obj_classes)
            regression = obj_regressions[
                j, :, :]  # (h*w*feat_num, num_anchor*4)

            bbox_annotation = inst_annotations[j, :, :5]
            act_annotation_oh = inst_annotations[j, :, 5:]

            act_annotation_oh = act_annotation_oh[bbox_annotation[:, 4] != -1]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] !=
                                              -1]  # (num_boxes, 5)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    act_classification_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    obj_classification_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                else:
                    act_classification_losses.append(torch.tensor(0).to(dtype))
                    regression_losses.append(torch.tensor(0).to(dtype))
                    obj_classification_losses.append(torch.tensor(0).to(dtype))
                continue

            obj_classification = torch.clamp(obj_classification, 1e-4,
                                             1.0 - 1e-4)
            act_classification = torch.clamp(act_classification, 1e-4,
                                             1.0 - 1e-4)

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            act_targets = torch.ones_like(act_classification) * -1
            obj_targets = torch.ones_like(obj_classification) * -1
            if torch.cuda.is_available():
                act_targets = act_targets.cuda()
                obj_targets = obj_targets.cuda()

            obj_targets[torch.lt(IoU_max, 0.4), :] = 0  # IoU < 0.4
            act_targets[torch.lt(IoU_max, 0.4), :] = 0  # IoU < 0.4

            positive_indices = torch.ge(IoU_max, 0.5)  # IoU > 0.5

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]
            assigned_act_annotation = act_annotation_oh[IoU_argmax, :]

            act_targets[positive_indices, :] = 0
            obj_targets[positive_indices, :] = 0

            obj_targets[positive_indices, assigned_annotations[
                positive_indices,
                4].long()] = 1  # set the corresponding categories as 1
            act_targets[positive_indices, :] = assigned_act_annotation[
                positive_indices, :]  # set the corresponding categories as 1

            foreground = torch.max(act_targets, dim=1)[0] > 0
            act_targets = act_targets[foreground]
            act_classification = act_classification[foreground]

            alpha_factor_obj = torch.ones_like(obj_targets) * alpha

            if torch.cuda.is_available():
                alpha_factor_obj = alpha_factor_obj.cuda()

            alpha_factor_obj = torch.where(torch.eq(obj_targets,
                                                    1.), alpha_factor_obj,
                                           1. - alpha_factor_obj)

            obj_focal_weight = torch.where(torch.eq(obj_targets, 1.),
                                           1. - obj_classification,
                                           obj_classification)
            obj_focal_weight = alpha_factor_obj * torch.pow(
                obj_focal_weight, gamma)

            obj_bce = -(
                obj_targets * torch.log(obj_classification) +
                (1.0 - obj_targets) * torch.log(1.0 - obj_classification))
            act_bce = -(
                act_targets * torch.log(act_classification) +
                (1.0 - act_targets) * torch.log(1.0 - act_classification))

            obj_cls_loss = obj_focal_weight * obj_bce  # classification loss

            if self.dataset == "vcoco":
                act_cls_loss = act_bce
            else:
                act_cls_loss = act_bce * self.verb_weight.to(dtype).cuda()

            obj_zeros = torch.zeros_like(obj_cls_loss)
            act_zeros = torch.zeros_like(act_cls_loss)
            if torch.cuda.is_available():
                obj_zeros = obj_zeros.cuda()
                act_zeros = act_zeros.cuda()
            obj_cls_loss = torch.where(
                torch.ne(obj_targets, -1.0), obj_cls_loss,
                obj_zeros)  # ignore loss if IoU is too small
            act_cls_loss = torch.where(
                torch.ne(act_targets, -1.0), act_cls_loss,
                act_zeros)  # ignore loss if IoU is too small

            obj_classification_losses.append(
                obj_cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))
            act_classification_losses.append(
                act_cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), obj_classifications.detach(),
                regressBoxes, clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(act_classification_losses).mean(dim=0, keepdim=True), \
                torch.stack(obj_classification_losses).mean(dim=0, keepdim=True), \
                torch.stack(regression_losses).mean(dim=0, keepdim=True)
コード例 #3
0
    def forward(self, regressions, anchors, annotations, **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = regressions.shape[0]
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            positive_indices = torch.ge(IoU_max, 0.5)
            assigned_annotations = bbox_annotation[IoU_argmax, :]

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(regression_losses).mean(dim=0, keepdim=True)
コード例 #4
0
def evaluate_coco(img_path, set_name, image_ids, coco, model, params, step, threshold=0.2, nms_threshold=0.5,
                  compound_coef=4, use_cuda=True):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']
        image = cv.imread(image_path)
        ori_imgs, framed_imgs, framed_metas = preprocess([image], max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(0)
        x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        display([preds], [image], params['obj_list'], imshow=False, imwrite=False, send=True, step=step, tag='val')

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)

    if not len(results):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')

    # write output
    filepath = f'{set_name}_bbox_results.json'
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)