Example #1
0
def after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg=None, img_name=None):
    if ids_p is None:
        return None, None, None, None

    if cfg and cfg.visual_thre > 0:
        keep = class_p >= cfg.visual_thre
        if not keep.any():
            return None, None, None, None

        ids_p = ids_p[keep]
        class_p = class_p[keep]
        box_p = box_p[keep]
        coef_p = coef_p[keep]

    if cfg and cfg.save_lincomb:
        draw_lincomb(proto_p, coef_p, img_name)

    masks = torch.sigmoid(torch.matmul(proto_p, coef_p.t()))

    if not cfg or not cfg.no_crop:  # Crop masks by box_p
        masks = crop(masks, box_p)

    masks = masks.permute(2, 0, 1).contiguous()

    ori_size = max(img_h, img_w)
    # in OpenCV, cv2.resize is `align_corners=False`.
    masks = F.interpolate(masks.unsqueeze(0), (ori_size, ori_size), mode='bilinear', align_corners=False).squeeze(0)
    masks.gt_(0.5)  # Binarize the masks because of interpolation.
    masks = masks[:, 0: img_h, :] if img_h < img_w else masks[:, :, 0: img_w]

    box_p *= ori_size
    box_p = box_p.int()

    return ids_p, class_p, box_p, masks
Example #2
0
    def lincomb_mask_loss(self, pos_bool, anchor_max_i, coef_p, proto_p,
                          mask_gt, anchor_max_gt):
        proto_h, proto_w = proto_p.shape[1:3]
        total_pos_num = pos_bool.sum()
        loss_m = 0
        for i in range(coef_p.size(0)):  # coef_p.shape: (n, 19248, 32)
            # downsample the gt mask to the size of 'proto_p'
            downsampled_masks = F.interpolate(mask_gt[i].unsqueeze(0),
                                              (proto_h, proto_w),
                                              mode='bilinear',
                                              align_corners=False).squeeze(0)
            downsampled_masks = downsampled_masks.permute(
                1, 2, 0).contiguous()  # (138, 138, num_objects)
            # binarize the gt mask because of the downsample operation
            downsampled_masks = downsampled_masks.gt(0.5).float()

            pos_anchor_i = anchor_max_i[i][pos_bool[i]]
            pos_anchor_box = anchor_max_gt[i][pos_bool[i]]
            pos_coef = coef_p[i][pos_bool[i]]

            if pos_anchor_i.size(0) == 0:
                continue

            # If exceeds the number of masks for training, select a random subset
            old_num_pos = pos_coef.size(0)
            if old_num_pos > self.cfg.masks_to_train:
                perm = torch.randperm(pos_coef.size(0))
                select = perm[:self.cfg.masks_to_train]
                pos_coef = pos_coef[select]
                pos_anchor_i = pos_anchor_i[select]
                pos_anchor_box = pos_anchor_box[select]

            num_pos = pos_coef.size(0)

            pos_mask_gt = downsampled_masks[:, :, pos_anchor_i]

            # mask assembly by linear combination
            # @ means dot product
            mask_p = torch.sigmoid(
                proto_p[i] @ pos_coef.t())  # mask_p.shape: (138, 138, num_pos)
            mask_p = crop(mask_p,
                          pos_anchor_box)  # pos_anchor_box.shape: (num_pos, 4)
            # TODO: grad out of gt box is 0, should it be modified?
            # TODO: need an upsample before computing loss?
            mask_loss = F.binary_cross_entropy(torch.clamp(mask_p, 0, 1),
                                               pos_mask_gt,
                                               reduction='none')
            # aa = -pos_mask_gt*torch.log(mask_p) - (1-pos_mask_gt) * torch.log(1-mask_p)

            # Normalize the mask loss to emulate roi pooling's effect on loss.
            anchor_area = (pos_anchor_box[:, 2] - pos_anchor_box[:, 0]) * (
                pos_anchor_box[:, 3] - pos_anchor_box[:, 1])
            mask_loss = mask_loss.sum(dim=(0, 1)) / anchor_area

            if old_num_pos > num_pos:
                mask_loss *= old_num_pos / num_pos

            loss_m += torch.sum(mask_loss)

        return self.cfg.mask_alpha * loss_m / proto_h / proto_w / total_pos_num
Example #3
0
def after_nms(nms_outs,
              img_h,
              img_w,
              show_lincomb=False,
              crop_masks=True,
              visual_thre=0,
              img_name=None):
    if nms_outs is None:
        return [torch.Tensor()
                ] * 4  # Warning, this is 4 copies of the same thing

    if visual_thre > 0:
        keep = nms_outs['class'] > visual_thre

        for k in nms_outs:
            if k != 'proto':
                nms_outs[k] = nms_outs[k][keep]

        if nms_outs['class'].size(0) == 0:
            return [torch.Tensor()] * 4

    class_ids = nms_outs['class_ids']
    boxes = nms_outs['box']
    classes = nms_outs['class']
    coefs = nms_outs['coef']

    # At this points masks is only the coefficients
    proto_data = nms_outs['proto']

    if show_lincomb:
        draw_lincomb(proto_data, coefs, img_name)

    masks = torch.sigmoid(torch.matmul(proto_data, coefs.t()))

    # Crop masks by boxes
    if crop_masks:
        masks = crop(masks, boxes)

    masks = masks.permute(2, 0, 1).contiguous()
    masks = F.interpolate(masks.unsqueeze(0), (img_h, img_w),
                          mode='bilinear',
                          align_corners=False).squeeze(0)
    # Binarize the masks
    masks.gt_(0.5)

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2],
                                                    img_w)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3],
                                                    img_h)
    boxes = boxes.long()

    return class_ids, classes, boxes, masks
Example #4
0
    def lincomb_mask_loss(positive_bool, prior_max_index, coef_p, proto_p, mask_gt, prior_max_box):
        proto_h = proto_p.size(1)  # 138
        proto_w = proto_p.size(2)  # 138

        loss_m = 0
        for i in range(coef_p.size(0)):  # coef_p.shape: (n, 19248, 32)
            with torch.no_grad():
                # downsample the gt mask to the size of 'proto_p'
                downsampled_masks = F.interpolate(mask_gt[i].unsqueeze(0), (proto_h, proto_w), mode='bilinear',
                                                  align_corners=False).squeeze(0)
                downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous()  # (138, 138, num_objects)
                # binarize the gt mask because of the downsample operation
                downsampled_masks = downsampled_masks.gt(0.5).float()

            pos_prior_index = prior_max_index[i, positive_bool[i]]  # pos_prior_index.shape: [num_positives]
            pos_prior_box = prior_max_box[i, positive_bool[i]]
            pos_coef = coef_p[i, positive_bool[i]]

            if pos_prior_index.size(0) == 0:
                continue

            # If exceeds the number of masks for training, select a random subset
            old_num_pos = pos_coef.size(0)
            if old_num_pos > cfg.masks_to_train:
                perm = torch.randperm(pos_coef.size(0))
                select = perm[:cfg.masks_to_train]

                pos_coef = pos_coef[select]
                pos_prior_index = pos_prior_index[select]
                pos_prior_box = pos_prior_box[select]

            num_pos = pos_coef.size(0)
            pos_mask_gt = downsampled_masks[:, :, pos_prior_index]

            # mask assembly by linear combination
            # @ means dot product
            mask_p = torch.sigmoid(proto_p[i] @ pos_coef.t())  # mask_p.shape: (138, 138, num_pos)
            mask_p = crop(mask_p, pos_prior_box)  # pos_prior_box.shape: (num_pos, 4)

            mask_loss = F.binary_cross_entropy(torch.clamp(mask_p, 0, 1), pos_mask_gt, reduction='none')
            # Normalize the mask loss to emulate roi pooling's effect on loss.
            pos_get_csize = center_size(pos_prior_box)
            mask_loss = mask_loss.sum(dim=(0, 1)) / pos_get_csize[:, 2] / pos_get_csize[:, 3]

            if old_num_pos > num_pos:
                mask_loss *= old_num_pos / num_pos

            loss_m += torch.sum(mask_loss)

        loss_m *= cfg.mask_alpha / proto_h / proto_w

        return loss_m
Example #5
0
def after_nms(nms_outs, img_h, img_w, cfg=None, img_name=None):
    if nms_outs is None:
        return [torch.Tensor()] * 4

    if cfg and cfg.visual_thre > 0:
        keep = nms_outs['class'] >= cfg.visual_thre

        for k in nms_outs:
            if k != 'proto':
                nms_outs[k] = nms_outs[k][keep]

        if nms_outs['class'].size(0) == 0:
            return [torch.Tensor()] * 4

    class_ids = nms_outs['class_ids']
    boxes = nms_outs['box']
    classes = nms_outs['class']
    coefs = nms_outs['coef']
    proto_data = nms_outs['proto']

    if cfg and cfg.save_lincomb:
        draw_lincomb(proto_data, coefs, img_name)

    masks = torch.sigmoid(torch.matmul(proto_data, coefs.t()))

    if not cfg or not cfg.no_crop:  # Crop masks by boxes
        masks = crop(masks, boxes)

    masks = masks.permute(2, 0, 1).contiguous()
    masks = F.interpolate(masks.unsqueeze(0), (img_h, img_w),
                          mode='bilinear',
                          align_corners=False).squeeze(0)
    masks.gt_(0.5)  # Binarize the masks

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2],
                                                    img_w)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3],
                                                    img_h)
    boxes = boxes.long()

    return class_ids, classes, boxes, masks