예제 #1
0
    def __call__(self, image, target):
        # Ensure always return cropped image
        while True:
            mode = random.choice(self.sample_options)
            if mode is None:  # 不做随机裁剪处理
                return image, target

            htot, wtot = target['height_width']

            min_iou, max_iou = mode
            min_iou = float('-inf') if min_iou is None else min_iou
            max_iou = float('+inf') if max_iou is None else max_iou

            # Implementation use 5 iteration to find possible candidate
            for _ in range(5):
                # 0.3*0.3 approx. 0.1
                w = random.uniform(0.3, 1.0)
                h = random.uniform(0.3, 1.0)

                if w / h < 0.5 or w / h > 2:  # 保证宽高比例在0.5-2之间
                    continue

                # left 0 ~ wtot - w, top 0 ~ htot - h
                left = random.uniform(0, 1.0 - w)
                top = random.uniform(0, 1.0 - h)

                right = left + w
                bottom = top + h

                # boxes的坐标是在0-1之间的
                bboxes = target["boxes"]
                ious = calc_iou_tensor(
                    bboxes, torch.tensor([[left, top, right, bottom]]))

                # tailor all the bboxes and return
                # all(): Returns True if all elements in the tensor are True, False otherwise.
                if not ((ious > min_iou) & (ious < max_iou)).all():
                    continue

                # discard any bboxes whose center not in the cropped image
                xc = 0.5 * (bboxes[:, 0] + bboxes[:, 2])
                yc = 0.5 * (bboxes[:, 1] + bboxes[:, 3])

                # 查找所有的gt box的中心点有没有在采样patch中的
                masks = (xc > left) & (xc < right) & (yc > top) & (yc < bottom)

                # if no such boxes, continue searching again
                # 如果所有的gt box的中心点都不在采样的patch中,则重新找
                if not masks.any():
                    continue

                # 修改采样patch中的所有gt box的坐标(防止出现越界的情况)
                bboxes[bboxes[:, 0] < left, 0] = left
                bboxes[bboxes[:, 1] < top, 1] = top
                bboxes[bboxes[:, 2] > right, 2] = right
                bboxes[bboxes[:, 3] > bottom, 3] = bottom

                # 虑除不在采样patch中的gt box
                bboxes = bboxes[masks, :]
                # 获取在采样patch中的gt box的标签
                labels = target['labels']
                labels = labels[masks]

                # 裁剪patch
                left_idx = int(left * wtot)
                top_idx = int(top * htot)
                right_idx = int(right * wtot)
                bottom_idx = int(bottom * htot)
                image = image.crop((left_idx, top_idx, right_idx, bottom_idx))

                # 调整裁剪后的bboxes坐标信息
                bboxes[:, 0] = (bboxes[:, 0] - left) / w
                bboxes[:, 1] = (bboxes[:, 1] - top) / h
                bboxes[:, 2] = (bboxes[:, 2] - left) / w
                bboxes[:, 3] = (bboxes[:, 3] - top) / h

                # 更新crop后的gt box坐标信息以及标签信息
                target['boxes'] = bboxes
                target['labels'] = labels

                return image, target
예제 #2
0
    def forward(self, ploc, plabel, gloc, glabel):
        # type: (Tensor, Tensor, Tensor, Tensor)
        """
            ploc, plabel: Nx4x8732, Nxlabel_numx8732
                predicted location and labels

            gloc, glabel: Nx4x8732, Nx8732
                ground truth location and labels
        """

        dbox_ious_max = torch.zeros(8, 5440).cuda('cuda:0')
        dbox_ious_New = torch.zeros(8, 5440).cuda('cuda:0')
        c_ofmaxIOU = torch.zeros(8, 5440).cuda('cuda:0')
        # 获取正样本的mask  Tensor: [N, 8732]
        mask = glabel > 0
        # mask1 = torch.nonzero(glabel)

        ploc_cxcy = self._location_vec_inverse(ploc)
        ploc_cxcy_ltwb = self._xywh2ltrb(ploc_cxcy)
        gloc_ltrb = self._xywh2ltrb(gloc)
        # gloc_ltrb_tmp = gloc_ltrb.permute(0,2,1)
        # gboxes_ltrb = gloc_ltrb_tmp[mask]

        for kk in range(8):
            maskTmp = glabel[kk, :] > 0
            gboxes_ltrb = gloc_ltrb[kk, :, maskTmp]
            gboxes_ltrb = gboxes_ltrb.transpose(0, 1)
            pboxes_oneImg = ploc_cxcy_ltwb[kk, :, :]
            pboxes_oneImg = pboxes_oneImg.transpose(0, 1)

            ious_pbox_gt = calc_iou_tensor(gboxes_ltrb,
                                           pboxes_oneImg)  # [nboxes, 8732]
            best_truth_ious, best_truth_idx = ious_pbox_gt.max(
                dim=0)  # 寻找每个default box匹配到的最大IoU bboxes_in
            # best_dbox_ious, best_dbox_idx = ious_pbox_gt.max(dim=1)
            # matches = gboxes_ltrb[best_truth_idx]
            # c_pbox_gt = calc_c_tensor(matches, pboxes_oneImg)

            # c_ofmaxIOU[kk,:]=c_pbox_gt
            dbox_ious_max[kk, :] = best_truth_ious

            # modification for iou loss
            tmpxx = gloc_ltrb[kk, :, :]
            ious_pbox_gt_New = calc_iou_tensor_diag(tmpxx.permute(
                1, 0), pboxes_oneImg)  # [nboxes, 8732]
            # diag_ious_pbox_gt = ious_pbox_gt_New.diagonal()
            # pos_ious_pbox = diag_ious_pbox_gt[maskTmp]
            dbox_ious_New[kk, :] = ious_pbox_gt_New

        # iou_loss = 1 - dbox_ious_max
        iou_loss = 1 - dbox_ious_New
        # iou_loss = torch.sqrt(iou_loss)
        # 计算一个batch中的每张图片的正样本个数 Tensor: [N]
        pos_num = mask.sum(dim=1)

        # 计算gt的location回归参数 Tensor: [N, 4, 8732]
        vec_gd = self._location_vec(gloc)

        pboxes = self._location_vec_inverse(ploc)

        # add loss ratio
        with torch.no_grad():
            tmp = 2 * self.dboxes[:, 2:, :] / (pboxes[:, 2:, :] +
                                               gloc[:, 2:, :]) / self.scale_xy
        # vec_gd1 = vec_gd * 1
        # ploc1 = ploc * 1

        vec_gd[:, :2, :] = vec_gd[:, :2, :] * tmp
        ploc[:, :2, :] = ploc[:, :2, :] * tmp

        # if tmp.max() > 2:
        #     print('haha')
        # sum on four coordinates, and mask
        # 计算定位损失(只有正样本)
        loc_loss = 2 * (iou_loss + self.location_loss(
            ploc[:, :2, :], vec_gd[:, :2, :]).sum(dim=1))  # Tensor: [N, 8732]
        loc_loss = (mask.float() * loc_loss).sum(dim=1)  # Tenosr: [N]

        # loc_loss1 = self.location_loss(ploc1, vec_gd1).sum(dim=1)  # Tensor: [N, 8732]
        # loc_loss1 = (mask.float() * loc_loss1).sum(dim=1)  # Tenosr: [N]

        # hard negative mining Tenosr: [N, 8732]
        # con1 = self.confidence_loss(plabel, glabel)
        con = self.cross_entropy_Iou(plabel,
                                     glabel,
                                     dbox_ious_New.unsqueeze(dim=1),
                                     isPositive=True)

        # positive mask will never selected
        # 获取负样本
        # con_neg1 = con.clone()
        con_neg = self.cross_entropy_Iou(plabel,
                                         glabel,
                                         dbox_ious_max.unsqueeze(dim=1),
                                         isPositive=False)
        con_neg[mask] = torch.tensor(0.0)
        # 按照confidence_loss降序排列 con_idx(Tensor: [N, 8732])
        _, con_idx = con_neg.sort(dim=1, descending=True)
        _, con_rank = con_idx.sort(dim=1)  # 这个步骤比较巧妙

        # number of negative three times positive
        # 用于损失计算的负样本数是正样本的3倍(在原论文Hard negative mining部分),
        # 但不能超过总样本数8732
        neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)
        neg_mask = con_rank < neg_num  # Tensor [N, 8732]

        # confidence最终loss使用选取的正样本loss+选取的负样本loss
        con_loss = (con * mask.float() + con_neg * neg_mask.float()).sum(
            dim=1)  # Tensor [N]

        # avoid no object detected
        # 避免出现图像中没有GTBOX的情况
        total_loss = loc_loss + con_loss
        num_mask = (pos_num > 0).float()  # 统计一个batch中的每张图像中是否存在GTBOX
        pos_num = pos_num.float().clamp(min=1e-6)  # 防止出现分母为零的情况
        ret = (total_loss * num_mask / pos_num).mean(dim=0)  # 只计算存在GTBOX的图像损失
        return ret