예제 #1
0
파일: loss.py 프로젝트: warisamin25/MTS
    def __call__(self, proposals, mask_logits, char_mask_logits, mask_targets,
                 char_mask_targets, char_mask_weights):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        mask_targets = cat(mask_targets, dim=0)
        char_mask_targets = cat(char_mask_targets, dim=0)
        char_mask_weights = cat(char_mask_weights, dim=0)
        char_mask_weights = char_mask_weights.mean(dim=0)

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0 or char_mask_targets.numel() == 0:
            return mask_logits.sum() * 0, char_mask_targets.sum() * 0

        mask_loss = F.binary_cross_entropy_with_logits(
            mask_logits.squeeze(dim=1), mask_targets)
        if self.use_weighted_loss:
            char_mask_loss = F.cross_entropy(char_mask_logits,
                                             char_mask_targets,
                                             char_mask_weights,
                                             ignore_index=-1)
        else:
            char_mask_loss = F.cross_entropy(char_mask_logits,
                                             char_mask_targets,
                                             ignore_index=-1)
        return mask_loss, char_mask_loss
예제 #2
0
    def __call__(self, predictions):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """
        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)
        return self.loss_computation(predictions, labels, regression_targets)
예제 #3
0
def concat_box_prediction_layers(box_cls, box_regression):
    box_cls_flattened = []
    box_regression_flattened = []
    # for each feature level, permute the outputs to make them be in the
    # same format as the labels. Note that the labels are computed for
    # all feature levels concatenated, so we keep the same representation
    # for the objectness and the box_regression
    for box_cls_per_level, box_regression_per_level in zip(
            box_cls, box_regression):
        N, AxC, H, W = box_cls_per_level.shape
        Ax4 = box_regression_per_level.shape[1]
        A = Ax4 // 4
        C = AxC // A
        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H,
                                                W)
        box_cls_flattened.append(box_cls_per_level)

        box_regression_per_level = permute_and_flatten(
            box_regression_per_level, N, A, 4, H, W)
        box_regression_flattened.append(box_regression_per_level)
    # concatenate on the first dimension (representing the feature levels), to
    # take into account the way the labels were generated (with all feature maps
    # being concatenated as well)
    box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
    box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
    return box_cls, box_regression
예제 #4
0
    def __call__(self, proposals, mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels, mask_targets = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)
        mask_targets = cat(mask_targets, dim=0)

        positive_inds = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[positive_inds]

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            return mask_logits.sum() * 0

        mask_loss = F.binary_cross_entropy_with_logits(
            mask_logits[positive_inds, labels_pos], mask_targets)
        return mask_loss
예제 #5
0
def binary_dice_loss_with_logits(mask_logits, mask_targets):
    r"""Function that measures Binary Dice Loss between target and output
    logits.

    Args:
        mask_logits: Tensor of arbitrary shape
        mask_targets: Tensor of the same shape as input

    Examples::

         >>> input = torch.randn(3, requires_grad=True)
         >>> target = torch.empty(3).random_(2)
         >>> loss = binary_dice_loss_with_logits(input, target)
         >>> loss.backward()
    """
    #########################################################################zy debug
    #import pdb
    #pdb.set_trace()
    #########################################################################
    mask_logits = mask_logits.sigmoid()
    if not (mask_logits.size() == mask_targets.size()):
        raise ValueError("Mask_logits size ({}) must be the same as mask_targets size ({})".format(mask_logits.size(), mask_targets.size()))
    if len(mask_logits.size()) == 3:
        mask_logits = cat([mask_logits[i] for i in range(mask_logits.size()[0])],dim=0)
        mask_targets = cat([mask_targets[i] for i in range(mask_targets.size()[0])],dim=0)
    smooth = 1
    intersection = (mask_logits.mul(mask_targets)).sum()
    union = mask_logits.sum() + mask_targets.sum()
    return 1 - (2. * intersection + smooth) / (union + smooth)
예제 #6
0
 def obj_classification_loss(self, proposals, class_logits):
     class_logits = cat(class_logits, dim=0)
     device = class_logits.device
     labels = cat(
         [proposal.get_field("gt_labels") for proposal in proposals], dim=0)
     classification_loss = F.cross_entropy(class_logits, labels)
     return classification_loss
예제 #7
0
    def __call__(self, proposals, keypoint_logits):
        heatmaps = []
        valid = []
        for proposals_per_image in proposals:
            # print('=====', proposals_per_image.fields())
            kp = proposals_per_image.get_field("keypoints")
            # print('--keypoints_per_image2', kp.keypoints.shape)
            heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
                kp, proposals_per_image, self.discretization_size)
            # print('---heatmaps_per_image', heatmaps_per_image.shape)
            heatmaps.append(heatmaps_per_image.view(-1))
            valid.append(valid_per_image.view(-1))

        keypoint_targets = cat(heatmaps, dim=0)
        valid = cat(valid, dim=0).to(dtype=torch.uint8)
        # print('>> valid', valid)
        valid = torch.nonzero(valid, as_tuple=False).squeeze(1)
        # print('>>>> valid', valid.shape)
        # torch.mean (in binary_cross_entropy_with_logits) does'nt
        # accept empty tensors, so handle it sepaartely
        if keypoint_targets.numel() == 0 or len(valid) == 0:
            # print('kkkkkkkkkkeypoint_logits.sum() * 0')
            # return keypoint_logits.sum() * 0
            return torch.zeros_like(keypoint_logits.sum(),
                                    device=keypoint_logits.sum().device)

        N, K, H, W = keypoint_logits.shape
        keypoint_logits = keypoint_logits.view(N * K, H * W)

        keypoint_loss = F.cross_entropy(keypoint_logits[valid],
                                        keypoint_targets[valid])
        return keypoint_loss
예제 #8
0
    def __call__(self, proposals, trans_pred, targets, loss_type):
        """

        :param proposals: (list[BoxList])
        :param trans_pred:
        :param targets:(list[BoxList])
        :return:
        """
        labels, label_trans = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)
        label_trans = cat(label_trans, dim=0)

        positive_inds = torch.nonzero(labels > 0).squeeze(1)
        device_id = label_trans.get_device()

        if loss_type == 'MSE':
            loss = nn.MSELoss()
            loss_trans = loss(trans_pred, label_trans[positive_inds])
        elif loss_type == 'L1':
            loss = nn.L1Loss()
            loss_trans = loss(trans_pred, label_trans[positive_inds])
        elif loss_type == 'HUBER':
            beta = self.cfg.MODEL.TRANS_HEAD.TRANS_HUBER_THRESHOLD
            loss_trans = huber_loss(trans_pred, label_trans[positive_inds],
                                    device_id, beta)

        dis_trans = torch.mean(
            torch.norm((trans_pred - label_trans[positive_inds]),
                       dim=1)).detach()

        return loss_trans, dis_trans
예제 #9
0
    def __call__(self, proposals, keypoint_logits):
        heatmaps = []
        valid = []
        for proposals_per_image in proposals:
            kp = proposals_per_image.get_field("keypoints")
            heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
                kp, proposals_per_image, self.discretization_size)
            heatmaps.append(heatmaps_per_image.view(-1))
            valid.append(valid_per_image.view(-1))

        keypoint_targets = cat(heatmaps, dim=0)  ##维度为[N * K], N个person, 17个关键点
        valid = cat(valid, dim=0).to(dtype=torch.bool)
        valid = torch.nonzero(valid).squeeze(1)

        # torch.mean (in binary_cross_entropy_with_logits) does'nt
        # accept empty tensors, so handle it separately
        if keypoint_targets.numel() == 0 or len(valid) == 0:
            return keypoint_logits.sum() * 0

        N, K, H, W = keypoint_logits.shape
        keypoint_logits = keypoint_logits.view(
            N * K, H * W)  ##N个person, 17个关键点, 56*56的特征图
        keypoint_targets = keypoint_targets.view(
            N * K, H * W)  ##N个person, 17个关键点, 56*56的特征图

        # import ipdb;ipdb.set_trace()
        # keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
        keypoint_loss = torch.mean(
            (keypoint_logits[valid] - keypoint_targets[valid])**2) * 1000
        # import ipdb;ipdb.set_trace()

        return keypoint_loss
예제 #10
0
파일: loss.py 프로젝트: zhangfx123/MPSR
    def __call__(self, class_logits, box_regression, closeup_logits,
                 closeup_labels):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        classification_loss = F.cross_entropy(class_logits, labels)
        if closeup_logits is not None:
            closeup_labels = closeup_labels.repeat(len(closeup_logits))
            closeup_logits = torch.cat(closeup_logits, dim=0)
            extra_classification_loss = F.cross_entropy(
                closeup_logits, closeup_labels) / 10
        else:
            extra_classification_loss = None

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss, extra_classification_loss
예제 #11
0
    def __call__(self, proposals, mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList]): 每张图片上的roi, 只包含正样本
            mask_logits (Tensor): [num_pos_roi, 81, 28, 28]
            targets (list[BoxList])

        mask_loss计算流程: 首先根据roi(proposals)和targets之间的IoU找到每个roi
            对应的gt_box, 同时可以得到与这个gt_box对应的mask值, 将真实的mask值根
            据对应的roi进行裁剪, 缩放, 处理成与mask_logits同样的大小(MxM), 然后计
            算两者之间的交叉熵
        """
        labels, mask_targets = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)  # [num_roi]
        mask_targets = cat(mask_targets, dim=0)  # [num_roi, 28, 28]

        positive_inds = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[positive_inds]

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            return mask_logits.sum() * 0

        # l_n = y_n * log[σ(x_n)] + (1-y_n) * log[1 - σ(x_n)]
        # 这个损失函数结合了sigmoid函数和CE损失, 首先使用sigmoid函数把logits转换成0到1
        # 之间的概率值, 然后计算预测的概率值与targets之间的交叉熵
        mask_loss = F.binary_cross_entropy_with_logits(
            mask_logits[positive_inds, labels_pos], mask_targets)
        return mask_loss
예제 #12
0
    def reduced_bbox_result(self, box_regression, proposals):

        box_regression = cat(box_regression, dim=0)
        device = box_regression.device

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]

        map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                          device=device)

        image_shapes = [box.size for box in proposals]
        boxes_per_image = [len(box) for box in proposals]
        concat_boxes = torch.cat([a.bbox for a in proposals], dim=0)

        prefix_sum_boxes = [boxes_per_image[0]]
        for box_per_images in boxes_per_image[1:]:
            prefix_sum_boxes.append(box_per_images + prefix_sum_boxes[-1])

        reduced_boxes_per_image = [0] * len(prefix_sum_boxes)
        i, j = 0, 0
        while i < len(sampled_pos_inds_subset):
            if sampled_pos_inds_subset[i] < prefix_sum_boxes[j]:
                reduced_boxes_per_image[j] += 1
                i += 1
            else:
                j += 1

        proposals = self.box_coder.decode(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            concat_boxes[sampled_pos_inds_subset])

        proposals = proposals.split(reduced_boxes_per_image, dim=0)

        box_targets = self.box_coder.decode(
            regression_targets[sampled_pos_inds_subset],
            concat_boxes[sampled_pos_inds_subset])

        box_targets = box_targets.split(reduced_boxes_per_image, dim=0)

        result = []
        for boxes, image_shape in zip(proposals, image_shapes):
            boxlist = BoxList(boxes, image_shape, mode="xyxy")
            boxlist = boxlist.clip_to_image(remove_empty=False)
            result.append(boxlist)

        box_result = []
        for boxes, image_shape in zip(box_targets, image_shapes):
            boxlist = BoxList(boxes, image_shape, mode="xyxy")
            boxlist = boxlist.clip_to_image(remove_empty=False)
            box_result.append(boxlist)

        return result, box_result
예제 #13
0
    def __call__(self, proposals, mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels, mask_targets = self.prepare_targets(proposals, targets)

        labels_pos = cat(labels, dim=0)
        mask_targets = cat(mask_targets, dim=0)

        # mask samples are all positive, no need to search for positive samples 
        #positive_inds = torch.nonzero(labels > 0).squeeze(1)
        #labels_pos = labels[positive_inds]

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            return mask_logits.sum() * 0

        N, C, H, W = mask_logits.shape
	# negative sampls are already filtered out, so all samples are positive
        positive_inds = torch.arange(0, N, device = labels_pos.device)
        index_select_indices = (positive_inds * mask_logits.size(1) + labels_pos).view(-1)
        mask_logits_sampled = mask_logits.view(-1, H, W).index_select(0, index_select_indices).view(N, H, W)

        mask_loss = F.binary_cross_entropy_with_logits(
            mask_logits_sampled, mask_targets
        )
        return mask_loss
예제 #14
0
    def __call__(self, proposals, word_logits, targets):
        labels, words, word_lens = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)
        word_targets = cat(words, dim=0)
        word_lens = cat(word_lens, dim=0)

        ########################## positive samples ###########################
        positive_inds = torch.nonzero(labels > 0).squeeze(1)

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if word_targets.numel() == 0:
            return word_logits.sum() * 0
        pos_logits = word_logits[:, positive_inds].log_softmax(2)
        pos_wlens = word_lens[positive_inds]
        pos_target = word_targets[positive_inds]
        # print("word_lens:", word_lens, positive_inds)
        limited_ind = pos_wlens < 18
        word_lens_lim = pos_wlens[limited_ind]
        word_targets_lim = pos_target[limited_ind]
        pos_logits_lim = pos_logits[:, limited_ind]

        if word_targets_lim.numel() == 0:
            return pos_logits.sum() * 0

        batch_size = pos_logits_lim.size()[1]
        predicted_length = torch.tensor([pos_logits_lim.size(0)] * batch_size)

        # print('words_targets:', word_targets)
        word_targets_flatten = word_targets_lim.view(-1)
        positive_w_inds = torch.nonzero(word_targets_flatten > 0).squeeze(1)
        # print('positive_inds:', positive_inds)
        word_targets_flatten = word_targets_flatten[positive_w_inds]

        if _DEBUG:
            self.show_cnt += 1
            if self.show_cnt % 100 == 0:
                pos_logits_show = pos_logits_lim.permute(1, 0, 2)
                pos_value, pos_inds = pos_logits_show.max(2)
                # print('word_lens_lim:', word_lens_lim)
                # print('pos_logits:', pos_inds, word_targets_flatten)
                predict_seq = pos_inds.data.cpu().numpy()
                word_targets_np = word_targets_lim.data.cpu().numpy()
                for a in range(predict_seq.shape[0]):
                    pred_str = ''
                    gt_str = ''
                    for b in range(predict_seq.shape[1]):
                        pred_str += self.alphabet[predict_seq[a, b]]
                    for c in range(word_targets_np.shape[1]):
                        if word_targets_np[a, c] != 0:
                            #print('use int?', word_targets_np[a, c])
                            gt_str += self.alphabet[int(word_targets_np[a, c])]
                    # print('lstr:', pred_str, gt_str)
                    print('lstr:', "|" + pred_str + "|", "|" + gt_str + "|")

        return self.ctc_loss(
            pos_logits_lim,
            word_targets_flatten.long(), predicted_length.long(),
            word_lens_lim.long()).sum() / pos_logits.size()[0] / batch_size
예제 #15
0
    def __call__(self, proposals, keypoint_logits):
        heatmaps = []
        valid = []
        for proposals_per_image in proposals:
            kp = proposals_per_image.get_field("keypoints")
            heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
                kp, proposals_per_image, self.discretization_size)
            heatmaps.append(heatmaps_per_image.view(-1))
            valid.append(valid_per_image.view(-1))

        keypoint_targets = cat(heatmaps, dim=0)
        valid = cat(valid, dim=0).to(dtype=torch.uint8)
        valid = torch.nonzero(valid).squeeze(1)

        # torch.mean (in binary_cross_entropy_with_logits) does'nt
        # accept empty tensors, so handle it sepaartely
        if keypoint_targets.numel() == 0 or len(valid) == 0:
            return keypoint_logits.sum() * 0

        N, K, H, W = keypoint_logits.shape
        keypoint_logits = keypoint_logits.view(N * K, H * W)

        keypoint_loss = F.cross_entropy(keypoint_logits[valid],
                                        keypoint_targets[valid])
        return keypoint_loss
예제 #16
0
    def __call__(self, proposals, ke_logits_x, ke_logits_y, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels, mask_targets = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)
        mask_targets = cat(mask_targets, dim=0)
        positive_inds = torch.nonzero(labels > 0).squeeze(1)

        if mask_targets.numel() == 0:
            return 0

        sb, sh, sw = mask_targets.shape
        mask_loss_x = edge_loss(
            ke_logits_x[positive_inds, 0].view([sb, 1, sh, sw]),
            mask_targets.view([sb, 1, sh, sw]))
        mask_loss_y = edge_loss(
            ke_logits_y[positive_inds, 0].view([sb, 1, sh, sw]),
            mask_targets.view([sb, 1, sh, sw]))

        mask_loss = mask_loss_x + mask_loss_y

        return mask_loss, mask_loss_x, mask_loss_y
예제 #17
0
    def __call__(self, class_logits):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])

        Returns:
            classification_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposal_pairs"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposal_pairs

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)

        rel_fg_cnt = len(labels.nonzero())
        rel_bg_cnt = labels.shape[0] - rel_fg_cnt
        ce_weights = labels.new(class_logits.size(1)).fill_(1).float()
        ce_weights[0] = float(rel_fg_cnt) / (rel_bg_cnt + 1e-5)
        classification_loss = F.cross_entropy(class_logits,
                                              labels,
                                              weight=ce_weights)

        return classification_loss
예제 #18
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        classification_loss = F.cross_entropy(class_logits, labels)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels.index_select(0, sampled_pos_inds_subset)
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        index_select_indices = (
            (sampled_pos_inds_subset[:, None]) * box_regression.size(1) +
            map_inds).view(-1)
        box_regression_sampled = box_regression.view(-1).index_select(
            0, index_select_indices).view(map_inds.shape[0], map_inds.shape[1])
        regression_targets_sampled = regression_targets.index_select(
            0, sampled_pos_inds_subset)

        box_loss = smooth_l1_loss(
            box_regression_sampled,
            regression_targets_sampled,
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
예제 #19
0
def compute_multi_modal(targets, word2vec, devices):
    word2vec = torch.tensor(word2vec['a']).to(devices)
    category_info = []
    spatial_info = []
    eps = torch.tensor(10e-16).to(devices)
    for i, target in enumerate(targets):

        subject_category = (target.get_field('subject_category') - 1)
        object_category = (target.get_field('object_category') - 1)
        sub = word2vec[subject_category]
        ob = word2vec[object_category]
        w2v = torch.cat((sub, ob), 1)
        category_info.append(w2v)

        subject_boundingboxes = target.get_field('subject_boundingboxes')
        object_boundingboxes = target.get_field('object_boundingboxes')
        W, H = target.size
        W = float(W)
        H = float(H)
        xs_o = object_boundingboxes[:, 0].view(-1, 1)
        xm_o = object_boundingboxes[:, 2].view(-1, 1)
        ys_o = object_boundingboxes[:, 1].view(-1, 1)
        ym_o = object_boundingboxes[:, 3].view(-1, 1)
        xs_s = subject_boundingboxes[:, 0].view(-1, 1)
        xm_s = subject_boundingboxes[:, 2].view(-1, 1)
        ys_s = subject_boundingboxes[:, 1].view(-1, 1)
        ym_s = subject_boundingboxes[:, 3].view(-1, 1)

        x_0 = xs_s / W
        x_1 = ys_s / H
        x_2 = xm_s / W
        x_3 = ym_s / H
        x_4 = ((ym_s - ys_s) * (xm_s - xs_s)) / (W * H)
        x_5 = xs_o / W
        x_6 = ys_o / H
        x_7 = xm_o / W
        x_8 = ym_o / H
        x_9 = ((ym_o - ys_o) * (xm_o - xs_o)) / (W * H)

        w_o = torch.max(xm_o - xs_o, eps)
        h_o = torch.max(ym_o - ys_o, eps)
        w_s = torch.max(xm_s - xs_s, eps)
        h_s = torch.max(ym_s - ys_s, eps)
        x_10 = (xs_s - xs_o) / w_o
        x_11 = (ys_s - ys_o) / h_o
        x_12 = torch.log(w_s / w_o)
        x_13 = torch.log(h_s / h_o)
        spatial = torch.cat((x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7, x_8, x_9,
                             x_10, x_11, x_12, x_13),
                            dim=1)
        spatial_info.append(spatial)

    category_info = cat(category_info, dim=0).float()
    spatial_info = cat(spatial_info, dim=0)
    spatial_info = torch.div(spatial_info,
                             torch.norm(spatial_info,
                                        dim=1).unsqueeze(-1)).float()

    return category_info, spatial_info
예제 #20
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)
        fitz_categories = cat(
            [proposal.get_field("fitz_categories") for proposal in proposals],
            dim=0)

        classification_loss = F.cross_entropy(class_logits,
                                              labels,
                                              reduction="none")
        classification_loss = self.augment_loss(classification_loss,
                                                fitz_categories,
                                                use_mean=True)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        map_inds = 4 * labels_pos[:, None] + \
            torch.tensor([0, 1, 2, 3], device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=None,
            beta=1,
        )
        box_loss = self.augment_loss(box_loss,
                                     fitz_categories[sampled_pos_inds_subset])
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
예제 #21
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)  # [num_roi, 81]
        box_regression = cat(box_regression, dim=0)  # [num_roi, 81*4]
        device = class_logits.device

        # 调用这个函数之前必须已经调用了subsample函数
        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        # labels: 每个roi的标签, 0~80     [num_roi, 1]
        # 这个label是在match_targets_to_proposals函数中设置的, target[idx]会同时复制labels
        labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
        # regression_targets: [t_x, t_y, t_w, t_h]    [num_roi, 4]
        regression_targets = cat(
            [proposal.get_field("regression_targets") for proposal in proposals], dim=0
        )

        # 结合了log_softmax和交叉熵损失
        classification_loss = F.cross_entropy(class_logits, labels)

        # 正样本的索引值
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        # 正样本的label
        labels_pos = labels[sampled_pos_inds_subset]

        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            # 对于每个roi, roi head的box预测值是为每个类别预测一个box
            # 这里直接选出ground truth类别的预测值
            map_inds = 4 * labels_pos[:, None] + torch.tensor(
                [0, 1, 2, 3], device=device)

        # 只对正样本计算回归损失
        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
예제 #22
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        pos_inds = torch.nonzero(labels > 0).squeeze(1)
        pos_numel = pos_inds.numel()
        if pos_numel > 0:
            classification_loss = F.cross_entropy(class_logits, labels)

            if self.cls_agnostic_bbox_reg:
                map_inds = torch.tensor([4, 5, 6, 7], device=device)
            else:
                map_inds = 4 * labels[pos_inds][:, None] + torch.tensor(
                    [0, 1, 2, 3], device=device)

            box_loss = smooth_l1_loss(
                box_regression[pos_inds[:, None], map_inds],
                regression_targets[pos_inds],
                size_average=True,
                beta=1,
            )

            classification_loss *= self.guided_loss_weighter(
                box_loss, classification_loss)
            return classification_loss, box_loss
        else:
            zero = pos_inds.new_tensor([0]).float()
            return zero, zero
예제 #23
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)
        # if sum(labels>100)>0:
        #     import pdb; pdb.set_trace()
        # set the not-trained classes as ignore lables>100 -> -1 ignore_index=-1
        # labels[labels>(self.num_classfiers-1)] = -1
        classification_loss = F.cross_entropy(class_logits,
                                              labels,
                                              ignore_index=-1)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        if self.cls_agnostic_bbox_reg:
            map_inds = torch.tensor([4, 5, 6, 7], device=device)
        else:
            map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3],
                                                              device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
예제 #24
0
    def __call__(self, attr_logits, class_logits, box_regression):
        classification_loss, box_loss = \
            super(FastRCNNLossWithAttrComputation, self).__call__(class_logits, box_regression)
        
        attr_logits = cat(attr_logits, dim=0) 
        attrs = cat([proposal.get_field("attrs") for proposal in self._proposals], dim=0)
        attrs_loss = 0.5 * F.cross_entropy(attr_logits, attrs, ignore_index=-1)

        return attrs_loss, classification_loss, box_loss
예제 #25
0
    def __call__(self, proposals, all_mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels = cat([p.get_field("proto_labels") for p in proposals])
        labels = (labels > 0).long()
        pos_inds = torch.nonzero(labels > 0).squeeze(1)
        if not self.use_mil_loss:
            mask_logits = all_mask_logits[0]
            if self.use_box_mask:
                mask_targets = self.prepare_targets_boxes(proposals)
            else:
                _, mask_targets = self.prepare_targets(proposals, targets)
            mask_targets = cat(mask_targets, dim=0)
            labels_pos = labels[pos_inds]
            if mask_targets.numel() == 0:
                return mask_logits.sum() * 0
            mask_loss = F.binary_cross_entropy_with_logits(
                mask_logits[pos_inds, labels_pos], mask_targets[pos_inds])
            return mask_loss

        labels_cr = self.prepare_targets_cr(proposals)
        labels_cr = cat(labels_cr, dim=0)
        mil_losses = []
        for mask_logits in all_mask_logits:

            mil_score = mask_logits[:, 1]
            mil_score = torch.cat(
                [mil_score.max(2)[0], mil_score.max(1)[0]], 1)
            # torch.mean (in binary_cross_entropy_with_logits) doesn't
            # accept empty tensors, so handle it separately
            if mil_score.numel() == 0:
                mil_losses.append(mask_logits.sum() * 0)

            mil_loss = F.binary_cross_entropy_with_logits(
                mil_score[pos_inds], labels_cr[pos_inds])
            mil_losses.append(mil_loss)

        if self.use_aff:
            mask_logits = all_mask_logits[0]
            mask_logits_n = mask_logits[:, 1:].sigmoid()
            aff_maps = F.conv2d(mask_logits_n,
                                self.aff_weights,
                                padding=(1, 1))
            affinity_loss = mask_logits_n * (aff_maps**2)
            affinity_loss = torch.mean(affinity_loss)
            return 1.2 * sum(mil_losses) / len(
                mil_losses) + 0.05 * affinity_loss
        else:
            return sum(mil_losses) / len(mil_losses)
예제 #26
0
    def __call__(self, proposals, keypoint_logits):
        heatmaps = []
        valid = []
        for proposals_per_image in proposals:
            kp = proposals_per_image.get_field("keypoints")
            heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
                kp, proposals_per_image, self.discretization_size)
            heatmaps.append(heatmaps_per_image.view(-1))
            valid.append(valid_per_image.view(-1))

        keypoint_targets = cat(heatmaps, dim=0)
        valid = cat(valid, dim=0).to(dtype=torch.uint8)
        valid = torch.nonzero(valid).squeeze(1)

        # torch.mean (in binary_cross_entropy_with_logits) does'nt
        # accept empty tensors, so handle it sepaartely
        if keypoint_targets.numel() == 0 or len(valid) == 0:
            return keypoint_logits.sum() * 0

        N, K, H, W = keypoint_logits.shape
        keypoint_logits = keypoint_logits.view(N * K, H * W)

        keypoint_loss = F.cross_entropy(keypoint_logits[valid],
                                        keypoint_targets[valid])

        input = F.sigmoid(keypoint_logits.view(N * K, H, W))
        #target = keypoint_targets[valid]

        rnd = random.randrange(20)
        if rnd % 20 == 0:
            filename = f'{time.time()}'
            save_image(
                input[0, None, :, :],
                os.path.join('/content/sample_data',
                             filename + '_kp0_input.jpg'))
            save_image(
                input[1, None, :, :],
                os.path.join('/content/sample_data',
                             filename + '_kp1_input.jpg'))
            save_image(
                input[2, None, :, :],
                os.path.join('/content/sample_data',
                             filename + '_kp2_input.jpg'))
            save_image(
                input[3, None, :, :],
                os.path.join('/content/sample_data',
                             filename + '_kp3_input.jpg'))
            #save_image(target[:,None,:,:], os.path.join('/content/sample_data', filename+'_kp_target.jpg'))

            #print('N , K, H, W: ', N , K, H, W)
            #print('input: ', keypoint_logits.size(), keypoint_logits)
            #print('target: ', keypoint_targets.size(), keypoint_targets)
            #print('valid: ', valid.size(), valid)

        return keypoint_loss
예제 #27
0
    def __call__(self, proposals, mask_logits, mil_score, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        # MIL term
        # Stack mil score and mask_logits
        if len(mil_score.shape) > 2 or mask_logits.size(1) > 1:  # multi-class
            class_labels, _ = self.prepare_targets(proposals, targets)
            class_labels = cat(class_labels, dim=0)
            if len(mil_score.shape) > 2:
                mil_score = [s[c] for s, c in zip(mil_score, class_labels)]
                mil_score = torch.stack(mil_score)
            if mask_logits.size(1) > 1:
                mask_logits = [m[c] for m, c in zip(mask_logits, class_labels)]
                mask_logits = torch.stack(mask_logits).unsqueeze(1)

        # Prepare target labels for mil loss of each col/row.
        labels = self.prepare_targets_cr(
            proposals, targets)  # for both positive/negative samples
        labels = cat(labels, dim=0)

        # Compute MIL term for each col/row MIL
        mil_loss = F.binary_cross_entropy_with_logits(mil_score, labels)

        # Pairwise term
        device = mask_logits.device
        mask_h, mask_w = mask_logits.size(2), mask_logits.size(3)
        pairwise_loss = []

        # Sigmoid transform to [0, 1]
        mask_logits_normalize = mask_logits.sigmoid()

        # Compute pairwise loss for each col/row MIL
        for w in self.pairwise_weights_list:
            conv = torch.nn.Conv2d(1, 1, 3, bias=False, padding=(1, 1))
            weights = self.center_weight - w
            weights = weights.view(1, 1, 3, 3).to(device)
            conv.weight = torch.nn.Parameter(weights)
            for param in conv.parameters():
                param.requires_grad = False
            aff_map = conv(mask_logits_normalize)

            cur_loss = (aff_map**2)
            cur_loss = torch.mean(cur_loss)
            pairwise_loss.append(cur_loss)
        pairwise_loss = torch.mean(torch.stack(pairwise_loss))

        return 1.0 * mil_loss, 0.05 * pairwise_loss
예제 #28
0
 def convert_to_roi_format(self, boxes):
     concat_boxes = cat([b.bbox for b in boxes], dim=0)
     device, dtype = concat_boxes.device, concat_boxes.dtype
     ids = cat(
         [
             torch.full((len(b), 1), i, dtype=dtype, device=device)
             for i, b in enumerate(boxes)
         ],
         dim=0,
     )
     rois = torch.cat([ids, concat_boxes], dim=1)
     return rois
예제 #29
0
    def forward(self, roi_features, proposals, logger=None):
        # labels will be used in DecoderRNN during training
        use_gt_label = self.training or self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL
        obj_labels = cat(
            [proposal.get_field("labels")
             for proposal in proposals], dim=0) if use_gt_label else None

        # label/logits embedding will be used as input
        if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL:
            obj_embed = self.obj_embed1(obj_labels)
        else:
            obj_logits = cat([
                proposal.get_field("predict_logits") for proposal in proposals
            ],
                             dim=0).detach()
            obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed1.weight

        # bbox embedding will be used as input
        assert proposals[0].mode == 'xyxy'
        pos_embed = self.bbox_embed(encode_box_info(proposals))

        # encode objects with transformer
        obj_pre_rep = cat((roi_features, obj_embed, pos_embed), -1)
        num_objs = [len(p) for p in proposals]
        obj_pre_rep = self.lin_obj(obj_pre_rep)
        obj_feats = self.context_obj(obj_pre_rep, num_objs)

        # predict obj_dists and obj_preds
        if self.mode == 'predcls':
            obj_preds = obj_labels
            obj_dists = to_onehot(obj_preds, self.num_obj_cls)
            edge_pre_rep = cat(
                (roi_features, obj_feats, self.obj_embed2(obj_labels)), dim=-1)
        else:
            obj_dists = self.out_obj(obj_feats)
            use_decoder_nms = self.mode == 'sgdet' and not self.training
            if use_decoder_nms:
                boxes_per_cls = [
                    proposal.get_field('boxes_per_cls')
                    for proposal in proposals
                ]
                obj_preds = self.nms_per_cls(obj_dists, boxes_per_cls,
                                             num_objs)
            else:
                obj_preds = obj_dists[:, 1:].max(1)[1] + 1
            edge_pre_rep = cat(
                (roi_features, obj_feats, self.obj_embed2(obj_preds)), dim=-1)

        # edge context
        edge_pre_rep = self.lin_edge(edge_pre_rep)
        edge_ctx = self.context_edge(edge_pre_rep, num_objs)

        return obj_dists, obj_preds, edge_ctx
예제 #30
0
    def __call__(self, proposals, mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels, mask_targets = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)
        mask_targets = cat(mask_targets, dim=0)

        positive_inds = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[positive_inds]

        if DEBUG:
            # print('labels:', labels.shape)
            # print('mask_targets:', mask_targets.shape)
            # print('proposals:', proposals)
            # print('mask_logits:', mask_logits.shape)
            first_target = (mask_targets[0, ...].data.cpu().numpy() *
                            255).astype(np.uint8)
            first_mask = (
                mask_logits[positive_inds][0, 1].sigmoid().data.cpu().numpy() *
                255).astype(np.uint8)

            # print('first_target:', first_target.shape, np.unique(mask_logits[positive_inds][0, 1].sigmoid().data.cpu().numpy())[-10:])
            # print('first_mask:', first_mask.shape)

            first_box = proposals[0].bbox[0]
            # print('first_box:', first_box, proposals[0].get_field("scores"))

            first_target = Image.fromarray(first_target)
            first_mask = Image.fromarray(first_mask)
            first_target = first_target.resize(
                (int(first_box[2]), int(first_box[3])))
            first_mask = first_mask.resize(
                (int(first_box[2]), int(first_box[3])))
            first_target.save('first_target.jpg', 'jpeg')
            first_mask.save('first_mask.jpg', 'jpeg')

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            return mask_logits.sum() * 0

        mask_loss = F.binary_cross_entropy_with_logits(
            mask_logits[positive_inds, labels_pos], mask_targets)
        return mask_loss
예제 #31
0
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
        regression_targets = cat(
            [proposal.get_field("regression_targets") for proposal in proposals], dim=0
        )

        classification_loss = F.cross_entropy(class_logits, labels)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss