def forward(self, confidence, predicted_locations, gts, anchors):
        # cls_out, reg_out, anchor_gt_labels, anchor_gt_locations
        """

        Compute classification loss and smooth l1 loss.
        Args:
            confidence (batch_size, num_anchors, num_classes): class predictions.
            locations (batch_size, num_anchors, 4*seq_len): predicted locations.
            labels (batch_size, num_anchors): real labels of all the anchors.
            boxes (batch_size, num_anchors, 4*seq_len): real boxes corresponding all the anchors.

        """

        gt_locations = []
        labels = []
        with torch.no_grad():
            # torch.cuda.synchronize()
            # t0 = time.perf_counter()
            for b in range(len(gts)):
                gt_boxes = gts[b][:, :4]
                gt_labels = gts[b][:, 4]
                gt_labels = gt_labels.type(torch.cuda.LongTensor)

                conf, loc = box_utils.match_anchors(gt_boxes, gt_labels,
                                                    anchors)

                labels.append(conf)
                gt_locations.append(loc)
            gt_locations = torch.stack(gt_locations, 0)
            labels = torch.stack(labels, 0)

        # pdb.set_trace()
        pos_mask = labels > 0
        predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
        gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
        num_pos = pos_mask.sum()
        smooth_l1_loss = F.smooth_l1_loss(
            predicted_locations, gt_locations, reduction='sum') / num_pos

        ## Compute classification loss
        confidence = torch.sigmoid(confidence)
        cls_conf = confidence[:, :, 1:]
        binary_preds = confidence[:, :, 0]
        num_classes = cls_conf.size(2)

        conf_labels = labels[pos_mask] - 1
        y_onehot = cls_conf.new_zeros(conf_labels.size(0), num_classes)
        y_onehot[range(y_onehot.shape[0]), conf_labels] = 1.0
        classification_loss = self.bce_loss(
            cls_conf[pos_mask].reshape(-1, num_classes), y_onehot)

        labels_bin = (labels > 0).float()
        neg_mask = labels < 1
        binary_loss_pos = self.bce_loss(binary_preds[pos_mask],
                                        labels_bin[pos_mask])
        binary_loss_neg = self.bce_loss(binary_preds[neg_mask],
                                        labels_bin[neg_mask])
        binary_loss = binary_loss_pos * self.pos_weight + binary_loss_neg * self.neg_weight

        return smooth_l1_loss, classification_loss + binary_loss
    def forward(self, confidence, predicted_locations, gts, anchors):
        # cls_out, reg_out, anchor_gt_labels, anchor_gt_locations
        """

        Compute classification loss and smooth l1 loss.
        Args:
            confidence (batch_size, num_anchors, num_classes): class predictions.
            locations (batch_size, num_anchors, 4*seq_len): predicted locations.
            labels (batch_size, num_anchors): real labels of all the anchors.
            boxes (batch_size, num_anchors, 4*seq_len): real boxes corresponding all the anchors.

        """

        gt_locations = []
        labels = []
        with torch.no_grad():
            # torch.cuda.synchronize()
            # t0 = time.perf_counter()
            for b in range(len(gts)):
                gt_boxes = gts[b][:, :4]
                gt_labels = gts[b][:, 4]
                gt_labels = gt_labels.type(torch.cuda.LongTensor)

                conf, loc = box_utils.match_anchors(gt_boxes, gt_labels,
                                                    anchors)

                labels.append(conf)
                gt_locations.append(loc)
            gt_locations = torch.stack(gt_locations, 0)
            labels = torch.stack(labels, 0)

        # pdb.set_trace()
        pos_mask = labels > 0
        predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
        gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
        num_pos = float(pos_mask.sum())
        smooth_l1_loss = F.smooth_l1_loss(
            predicted_locations, gt_locations, reduction='sum') / num_pos

        ## Compute classification loss
        confidence = torch.sigmoid(confidence)
        cls_conf = confidence[:, :, 1:]
        # binary_preds = confidence[:,:,0] # not needed in focal loss
        num_classes = cls_conf.size(2)
        cls_conf = cls_conf.view(-1, num_classes)

        conf_labels = labels[pos_mask] - 1
        y_onehot = cls_conf.new_zeros(conf_labels.size(0), num_classes)
        y_onehot[range(y_onehot.shape[0]), conf_labels] = 1.0

        alpha_factor = self.alpha * y_onehot + (1.0 - self.alpha) * (1.0 -
                                                                     y_onehot)
        focal_weight = 1.0 - cls_conf * y_onehot + cls_conf * (1.0 - y_onehot)
        weight = alpha_factor * focal_weight**self.gamma
        classification_loss = self.bce_loss(cls_conf, y_onehot,
                                            weight=weight) / num_pos

        return smooth_l1_loss, classification_loss
    def forward(self, confidence, predicted_locations, gts, anchors):
        # cls_out, reg_out, anchor_gt_labels, anchor_gt_locations
        """


        Compute classification loss and smooth l1 loss.
        Args:
            confidence (batch_size, num_anchors, num_classes): class predictions.
            locations (batch_size, num_anchors, 4*seq_len): predicted locations.
            labels (batch_size, num_anchors): real labels of all the anchors.
            boxes (batch_size, num_anchors, 4*seq_len): real boxes corresponding all the anchors.


        """

        num_classes = confidence.size(2)
        gt_locations = []
        labels = []
        with torch.no_grad():
            # torch.cuda.synchronize()
            # t0 = time.perf_counter()
            for b in range(len(gts)):
                gt_boxes = gts[b][:, :4]
                gt_labels = gts[b][:, 4]
                gt_labels = gt_labels.type(torch.cuda.LongTensor)

                conf, loc = box_utils.match_anchors(gt_boxes, gt_labels,
                                                    anchors)

                labels.append(conf)
                gt_locations.append(loc)
            gt_locations = torch.stack(gt_locations, 0)
            labels = torch.stack(labels, 0)
            # torch.cuda.synchronize()
            # t1 = time.perf_counter()
            # print(gt_locations.size(), labels.size(), t1 - t0)
            # derived from cross_entropy=sum(log(p))
            loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
            mask = box_utils.hard_negative_mining(loss, labels,
                                                  self.neg_pos_ratio)

        # pdb.set_trace()

        confidence = confidence[mask, :]
        classification_loss = F.cross_entropy(confidence.reshape(
            -1, num_classes),
                                              labels[mask],
                                              reduction='sum')
        pos_mask = labels > 0
        predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
        gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
        smooth_l1_loss = F.smooth_l1_loss(predicted_locations,
                                          gt_locations,
                                          reduction='sum')
        num_pos = pos_mask.sum()
        return smooth_l1_loss / num_pos, classification_loss / num_pos
Example #4
0
    def forward(self, confidence, predicted_locations, gts, counts, anchors):
        """
        
        Compute classification loss and smooth l1 loss.
        Args:
            confidence (batch_size, num_anchors, num_classes): class predictions.
            locations (batch_size, num_anchors, 4): predicted locations.
            boxes list of len = batch_size and nx4 arrarys
            anchors: (num_anchors, 4)

        """

        num_classes = confidence.size(2)
        gt_locations = []
        labels = []
        with torch.no_grad():
            # torch.cuda.synchronize()
            # t0 = time.perf_counter()
            for b in range(len(gts)):
                gt_boxes = gts[b, :counts[b], :4]
                gt_labels = gts[b, :counts[b], 4]
                gt_labels = gt_labels.type(torch.cuda.LongTensor)

                conf, loc = box_utils.match_anchors(
                    gt_boxes,
                    gt_labels,
                    anchors,
                    iou_threshold=self.positive_threshold)

                labels.append(conf)
                gt_locations.append(loc)
            gt_locations = torch.stack(gt_locations, 0)
            labels = torch.stack(labels, 0)
            loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
            mask = box_utils.hard_negative_mining(loss, labels,
                                                  self.neg_pos_ratio)

        # pdb.set_trace()
        pos_mask = labels > 0
        num_pos = max(1.0, float(pos_mask.sum()))

        confidence = confidence[mask, :]
        classification_loss = F.cross_entropy(
            confidence.reshape(-1, num_classes), labels[mask],
            reduction='sum') / (num_pos * 4.0)

        predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
        gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)

        localisation_loss = F.smooth_l1_loss(predicted_locations,
                                             gt_locations,
                                             reduction='sum') / (num_pos * 4.0)

        return localisation_loss, classification_loss