Beispiel #1
0
    def forward(self, predictions, targets, size_average=False):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data = predictions
        num = loc_data.size(0)
        priors = self.priors
        # priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.tensor(num, num_priors, 4)
        conf_t = torch.tensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        #loc_t = (loc_t, requires_grad=False)
        #conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0
        num_pos = pos.sum()

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=size_average)
        loss_l /= num_pos.data.sum()

        # Confidence Loss (Focal loss)
        # Shape: [batch,num_priors,1]
        loss_c = self.focal_loss(conf_data.view(-1, self.num_classes),
                                 conf_t.view(-1, 1))

        return loss_l, loss_c
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data = predictions
        num = loc_data.size(0)
        priors = self.priors
        # priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0
        # num_pos = pos.sum()

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c = loss_c.view(-1, num_priors)
        loss_c[pos] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(
            1, keepdim=True)  #new sum needs to keep the same dim
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum().float()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
Beispiel #3
0
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, obj_data = predictions
        num = loc_data.size(0)
        priors = self.priors
        # priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))  ## num_priors == predict_box_num
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0
        # num_pos = pos.sum()

        ## Localization Loss (MSE, loss_xy, loss_wh)
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)

        loc_p_xy = loc_p[:, 0:2]
        loc_t_xy = lox_t[:, 0:2]
        loss_l_xy = F.mse_loss(torch.sigmoid(loc_p_xy, loc_t_xy),
                               size_average=False)

        loc_p_wh = loc_p[:, 2:4]
        loc_t_wh = loc_t[:, 2:4]
        loss_l_xy = F.mse_loss(loc_p_wh, loc_t_wh, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = F.binary_cross_entropy(batch_conf,
                                        conf_t.view(-1, 1),
                                        size_average=False)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # objectness loss
        batch_objn = obj_data.view(-1, 1)
        loss_o = F.binary_cross_entropy_with_logits(batch_objn,
                                                    torch.zeros_like(
                                                        batch_objn.shape),
                                                    size_average=False)

        return loss_l, loss_c, loss_o
Beispiel #4
0
    def forward(self, predictions, targets, mode='pos'):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data = predictions
        num = loc_data.size(0)
        priors = self.priors
        # priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes
        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)

        for idx in range(num):
            # print(type(targets[idx]))
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0
        if mode == 'pos':
            return pos
        # num_pos = pos.sum()
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        loss_c = loss_c.view(pos.size(0), pos.size(1))

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        sizes = [361, 100, 25, 9, 4, 1]
        boxes = [6, 6, 6, 6, 4, 4]

        activation = torch.zeros(6, 4)
        start = 0
        end = 0
        for i in range(6):
            start += sizes[i] * boxes[i]
            # print('start',start)
            # print('end',end)
            pos_idxi = pos_idx[0, end:start]
            posi = pos[0, end:start]
            conf_datai = conf_data[0, end:start]
            conf_ti = conf_t[0, end:start]
            # print(conf_ti.size())
            end = start
            # print('end--',end)
            conf_p = conf_datai.view(-1, self.num_classes)
            conf_p = F.softmax(conf_p, dim=-1)
            # print('conf_p',conf_p.size())

            targets_weighted = conf_ti
            all_pos = torch.sum(
                targets_weighted.eq(torch.argmax(conf_p, dim=1)).float())
            all_neg = torch.sum(
                targets_weighted.ne(torch.argmax(conf_p, dim=1)).float())
            activation[i][0] = all_pos
            activation[i][1] = all_neg
            # print(all_pos)
            # print(all_neg)

            conf_p = conf_datai[pos_idxi].view(-1, self.num_classes)
            conf_p = F.softmax(conf_p, dim=-1)
            num_pos, num_neg = 0, 0
            if conf_p.size(0) != 0:
                # print('conf_p',conf_p.size())
                # print('conf_p',conf_p)
                # print(torch.argmax(conf_p, dim=1))

                targets_weighted = conf_ti[posi]
                num_pos = torch.sum(
                    targets_weighted.eq(torch.argmax(conf_p, dim=1)).float())
                num_neg = torch.sum(
                    targets_weighted.ne(torch.argmax(conf_p, dim=1)).float())
                # print(num_pos)
                # print(num_neg)
                # print('targets_weighted',targets_weighted)
            activation[i][2] = num_pos
            activation[i][3] = num_neg

        return activation