Example #1
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        im = prediction[..., 4]  # angle imaginary part
        re = prediction[..., 5]  # angle real part
        pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :6].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        pred_boxes[..., 4] = im
        pred_boxes[..., 5] = re

        output = torch.cat(
            (
                #pred_boxes.view(num_samples, -1, 6) * self.stride,
                pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
                pred_boxes[..., 4:].view(num_samples, -1, 2),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # Kevin: Adding this try catch to make sure when ious is empty in
            # build_targets (look at utils/utils.py), this function knows how to
            # handle and return (output, 0) instead.
            try:
                iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = build_targets(
                    pred_boxes=pred_boxes,
                    pred_cls=pred_cls,
                    target=targets,
                    anchors=self.scaled_anchors,
                    ignore_thres=self.ignore_thres,
                )

                # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
                loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
                loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
                loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
                loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
                loss_im = self.mse_loss(im[obj_mask], tim[obj_mask])
                loss_re = self.mse_loss(re[obj_mask], tre[obj_mask])
                loss_eular = loss_im + loss_re
                loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
                loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
                loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
                total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls

                # Metrics
                cls_acc = 100 * class_mask[obj_mask].mean()
                conf_obj = pred_conf[obj_mask].mean()
                conf_noobj = pred_conf[noobj_mask].mean()
                conf50 = (pred_conf > 0.5).float()
                iou50 = (iou_scores > 0.5).float()
                iou75 = (iou_scores > 0.75).float()
                detected_mask = conf50 * class_mask * tconf
                precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
                recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
                recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

                self.metrics = {
                    "loss": to_cpu(total_loss).item(),
                    "x": to_cpu(loss_x).item(),
                    "y": to_cpu(loss_y).item(),
                    "w": to_cpu(loss_w).item(),
                    "h": to_cpu(loss_h).item(),
                    "im": to_cpu(loss_im).item(),
                    "re": to_cpu(loss_re).item(),
                    "conf": to_cpu(loss_conf).item(),
                    "cls": to_cpu(loss_cls).item(),
                    "cls_acc": to_cpu(cls_acc).item(),
                    "recall50": to_cpu(recall50).item(),
                    "recall75": to_cpu(recall75).item(),
                    "precision": to_cpu(precision).item(),
                    "conf_obj": to_cpu(conf_obj).item(),
                    "conf_noobj": to_cpu(conf_noobj).item(),
                    "grid_size": grid_size,
                }

                return output, total_loss
            except RuntimeError as err:
                print(err)
                return output, 0
Example #2
0
    def forward(self, x, targets=None):
        # x is the output with linear activation before yolo layer
        # x.size() (bs, num_anchors*(5+num_classes), g_dim, g_dim)
        bs = x.size(0)
        g_dim = x.size(2)
        stride = self.img_dim / g_dim
        # Tensors for cuda support
        if x.is_cuda:
            FloatTensor = torch.cuda.FloatTensor
            LongTensor = torch.cuda.LongTensor
        else:
            FloatTensor = torch.FloatTensor
            LongTensor = torch.LongTensor

        prediction = x.view(bs, self.num_anchors, self.bbox_attrs, g_dim,
                            g_dim).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs (offset)
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid = torch.linspace(0, g_dim - 1, g_dim).repeat(g_dim, 1)
        grid_x = grid.repeat(bs * self.num_anchors, 1,
                             1).view(x.shape).type(FloatTensor)
        grid_y = grid.t().repeat(bs * self.num_anchors, 1,
                                 1).view(y.shape).type(FloatTensor)
        scaled_anchors = [(a_w / stride, a_h / stride)
                          for a_w, a_h in self.anchors]
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1,
                                                 g_dim * g_dim).view(w.shape)
        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1,
                                                 g_dim * g_dim).view(h.shape)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()

            scaled_all_anchors = [(a_w / stride, a_h / stride)
                                  for a_w, a_h in self.all_anchors]
            (nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf,
             tcls) = build_targets(pred_boxes.cpu().data,
                                   targets.cpu().data, scaled_anchors,
                                   scaled_all_anchors, self.num_anchors,
                                   self.num_classes, g_dim, self.ignore_thres,
                                   self.img_dim)

            #  nProposals = int((conf > 0.25).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1

            # Handle masks
            mask = Variable(mask.type(FloatTensor))  # loc
            cls_mask = Variable(
                mask.unsqueeze(-1).repeat(
                    1, 1, 1, 1, self.num_classes).type(FloatTensor))  # cls
            conf_mask = Variable(conf_mask.type(FloatTensor))  # neg conf

            # number of positives is less than that of negatives
            # so the loss need to be balanced
            # For loc_loss, cls_loss, should be 1/num_pos
            # For conf_loss, should be 1/(num_pos + num_neg)
            # Ignored boxes does not trigger any loss
            balanced = False
            num_positive_box = torch.sum(mask.view(bs, -1), -1).view(
                bs, 1, 1, 1) + 1e-16
            num_negative_box = torch.sum(conf_mask.view(bs, -1), -1).view(
                bs, 1, 1, 1) + 1e-16

            # Handle target variables
            # (nB, nA, dim, dim)
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            # (nB, nA, dim, dim)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            # (nB, nA, dim, dim)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            # (nB, nA, dim, dim)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            # (nB, nA, dim, dim)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            # (nB, nA, dim, dim, nC)
            tcls = Variable(tcls.type(FloatTensor), requires_grad=False)
            #  box_loss_scale  = Variable(box_loss_scale.type(FloatTensor),
            #  requires_grad=False)

            # loc loss
            loss_x = torch.sum(
                (1 / num_positive_box * self.mse_loss(x, tx))[mask == 1]) / bs
            loss_y = torch.sum(
                (1 / num_positive_box * self.mse_loss(y, ty))[mask == 1]) / bs
            # width height loss, mse (vanilla yolov3) or smthl1 (ours)
            loss_w = torch.sum((
                1 / num_positive_box *  # box_loss_scale *
                self.mse_loss(w, tw))[mask == 1]) / bs
            loss_h = torch.sum((
                1 / num_positive_box *  # box_loss_scale *
                self.mse_loss(h, th))[mask == 1]) / bs
            loss_x *= 1  # self.alpha
            loss_y *= 1  # self.alpha
            loss_w *= 1  # self.alpha
            loss_h *= 1  # self.alpha

            # cls loss
            num_cls_each_box = torch.zeros(bs, self.num_anchors, g_dim,
                                           g_dim).type(FloatTensor) + 1e-16
            # bs, nBoxes, nC
            if balanced:
                num_ref = torch.sum(tcls.reshape(bs, -1, self.num_classes), 1)
                for bs_ind in range(bs):
                    for cls_ind in range(self.num_classes):
                        boxes_ = (tcls[bs_ind][..., cls_ind] == 1)
                        num_cls_each_box[bs_ind][boxes_] = num_ref[bs_ind,
                                                                   cls_ind]
                num_cls_each_box = num_cls_each_box.unsqueeze(-1)
                loss_cls = torch.sum((1 / num_cls_each_box * self.bce_loss(
                    pred_cls, tcls))[cls_mask == 1]) / (bs * self.num_classes)
            else:
                if cls_mask.max().item() == 0.:
                    loss_cls = torch.sum(
                        self.bce_loss(pred_cls, tcls)[cls_mask == 1])
                else:
                    loss_cls = torch.mean(
                        self.bce_loss(pred_cls, tcls)[cls_mask == 1])
            # conf loss
            if balanced:
                conf_balance = [num_positive_box, num_negative_box]
            else:
                conf_balance = [
                    num_positive_box + num_negative_box,
                    num_positive_box + num_negative_box
                ]
            loss_conf_all = self.bce_loss(conf, tconf)
            loss_conf_pos = torch.sum(
                (1 / conf_balance[0] * loss_conf_all)[mask == 1])
            loss_conf_neg = torch.sum(
                (1 / conf_balance[1] * loss_conf_all)[conf_mask == 1])
            loss_conf = (loss_conf_pos + loss_conf_neg) / bs

            loss = loss_x + loss_y + loss_w + loss_h + loss_cls + loss_conf

            return (loss, loss_x.item(), loss_y.item(), loss_w.item(),
                    loss_h.item(), loss_conf.item(), loss_cls.item(), recall)

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (pred_boxes.view(bs, -1, 4) * stride, conf.view(
                    bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1)
            return output.data
Example #3
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            if not obj_mask.any():
                total_loss = self.noobj_scale * self.bce_loss(
                    pred_conf[noobj_mask], tconf[noobj_mask])
                return output, total_loss

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            tconf = obj_mask.float()
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #4
0
    def forward(self, x, cls_targets=None):
        layer_outputs, dqnyolo_outputs = [], []
        backbone_ind = -1

        if cls_targets is not None:
            tar_conf, tar_cls, obj_mask, no_obj_mask = build_targets(
                cls_targets)

        for i, (module_def,
                module) in enumerate(zip(self.module_defs, self.module_list)):

            if module_def["type"] == "resnet":
                x = module(x)

            if module_def["type"] in ["convolutional", "maxpool"]:
                x = module(x)

            elif module_def["type"] == "shortcut":
                layer_i = int(module_def["from"])
                x = layer_outputs[-1] + layer_outputs[layer_i]

            elif module_def["type"] == "cls_conv":
                layer_i = int(module_def["from"])
                if layer_i != 0:
                    # x = layer_outputs[layer_i]
                    x = layer_outputs[backbone_ind]
                # print('input_x_shape', x.shape)
                x = module(x)
                # print('output_x_shape', x.shape)

                # Calculate cls_loss
                out = int(module_def["out"])
                if out == 1:
                    conf_cls_output_ind = len(layer_outputs)
                if out and cls_targets is not None:
                    # print("x.size()")
                    # print(x.size())
                    pred_conf_cls = x.permute(0, 2, 3, 1)
                    pred_conf = pred_conf_cls[:, :, :, 0]
                    pred_conf = torch.sigmoid(pred_conf)
                    pred_cls = pred_conf_cls[:, :, :, 1:]
                    pred_cls = torch.sigmoid(pred_cls)
                    # print("pred_conf")
                    # print(pred_conf)
                    # print(pred_conf.size())
                    # print("tar_conf")
                    # print(tar_conf)
                    # print(tar_conf.size())
                    # print("obj_mask")
                    # print(obj_mask)
                    # print(obj_mask.size())
                    # print("pred_conf[obj_mask]")
                    # print(pred_conf[obj_mask])
                    # print(pred_conf[obj_mask].size())
                    # print("tar_conf[obj_mask]")
                    # print(tar_conf[obj_mask])
                    # print(tar_conf[obj_mask].size())
                    loss_conf_obj = self.bce_loss(pred_conf[obj_mask],
                                                  tar_conf[obj_mask])
                    # print("no_obj_mask")
                    # print(no_obj_mask)
                    # print(no_obj_mask.size())
                    # print("pred_conf[no_obj_mask]")
                    # print(pred_conf[no_obj_mask])
                    # print(pred_conf[no_obj_mask].size())
                    # print("tar_conf[no_obj_mask]")
                    # print(tar_conf[no_obj_mask])
                    # print(tar_conf[no_obj_mask].size())
                    loss_conf_noobj = self.bce_loss(pred_conf[no_obj_mask],
                                                    tar_conf[no_obj_mask])
                    # print(pred_conf[0, :, :])
                    # print(tar_conf[0, :, :])
                    # print("\nloss_conf_obj: ", self.obj_scale * loss_conf_obj.item())
                    # print("loss_conf_noobj: ", self.noobj_scale * loss_conf_noobj.item())
                    loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                    # print("tar_cls")
                    # print(tar_cls)
                    # print(tar_cls.size())
                    # print("pred_cls")
                    # print(pred_cls)
                    # print(pred_cls.size())
                    # print("obj_mask")
                    # print(obj_mask)
                    # print(obj_mask.size())
                    # print("pred_cls[obj_mask]")
                    # print(pred_cls[obj_mask])
                    # print(pred_cls[obj_mask].size())
                    # print("tar_cls[obj_mask]")
                    # print(tar_cls[obj_mask])
                    # print(tar_cls[obj_mask].size())
                    # print(pred_cls[0, 6, 6, :])
                    # print(tar_cls[0, 6, 6, :])
                    loss_cls = self.bce_loss(pred_cls[obj_mask],
                                             tar_cls[obj_mask])
                    # print("\nloss_conf: ", self.conf_scale * loss_conf.item())
                    # print("loss_cls: ", self.cls_scale * loss_cls.item())
                    loss_conf_cls = self.conf_scale * loss_conf + self.cls_scale * loss_cls
                    # print("\nloss_loc: ", self.loc_scale * loss_loc.item())
                    # print("loss_conf_cls: ", self.conf_cls_scale * loss_conf_cls.item())

            layer_outputs.append(x)
            # print('layer_outputs', i, len(layer_outputs))

        dqnyolo_conf_cls_outputs = layer_outputs[conf_cls_output_ind]

        if cls_targets is None:
            return dqnyolo_conf_cls_outputs
        else:
            return loss_conf_cls, dqnyolo_conf_cls_outputs
Example #5
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        logger.info(
            f"YOLOLayer input: {x.size(0)}, {x.size(1)}, {x.size(2)}, {x.size(3)}"
        )

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())
        logger.info(
            f"After resize, prediction: {prediction.size(0)}, {prediction.size(1)}, {prediction.size(2)}, {prediction.size(3)}, {prediction.size(4)}"
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # if grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_bboxes = FloatTensor(prediction[..., :4].shape)
        pred_bboxes[..., 0] = x.data + self.grid_x
        pred_bboxes[..., 1] = y.data + self.grid_y
        # 乘scale过的anchor_w, anchor_h
        pred_bboxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_bboxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat((
            pred_bboxes.view(num_samples, -1, 4) * self.stride,
            pred_conf.view(num_samples, -1, 1),
            pred_cls.view(num_samples, -1, self.num_classes),
        ), -1)
        logger.info(
            f"YOLOLayer output: {output.size(0)}, {output.size(1)}, {output.size(2)}\n"
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_bboxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf loss)
            # 目标框使用 mse loss
            # 计算loss采用最原始的数值
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse.loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse.loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # 置信度使用 bce 交叉熵, 有无物体的交叉熵比例贡献不一样
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # 分类交叉熵
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            # 总体损失 坐标损失,置信度损失,分类损失
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            # cls_acc 不理解???
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            # detected_mask ???
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #6
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(
            0)  # 三个路径x分别为(N, 255, 13, 13),(N, 255, 26, 26),(N, 255, 52, 52)
        grid_size = x.size(2)
        # print(x.shape)
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5,
                   grid_size, grid_size).
            permute(
                0, 1, 3, 4, 2
            )  # 交换维度后(N, num_anchors(3), grid_size, grid_size, num_classes + 5(85))
            .contiguous()  # 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续则返回原tensor
        )
        # (N, 3, 13, 13, 85)
        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # iou_scores:标签中有物体的位置地方预测的框与真实的框的IOU
            # class_mask:标签中有物体的位置地方预测的物体的分类正确率
            # obj_mask:指标签中有物体的网格中且与真实框IOU最大的框
            # noobj_mask:指标签中有物体的网格中且与真实框IOU最大的框和IOU大于0.5之外的框
            # tx, ty, tw, th:标签中检测物体的中心坐标和长宽
            # tcls:类别的标签
            # tconf:obj_mask.float()
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()  # 类别分类的正确率
            conf_obj = pred_conf[obj_mask].mean()  # 标签中有物体的网格所在位置,预测是否有物体的置信度
            conf_noobj = pred_conf[noobj_mask].mean(
            )  # 标签中没有物体的网格所在位置,预测是否有物体的置信度
            conf50 = (pred_conf > 0.5).float()  # 预测是否有物体的置信度大于0.5的框
            iou50 = (iou_scores > 0.5).float()  # 预测的框与真正的框的IOU值>0.5的框
            iou75 = (iou_scores > 0.75).float()  # 预测的框与真正的框的IOU值>0.7的框
            detected_mask = conf50 * class_mask * tconf  # 检测到物体和分类总的正确率
            precision = torch.sum(iou50 * detected_mask) / (
                conf50.sum() + 1e-16)  # 精准度
            recall50 = torch.sum(iou50 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # IOU为0.5的召回率
            recall75 = torch.sum(iou75 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # IOU为0.75的召回率

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #7
0
    def forward(self, x, targets=None):
        print("YOLO layers forward>>")
        # x 依次有三种大小
        # 255*13*13
        # 255*26*26
        # 255*52*52
        nA = self.num_anchors         # anchors 大小 3
        nB = x.size(0)
        nG = x.size(2)
        stride = self.image_dim / nG  # 图像对应的stride 416/13=32 ,416/26=16 416/52=8
        # print("x shape:",x.shape)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # 为生成的每一个features map 像素产生一个 anchors*(5+classes) 的预测值
        prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        # 对预测的得到的 x 的中心坐标x,y进行 sigmod,因为下面要计算对每一个sigmod的偏移值,
        # 所以限定在0,1 之间。 长和宽不进行操作
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        # 预测框的背景和前景的概率 进行sigmod操作
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        #类别的概率 进行sigmod 操作
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        # 计算每一个grid的偏移
        # nG 取值 13 26 52
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
        # print("grid_x:",grid_x)
        # print("grid_y:",grid_y)

        # 对实际大小的anchors ,缩小到指定的feature map 上面,得到缩放之后的anchors
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=pred_boxes.cpu().data,
                pred_conf=pred_conf.cpu().data,
                pred_cls=pred_cls.cpu().data,
                target=targets.cpu().data,
                anchors=scaled_anchors.cpu().data,
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )

            nProposals = int((pred_conf > 0.5).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1
            precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
                pred_conf[conf_mask_true], tconf[conf_mask_true]
            )
            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (
                    pred_boxes.view(nB, -1, 4) * stride,
                    pred_conf.view(nB, -1, 1),
                    pred_cls.view(nB, -1, self.num_classes),
                ),
                -1,
            )
            return output
Example #8
0
    def forward(self, x, target = None, img_dim = None):
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)  # todo 这个size为什么是输入的宽高维度呢

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .premute(0,1,3,4,2)  # todo
            .contiguous() # todo
        )

        # get output
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5])

        # if the grid size dose not match current we compute new offset
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
            pred_boxes.view(num_samples, -1, 4) * self.stride,
            pred_conf.view(num_samples, -1, 1),
            pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if target is None:
            return output, 0
        else:
            iou_score, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes = pred_boxes,
                pred_cls = pred_cls,
                target = targets,
                anchors=self.scaled_anchors,
                ignore_thres= self.ignore_thres.
            )

            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tconf[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_score > 0.5 ).float()
            iou75 = (iou_score > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-15)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #9
0
    def forward(self, x, targets=None, img_dim=None):
        FloatTensor = torch.cuda.FloatTensor
        LongTensor = torch.cuda.LongTensor
        ByteTensor = torch.cuda.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        # convert predictions
        # note: NCHW format -> grid_y, grid_x
        # nx255x13x13 -> nx3x85x13x13 -> nx3x13x13x85
        # 85: tx_ctr, ty_ctr, tw, th, objectness, 80 class
        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # get and parse outputs
        x = torch.sigmoid(prediction[..., 0])  # tx_ctr range: (0, 1)
        # format: [batch_size, anchors, grid_y, grid_x]
        y = torch.sigmoid(prediction[..., 1])  # ty_ctr range: (0, 1)
        w = prediction[..., 2]  # tw
        h = prediction[..., 3]  # th
        pred_conf = torch.sigmoid(prediction[...,
                                             4])  # objectness use sigmoid()
        pred_cls = torch.sigmoid(prediction[..., 5:])  # cls use sigmoid()
        # format: [batch_size, anchors, grid_y, grid_x, cls]

        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x  # x_ctr range: (0, 13)
        pred_boxes[..., 1] = y.data + self.grid_y  # y_ctr range: (0, 13)
        pred_boxes[..., 2] = torch.exp(
            w.data
        ) * self.anchor_w  # width w.r.t current feature map dimension
        pred_boxes[..., 3] = torch.exp(
            h.data
        ) * self.anchor_h  # height w.r.t current feature map dimension

        # output shape: [1, x, 85]
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) *
                self.stride,  # get (x_ctr, y_ctr, w, h) w.r.t 416x416 
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # calculate loss
            # (tx, ty, tw, th): target offset
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,  # normalize x_ctr, y_ctr, w, h
                pred_cls=pred_cls,
                target=targets,
                anchors=self.
                scaled_anchors,  # normalize (anchor w, anchor h) w.r. current yolo layer dimension
                ignore_thres=self.ignore_thres,  # 0.5
            )
            """
            test code
            """
            tmp = list(obj_mask.size())
            sum = 1
            for item in tmp:
                sum *= item
            #print ('sum anchors: ', sum)
            #print ('positive samples: ', list(obj_mask[obj_mask].size())[0])
            #print ('negative sample: %d \n' %(list(noobj_mask[noobj_mask].size())[0]))

            # calculate loss
            #print ('loss')
            """
            calculate postive samples loss: loc loss + cls loss + obj loss
            """
            # calculate loc loss
            loss_x = self.mse_loss(
                x[obj_mask],
                tx[obj_mask])  # choose positive predict box tx ang target tx*
            # x size: [batch_size, anchors, grid_y, grid_x]
            # obj_mask size: [batch_size, anchors, grid_y, grid_x]
            # tx size:  [batch_size, anchors, grid_y, grid_x]
            # x[obj_mask] size: [14] 14 is number of positive samples
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # calculate cls loss
            loss_cls = self.bce_loss(
                pred_cls[obj_mask],
                tcls[obj_mask])  # pred_cls size: [1, 3, 13, 13, 80]
            # obj_mask size: [1, 3, 13, 13]
            # pred_cls[obj_mask] size: [n, 80]
            # tcls[obj_mask] size: [n, 80]
            # loss_cls: 1/N * Sum(-(y x logp + (1-y) x log(1-p)))

            # calculate obj loss
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask],
                                          tconf[obj_mask])  # tconf = obj_mask
            # tconf[obj_mask]: [1, 1, 1, 1, 1 ...] note: just choose 1(target)
            # pred_conf[obj_mask]: [0.1, 0.12, 0.13 ...]
            # use binary cross-entropy loss
            """
            calculate negative samples loss: no obj loss
            """
            # calculate no-obj loss
            loss_conf_noobj = self.bce_loss(
                pred_conf[noobj_mask], tconf[noobj_mask])  # tconf = obj_mask
            # obj_mask[noobj_mask]: just choose 0(target)
            """
            loss post-process
            """
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj  # note: it is unreasonable
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # metrics
            cls_acc = 100 * class_mask[obj_mask].mean(
            )  # class_mask[obj_mask] size: [20] 20 is positive samples number
            conf_obj = pred_conf[obj_mask].mean(
            )  # pred_conf[obj_mask] size: [20] 20 is positve samples number
            conf_noobj = pred_conf[noobj_mask].mean(
            )  # pred_conf[noobj_mask] size: [2000] 2000 is negative samples number
            conf50 = (pred_conf > 0.5).float()  # size: [1, 3, 13, 13]
            iou50 = (iou_scores > 0.5).float()  # size: [1, 3, 13, 13]
            iou75 = (iou_scores > 0.5).float()  # size: [1, 3, 13, 13]
            detected_mask = conf50 * class_mask * tconf  # size: [1, 3, 13, 13]
            # objectness > 0.5 and predict class is correct
            precision = torch.sum(iou50 * detected_mask) / (
                conf50.sum() + 1e-16)  # precision = TP / (TP + FP)
            # TP: objectness > 0.5 && predict class correct && IOU > 0.5
            # TP + FP: objectness > 0.5
            recall50 = torch.sum(iou50 * detected_mask) / (
                obj_mask.sum() + 1e-16)  # recall = TP / (TP + FN)
            # TP: objectness > 0.5 && predict class correct && IOU > 0.5
            # TP + FN : all positive samples(obj_mask)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            #print (grid_size, 'x', grid_size, '-loss: ', to_cpu(total_loss).item(), ' coord loss: ',
            #        to_cpu(loss_x).item() + to_cpu(loss_y).item() + to_cpu(loss_w).item() + to_cpu(loss_h).item(),
            #        ' conf loss: ', to_cpu(loss_conf).item(), ' cls loss: ', to_cpu(loss_cls).item())

            self.metrics = {
                "grid_size":
                grid_size,
                "loss":
                to_cpu(total_loss).item(),
                "loss-tx":
                to_cpu(loss_x).item(),
                "loss-ty":
                to_cpu(loss_y).item(),
                "loss-tw":
                to_cpu(loss_w).item(),
                "loss-th":
                to_cpu(loss_h).item(),
                "loss-conf":
                to_cpu(loss_conf).item(),
                "loss-cls":
                to_cpu(loss_cls).item(),
                "loss-obj":
                to_cpu(loss_conf_obj).item(),
                "loss-noobj x scale":
                to_cpu(loss_conf_noobj * self.noobj_scale).item(),
                "loss-noobj":
                to_cpu(loss_conf_noobj).item(),
                "cls_acc":
                to_cpu(cls_acc).item(),
                "recall50":
                to_cpu(recall50).item(),
                "recall75":
                to_cpu(recall75).item(),
                "precision":
                to_cpu(precision).item(),
                "conf_obj":
                to_cpu(conf_obj).item(),
                "conf_noobj":
                to_cpu(conf_noobj).item(),
            }

            #print (self.metrics)
            self.noobj_scale = 100000

            return output, total_loss
Example #10
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        # reshape input torch to num_samples * num_anchors * (num_classes + 4) * grid_size^2
        # permute prediction torch into num_samples * num_anchors * grid_size^2 * (num_classes + 4)
        # modify
        # only 4 parameters to be learned, so num_classes+5 => num_classes + 4
        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 4, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        # modify
        # reduce one channel for height
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        d = prediction[..., 2]  # diameter
        #         h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 3])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 4:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        # modify
        # only adjust d
        pred_boxes = FloatTensor(prediction[..., :3].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(d.data) * self.anchor_w
        #         pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 3) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            #modify build_target function to calculate new IOU for circle and rectangle
            #here tw is used as td
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            # modify,
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            # pick loss_w as loss_d and stop using loss_h
            loss_d = self.mse_loss(d[obj_mask], tw[obj_mask])
            #             loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + 0.5 * loss_d + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "d": to_cpu(loss_d).item(),
                #                 "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # Calculate BCE of objectness score of a bounding box
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            # Calculate BCE of no objectness score of a bounding box
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])

            # Scale and Sum above two LOSS
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # Calculate BCE of multi-class predictions of a bounding box
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return output, total_loss
Example #12
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 8, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        # (u, v) Projected points on image plane
        u = torch.sigmoid(prediction[..., 0])
        v = torch.sigmoid(prediction[..., 1])    
        # Z in the 3D coordinates
        Z = prediction[..., 2]
        # (Qw + Qx * i + Qy * j + Qz * k) Quaternion
        Qw = prediction[..., 3]
        Qx = prediction[..., 4]
        Qy = prediction[..., 5]
        Qz = prediction[..., 6]

        pred_conf = torch.sigmoid(prediction[..., 7])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 8:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_uvZQ = FloatTensor(prediction[..., :7].shape)
        pred_uvZQ[..., 0] = u.data + self.grid_x
        pred_uvZQ[..., 1] = v.data + self.grid_y
        pred_uvZQ[..., 2] = Z.data
        pred_uvZQ[..., 3] = torch.sigmoid(Qw.data)  # * self.anchor_Qw
        pred_uvZQ[..., 4] = torch.tanh(Qx.data)     # * self.anchor_Qx
        pred_uvZQ[..., 5] = torch.tanh(Qy.data)     # * self.anchor_Qy
        pred_uvZQ[..., 6] = torch.tanh(Qz.data)     # * self.anchor_Qz

        output = torch.cat(
            (
                pred_uvZQ[..., :2].view(num_samples, -1, 2) * self.stride,
                pred_uvZQ[..., 2:].view(num_samples, -1, 5),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            z_scores, class_mask, obj_mask, noobj_mask, tu, tv, tZ, tQw, tQx, tQy, tQz, tcls, tconf = build_targets(
                pred_uvZQ=pred_uvZQ,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_u = 10 * self.mse_loss(u[obj_mask], tu[obj_mask])
            loss_v = 10 * self.mse_loss(v[obj_mask], tv[obj_mask])
            loss_Z = 10 * self.mse_loss(Z[obj_mask], tZ[obj_mask])
            loss_Qw = self.mse_loss(Qw[obj_mask], tQw[obj_mask])
            loss_Qx = self.mse_loss(Qx[obj_mask], tQx[obj_mask])
            loss_Qy = self.mse_loss(Qy[obj_mask], tQy[obj_mask])
            loss_Qz = self.mse_loss(Qz[obj_mask], tQz[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_u + loss_v + loss_Z + loss_Qw + loss_Qx + loss_Qy + loss_Qz + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            z5 = (z_scores < 0.5).float()
            z05 = (z_scores < 0.05).float()
            detected_mask = conf50 * class_mask * tconf
            recall5 = torch.sum(z5 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall05 = torch.sum(z05 * detected_mask) / (obj_mask.sum() + 1e-16)


            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "u": to_cpu(loss_u).item(),
                "v": to_cpu(loss_v).item(),
                "Z": to_cpu(loss_Z).item(),
                "Qw": to_cpu(loss_Qw).item(),
                "Qx": to_cpu(loss_Qx).item(),
                "Qy": to_cpu(loss_Qy).item(),
                "Qz": to_cpu(loss_Qz).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall5": to_cpu(recall5).item(),
                "recall05": to_cpu(recall05).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
    def forward(self, x, targets=None, img_dim=None):

        print("^" * 30)
        print("yolo layer input: ", x.shape)
        print("targets: ", targets.shape)
        print("img_dim: ", img_dim)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        #输入到模型时图片的尺寸
        self.img_dim = img_dim
        num_samples = x.size(0)
        #特征图尺寸
        grid_size = x.size(2)

        # 对x的操作

        #(num_samples, 255, 13, 13)->(num_samples, 3, 80+5, 13, 13)->(num_samples, 3, 13, 13, 80+5)
        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        # last dimension column 1 = tensor[...,0]
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        # this part is related to the bounding box. ??????????????
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(
            w.data) * self.anchor_w  # why exp? 这是yolo v3论文中的公式,用这个来做预测值
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        # print(self.stride)
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) *
                self.stride,  # why does it(x, y, w, h) mult self.stride
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        # 对targets的操作

        if targets is None:
            return output, 0
        else:

            #此函数用于将模型输入的target转化成用于计算loss的target,应当熟悉其逻辑,这也可能是目标检测算法对标签数据处理的通用逻辑
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )
            # ********************************
            # 如果不类型转换,会报警告
            obj_mask = obj_mask.bool()
            noobj_mask = noobj_mask.bool()
            # ********************************
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #14
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda Support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)  # x.size() => torch.Size([1, 3, 13, 13])
        grid_size = x.size(2)  # 13, 13

        prediction = (  # (  )이렇게 감 쌈  그냥 가독성을 위한 것인듯... shape 모양에는 그대로임
            x.view(num_samples, self.num_anchors, self.num_classes + 5,
                   grid_size, grid_size).permute(0, 1, 3, 4,
                                                 2)  # 내부의 차원의 배치를 바꿀 것이다.
            .contigous()  # 메모리를 연속적으로 할당해준다. 이렇게 하믄 backend에서 효율적으로 동작한다는듯
        )
        # ( num_samples, self.num_anchors, grid_size, grid_size, self.num_classes + 5 )
        # 만약 coco라면 => (1, 3, 13, 13, 85)

        # Get outputs
        x = torch.sigmoid(prediction[...,
                                     0])  # => O(tx)이다. 즉, Sigmoid를 씌운 x 좌표
        y = torch.sigmoid(prediction[...,
                                     1])  # => O(ty)이다. 즉, Sigmoid를 씌운 y 좌표
        w = prediction[..., 2]
        h = prediction[..., 3]
        pred_conf = torch.sigmoid(prediction[..., 4])
        pred_cls = torch.sigmoid(prediction[..., 5:])

        # if grid size does not match current we compute new offsets
        #  맨처음에는 grid_size가 0이니까 if 안으로 빠진다.
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        # x.data.shape      -> [1, 3, 13, 13] 이다.
        # self.grid_x.shape -> [1, 1, 13, 13] 이다.
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w  #
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h  #
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #15
0
    def forward(self, x, targets=None, img_dim=None, Half=False):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        FloatTensor = torch.cuda.HalfTensor if x.type() == "torch.cuda.HalfTensor" else torch.cuda.FloatTensor
        # LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        # ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # 注释说明
        # x 是最后一层卷积输出的特征图,在输入图片大小为416×416的前提下
        # x[0],x[1],x[2],x[3] = batch size, 255, 13, 13
        # x[0],x[1],x[2],x[3] = batch size, 255, 26, 26
        # 255 = 3*(4+1+80)  3:我认为是mask的数量,也即每个cell生成的检测框数; 4:检测框坐标; 1:检测框置信度;80:类别数。
        # 检测框具体顺序为 Center x,Center y,Width,Height
        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        # 注释说明
        # prediction 的维度为 batch_size, num_anchors=3, grid_size, grid_size, num_classes + 5(coco:85)
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
                .permute(0, 1, 3, 4, 2)  # permute: 将维度换位
                .contiguous()
        )
        # print(prediction.size())

        # 注释说明
        # Center x,Center y,Conf,Cls pred 用sigmoid函数限定其范围在0-1范围内
        # 为什么 w,h 不用限定范围?确实存在 w,h 大于1的是数据
        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf (检测框置信度)
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
        # print(torch.max(w))
        # print(h)

        # 调试
        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, img_dim, cuda=x.is_cuda, Half=Half)

        # 注释说明
        # pred_box 表示网络预测的框
        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        # print(pred_boxes[..., 2].type())

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )
        # print(output.size())

        # 注释说明
        # target 用来表明是否是训练还是推理
        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            # 注释说明
            # loss_conf 正负样本带有各自权重(obj_scale,noobj_scale)
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
    def forward(self, x, targets=None, img_dim=None):

        # print('hahaha',x.shape)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # 输入图像大小
        self.img_dim = img_dim
        # N,C,H,W
        # 几个样本
        num_samples = x.size(0)
        # 目前样本的尺寸
        grid_size = x.size(2)

        # print('raw x shape {}'.format(x.shape))
        # print('x view shape {}'.format((num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)))

        '''
            reshape一下,
            [num_samples,num_anchors,grid_size,grid_size,num_class+5]
        '''
        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        '''
            这个...表示取最里面那个num_class+5这个维度的
            x,y是bbox相对于当前cell的偏移量
            w,h是bbox的w,h相对于anchors(在当前feature_map下)的log值
        '''
        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # print('heihei',pred_cls.shape)

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # print(self.grid_x)
        # print(self.grid_y)


        '''
            将tx,ty,tw,th恢复成bbox的坐标
        '''

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h


        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )


        if targets is None:
            return output, 0
        else:
            '''
                这个targets,是一个【n,6】的张量
                [第几张图,0,cx,cy,dw,dh]

                obj_mask包含的是和anchors的IOU最大的一批数据
                noobj_mask包含的是除去IOU超过阈值的一批数据
            '''
            import time

            # print(pred_boxes.shape)
            # print(pred_cls.shape)
            # print(targets.shape)
            #
            # print('stop here')
            # time.sleep(1000)
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            obj_mask = obj_mask.bool()  # convert int8 to bool
            noobj_mask = noobj_mask.bool()  # convert int8 to bool



            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            '''
                loss由三部分组成:
                    1、(有物体在的cell && 被选中的anchors)对应的tx,ty,tw,th误差
                    2、(有物体在的cell && 被选中的anchors)对应的前背景分类误差
                    3、(没物体在的cell && 被选中的anchors)对应的前背景分类误差
                    4、(有物体在的cell && 被选中的anchors)对应的类别分类误差
            '''
            # 第一部分
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # 第二部分
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            # 第三部分
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            # 按照不同比例组合
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # 第四部分
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
    def forward(self, x, targets=None):
        # 每个格子的anchor个数(现在是3)
        nA = self.num_anchors
        # 一个batch的图片数量
        nB = x.size(0)
        # 传入yolo层特征图宽高(这里宽高都是13,所以取一个值即可)
        nG = x.size(2)
        # 网络的步长,即输入网络图片的尺寸与最后输出的特征图的尺寸比值
        stride = self.image_dim / nG

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # 将2x255x13x13先view成2x3x85x13x13再permute(重排列的index)成2x3x13x13x85,
        #最后的85对应每个anchor预测出来的属性(tx,ty,tw,th,score,score_class1,score_class2...score_class80)
        # 其中tx,ty是相对于该anchor所在cell左上角的偏移坐标,代表预测出来的anchor中心坐标
        prediction = x.view(nB, nA, self.bbox_attrs, nG,
                            nG).permute(0, 1, 3, 4, 2).contiguous()
        #contiguous:view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。
        # Get outputs
        x = torch.sigmoid(
            prediction[..., 0])  # Center x,对应于预测坐标公式中的sigmoid(tx),维度为2x3x13x13
        y = torch.sigmoid(prediction[...,
                                     1])  # Center y,对应于预测坐标公式中的sigmoid(ty)
        w = prediction[..., 2]  # Width,对应于预测坐标公式中的tw
        h = prediction[..., 3]  # Height,对应于预测坐标公式中的th
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf 预测方框内含有目标的得分
        pred_cls = torch.sigmoid(prediction[...,
                                            5:])  # Cls pred. 方框内所含目标属于每个类的概率得分

        # Calculate offsets for each grid
        # 生成所有cell的Cx坐标,一共有13x13个cell,所以x坐标有13x13个,范围从0到12。torch.arange(nG)先生成一个长度为13的行tensor
        # 再用repeat(nG,1)扩展成维度为13x13的tensor,最后用view()变成1x1x13x13的tensor,并且将类型转换成float型
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG,
                                                      nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG,
                                         1).t().view([1, 1, nG,
                                                      nG]).type(FloatTensor)
        # 得到所有经过缩小后的anchor尺寸,scaled_anchors维度为3x2,一行对应一个缩小后anchor的宽高。此时anchor的尺寸是相对于特征图
        # 特征图尺寸是原图缩放网络步长stride倍,同理anchor也缩小stride倍
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride)
                                      for a_w, a_h in self.anchors])
        # 得到所有缩放后anchor的宽,nA为每个cell对应的anchor的个数
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        # 得到所有缩放后anchor的高,nA为每个cell对应的anchor的个数
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

        # Add offset and scale with anchors,对anchor进行平移和尺度缩放,得到预测的方框宽高
        # pred_boxes维度为2x3x13x13x4,是所有anchors预测出来的tx,ty,tw,th
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        # 对应公式 bx = sigmoid(tx) + cx,x维度为2x3x13x13,grid_x维度为1x1x13x13,相加时会根据python广播原理,扩展成2x3x13x13
        pred_boxes[..., 0] = x.data + grid_x
        # 对应公式 by = sigmoid(ty) + cy
        pred_boxes[..., 1] = y.data + grid_y
        # 对应公式 bw = pw*e^(tw),pw对应anchor_w,是anchor缩小stride倍后的宽;tw对应w.data
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        # 对应公式 bh = ph*e^(th),ph对应anchor_h,是anchor缩小stride倍后的高;th对应h.data
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()
            # 注释见util.py的build_targets函数
            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=pred_boxes.cpu().data,
                pred_conf=pred_conf.cpu().data,
                pred_cls=pred_cls.cpu().data,
                target=targets.cpu().data,
                anchors=scaled_anchors.cpu().data,
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )

            # nProposals为预测方框中,含有目标得分大于0.5的方框个数,即网络预测出来的方框。item()对只有一个元素的tensor进行操作,返回一个python数字
            nProposals = int((pred_conf > 0.5).sum().item())
            # 计算recall和precision
            recall = float(nCorrect / nGT) if nGT else 1
            precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            # conf_mask_true标记真正负责检测目标的anchor的位置
            conf_mask_true = mask
            # conf_mask_false标记没有负责检测目标的anchor的位置
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            # 采用均方误差计算x,y,w,h的偏移量和缩放比例的预测误差
            # ?????????? x[mask]是如何取值的? 是采用的数组索引方式进行索引,与numpy数组索引类似,但是有差异。这里实际是bool索引
            # 这里mask和x都是2x3x13x13的tensor,利用数组索引的方式提取x中的元素,被提取出来的元素就是mask中非0元素所在位置在x中对
            # 应位置的元素。所以x[mask]就将真正负责检测目标的anchor所对应的预测方框中心坐标在x方向上的偏移量的预测值提取出来。tx就
            # 是真实标签方框所对应的方框中心坐标在x方向上的偏移量。计算它们的平方误差即可。y,w,h同理
            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])
            # 计算每个anchor预测的含有目标的损失,采用Binary Cross Entropy损失函数
            # pred_conf[conf_mask_false],长度为1005的一维tensor,提取出没有负责检测目标的anchor所预测的这个anchor含有目标的得分
            # tconf[conf_mask_false],长度为1005的一维tensor,提取出没有负责检测目标的anchor所对应的真实目标标签,值为0
            # pred_conf[conf_mask_true],长度为9的一维tensor,提取出真正负责检测目标的anchor所预测的这个anchor含有目标的得分
            # tconf[conf_mask_true],长度为9的一维tensor,提取出真正负责检测目标的anchor所对应的真实目标标签,值为1
            # 这里的conf_mask_false,conf_mask_true的维度与tconf的维度都是2x3x13x13,并且conf_mask_true和mask以及tconf的维度和
            # 元素值都是相等的,等于1的元素代表这个位置对应的anchor负责检测一个目标。这里用了mask和tconf两个变量来记录,个人认为是
            # 为了用tconf[conf_mask_true]这种方式很方便的提取出真实的标签
            loss_conf = self.bce_loss(pred_conf[conf_mask_false],
                                      tconf[conf_mask_false]) + self.bce_loss(
                                          pred_conf[conf_mask_true],
                                          tconf[conf_mask_true])
            # 计算真正负责检测一个目标的anchor所预测的类别的得分
            # pred_cls:维度为2x3x13x13x80,是预测出来的每个anchor所含目标对应每个类别的概率,mask维度为2x3x13x13,
            # 所以pred_cls[mask]在前面的2x3x13x13的维度索引中采用的是bool值索引方式,只有mask中非0的元素在pred_cls中对应元素才会提取出来。
            # 此时提取出来的元素会自动包含最后一个没有被mask给定的维度,所以pred_cls[mask]维度为9x80,是一个二维tensor.
            # 代表真正负责目标检测的ahchor所含目标对应每个类别的概率
            # tcls:维度为2x3x13x13x80,mask维度为2x3x13x13,tcls[mask]维度为9x80,表示真实目标有9个,每个目标可能的类别有80个,
            # 只有一个元素为1,所以这80个元素中只有一个为1,用argmax得到了9x80 tensor中每行元素中最大的值对应的序号,
            # 也即类别所在序号,得到长度为9的一维tensor。
            # 参考官方文档中的公式,这里采用的是交叉熵损失函数。pred_cls[mask]对应x,torch.argmax(tcls[mask], 1)对应每个目标的class,
            # 而公式中j对应的是每个目标可能的不同的类别,这里就是x的一行中不同列的下标。最终输出的是所有目标损失值
            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask],
                                               torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (
                    pred_boxes.view(nB, -1, 4) * stride,
                    pred_conf.view(nB, -1, 1),
                    pred_cls.view(nB, -1, self.num_classes),
                ),
                -1,
            )
            return output
    def forward(self, x, targets=None, img_dim=None):

        # Tensores para soporte cuda
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # obtener salidas
        x = torch.sigmoid(prediction[..., 0])  # centro de x
        y = torch.sigmoid(prediction[..., 1])  # centro de y
        w = prediction[..., 2]  # ancho
        h = prediction[..., 3]  # largo
        pred_conf = torch.sigmoid(prediction[..., 4])  # configuracion
        pred_cls = torch.sigmoid(prediction[..., 5:])  # predicciones

        # Si el tamaño de la cuadrícula no coincide con el actual, calculamos nuevas compensaciones
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Agregue desplazamiento y escala con anclajes
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Pérdida: enmascara las salidas para ignorar objetos no existentes (excepto con pérdida de configuración)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # metricas
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #19
0
    def forward(self, x, targets=None):
        nA = self.num_anchors
        nB = x.size(0)
        nG = x.size(2)
        stride = self.image_dim / nG

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        #pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf = build_targets(
                pred_boxes=pred_boxes.cpu().data,
                pred_conf=pred_conf.cpu().data,
                #pred_cls=pred_cls.cpu().data,
                target=targets.cpu().data,
                anchors=scaled_anchors.cpu().data,
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )


            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            #tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])

            loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
                pred_conf[conf_mask_true], tconf[conf_mask_true]
            )
            #loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf #+ loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item()
            )

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (
                    pred_boxes.view(nB, -1, 4) * stride,
                    pred_conf.view(nB, -1, 1),
                    #pred_cls.view(nB, -1, self.num_classes),
                ),
                -1,
            )
            return output
Example #20
0
    def forward(self, x, targets=None, img_dim=None):
        # x.shape: b x 255 x 13 x 13 (anchor 6, 7, 8)

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)     # batch size
        grid_size = x.size(2)       # feature map size: 13, 26, 52  # initially, self.grid_size = 0

        prediction = (
            #       b, 3, 85, 13, 13
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            #       b, 3, 13, 13, 85
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        # the x,y,w,h corresponds to the pink circle in slides (generated directly from network)
        x = torch.sigmoid(prediction[..., 0])  # Center x   # (b,3,13,13)            # 1 +
        y = torch.sigmoid(prediction[..., 1])  # Center y   # (b,3,13,13)            # 1 +
        w = prediction[..., 2]  # Width                     # (b,3,13,13)            # 1 +
        h = prediction[..., 3]  # Height                    # (b,3,13,13)            # 1 +
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf (b,3,13,13)            # 1 + = 5 +
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred. (b,3,13,13,80)    # 80 = 85

        # Initially, self.grid_size = 0 != 13, then 13 != 26, then 26 != 52
        # Each time, if former grid size does not match current one, we need to compute new offsets
        # 作用:
        # 1. 针对不同size的feature map (13x13, 26x26, 52x52), 求出不同grid的左上角坐标
        # 2. 将(0, 416)范围的anchor scale到(0, 13)的范围
        #
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
        # self.grid_x:                             # self.grid_y:
        #       tensor([[[[0,1,2,...,12],          #       tensor([[[[0,0,0,...,0],
        #                 [0,1,2,...,12],          #                 [1,1,1,...,1],
        #                 ...                      #                 ...
        #                 [0,1,2,...,12]]]])       #                 [12,12,12,...,12]]]])
        #       shape=torch.Size([1, 1, 13, 13])   #       shape=torch.Size([1, 1, 13, 13])
        #                                          #
        # self.anchor_w: shape([1, 3, 1, 1])       # self.anchor_h: shape([1, 3, 1, 1])
        # tensor([                                 # tensor([
        #         [                                #         [
        #           [[3.625]],                     #           [[2.8125]],
        #           [[4.8750]],                    #           [[6.1875]],
        #           [[11.6562]]                    #           [[10.1875]]
        #         ]                                #         ]
        #        ])                                #        ])

        # Add offset and scale with anchors
        # 请回想/对照slides中的等式,是目前绝大部分靠回归offset的方法通行的策略
        # x, y, w, h即上文中prediction, 此部分是直接由网络predict出来的, xy经过sigmoid强制到(0,1)
        # grid_xy是grid的左上角坐标[0,1,...,12],
        # 所以xy+grid_xy就是将pred结果(即物体中心点)分布到每个grid中去,(0, 13)
        #
        # 对于wh,由于prediction的结果直接是log()后的(如果忘记,请回看slides),所以此处要exp
        #
        # 此时,所有pred_boxes都是(0,13)范围的
        # These preds are final outpus for test/inference which corresponds to the blue circle in slides
        # This procedure could also be called as Decode
        #
        # 通常情况下,单纯的preds并不参与loss的计算,而只是作为最终的输出存在,
        # 但是这里依然计算,并在build_targets函数中出现,其目的,在于协助产生mask
        pred_boxes = FloatTensor(prediction[..., :4].shape)     # (b, 3, 13, 13, 4)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (   # * stride(=32对于13x13),目的是将(0, 13)的bbox恢复到(0, 416)
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            # iou_scores: [b, num_anchor, grid_size, grid_size] -> pred_boxes与ground_truth的IoU
            # class_mask: [b, num_anchor, grid_size, grid_size], 预测正确的class 为true
            # obj_mask : [b, num_anchor, grid_size, grid_size] -> 1: 一定是正样本落在的地方(b_id, anchor_id, i, j)
            #                                                  -> 0: 一定不是正样本落在的地方
            # noobj_mask:  [b, num_anchor, grid_size, grid_size] -> 1: 一定是负样本落在的地方
            #                                                    -> 0: 不一定是正样本落在的地方,也可能是不参与计算
            #                                                          体现了ignore_thres的价值。>ignore的,都不参与计算
            # 底下是,算出来的,要参与产生loss的真实target.(除了tcls)
            # The procedure to generate those t·, corresponding to the gray circle in slides, can be called as Encode
            # tx: [b, num_anchor, grid_size, grid_size]
            # ty: [b, num_anchor, grid_size, grid_size]
            # tw: [b, num_anchor, grid_size, grid_size]
            # th: [b, num_anchor, grid_size, grid_size]
            # tcls :[b, num_anchor, grid_size, grid_size, n_classes]
            #
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,              # (b, 3, 13, 13, 4)
                pred_cls=pred_cls,                  # (b, 3, 13, 13, 80)
                target=targets,                     # (n_boxes, 6) [details in build_targets function]
                anchors=self.scaled_anchors,        # (3, 2) 3个anchor,每个2维
                ignore_thres=self.ignore_thres,     # 0.5 (hard code in YOLOLayer self.init())
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            # 可以看到,真正参与loss计算的,仍然是·与t·,即offset regress
            # Reg Loss
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            # Conf Loss
            # 因为这里conf选择的是bce_loss,因为对于noobj,基本都能预测对,所以loss_conf_noobj通常比较小
            # 所以此时为了平衡,noobj_scale往往大于obj_scale, (100, 1)
            # 实际上,这里的conf loss就是做了个0-1分类,0就是noobj, 1就是obj
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

            # Class Loss
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            # Total Loss
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()     # class_mask/obj_mask(b, 3, 13, 13) # 正确率
            conf_obj = pred_conf[obj_mask].mean()           # 有物体的平均置信度
            conf_noobj = pred_conf[noobj_mask].mean()       # 无物体的平均置信度
            conf50 = (pred_conf > 0.5).float()              # 置信度大于0.5的位置 (b, num_anchor, 13, 13)
            iou50 = (iou_scores > 0.5).float()              # iou大于0.5的位置 (b, num_anchor, 13, 13)
            iou75 = (iou_scores > 0.75).float()             # iou大于0.75的位置 (b, num_anchor, 13, 13)
            detected_mask = conf50 * class_mask * tconf     # tconf=obj_mask, 即:既是预测的置信度>0.5,又class也对,又是obj
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #21
0
    def forward(self, x, targets=None):
        bs = x.size(0)
        g_dim = x.size(2)
        stride = self.img_dim / g_dim
        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor

        prediction = x.view(bs, self.num_anchors, self.bbox_attrs, g_dim,
                            g_dim).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid_x = torch.linspace(0, g_dim - 1, g_dim).repeat(g_dim, 1).repeat(
            bs * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0, g_dim - 1,
                                g_dim).repeat(g_dim, 1).t().repeat(
                                    bs * self.num_anchors, 1,
                                    1).view(y.shape).type(FloatTensor)
        scaled_anchors = [(a_w / stride, a_h / stride)
                          for a_w, a_h in self.anchors]
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1,
                                                 g_dim * g_dim).view(w.shape)
        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1,
                                                 g_dim * g_dim).view(h.shape)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes.cpu().data,
                targets.cpu().data, scaled_anchors, self.num_anchors,
                self.num_classes, g_dim, self.ignore_thres, self.img_dim)

            nProposals = int((conf > 0.25).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1

            # Handle masks
            mask = Variable(mask.type(FloatTensor))
            cls_mask = Variable(
                mask.unsqueeze(-1).repeat(1, 1, 1, 1,
                                          self.num_classes).type(FloatTensor))
            conf_mask = Variable(conf_mask.type(FloatTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(FloatTensor), requires_grad=False)

            # Mask outputs to ignore non-existing objects
            loss_x = self.lambda_coord * self.bce_loss(x * mask, tx * mask)
            loss_y = self.lambda_coord * self.bce_loss(y * mask, ty * mask)
            loss_w = self.lambda_coord * self.mse_loss(w * mask, tw * mask) / 2
            loss_h = self.lambda_coord * self.mse_loss(h * mask, th * mask) / 2
            loss_conf = self.bce_loss(conf * conf_mask, tconf * conf_mask)
            loss_cls = self.bce_loss(pred_cls * cls_mask, tcls * cls_mask)
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return loss, loss_x.item(), loss_y.item(), loss_w.item(
            ), loss_h.item(), loss_conf.item(), loss_cls.item(), recall

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (pred_boxes.view(bs, -1, 4) * stride, conf.view(
                    bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1)
            return output.data
    def forward(self, p, targets=None, batch_report=False, var=None):

        FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor

        bs = p.shape[0]  # batch size
        nG = p.shape[2]  # number of grid points
        stride = self.img_dim / nG

        if p.is_cuda and not self.grid_x.is_cuda:
            self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()
            self.anchor_w, self.anchor_h = self.anchor_w.cuda(
            ), self.anchor_h.cuda()
            self.weights, self.loss_means = self.weights.cuda(
            ), self.loss_means.cuda()

        # p.view(12, 255, 13, 13) -- > (12, 3, 13, 13, 80)  # (bs, anchors, grid, grid, classes + xywh)
        p = p.view(bs, self.nA, self.bbox_attrs, nG,
                   nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction

        # Get outputs
        x = torch.sigmoid(p[..., 0])  # Center x
        y = torch.sigmoid(p[..., 1])  # Center y

        # Width and height (yolo method)
        w = p[..., 2]  # Width
        h = p[..., 3]  # Height
        width = torch.exp(w.data) * self.anchor_w
        height = torch.exp(h.data) * self.anchor_h

        # Width and height (power method)
        # w = torch.sigmoid(p[..., 2])  # Width
        # h = torch.sigmoid(p[..., 3])  # Height
        # width = ((w.data * 2) ** 2) * self.anchor_w
        # height = ((h.data * 2) ** 2) * self.anchor_h

        # Add offset and scale with anchors (in grid space, i.e. 0-13)
        pred_boxes = FT(bs, self.nA, nG, nG, 4)
        pred_conf = p[..., 4]  # Conf
        pred_cls = p[..., 5:]  # Class

        # Training
        if targets is not None:
            MSELoss = nn.MSELoss()
            BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
            CrossEntropyLoss = nn.CrossEntropyLoss()

            if batch_report:
                gx = self.grid_x[:, :, :nG, :nG]
                gy = self.grid_y[:, :, :nG, :nG]
                pred_boxes[..., 0] = x.data + gx - width / 2
                pred_boxes[..., 1] = y.data + gy - height / 2
                pred_boxes[..., 2] = x.data + gx + width / 2
                pred_boxes[..., 3] = y.data + gy + height / 2

            tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \
                utils.build_targets(pred_boxes, pred_conf, pred_cls, targets, self.scaled_anchors, self.nA, self.nC, nG,
                                    batch_report)
            tcls = tcls[mask]
            if x.is_cuda:
                tx, ty, tw, th, mask, tcls = tx.cuda(), ty.cuda(), tw.cuda(
                ), th.cuda(), mask.cuda(), tcls.cuda()

            # Compute losses
            nT = sum([len(x) for x in targets])  # number of targets
            nM = mask.sum().float()  # number of anchors (assigned to targets)
            # print("mask:-----------",nM)
            nB = len(targets)  # batch size
            k = nM / nB
            if nM > 0:
                lx = k * MSELoss(x[mask], tx[mask])
                ly = k * MSELoss(y[mask], ty[mask])
                lw = k * MSELoss(w[mask], tw[mask])
                lh = k * MSELoss(h[mask], th[mask])

                # self.tx.extend(tx[mask].data.numpy())
                # self.ty.extend(ty[mask].data.numpy())
                # self.tw.extend(tw[mask].data.numpy())
                # self.th.extend(th[mask].data.numpy())
                # print([np.mean(self.tx), np.std(self.tx)],[np.mean(self.ty), np.std(self.ty)],[np.mean(self.tw), np.std(self.tw)],[np.mean(self.th), np.std(self.th)])
                # [0.5040668, 0.2885492] [0.51384246, 0.28328574] [-0.4754091, 0.57951087] [-0.25998235, 0.44858757]
                # [0.50184494, 0.2858976] [0.51747805, 0.2896323] [0.12962963, 0.6263085] [-0.2722081, 0.61574113]
                # [0.5032071, 0.28825334] [0.5063132, 0.2808862] [0.21124361, 0.44760725] [0.35445485, 0.6427766]
                # import matplotlib.pyplot as plt
                # plt.hist(self.x)

                # lconf = k * BCEWithLogitsLoss(pred_conf[mask], mask[mask].float())

                lcls = (k / 4) * CrossEntropyLoss(pred_cls[mask],
                                                  torch.argmax(tcls, 1))
                # lcls = (k * 10) * BCEWithLogitsLoss(pred_cls[mask], tcls.float())
            else:
                lx, ly, lw, lh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT(
                    [0]), FT([0]), FT([0])

            # lconf += k * BCEWithLogitsLoss(pred_conf[~mask], mask[~mask].float())
            lconf = (k * 64) * BCEWithLogitsLoss(pred_conf, mask.float())

            # Sum loss components
            balance_losses_flag = False
            if balance_losses_flag:
                k = 1 / self.loss_means.clone()
                loss = (lx * k[0] + ly * k[1] + lw * k[2] + lh * k[3] +
                        lconf * k[4] + lcls * k[5]) / k.mean()

                self.loss_means = self.loss_means * 0.99 + \
                                  FT([lx.data, ly.data, lw.data, lh.data, lconf.data, lcls.data]) * 0.01
            else:
                loss = lx + ly + lw + lh + lconf + lcls

            # Sum False Positives from unassigned anchors
            FPe = torch.zeros(self.nC)
            if batch_report:
                i = torch.sigmoid(pred_conf[~mask]) > 0.5
                if i.sum() > 0:
                    FP_classes = torch.argmax(pred_cls[~mask][i], 1)
                    FPe = torch.bincount(
                        FP_classes,
                        minlength=self.nC).float().cpu()  # extra FPs

            return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \
                   nT, TP, FP, FPe, FN, TC

        else:
            pred_boxes[..., 0] = x.data + self.grid_x
            pred_boxes[..., 1] = y.data + self.grid_y
            pred_boxes[..., 2] = width
            pred_boxes[..., 3] = height

            # If not in training phase return predictions
            output = torch.cat(
                (pred_boxes.view(bs, -1, 4) * stride,
                 torch.sigmoid(pred_conf.view(
                     bs, -1, 1)), pred_cls.view(bs, -1, self.nC)), -1)
            return output.data
Example #23
0
def yolo_loss(x, y, w, h, xdir, ydir, pred_boxes, pred_conf, pred_cls, targets,
              scaled_anchors, ignore_thres, clf_criterion, reg_criterion,
              obj_scale, noobj_scale, regr_weights, grid_size1):
    iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, txdir, tydir, tcls, tconf = build_targets(
        pred_boxes=pred_boxes,
        pred_cls=pred_cls,
        target=targets,
        anchors=scaled_anchors,
        ignore_thres=ignore_thres,
    )
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
    loss_x = reg_criterion(x[obj_mask], tx[obj_mask])
    loss_y = reg_criterion(y[obj_mask], ty[obj_mask])
    loss_w = reg_criterion(w[obj_mask], tw[obj_mask])
    loss_h = reg_criterion(h[obj_mask], th[obj_mask])
    # Loss: rotations
    loss_xdir = reg_criterion(xdir[obj_mask], txdir[obj_mask])
    loss_ydir = reg_criterion(ydir[obj_mask], tydir[obj_mask])
    weights = (noobj_scale, obj_scale)
    loss_conf = focal_loss(pred_conf, tconf, weights)
    #print(obj_scale, '*', loss_conf_obj, '+', noobj_scale, '*', loss_conf_noobj )
    if pred_cls is not None:
        loss_cls = clf_criterion(pred_cls[obj_mask], tcls[obj_mask])
    else:
        loss_cls = torch.tensor(0, device=device)

    total_loss = regr_weights * (loss_x + loss_y + loss_w + loss_h + loss_xdir + loss_ydir) +\
                 loss_conf + loss_cls

    # Metrics
    if loss_cls == 0:
        cls_acc = torch.tensor(0, device=device)
    else:
        cls_acc = 100 * class_mask[obj_mask].mean()
    conf_obj = pred_conf[obj_mask].mean()
    conf_noobj = pred_conf[noobj_mask].mean()
    conf50 = (pred_conf > 0.5).float()
    iou50 = (iou_scores > 0.5).float()
    iou75 = (iou_scores > 0.75).float()
    detected_mask = conf50 * class_mask * tconf
    precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
    recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
    recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

    metrics = {
        "loss": to_cpu(total_loss).item(),
        "x": to_cpu(loss_x).item(),
        "y": to_cpu(loss_y).item(),
        "w": to_cpu(loss_w).item(),
        "h": to_cpu(loss_h).item(),
        "conf": to_cpu(loss_conf).item(),
        "cls": to_cpu(loss_cls).item(),
        "cls_acc": to_cpu(cls_acc).item(),
        "recall50": to_cpu(recall50).item(),
        "recall75": to_cpu(recall75).item(),
        "precision": to_cpu(precision).item(),
        "conf_obj": to_cpu(conf_obj).item(),
        "conf_noobj": to_cpu(conf_noobj).item(),
        "grid_size": grid_size1,
        'rotation': to_cpu(loss_xdir + loss_ydir).item()
    }
    return total_loss, metrics
Example #24
0
    def forward(self, x, targets=None):
        batch_size = x.size(0)
        num_Grids = x.size(2)
        stride = self.image_dim / num_Grids

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda \
            else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        # Output :  Batch_Size *
        #           Num_Anchors *
        #           (4+1+Num_classes) *
        #           Num_Grids *
        #           Num_Grids
        prediction = x.view(batch_size, self.num_anchors, self.bbox_attrs,
                            num_Grids, num_Grids).permute(0, 1, 3, 4,
                                                          2).contiguous()

        # Get individual outputs
        pred_x = torch.sigmoid(prediction[..., 0])  # Center x
        pred_y = torch.sigmoid(prediction[..., 1])  # Center y
        pred_w = prediction[..., 2]  # Width
        pred_h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred

        # Calculate offsets for each grid
        grid_x = torch.arange(num_Grids).repeat(num_Grids, 1).view(
            [1, 1, num_Grids, num_Grids]).type(FloatTensor)
        grid_y = torch.arange(num_Grids).repeat(num_Grids, 1).t().view(
            [1, 1, num_Grids, num_Grids]).type(FloatTensor)
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride)
                                      for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = grid_x + pred_x.data
        pred_boxes[..., 1] = grid_y + pred_y.data
        pred_boxes[..., 2] = anchor_w * torch.exp(pred_w.data)
        pred_boxes[..., 3] = anchor_h * torch.exp(pred_h.data)

        if targets is not None:
            # Training
            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = \
                build_targets(
                    pred_boxes=pred_boxes.cpu().data,
                    pred_conf=pred_conf.cpu().data,
                    pred_cls=pred_cls.cpu().data,
                    target=targets.cpu().data,
                    anchors=scaled_anchors.cpu().data,
                    num_anchors=self.num_anchors,
                    num_classes=self.num_classes,
                    grid_size=num_Grids,
                    ignore_thres=self.ignore_thres,
                    img_dim=self.image_dim,
                )

            nProposals = int((pred_conf > 0.5).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1
            precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            loss_x = self.mse_loss(pred_x[mask], tx[mask])
            loss_y = self.mse_loss(pred_y[mask], ty[mask])
            loss_w = self.mse_loss(pred_w[mask], tw[mask])
            loss_h = self.mse_loss(pred_h[mask], th[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false],
                                      tconf[conf_mask_false]) + self.bce_loss(
                                          pred_conf[conf_mask_true],
                                          tconf[conf_mask_true])
            loss_cls = (1 / batch_size) * self.ce_loss(
                pred_cls[mask], torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )
        else:
            # Inference
            output = torch.cat(
                (pred_boxes.view(batch_size, -1, 4) * stride,
                 pred_conf.view(batch_size, -1, 1),
                 pred_cls.view(batch_size, 1, self.num_classes)), -1)

            return output
    def forward(self, x, targets=None, img_dim=None):
        # print('yolo input shape {}'.format(x.shape))
        # [8, 255, 13, 13]
        # [8, 255, 26, 26]
        # [8, 255, 52, 52]
        # 255 = n_anchors*(5+n_classes) = 3*85
        '''
        anchors = [(116, 90), (156, 198), (373, 326)]
        num_classes = 80
        yolo_layer = YOLOLayer(anchors, num_classes)
        grid_size = 13
        yolo_layer.compute_grid_offsets(grid_size)
        x = torch.rand([8, 255, grid_size, grid_size]).cuda()
        yolo_layer.forward(x, targets=targets)
        num_samples=8
        self = yolo_layer
        '''

        # Tensors for cuda support, fixme
        # import pdb; pdb.set_trace()
        device_id = x.device.index
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        BoolTensor = torch.cuda.BoolTensor if x.is_cuda else torch.BoolTensor

        self.img_dim = img_dim
        num_samples = x.size(0)  # 8
        grid_size = x.size(2)  # # 13

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous()
                      )  # bs, 3, 85, 13, 13

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size,
                                      cuda=x.is_cuda,
                                      device_id=device_id)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape, device=device_id)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )
        # [bs, num_bb_by_each_grid_cell*grid_cell*grid_cell, num_classes]

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            # self.register_buffer('metrics', None) # fixme

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss, self.metrics
Example #26
0
    def forward(self, x, targets=None, img_dim=None):

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs [x, y, width, height, confidence, cls_p * 20]
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size)

        # Add offset and scale with anchors
        pred_boxes = pred_boxes = torch.zeros_like(prediction[..., :4], dtype=torch.float, device=self.device)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.img_stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            (iou_scores, class_mask, obj_mask, no_obj_mask,
                true_x, true_y, true_w, true_h, true_cls, true_conf) = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thr=self.ignore_thr
            )
            print(x.size())
            print(pred_cls.size())
            print(true_cls.size())
            print(pred_conf.size())
            print(true_conf.size())
            obj_mask = obj_mask.long()
            no_obj_mask = no_obj_mask.long()
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], true_x[obj_mask])
            print("x", loss_x.detach().cpu().item())
            loss_y = self.mse_loss(y[obj_mask], true_y[obj_mask])
            print("y", loss_y.detach().cpu().item())
            loss_w = self.mse_loss(w[obj_mask], true_w[obj_mask])
            print("w", loss_w.detach().cpu().item())
            loss_h = self.mse_loss(h[obj_mask], true_h[obj_mask])
            print("h", loss_h.detach().cpu().item())
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], true_conf[obj_mask])
            print("obj", loss_conf_obj.detach().cpu().item())
            loss_conf_no_obj = self.bce_loss(pred_conf[no_obj_mask], true_conf[no_obj_mask])
            print("no_obj", loss_conf_no_obj.detach().cpu().item())
            loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_no_obj
            loss_cls = self.bce_loss(pred_cls[obj_mask], true_cls[obj_mask])
            print("loss_cls", loss_cls.detach().cpu().item())
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            print(total_loss.detach().cpu().item())
            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_no_obj = pred_conf[no_obj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * true_conf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": total_loss.detach().cpu().item(),
                "x": loss_x.detach().cpu().item(),
                "y": loss_y.detach().cpu().item(),
                "w": loss_w.detach().cpu().item(),
                "h": loss_h.detach().cpu().item(),
                "conf": loss_conf.detach().cpu().item(),
                "cls": loss_cls.detach().cpu().item(),
                "cls_acc": cls_acc.detach().cpu().item(),
                "recall50": recall50.detach().cpu().item(),
                "recall75": recall75.detach().cpu().item(),
                "precision": precision.detach().cpu().item(),
                "conf_obj": conf_obj.detach().cpu().item(),
                "conf_no_obj": conf_no_obj.detach().cpu().item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #27
0
    def forward(self, x, targets=None):
        nA = self.num_anchors
        nB = x.size(0)
        nG = x.size(2)
        stride = self.image_dim / nG

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        prediction = x.view(nB, nA, self.bbox_attrs, nG,
                            nG).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG,
                                                      nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG,
                                         1).t().view([1, 1, nG,
                                                      nG]).type(FloatTensor)
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride)
                                      for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        output = torch.cat(
            (pred_boxes.view(nB, -1, 4) * stride, pred_conf.view(
                nB, -1, 1), pred_cls.view(nB, -1, self.num_classes)),
            -1,
        )

        if targets is None:
            # Inference
            return output
        else:
            # Training
            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()

            num_targets, num_correct, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=pred_boxes.data.cpu(),
                pred_conf=pred_conf.data.cpu(),
                pred_cls=pred_cls.data.cpu(),
                target=targets.data.cpu(),
                anchors=scaled_anchors.data.cpu(),
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )

            # Compute recall and precision
            num_proposals = (pred_conf > 0.5).sum().item()
            recall = num_correct / num_targets if num_targets else 1
            precision = num_correct / num_proposals

            # Masks
            obj_mask = Variable(obj_mask.type(ByteTensor), requires_grad=False)
            noobj_mask = Variable(noobj_mask.type(ByteTensor),
                                  requires_grad=False)

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(FloatTensor), requires_grad=False)

            # Mask outputs to ignore (except for conf. loss) non-existing objects
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = loss_conf_obj + loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            cls_acc = (pred_cls[obj_mask].argmax(1) == tcls[obj_mask].argmax(1)
                       ).float().mean().item()

            return (
                output,
                loss,
                {
                    "loss": loss.item(),
                    "x": loss_x.item(),
                    "y": loss_y.item(),
                    "w": loss_w.item(),
                    "h": loss_h.item(),
                    "conf": loss_conf.item(),
                    "cls": loss_cls.item(),
                    "cls_acc": cls_acc,
                    "recall": recall,
                    "precision": precision,
                },
            )
Example #28
0
    def forward(self, x, targets=None, img_dim=None):
        # 计算总损失 以及 预测结果outputs  targets为真实边界框  用于计算ap recall等
        # Tensors for cuda support
        #
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim  # 图片尺寸
        num_samples = x.size(0)  # (img_batch)
        grid_size = x.size(2)  # (feature_map_size)
        # x.shape = tensor([batch_size,num_anchors*(num_classes+5),grid_size,grid_size])
        # (batch_size, 255, grid_size, grid_size)
        # x就是最终输出的预测结果 255 = (80 + 4 + 1)* 3
        # 13*13*255
        prediction = (x.view(num_samples, self.num_anchors,
                             5 + self.num_classes, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())
        # print prediction.shape (batch_size, num_anchors, grid_size, grid_size, 85)

        # Get outputs
        # 这里的prediction是初步的所有预测,在grid_size*grid_size个网格中,它表示每个网格都会有num_anchor(3)个anchor框
        # x,y,w,h, pred_conf的shape都是一样的 (batch_size, num_anchor, gride_size, grid_size)
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf置信度
        pred_cls = torch.sigmoid(
            prediction[..., 5:]
        )  # Cls pred. (batch_size, num_anchor, gride_size, grid_size, cls)

        # If grid size does not match current we compute new offsets
        # print grid_size, self.grid_size
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # print self.grid_x, self.grid_y, self.anchor_w, self.anchor_h
        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        # 这里是创建一个同等shape的tensor
        # 针对每个网格的偏移量,每个网格的单位长度为1,而预测的中心点(x,y)是归一化的(0,1之间),所以可以直接相加
        # 广播机制
        pred_boxes[
            ...,
            0] = x.data + self.grid_x  # (batch_size, 1, gride_size, gride_size)
        # pred_boxes.shape = tensor.size([1,3,13,13])
        # 详细解析上一步是什么意思,首先看维度   x的维度13*13*1  什么意思  就是每个网格中都包含一个预测的x值
        #   那么距离左上角的距离就是   第一个网格左上角就是整个的左上角所以 +0  以此类推 +1 +2 +3 ...
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w  # # (1,3,1,1)
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        # anchor_w 是预先设定的anchor尺寸   w.data是预测的边界框的宽
        # 0 , 1   是指预测的中心点相对于图片左上角的偏移量
        # pred_boxes.shape = tensor.size([batch_size, num_anchors,grid_size,grid_size, 4])
        output = torch.cat(
            (
                # (batch_size, num_anchors*grid_size*grid_size, 4)
                pred_boxes.view(num_samples, -1, 4) *
                self.stride,  # 放大到最初输入的尺寸
                # (batch_size, num_anchors*grid_size*grid_size, 1)
                pred_conf.view(num_samples, -1, 1),
                # (batch_size, num_anchors*grid_size*grid_size, 80)
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )
        # output.shape = tensor.size([batch_size, num_anchors*grid_size*grid_size, 85])
        if targets is None:
            # targets 是指ground truth
            return output, 0
        # 计算loss
        else:
            # pred_boxes => (batch_size, anchor_num, gride, gride, 4)
            # pred_cls => (batch_size, anchor_num, gride, gride, 80)
            # targets => (num, 6)  6=>(batch_index, cls, center_x, center_y, widht, height)
            # scaled_anchors => (3, 2)
            # print pred_boxes.shape, pred_cls.shape, targets.shape, self.scaled_anchors.shape
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )
            #
            # iou_scores:预测框pred_boxes中的正确框与目标实体框target_boxes的交集IOU,以IOU作为分数,IOU越大,分值越高.
            # class_mask:将预测正确的标记为1(正确的预测了实体中心点所在的网格坐标,哪个anchor框可以最匹配实体,以及实体的类别)
            # obj_mask:将目标实体框所对应的anchor标记为1,目标实体框所对应的anchor与实体一一对应的
            # noobj_mask:将所有与目标实体框IOU小于某一阈值的anchor标记为1
            # tx, ty, tw, th: 需要拟合目标实体框的坐标和尺寸
            # tcls:目标实体框的所属类别
            # tconf:所有anchor的目标置信度

            # 这里计算得到的iou_scores,class_mask,obj_mask,noobj_mask,tx, ty, tw, th和tconf都是(batch, anchor_num, gride, gride)
            # 预测的x,y,w,h,pred_conf也都是(batch, anchor_num, gride, gride)

            # tcls 和 pred_cls 都是(batch, anchor_num, gride, gride,num_class)

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)

            # 坐标和尺寸的loss计算:
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            # anchor置信度的loss计算:
            loss_conf_obj = self.bce_loss(
                pred_conf[obj_mask], tconf[obj_mask])  # tconf[obj_mask] 全为1
            loss_conf_noobj = self.bce_loss(
                pred_conf[noobj_mask],
                tconf[noobj_mask])  # tconf[noobj_mask] 全为0
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            # 类别的loss计算
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

            # loss汇总
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics 指标
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf

            obj_mask = obj_mask.float()

            # print type(iou50), type(detected_mask), type(conf50.sum()), type(iou75), type(obj_mask)
            #
            # print iou50.dtype, detected_mask.dtype, conf50.sum().dtype, iou75.dtype, obj_mask.dtype
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Example #29
0
    def forward(self, x, targets=None, img_dim=None, cls=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        # Softmax instead of sigmoid, since only one class will be present
        pred_cls = prediction[..., 5:]  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        # Weight the grid-wise predictions acc. to the object confidence
        weighted_class_scores = pred_conf.unsqueeze(dim=-1) * pred_cls
        weighted_class_scores = weighted_class_scores.sum(dim=(1, 2, 3))

        if targets is None:
            return output, weighted_class_scores, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            # Calculate these only if obj_mask is non-empty
            if obj_mask.sum() > 0:
                # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
                loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
                loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
                loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
                loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
                loss_conf_obj = self.bce_loss(pred_conf[obj_mask],
                                              tconf[obj_mask])
                loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                                tconf[noobj_mask])
                loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
                loss_cls = self.ce_loss(
                    pred_cls[obj_mask].view(-1, self.num_classes),
                    tcls[obj_mask].long().view(-1))
                detection_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            else:
                detection_loss = 0.

            # Classification loss
            classification_loss = self.ce_loss(weighted_class_scores, cls)

            total_loss = detection_loss + classification_loss

            # Calculate these only if obj_mask is non-empty
            if obj_mask.sum() > 0:
                # Metrics
                cls_acc = 100 * class_mask[obj_mask].mean()
                conf_obj = pred_conf[obj_mask].mean()
                conf_noobj = pred_conf[noobj_mask].mean()
                conf50 = (pred_conf > 0.5).float()
                iou50 = (iou_scores > 0.5).float()
                iou75 = (iou_scores > 0.75).float()
                detected_mask = conf50 * class_mask * tconf
                precision = torch.sum(
                    iou50 * detected_mask) / (conf50.sum() + 1e-16)
                recall50 = torch.sum(
                    iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
                recall75 = torch.sum(
                    iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

                self.metrics = {
                    "loss": to_cpu(total_loss).item(),
                    "x": to_cpu(loss_x).item(),
                    "y": to_cpu(loss_y).item(),
                    "w": to_cpu(loss_w).item(),
                    "h": to_cpu(loss_h).item(),
                    "conf": to_cpu(loss_conf).item(),
                    "cls": to_cpu(loss_cls).item(),
                    "cls_acc": to_cpu(cls_acc).item(),
                    "recall50": to_cpu(recall50).item(),
                    "recall75": to_cpu(recall75).item(),
                    "precision": to_cpu(precision).item(),
                    "conf_obj": to_cpu(conf_obj).item(),
                    "conf_noobj": to_cpu(conf_noobj).item(),
                    "grid_size": grid_size,
                }
            _classification_loss = classification_loss.clone()
            self.metrics['classification_loss'] = to_cpu(
                _classification_loss).item()
            _weighted_class_scores = weighted_class_scores.clone()
            self.metrics['batch_acc'] = to_cpu(
                torch.sum(torch.argmax(_weighted_class_scores, dim=-1) ==
                          cls)).item() / len(cls)

            return output, weighted_class_scores, total_loss
Example #30
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + self.num_angles + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        #a = torch.remainder((prediction[..., 4]*180/np.pi) + 180, 180)*np.pi/180  # Angle
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf 
        pred_cls = torch.sigmoid(prediction[..., 5:5+self.num_classes])  # Cls pred.
        pred_angle_cls = torch.sigmoid(prediction[..., 5+self.num_classes: ])  # Angle Cls pred


        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        #pred_boxes[..., 4] = a.data
        #print("Theta predictions: ", pred_boxes.view(num_samples, -1, 4).size(),pred_boxes[...,4].size(),pred_conf.size())
        
        #print("Target SIZE: ",targets.size())
        #print("pred boxes: ",pred_boxes[...,:4])
        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                #pred_boxes[...,4].view(num_samples,-1,1),
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
                pred_angle_cls.view(num_samples, -1, self.num_angles)
            ),
            -1,
        )
        #print(pred_boxes, targets)

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask,angle_mask, obj_mask, noobj_mask, tx, ty, tw, th,tacls, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                pred_angle_cls = pred_angle_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            weights = torch.tensor([1.12424274,13.3361754, 75.7716263, 50.10983982, 61.6845070, 71.0974026,  73.73063973, 22.52880658 ,
                                   8.14052045, 5.87707998, 25.49243306,  10.36837121,  26.4468599,   77.92882562, 100.44954128,
                                   82.9469697, 35.20578778, 8.97826978, 1.]).type(FloatTensor)
    
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            loss_conf_angle = nn.BCELoss(reduction='none')(pred_angle_cls[obj_mask],tacls[obj_mask])
            loss_conf_angle = loss_conf_angle*weights/100
            loss_conf_angle = loss_conf_angle.mean()
            #loss_conf_angle = self.bce_loss(pred_angle_cls[obj_mask],tacls[obj_mask])
 
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls + loss_conf_angle

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            angle_acc = 100 * angle_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "angle_acc": to_cpu(angle_acc).item(),
                "angle":to_cpu(loss_conf_angle).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss