Beispiel #1
0
    def forward(self, output, dim_info, target, scaled_anchors, stride):

        # print(output.shape)
        # torch.Size([8, 3, 13, 13, 85])
        # print(target.shape)
        # torch.Size([6])

        pred_boxes = output[..., :4] / stride
        pred_conf = output[..., 4]
        pred_cls = output[..., 5:]
        x, y, w, h = dim_info[..., 0], dim_info[..., 1], dim_info[..., 2], dim_info[..., 3]

        iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets(
            pred_boxes = pred_boxes,
            pred_cls = pred_cls,
            target = target,
            anchors = scaled_anchors,
            ignore_thres = self.ignore_thres,
        )

        # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
        loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
        loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
        loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
        loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
        
        loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
        loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
        
        loss_conf = self.object_scale * loss_conf_obj + self.noobject_scale * loss_conf_noobj
        loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
        total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

        return total_loss
Beispiel #2
0
def train(neural_network, tf, idf):
    inputs = build_inputs(len(tf))
    targets = build_targets(tf, idf)

    epochs = 1 if len(tf) > 10 else 10
    for i in range(epochs):
        for input_vector, target_vector in zip(inputs, targets):
            backpropagate(neural_network, input_vector, target_vector)
    return neural_network
Beispiel #3
0
def train_from_tensors(dataset_directory, epochs=4,
                       batch_size=1, dropout_rate=0.3):
    files = list_files(dataset_directory)
    all_tensors = build_tensors(files)
    targets = build_targets(files)
    model = HeadClass(dropout_rate=dropout_rate)
    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # ckpt = tf.train.Checkpoint()
    model.fit(all_tensors, targets, validation_split=0.2,
              epochs=epochs, batch_size=batch_size)
    model.summary()
Beispiel #4
0
    def forward(self,
                fmaps: Dict[str, torch.Tensor],
                rois: List[torch.Tensor],
                img_dims: List[Tuple[int, int]],
                targets: List[Dict[str, torch.Tensor]] = None):

        # assign rois to gt and generate cls(Ntotal,) and reg(Ntotal,4) targets
        if targets is not None:
            # match/build targets
            matches, target_objectness, target_labels, target_offsets = build_targets(
                rois,
                targets,
                self._params['fg_iou_threshold'],
                self._params['bg_iou_threshold'],
                add_best_matches=False)

            # sample fg and bg with given ratio
            positives, negatives = sample_fg_bg(matches,
                                                self._params['num_of_samples'],
                                                self._params['positive_ratio'])

            matches[:] = 0
            matches[positives] = 1
            matches[negatives] = -1
            sample_mask = torch.logical_or(matches == 1, matches == -1)
            positive_mask = matches == 1

            target_objectness = target_objectness[sample_mask]
            target_labels = target_labels[sample_mask]
            target_offsets = target_offsets[positive_mask]
            current = 0
            for roi_index, boxes in enumerate(rois):
                N = boxes.size(0)
                batch_sample_mask = sample_mask[current:current + N]
                current += N
                rois[roi_index] = boxes[batch_sample_mask]

        # extract all rois from feature maps (Ntotal,(C*output_size[0]*output_size[1]))
        # outputs: (Ntotal,output_features*output_size**2)

        outputs = self.roi_pool(fmaps, rois, img_dims).flatten(start_dim=1)

        # feed to the hidden units and get cls_logits and reg_deltas

        outputs = self.hidden_unit(outputs)  # Ntotal,hiddin_channels
        cls_logits = self.cls_unit(outputs)  # Ntotal,num_classes
        reg_deltas = self.reg_unit(outputs)  # Ntotal,num_classes*4
        reg_deltas = reg_deltas.reshape(-1, self.num_classes, 4)

        return (cls_logits, reg_deltas), (target_objectness, target_labels,
                                          target_offsets)
Beispiel #5
0
    def forward(self, fmaps:torch.Tensor, img_dims:Tuple[int,int],
            targets:List[Dict[str,torch.Tensor]]=None):
        (keep_pre_nms, keep_post_nms),\
            (batch_size_per_image, batch_positive_ratio,\
                fg_iou_threshold, bg_iou_threshold,\
                    nms_threshold) = self.get_params()

        dtype = fmaps.dtype
        device = fmaps.device
        bs = fmaps.size(0)

        fmap_dims = fmaps.shape[-2:]

        # cls_logits: (bs x (h'*w'*nA) x 1)
        # reg_deltas: (bs x (h'*w'*nA) x 4) as dx,dy,dw,dh
        cls_logits,reg_deltas = self.prediction_layer(fmaps)

        batched_dets = self.detection_layer(cls_logits, reg_deltas, fmap_dims,
            img_dims, nms_threshold=nms_threshold,
            keep_pre_nms=keep_pre_nms, keep_post_nms=keep_post_nms,
            dtype=dtype, device=device)

        if targets is not None:
            # merge batches
            cls_logits = cls_logits.reshape(-1)
            reg_deltas = reg_deltas.reshape(-1,4)
                        
            # match/build targets
            matches,target_objectness,target_labels,target_offsets = build_targets(
                self.detection_layer.anchors.repeat(bs,1,1),
                targets, fg_iou_threshold, bg_iou_threshold,
                add_best_matches=True)

            # sample fg and bg with given ratio
            positives,negatives = sample_fg_bg(matches,batch_size_per_image,batch_positive_ratio)
            samples = torch.cat([positives,negatives])

            # compute loss
            cls_loss,reg_loss = self.compute_loss(
                cls_logits[samples], target_objectness[samples],
                reg_deltas[positives], target_offsets[positives])

            losses = {'cls_loss': cls_loss,'reg_loss': reg_loss}

            return batched_dets,losses

        return batched_dets
Beispiel #6
0
    def forward(self, x, targets=None):
        nA = self.num_anchors
        nB = x.size(0)
        nG = x.size(2)
        stride = self.image_dim / nG

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        prediction = x.view(nB, nA, self.bbox_attrs, nG,
                            nG).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG,
                                                      nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG,
                                         1).t().view([1, 1, nG,
                                                      nG]).type(FloatTensor)
        scaled_anchors = FloatTensor([(a_w / stride, a_h / stride)
                                      for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=pred_boxes.cpu().data,
                pred_conf=pred_conf.cpu().data,
                pred_cls=pred_cls.cpu().data,
                target=targets.cpu().data,
                anchors=scaled_anchors.cpu().data,
                num_anchors=nA,
                num_classes=self.num_classes,
                grid_size=nG,
                ignore_thres=self.ignore_thres,
                img_dim=self.image_dim,
            )

            nProposals = int((pred_conf > 0.5).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1
            precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false],
                                      tconf[conf_mask_false]) + self.bce_loss(
                                          pred_conf[conf_mask_true],
                                          tconf[conf_mask_true])
            loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask],
                                               torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )

        else:
            # If not in training phase return predictions
            output = torch.cat(
                (
                    pred_boxes.view(nB, -1, 4) * stride,
                    pred_conf.view(nB, -1, 1),
                    pred_cls.view(nB, -1, self.num_classes),
                ),
                -1,
            )
            return output
    def forward(self, x, CUDA, targets=None):
        detections = []
        modules = self.blocks[1:]
        outputs = {}  #We cache the outputs for the route layer

        write = 0
        for i in range(len(modules)):

            module_type = (modules[i]["type"])
            if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool":

                x = self.module_list[i](x)
                outputs[i] = x

            elif module_type == "route":
                layers = modules[i]["layers"]
                layers = [int(a) for a in layers]

                if (layers[0]) > 0:
                    layers[0] = layers[0] - i

                if len(layers) == 1:
                    x = outputs[i + (layers[0])]

                else:
                    if (layers[1]) > 0:
                        layers[1] = layers[1] - i

                    map1 = outputs[i + layers[0]]
                    map2 = outputs[i + layers[1]]

                    x = torch.cat((map1, map2), 1)
                outputs[i] = x

            elif module_type == "shortcut":
                from_ = int(modules[i]["from"])
                x = outputs[i - 1] + outputs[i + from_]
                outputs[i] = x

            elif module_type == 'yolo':

                anchors = self.module_list[i][0].anchors
                #Get the input dimensions
                inp_dim = int(self.net_info["height"])

                #Get the number of classes
                num_classes = int(modules[i]["classes"])

                #Output the result
                x = x.data

                if not write:
                    detections = x
                    write = 1

                else:
                    detections = torch.cat((detections, x), 1)

                outputs[i] = outputs[i - 1]

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            stride_ = inp_dim // detections.size(2)
            grid_size = inp_dim // stride_
            num_anchors = len(anchors)
            FloatTensor = torch.cuda.FloatTensor if CUDA else torch.FloatTensor
            LongTensor = torch.cuda.LongTensor if CUDA else torch.LongTensor
            ByteTensor = torch.cuda.ByteTensor if CUDA else torch.ByteTensor

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
                pred_boxes=detections[:, :, :4].cpu().data,
                pred_conf=detections[:, :, 4:5].cpu().data,
                pred_cls=detections[:, :, 5:].cpu().data,
                target=targets.cpu().data,
                anchors=anchors.cpu().data,
                num_anchors=num_anchors,
                num_classes=num_classes,
                grid_size=grid_size,
                ignore_thres=0.3,
                img_dim=inp_dim,
            )

            nProposals = int((detections[:, :, 4:5] > 0.5).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1
            precision = 0
            if nProposals > 0:
                precision = float(nCorrect / nProposals)

            # Handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(conf_mask.type(ByteTensor))

            # Handle target variables
            tx = Variable(tx.type(FloatTensor), requires_grad=False)
            ty = Variable(ty.type(FloatTensor), requires_grad=False)
            tw = Variable(tw.type(FloatTensor), requires_grad=False)
            th = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls = Variable(tcls.type(LongTensor), requires_grad=False)

            x = torch.sigmoid(detections[..., 0])  # Center x
            y = torch.sigmoid(detections[..., 1])  # Center y
            w = detections[..., 2]  # Width
            h = detections[..., 3]  # Height
            pred_conf = torch.sigmoid(detections[..., 4])  # Conf
            pred_cls = torch.sigmoid(detections[..., 5:])  # Cls pred.

            # Get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            # Mask outputs to ignore non-existing objects
            mse_loss = nn.MSELoss()
            bce_loss = nn.BCELoss()
            ce_loss = nn.CrossEntropyLoss()
            loss_x = mse_loss(x[mask], tx[mask])
            loss_y = mse_loss(y[mask], ty[mask])
            loss_w = mse_loss(w[mask], tw[mask])
            loss_h = mse_loss(h[mask], th[mask])
            loss_conf = bce_loss(
                pred_conf[conf_mask_false], tconf[conf_mask_false]) + bce_loss(
                    pred_conf[conf_mask_true], tconf[conf_mask_true])
            loss_cls = (1 / detections.size(0)) * ce_loss(
                pred_cls[mask], torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return (
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )
Beispiel #8
0
    def forward(self, x, targets=None):
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor

        numBatch = x.shape[0]
        gridSiz = x.shape[2]

        # Create grid offset if not created/different
        if gridSiz != self.gridSiz:
            self.compute_grid_offsets(gridSiz, cuda=x.is_cuda)

        # Separate output to managable blocks
        prediction = (x.view(numBatch, self.numAnchors, self.numOfClass+5, gridSiz, -1)
                      .permute(0, 1, 3, 4, 2).contiguous())
        # Note:prediction is (b,anc,grid,grid,numclasses+5)
        # Note: output sequence is (x,y,w,h,conf,classes:)
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        w = prediction[..., 2]
        h = prediction[..., 3]
        predConf = torch.sigmoid(prediction[..., 4])
        # Note: x,y,w,h,predConf -> (b,box,grid,grid)
        predCls = torch.sigmoid(prediction[..., 5:])  # Question: use sigmoid?
        # Note: predCls -> (b,box,grid,grid,numClass)

        # Convert to grid space
        pred_boxes = FloatTensor(prediction[..., :4].shape)  # Note:becomes (b,box,grid,grid,4)
        pred_boxes[..., 0] = x.data + self.gridX  # Note:convert x to grid space
        pred_boxes[..., 1] = y.data + self.gridY
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(numBatch, -1, 4) * self.stride,  # Note:becomes (b,box*grid*grid,4)
                predConf.view(numBatch, -1, 1),  # Note: (b,box*grid*grid,1)
                predCls.view(numBatch, -1, self.numOfClass)), -1)  # Note: (b,box*grid*grid,numClass)

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=predCls,
                target=targets,
                anchors=self.scaledAnchors,
                ignore_thres=self.ignore_thres,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(predConf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(predConf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(predCls[obj_mask], tcls[obj_mask])
            total_loss = ((loss_y + loss_x) * self.coord_scale +
                          (loss_w + loss_h) * self.coord_scale +
                          loss_conf + loss_cls)

            return output, total_loss
Beispiel #9
0
    def forward(self, x, target=None):

        features = self.backbone(x)

        total_loss = []
        output = []

        for idx, x in enumerate(features):

            FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor

            batch_size = x.size(0)
            grid_size = x.size(2)
            current_anchors = [self.anchors[i] for i in self.anchors_mask[idx]]
            stride = self.input_size // grid_size

            prediction = x.view(batch_size, self.num_anchors,
                                self.num_classes + 5, grid_size, grid_size)
            prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()

            # Get outputs
            x = torch.sigmoid(prediction[..., 0])  # Center x
            y = torch.sigmoid(prediction[..., 1])  # Center y
            w = prediction[..., 2]  # Width
            h = prediction[..., 3]  # Height
            pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
            pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

            # Calculate offsets for each grid
            grid_x = torch.arange(grid_size).repeat(grid_size, 1).view(
                [1, 1, grid_size, grid_size]).type(FloatTensor)
            grid_y = torch.arange(grid_size).repeat(grid_size, 1).t().view(
                [1, 1, grid_size, grid_size]).type(FloatTensor)
            scaled_anchors = FloatTensor([(a_w / stride, a_h / stride)
                                          for a_w, a_h in current_anchors])
            anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
            anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))

            # Add offset and scale with anchors
            pred_boxes = FloatTensor(prediction[..., :4].shape)
            pred_boxes[..., 0] = x.data + grid_x
            pred_boxes[..., 1] = y.data + grid_y
            pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
            pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

            output.append(
                torch.cat((pred_boxes.view(batch_size, -1, 4) * stride,
                           pred_conf.view(batch_size, -1, 1),
                           pred_cls.view(batch_size, -1, self.num_classes)),
                          -1))

            if target is not None:

                iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets(
                    pred_boxes=pred_boxes,
                    pred_cls=pred_cls,
                    target=target,
                    anchors=scaled_anchors,
                    ignore_thres=self.ignore_thres,
                )

                # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
                loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
                loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
                loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
                loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

                loss_conf_obj = self.bce_loss(pred_conf[obj_mask],
                                              tconf[obj_mask])
                loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                                tconf[noobj_mask])

                loss_conf = self.object_scale * loss_conf_obj + self.noobject_scale * loss_conf_noobj
                loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
                loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
                total_loss.append(loss)
            # prediction[..., 0] = torch.sigmoid(prediction[..., 0])  # Center x
            # prediction[..., 1] = torch.sigmoid(prediction[..., 1])  # Center y
            # prediction[..., 4] = torch.sigmoid(prediction[..., 4])   # object coofidence
            # prediction[..., 5:] = torch.sigmoid(prediction[..., 5:])  # classs prediction

            # dim_info = prediction[..., :4].clone()

            # # Add offset and scale with anchors
            # grid = np.arange(grid_size)
            # m,n = np.meshgrid(grid, grid)
            # x_offset = FloatTensor(m).view(-1,1)
            # y_offset = FloatTensor(n).view(-1,1)
            # x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,self.num_anchors).view(-1, 2).unsqueeze(0)
            # x_y_offset = x_y_offset.repeat(batch_size, 1, 1)

            # _scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in current_anchors])
            # scaled_anchors = _scaled_anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
            # scaled_anchors = scaled_anchors.repeat(batch_size, 1, 1)

            # prediction[..., 0:2] = prediction[..., 0:2] + x_y_offset
            # prediction[..., 2:4] = torch.exp(prediction[..., 2:4]) * scaled_anchors
            # prediction[..., 0:4] = prediction[..., 0:4] * stride

            # output.append(prediction)

            # if target is not None:
            #     total_loss.append(self.loss(prediction.view(batch_size, self.num_anchors, grid_size, grid_size, self.num_classes+5),
            #                                 dim_info.view(batch_size, self.num_anchors, grid_size, grid_size, 4),
            #                                 target, _scaled_anchors, stride))

        return torch.cat(output, 1), np.sum(total_loss)
Beispiel #10
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (
            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
            )

            obj_mask=obj_mask.bool() # convert int8 to bool
            noobj_mask=noobj_mask.bool() #convert int8 to bool
            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(loss_conf).item(),
                "cls": to_cpu(loss_cls).item(),
                "cls_acc": to_cpu(cls_acc).item(),
                "recall50": to_cpu(recall50).item(),
                "recall75": to_cpu(recall75).item(),
                "precision": to_cpu(precision).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Beispiel #11
0
    def forward(self, x, targets=None, img_dim=416):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        batch_size = x.shape[0]
        grid_size = x.shape[2]
        prediction = x.view(batch_size, self.num_anchors, self.num_classes + 5,
                            grid_size, grid_size).permute(0, 1, 3, 4,
                                                          2).contiguous()

        # Get outputs
        tx_hat = torch.sigmoid(
            prediction[:, :, :, :, 0]
        )  # For Center-x, we apply sigmoid on prediction to ensure value is between 0 and 1
        ty_hat = torch.sigmoid(
            prediction[:, :, :, :, 1]
        )  # For Center-y, we apply sigmoid on prediction to ensure value is between 0 and 1
        tw_hat = prediction[:, :, :, :, 2]  # Width
        th_hat = prediction[:, :, :, :, 3]  # Height
        pred_conf = torch.sigmoid(prediction[:, :, :, :,
                                             4])  # Object Confidence
        pred_class = torch.sigmoid(
            prediction[:, :, :, :, 5:])  # Class Prediction Probability

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, img_dim)

        # The Log-Space Transformations (Adding the offsets and scaling with the anchors)
        pred_boxes = FloatTensor(prediction[:, :, :, :, :4].shape)
        pred_boxes[:, :, :, :, 0] = tx_hat + self.grid_x
        pred_boxes[:, :, :, :, 1] = ty_hat + self.grid_y
        pred_boxes[:, :, :, :, 2] = torch.exp(tw_hat) * self.anchor_w
        pred_boxes[:, :, :, :, 3] = torch.exp(th_hat) * self.anchor_h
        output = torch.cat(
            (
                pred_boxes.view(batch_size, -1, 4) * self.stride,
                pred_conf.view(batch_size, -1, 1),
                pred_class.view(batch_size, -1, self.num_classes),
            ),
            -1,
        )

        if targets is not None:

            obj_mask, noobj_mask, tx, ty, tw, th, tclass, tconf = build_targets(
                pred_boxes=pred_boxes,
                pred_class=pred_class,
                targets=targets,
                anchors=self.scaled_anchors,
                ignore_thresh=self.ignore_thresh)

            loss_x = self.mse_loss(tx_hat[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(ty_hat[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(tw_hat[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(th_hat[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            loss_class = self.bce_loss(pred_class[obj_mask], tclass[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_class

            # Metrics
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()

            self.metrics = {
                "loss": total_loss.cpu().item(),
                "x": loss_x.cpu().item(),
                "y": loss_y.cpu().item(),
                "w": loss_w.cpu().item(),
                "h": loss_h.cpu().item(),
                "conf": loss_conf.cpu().item(),
                "cls": loss_class.cpu().item(),
                "conf_obj": conf_obj.cpu().item(),
                "conf_noobj": conf_noobj.cpu().item(),
                "grid_size": grid_size,
            }
            return output, total_loss
        else:
            return output, 0
Beispiel #12
0
    def forward(self, x, targets=None, img_dim=None):

        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(num_samples, self.num_anchors,
                             self.num_classes + 4, grid_size,
                             grid_size).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        pred_conf = pred_conf.unsqueeze(-1)

        output = torch.cat((pred_boxes, pred_conf), -1)

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
            ),
            -1,
        )
        if targets is None:
            return output, 0

        else:
            iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf = \
            build_targets(pred_boxes, targets, self.scaled_anchors, self.ignore_thres)

            ## losses
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])

            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])

            obj_loss = self.bce_loss(pred_conf[obj_mask],
                                     tconf[obj_mask].reshape(-1, 1))
            noobj_loss = self.bce_loss(pred_conf[noobj_mask],
                                       tconf[noobj_mask].reshape(-1, 1))

            conf_loss = self.obj_scale * obj_loss + self.noobj_scale * noobj_loss
            total_loss = loss_x + loss_y + loss_w + loss_h + conf_loss

            # Metrics
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()

            self.metrics = {
                "loss": to_cpu(total_loss).item(),
                "x": to_cpu(loss_x).item(),
                "y": to_cpu(loss_y).item(),
                "w": to_cpu(loss_w).item(),
                "h": to_cpu(loss_h).item(),
                "conf": to_cpu(conf_loss).item(),
                "conf_obj": to_cpu(conf_obj).item(),
                "conf_noobj": to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

        return output, total_loss
Beispiel #13
0
    def forward(self, x, targets):
        # x : feature map -> [batch_size, final_ch, grid, grid]
        # targets : ground truth -> [batch_size, 6] -> 6 = num(gt box의 num, 배치 인덱스), class, x, y, w, h

        num_batches = x.size(0)
        grid_size = x.size(2)

        #출력값 형태 변환
        prediction = (x.view(num_batches, self.num_anchors,
                             self.num_classes + 5, grid_size,
                             grid_size).permute(0, 1, 3, 4,
                                                2).contiguous())  #메모리 연속 할달

        #get outputs
        #format : [batch, anchors, grid, grid]
        cx = torch.sigmoid(prediction[..., 0])  #예측 box의 중심 x좌표
        cy = torch.sigmoid(prediction[..., 1])  #예측 box의 중심 y좌표
        w = prediction[..., 2]  #예측 box의 w
        h = prediction[..., 3]  #예측 box의 h
        pred_conf = torch.sigmoid(prediction[..., 4])  #confidence
        pred_cls = torch.sigmoid(prediction[..., 5:])  #class 확률

        #offset 구하기
        stride = self.img_size / grid_size
        ''' grid_x=([[0],[1],[2],[3],...,[16]],
                    [[0],[1],[2],[3],...,[16]],
                            ...
                    [[0],[1],[2],[3],...,[16]])
            grid_y=([[0],[0],[0],...,[0]],
                    [[1],[1],[1],...,[1]],
                            ...
                    [[16],[16],[16],...,[16]])'''
        grid_x = torch.arange(grid_size, dtype=torch.float).repeat(
            grid_size, 1).view([1, 1, grid_size, grid_size])
        grid_y = torch.arange(grid_size, dtype=torch.float).repeat(
            grid_size, 1).t().view([1, 1, grid_size, grid_size])
        scaled_anchor = torch.as_tensor([(a_w / stride, a_h / stride)
                                         for a_w, a_h in self.anchors],
                                        dtype=torch.float)
        anchor_w = scaled_anchor[:, 0].view((1, self.num_anchors, 1, 1))
        anchor_h = scaled_anchor[:, 1].view((1, self.num_anchors, 1, 1))

        #예측한 anchor 좌표 구하기
        pred_boxes = torch.zeros_like(prediction[..., :4])  #x, y, w, h
        pred_boxes[..., 0] = cx + grid_x
        pred_boxes[..., 1] = cy + grid_y
        pred_boxes[..., 2] = torch.exp(w) * anchor_w
        pred_boxes[..., 3] = torch.exp(h) * anchor_h

        pred = (
            pred_boxes.view(num_batches, -1, 4) * stride,  #(1, 3*grid*grid, 4)
            pred_conf.view(num_batches, -1, 1),  #(1, 3*grid*grid, 1)
            pred_cls.view(num_batches, -1,
                          self.num_classes))  #(1, 3*grid*grid, 80)

        output = torch.cat(pred, -1)  #(1, 3*grid*grid, 85)

        if targets is None:
            return output, 0

        iou_scores, class_mask, obj_mask, no_obj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets(
            pred_boxes=pred_boxes,
            pred_cls=pred_cls,
            target=targets,
            anchors=scaled_anchor,
            ignore_thres=self.ignore_th)

        #loss 구하기
        loss_x = self.mse_loss(cx[obj_mask], tx[obj_mask])
        loss_y = self.mse_loss(cy[obj_mask], ty[obj_mask])
        loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
        loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
        loss_bbox = loss_x + loss_y + loss_w + loss_h

        loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
        loss_conf_no_obj = self.bce_loss(pred_conf[no_obj_mask],
                                         tconf[no_obj_mask])
        loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_no_obj  #패널티를 주기위해

        loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
        loss_layer = loss_bbox + loss_conf + loss_cls

        # Metrics
        conf50 = (pred_conf > 0.5).float()
        iou50 = (iou_scores > 0.5).float()
        iou75 = (iou_scores > 0.75).float()
        detected_mask = conf50 * class_mask * tconf
        cls_acc = 100 * class_mask[obj_mask].mean()
        conf_obj = pred_conf[obj_mask].mean()
        conf_no_obj = pred_conf[no_obj_mask].mean()
        precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
        recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
        recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

        # Write loss and metrics
        self.metrics = {
            "loss_x": loss_x.detach().cpu().item(),
            "loss_y": loss_y.detach().cpu().item(),
            "loss_w": loss_w.detach().cpu().item(),
            "loss_h": loss_h.detach().cpu().item(),
            "loss_bbox": loss_bbox.detach().cpu().item(),
            "loss_conf": loss_conf.detach().cpu().item(),
            "loss_cls": loss_cls.detach().cpu().item(),
            "loss_layer": loss_layer.detach().cpu().item(),
            "cls_acc": cls_acc.detach().cpu().item(),
            "conf_obj": conf_obj.detach().cpu().item(),
            "conf_no_obj": conf_no_obj.detach().cpu().item(),
            "precision": precision.detach().cpu().item(),
            "recall50": recall50.detach().cpu().item(),
            "recall75": recall75.detach().cpu().item()
        }

        return output, loss_layer
Beispiel #14
0
    def forward(self, x, targets=None):
        bs = x.size(0)
        g_dim = x.size(2)
        stride = self.img_dim / g_dim
        # Tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        # print(x.shape, self.num_anchors, self.bbox_attrs, g_dim, g_dim)
        prediction = x.view(bs,  self.num_anchors, self.bbox_attrs, g_dim, g_dim).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])          # Center x
        y = torch.sigmoid(prediction[..., 1])          # Center y
        w = prediction[..., 2]                         # Width
        h = prediction[..., 3]                         # Height
        conf = torch.sigmoid(prediction[..., 4])       # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # Calculate offsets for each grid
        grid_x = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).repeat(bs*self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).t().repeat(bs*self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
        scaled_anchors = [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(w.shape)
        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(h.shape)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        # Training
        if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()

            nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes.cpu().data,
                                                                            targets.cpu().data,
                                                                            scaled_anchors,
                                                                            self.num_anchors,
                                                                            self.num_classes,
                                                                            g_dim,
                                                                            self.ignore_thres,
                                                                            self.img_dim)

            nProposals = int((conf > 0.25).sum().item())
            recall = float(nCorrect / nGT) if nGT else 1

            # Handle masks
            mask = Variable(mask.type(FloatTensor))
            cls_mask = Variable(mask.unsqueeze(-1).repeat(1, 1, 1, 1, self.num_classes).type(FloatTensor))
            conf_mask = Variable(conf_mask.type(FloatTensor))

            # Handle target variables
            tx    = Variable(tx.type(FloatTensor), requires_grad=False)
            ty    = Variable(ty.type(FloatTensor), requires_grad=False)
            tw    = Variable(tw.type(FloatTensor), requires_grad=False)
            th    = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls  = Variable(tcls.type(FloatTensor), requires_grad=False)

            # Mask outputs to ignore non-existing objects
            loss_x = self.lambda_coord * self.bce_loss(x * mask, tx * mask)
            loss_y = self.lambda_coord * self.bce_loss(y * mask, ty * mask)
            loss_w = self.lambda_coord * self.mse_loss(w * mask, tw * mask) / 2
            loss_h = self.lambda_coord * self.mse_loss(h * mask, th * mask) / 2
            loss_conf = self.bce_loss(conf * conf_mask, tconf * conf_mask)
            loss_cls = self.bce_loss(pred_cls * cls_mask, tcls * cls_mask)
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall

        else:
            # If not in training phase return predictions
            output = torch.cat((pred_boxes.view(bs, -1, 4) * stride, conf.view(bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1)
            return output.data
    def forward(self, x, targets=None):
        batch_size = x.size(0)
        grid_size = x.size(2)

        # 출력값 형태 변환하기
        prediction = (
            x.view(batch_size, self.num_anchors, self.num_classes + 5,
                   grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
        )  # contiguous()는 tensor에서 바로 옆에 있는 요소가 실제로 메모리상에서 서로 인접한 것

        # outputs
        bx = torch.sigmoid(prediction[
            ..., 0])  # Center x   # 앞의 값은 모두 포함하고 맨 뒤에 인덱스는 0번 인덱스만 포함한다는 뜻
        by = torch.sigmoid(prediction[..., 1])  # Center y
        bw = prediction[..., 2]  # Width
        bh = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(
            prediction[..., 4])  # Object confidence (objectness)
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Class prediction

        # 각 그리드에 맞춰 offsets 계산하기
        stride = self.image_size / grid_size
        cx = torch.arange(grid_size, dtype=torch.float).repeat(
            grid_size, 1).view([1, 1, grid_size, grid_size])
        # arange 는 주어진 범위 내 정수를 순서대로 생성 , repeat는 dim=0으로 grid size만큼 dim=1로 1만큼 반복 의미
        cy = torch.arange(grid_size, dtype=torch.float).repeat(
            grid_size, 1).t().view([1, 1, grid_size, grid_size])
        scaled_anchors = torch.as_tensor([(a_w / stride, a_h / stride)
                                          for a_w, a_h in self.anchors],
                                         dtype=torch.float)
        # scaled_anchors 에는 w와 h값 밖에 없는데 왜 굳이 [:,0:1]이라고 써주는지..? 질문
        anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))

        # anchors 에 offset 과 scale 추가
        pred_boxes = torch.zeros_like(prediction[..., :4])
        pred_boxes[..., 0] = bx + cx
        pred_boxes[..., 1] = by + cy
        pred_boxes[..., 2] = torch.exp(bw) * anchor_w
        pred_boxes[..., 3] = torch.exp(bh) * anchor_h

        # x,y,w,h와 conf,cls 합치기
        # stride 곱해서 이미지에서 실제 좌표로 만들어주기
        pred = (
            pred_boxes.view(batch_size, -1, 4) *
            stride,  # batch_size 가 의미하는건 무엇인지..? 굳이 여기 있는 이유는?
            pred_conf.view(batch_size, -1, 1),
            pred_cls.view(batch_size, -1, self.num_classes))
        output = torch.cat(pred, -1)

        if targets is None:
            return output, 0

        iou_scores, class_mask, obj_mask, no_obj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets(
            pred_boxes=pred_boxes,
            pred_cls=pred_cls,
            target=targets,
            anchors=scaled_anchors,
            ignore_thres=self.ignore_thres)

        # Loss 구하기(존재하지 않는 object를 무시하도록 mask. conf.loss는 제외)
        loss_x = self.mse_loss(bx[obj_mask], tx[obj_mask])
        loss_y = self.mse_loss(by[obj_mask], ty[obj_mask])
        loss_w = self.mse_loss(bw[obj_mask], tw[obj_mask])
        loss_h = self.mse_loss(bh[obj_mask], th[obj_mask])
        loss_bbox = loss_x + loss_y + loss_w + loss_h

        # bounding box안에 물체가 있는지 없는지에 대한 loss
        # 왜 bce loss썼는지?
        loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
        loss_conf_no_obj = self.bce_loss(pred_conf[no_obj_mask],
                                         tconf[no_obj_mask])
        # scale 은 패널티 의미. 물체가 없을 때 있다고 하면 더 크게 패널티를 줌
        loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_no_obj

        # class 예측에 대한 loss
        loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])

        loss_layer = loss_bbox + loss_conf + loss_cls

        return output, loss_layer