コード例 #1
0
    def get_target(self, target, anchors, in_w, in_h, pred_box, ignore_threshold):
        bs = target.size(0)
        n_obj = 0
        mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False)
        scales = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        for b in range(bs):
            box_p = pred_box[b].view(-1, 4)
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    continue
                n_obj += 1
                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box_match = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
                gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),
                                                                  np.array(anchors)), 1))
                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box_match, anchor_shapes, True)
                pred_ious = bbox_iou(gt_box, box_p, True).view(self.num_anchors, in_h, in_w)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                noobj_mask[b][pred_ious >= ignore_threshold] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)
                best_conf = pred_ious[best_n, gj, gi]
                # Masks
                mask[b, best_n, gj, gi] = 1
                noobj_mask[b, best_n, gj, gi] = 0
                # Coordinates
                tx[b, best_n, gj, gi] = gx - gi
                ty[b, best_n, gj, gi] = gy - gj
                # Width and height
                tw[b, best_n, gj, gi] = torch.log(gw/anchors[best_n][0] + 1e-16)
                th[b, best_n, gj, gi] = torch.log(gh/anchors[best_n][1] + 1e-16)
                # object
                tconf[b, best_n, gj, gi] = best_conf
                # One-hot encoding of label
                tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1
                scales[b, best_n, gj, gi] = 2 - target[b, t, 3] * target[b, t, 4]

        return n_obj, mask, noobj_mask, tx, ty, tw, th, tconf, tcls, scales
コード例 #2
0
ファイル: yolo_loss.py プロジェクト: jacke121/yolov3_nmslayer
    def get_target(self,pred_boxes, target, anchors, in_w, in_h, ignore_threshold,conf_mask, noobj_mask , tx, ty, tw, th, tconf,tcls):
        bs = target.size(0)
        # tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False)
        nGT = 0
        nCorrect = 0

        gx_ = target[:, :, 1:5]* in_w

        gt_box = target[:, :, 1:5]* in_w
        gt_box[:, :, 0:2] = 0
        targetbox = torch.FloatTensor(
            np.concatenate((np.zeros((1, self.num_anchors, 2)), np.array([anchors])), 2)).repeat(self.batch_size, 1, 1)
        batch_anchor=torch.FloatTensor(anchors).repeat(self.batch_size, 1,1).cuda()
        for t in range(target.shape[1]):
            if target[:, t].sum() == 0:
                continue
            gi= np.array(gx_[:, t, 0].int())#.type(torch.uint8)
            gj= np.array(gx_[:, t, 1].int())#.type(torch.uint8)
            r_gt_box= gt_box[:, t, :]
            anch_ious= bbox_iou(r_gt_box.view(self.batch_size,1,4),targetbox.cuda())
            #80 3,11,11

            noobj_mask[anch_ious > ignore_threshold] = 0

            values = torch.max(anch_ious, 1, keepdim=True)[0]

            anch_ious[anch_ious == 0] = 1e+16
            c = anch_ious - values

            best_n=(c==0)#(c == 0).type(torch.uint8)
            conf_mask[best_n, gj, gi]=1
            # Coordinates
            tx[best_n,gj, gi] = (gx_[:, t, 0] - gx_[:, t, 0].int().float())#.cpu()
            ty[best_n,gj, gi] = (gx_[:, t, 1] - gx_[:, t, 1].int().float())#.cpu()

            tw[best_n,gj, gi] =torch.log(gx_[:,t, 2]/batch_anchor[best_n][:,0]+1e-16)#.cpu()
            th[best_n,gj, gi] =torch.log(gx_[:,t, 3]/batch_anchor[best_n][:,1]+1e-16)#.cpu()
            # object
            tconf[best_n,gj, gi] = 1
            # One-hot encoding of label
            tcls[best_n, gj, gi, np.array(target[:, t, 0])] = 1
            r_gt_box[:,0:2]=gx_[:, t, 0:2]
            pred_box = pred_boxes[best_n,gj, gi]
            iou = bbox_iou(r_gt_box.cuda(), pred_box, x1y1x2y2=False)
            nGT=nGT+self.batch_size
            nCorrect=nCorrect+int(sum(iou > 0.8))


        return  nGT, nCorrect
コード例 #3
0
ファイル: yolo_loss.py プロジェクト: qd213618/URVP
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        bs = target.size(0)
        mask = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           requires_grad=False)
        noobj_mask = torch.ones(bs,
                                self.num_anchors,
                                in_h,
                                in_w,
                                requires_grad=False)
        tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tconf = torch.zeros(bs,
                            self.num_anchors,
                            in_h,
                            in_w,
                            requires_grad=False)
        tcls = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           self.num_classes,
                           requires_grad=False)
        for b in range(bs):
            # Convert to position relative to box
            gx = target[b, 0, 1] * in_w
            gy = target[b, 0, 2] * in_h
            gw = target[b, 0, 3] * in_w
            gh = target[b, 0, 4] * in_h
            # Get grid box indices
            gi = int(gx)
            gj = int(gy)
            # Get shape of gt box
            gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
            # Get shape of anchor box
            anchor_shapes = torch.FloatTensor(
                np.concatenate((np.zeros(
                    (self.num_anchors, 2)), np.array(anchors)), 1))
            # Calculate iou between gt and anchor shapes
            anch_ious = bbox_iou(gt_box, anchor_shapes)
            noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0
            best_n = np.argmax(anch_ious)
            # Masks
            mask[b, best_n, gj, gi] = 1
            # Coordinates
            tx[b, best_n, gj, gi] = gx - gi
            ty[b, best_n, gj, gi] = gy - gj
            # Width and height
            tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
            th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
            # object
            tconf[b, best_n, gj, gi] = 1
            # One-hot encoding of label
            tcls[b, best_n, gj, gi, int(target[b, 0, 0])] = 1

        return mask, noobj_mask, tx, ty, tw, th, tconf, tcls
コード例 #4
0
def validate(net):
    n_gt = 0
    correct = 0
    for step, samples in enumerate(dataloader):
        images, labels, image_paths = samples["image"], samples[
            "label"], samples["img_path"]
        labels = labels.cuda()
        with torch.no_grad():
            time1 = datetime.datetime.now()
            outputs = net(images)

            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            output = non_max_suppression(output, 1, conf_thres=0.5)
            if ((datetime.datetime.now() - time1).seconds > 5):
                logging.info('Batch %d time is too long ' % (step))
                n_gt = 1
                break
            # print("time2", (datetime.datetime.now() - time1).seconds*1000+(datetime.datetime.now() - time1).microseconds//1000)
            #  calculate
            for sample_i in range(labels.size(0)):
                # Get labels for sample where width is not zero (dummies)
                target_sample = labels[sample_i, labels[sample_i, :, 3] != 0]
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    tx1, tx2 = config["img_w"] * (
                        tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (
                        ty - th / 2), config["img_h"] * (ty + th / 2)
                    n_gt += 1
                    box_gt = torch.cat([
                        coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]
                    ]).view(1, -1)
                    sample_pred = output[sample_i]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[
                                sample_pred[:, 6] == obj_cls]:
                            box_pred = torch.cat([
                                coord.unsqueeze(0)
                                for coord in [x1, y1, x2, y2]
                            ]).view(1, -1)
                            iou = bbox_iou(box_pred, box_gt)
                            if iou >= config["iou_thres"]:
                                correct += 1
                                break
        if n_gt:
            logging.info('Batch [%d/%d] mAP: %.5f' %
                         (step, len(dataloader), float(correct / n_gt)))

    logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
コード例 #5
0
    def myloss(self, anchors, y_pred, y_true):
        self.reso = 352
        self.anchors = anchors

        loss = dict()

        # 1. Prepare
        # 1.1 re-organize y_pred
        # [bs, (5+nC)*nA, gs, gs] => [bs, num_anchors, gs, gs, 5+nC]
        bs, _, gs, _ = y_pred.size()
        nA = len(self.anchors)
        nC = self.num_classes
        y_pred = y_pred.view(bs, nA, 5 + nC, gs, gs)
        y_pred = y_pred.permute(0, 1, 3, 4, 2)

        # 1.3 prepare anchor boxes
        stride = self.reso // gs
        anchors = [(a[0] / stride, a[1] / stride) for a in self.anchors]
        anchor_bboxes = torch.zeros(3, 4).cuda()
        anchor_bboxes[:, 2:] = torch.Tensor(anchors)

        anchor_bboxes = anchor_bboxes.repeat(bs, 1, 1)

        # 2. Build gt [tx, ty, tw, th] and masks
        # TODO: f1 score implementation
        # total_num = 0
        gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False)
        non_obj_mask = torch.ones(bs, nA, gs, gs, requires_grad=False)
        cls_mask = torch.zeros(bs, nA, gs, gs, nC, requires_grad=False)
        start = time.time()
        # for batch_idx in range(bs):
        #     for box_idx, y_true_one in enumerate(y_true[batch_idx]):
        # total_num += 1
        gt_bbox = y_true[:, :, :4] * gs  # scale bbox relative to feature map
        gt_cls_label = y_true[:, :, 4].int()

        # gt_xc, gt_yc, gt_w, gt_h = gt_bbox[:,:,0:4]
        gt_xc = gt_bbox[:, :, 0]
        gt_yc = gt_bbox[:, :, 1]
        gt_w = gt_bbox[:, :, 2]
        gt_h = gt_bbox[:, :, 3]
        gt_i, gt_j = gt_xc.int(), gt_yc.int()
        gt_box_shape = y_true[:, :, :4] * gs
        gt_box_shape[:, :, 0:2] = 0
        # gt_box_shape = torch.Tensor([0, 0, gt_w, gt_h]).unsqueeze(0).cuda()
        anch_ious = bbox_iou(gt_box_shape.view(self.batch_size, 1, 4),
                             anchor_bboxes.cuda())
        anchor_ious = IoU(gt_box_shape, anchor_bboxes, format='center')
        best_anchor = np.argmax(anchor_ious)
        anchor_w, anchor_h = anchors[best_anchor]

        gt_tw[:, best_anchor, gt_i, gt_j] = torch.log(gt_w / anchor_w + 1e-16)
        gt_th[:, best_anchor, gt_i, gt_j] = torch.log(gt_h / anchor_h + 1e-16)
        gt_tx[:, best_anchor, gt_i, gt_j] = gt_xc - gt_i
        gt_ty[:, best_anchor, gt_i, gt_j] = gt_yc - gt_j

        obj_mask[:, best_anchor, gt_i, gt_j] = 1
        non_obj_mask[:, anchor_ious > 0.5] = 0  # FIXME: 0.5 as variable
        cls_mask[:, best_anchor, gt_i, gt_j, gt_cls_label] = 1

        # 3. activate raw y_pred
        end = time.time()
        print("yolo_losses", bs, len(y_true), end - start)
        pred_tx = torch.sigmoid(y_pred[..., 0])  # gt tx/ty are not deactivated
        pred_ty = torch.sigmoid(y_pred[..., 1])
        pred_tw = y_pred[..., 2]
        pred_th = y_pred[..., 3]
        pred_conf = y_pred[..., 4]
        pred_cls = y_pred[..., 5:]

        # 4. Compute loss
        obj_mask = obj_mask.cuda()
        non_obj_mask = non_obj_mask.cuda()
        cls_mask = cls_mask.cuda()
        gt_tx, gt_ty = gt_tx.cuda(), gt_ty.cuda()
        gt_tw, gt_th = gt_tw.cuda(), gt_th.cuda()

        # average over batch
        MSELoss = nn.MSELoss()
        BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
        BCELoss = nn.BCELoss()
        CrossEntropyLoss = nn.CrossEntropyLoss()

        loss['x'] = MSELoss(pred_tx[obj_mask == 1], gt_tx[obj_mask == 1])
        loss['y'] = MSELoss(pred_ty[obj_mask == 1], gt_ty[obj_mask == 1])
        loss['w'] = MSELoss(pred_tw[obj_mask == 1], gt_tw[obj_mask == 1])
        loss['h'] = MSELoss(pred_th[obj_mask == 1], gt_th[obj_mask == 1])
        loss['cls'] = CrossEntropyLoss(
            pred_cls[obj_mask == 1], torch.argmax(cls_mask[obj_mask == 1], 1))
        loss['conf'] = BCEWithLogitsLoss(pred_conf[obj_mask == 1],
                                         obj_mask[obj_mask == 1])
        loss['non_conf'] = BCEWithLogitsLoss(pred_conf[non_obj_mask == 1],
                                             non_obj_mask[non_obj_mask == 1])
        loss['total_loss'] = loss['x'] + loss['y'] + loss['w'] + loss[
            'h'] + loss['cls'] + loss['conf'] + loss['non_conf']
        #["total_loss", "x", "y", "w", "h", "conf", "cls", "recall"]
        return loss['total_loss'], loss['x'], loss['y'], loss['w'], loss[
            'h'], loss['cls'], loss['conf'], loss['non_conf']
コード例 #6
0
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(COCODataset(
        config["val_path"], (config["img_w"], config["img_h"]),
        is_training=False),
                                             batch_size=config["batch_size"],
                                             shuffle=False,
                                             num_workers=16,
                                             pin_memory=False)

    # Start the eval loop
    logging.info("Start eval.")
    n_gt = 0
    correct = 0
    for step, samples in enumerate(dataloader):
        images, labels = samples["image"], samples["label"]
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            output = non_max_suppression(output, 80, conf_thres=0.2)
            #  calculate
            for sample_i in range(labels.size(0)):
                # Get labels for sample where width is not zero (dummies)
                target_sample = labels[sample_i, labels[sample_i, :, 3] != 0]
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    tx1, tx2 = config["img_w"] * (
                        tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (
                        ty - th / 2), config["img_h"] * (ty + th / 2)
                    n_gt += 1
                    box_gt = torch.cat([
                        coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]
                    ]).view(1, -1)
                    sample_pred = output[sample_i]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[
                                sample_pred[:, 6] == obj_cls]:
                            box_pred = torch.cat([
                                coord.unsqueeze(0)
                                for coord in [x1, y1, x2, y2]
                            ]).view(1, -1)
                            iou = bbox_iou(box_pred, box_gt)
                            if iou >= config["iou_thres"]:
                                correct += 1
                                break
        if n_gt:
            logging.info('Batch [%d/%d] mAP: %.5f' %
                         (step, len(dataloader), float(correct / n_gt)))

    logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
コード例 #7
0
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        """

        :param target:
        :param anchors:
        :param in_w:
        :param in_h:
        :param ignore_threshold:
        :return:
        """
        bs = target.size(0)

        obj_mask = torch.zeros(bs,
                               self.num_anchors,
                               in_h,
                               in_w,
                               requires_grad=False)
        noobj_mask = torch.ones(bs,
                                self.num_anchors,
                                in_h,
                                in_w,
                                requires_grad=False)

        tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)

        tconf = torch.zeros(bs,
                            self.num_anchors,
                            in_h,
                            in_w,
                            requires_grad=False)
        tcls = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           self.num_classes,
                           requires_grad=False)

        for b in range(bs):
            for t in range(target.shape[1]):

                if target[b, t].sum() == 0:
                    continue

                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h

                # Get grid box indices
                gi = int(gx)
                gj = int(gy)

                if gi >= in_w:
                    continue
                if gj >= in_h:
                    continue

                # Get shape of gt box
                gt_box = torch.FloatTensor(np.array([0, 0, gw,
                                                     gh])).unsqueeze(0)

                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))

                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box, anchor_shapes)

                # If the overlap is larger than threshold set mask to zero (ignore)
                noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0

                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)

                # Masks
                obj_mask[b, best_n, gj, gi] = 1

                # Coordinates
                tx[b, best_n, gj, gi] = gx - gi
                ty[b, best_n, gj, gi] = gy - gj

                # Width and height
                tw[b, best_n, gj,
                   gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                th[b, best_n, gj,
                   gi] = math.log(gh / anchors[best_n][1] + 1e-16)

                # object
                tconf[b, best_n, gj, gi] = 1

                # One-hot encoding of label
                tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1

        return obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls
コード例 #8
0
    def get_target(self, target, anchors, in_w, in_h):
        bs = target.size(0)

        mask = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           requires_grad=False)
        tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tconf = torch.zeros(bs,
                            self.num_anchors,
                            in_h,
                            in_w,
                            requires_grad=False)
        tcls = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           self.num_classes,
                           requires_grad=False)
        for b in range(bs):
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    continue
                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor(np.array([0, 0, gw,
                                                     gh])).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))
                # Calculate iou between gt and anchor shape
                anch_ious = bbox_iou(gt_box, anchor_shapes)
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)

                # Masks
                if (gj < in_h) and (gi < in_w):
                    mask[b, best_n, gj, gi] = 1
                    # Coordinates
                    tx[b, best_n, gj, gi] = gx - gi
                    ty[b, best_n, gj, gi] = gy - gj
                    # Width and height
                    tw[b, best_n, gj,
                       gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                    th[b, best_n, gj,
                       gi] = math.log(gh / anchors[best_n][1] + 1e-16)
                    # object
                    tconf[b, best_n, gj, gi] = 1
                    # One-hot encoding of label
                    tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1
                else:
                    print('Step {0} out of bound'.format(b))
                    print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(
                        gj, in_h, gi, in_w))
                    continue

        return mask, tx, ty, tw, th, tconf, tcls
def eval(config):
    """

    :param config:
    :return:
    """
    is_training = False

    # Load and initialize network
    # net = ProposalModel(config, is_training=is_training)
    net = ProposalAttention(config, is_training=is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)

    # YOLO loss with 3 scales
    val_losses = []
    for i in range(3):
        val_losses.append(
            ProposalLoss(config["yolo"]["anchors"][i],
                         (config["img_w"], config["img_h"])))

    # DataLoader
    val_loader = torch.utils.data.DataLoader(
        COCOvalDataset(config["val_path"], (config["img_w"], config["img_h"])),
        batch_size=16,  # set batch size by 1
        shuffle=False,
        num_workers=2,
        pin_memory=False)
    """ VALIDATION """
    total = 0.0
    proposal = 0.0
    correct = 0.0
    net.eval()
    img_cnt = 0
    recall_cnt = 0.0
    for step, samples in enumerate(val_loader):
        images, labels = samples["image"], samples["label"]
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(val_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output)

        # one image at a time !!!

        for label_i in range(labels.size(0)):

            total_avg = 0
            correct_avg = 0

            # calculate total
            targets = labels[label_i]
            for tx, ty, tw, th in targets:
                if tw > 0:
                    total += 1
                    total_avg += 1
                else:
                    continue

            # calculate proposal
            if batch_detections[label_i] is None:
                continue

            img_cnt += 1
            predictions = batch_detections[label_i]
            proposal += predictions.size(0)

            # calculate correct
            for tx, ty, tw, th in targets:
                x1, x2 = config["img_w"] * (tx - tw / 2.0), config["img_w"] * (
                    tx + tw / 2.0)
                y1, y2 = config["img_h"] * (ty - th / 2.0), config["img_h"] * (
                    ty + th / 2.0)
                box_gt = [x1, y1, x2, y2, 1.0]
                box_gt = torch.from_numpy(np.array(box_gt)).float().cuda()

                best_iou = 0.0
                for pred_i in range(predictions.size(0)):
                    iou = bbox_iou(predictions[pred_i].unsqueeze(0),
                                   box_gt.unsqueeze(0))
                    iou = iou.item()
                    best_iou = max(iou, best_iou)
                if best_iou >= 0.5:
                    correct += 1
                    correct_avg += 1
            recall_cnt += float(correct_avg / float(total_avg))
        if (step + 1) % 100 == 0:
            print 'Total: %d\tProposal: %d\tCorrect: %d\tPrecision: %.4f\tRecall: %.4f' % (
                total, proposal, correct, correct /
                (proposal + 1e-6), correct / (total + 1e-6))

    precision = correct / (proposal + 1e-6)
    recall = correct / (total + 1e-6)
    fscore = (2.0 * precision * recall) / (precision + recall + 1e-6)

    print("Precision: %.4f\tRecall: %.4f\tFscore: %.4f" %
          (precision, recall, fscore))
    print("Avg Recall: %.4f" % (recall_cnt / float(img_cnt + 1e-6)))
コード例 #10
0
def GetIOU(bbox1, bbox2):
    from common.utils import bbox_iou
    b1 = torch.Tensor(bbox1).unsqueeze(0)
    b2 = torch.Tensor(bbox2).unsqueeze(0)
    iou = bbox_iou(b1, b2, x1y1x2y2=True)
    return iou.item()
コード例 #11
0
ファイル: eval.py プロジェクト: sztcmtj/yolov3_pytorch_
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
                                    config["yolo"]["classes"], (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(dataset=COCODataset(config["test_path"], config["img_w"]),
                                             batch_size=config["batch_size"],
                                             shuffle=False, num_workers=8, pin_memory=False)

    # Start the eval loop
    #logging.info("Start eval.")
    n_gt = 0
    correct = 0
    #logging.debug('%s' % str(dataloader))

    gt_histro={}
    pred_histro = {}
    correct_histro = {}

    for i in range(config["yolo"]["classes"]):
        gt_histro[i] = 1
        pred_histro[i] = 1
        correct_histro[i] = 0

    # images 是一个batch里的全部图片,labels是一个batch里面的全部标签
    for step, (images, labels) in enumerate(dataloader):
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))

            # 把三个尺度上的预测结果在第1维度(第0维度是batch里的照片,第1维度是一张照片里面的各个预测框,第2维度是各个预测数值)上拼接起来
            output = torch.cat(output_list, dim=1)

            #logging.info('%s' % str(output.shape))

            # 进行NMS抑制
            #output = non_max_suppression(prediction=output, num_classes=config["yolo"]["classes"], conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"])
            output = class_nms(prediction=output, num_classes=config["yolo"]["classes"],conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"])
            #  calculate
            for sample_i in range(labels.size(0)):

                # 计算所有的预测数量
                sample_pred = output[sample_i]
                if sample_pred is not None:
                    #logging.debug(sample_pred.shape)
                    for i in range(sample_pred.shape[0]):
                        pred_histro[int(sample_pred[i,6])] +=  1

                # Get labels for sample where width is not zero (dummies)
                target_sample = labels[sample_i, labels[sample_i, :, 3] != 0]
                # Ground truth的 分类编号obj_cls、相对中心x、相对中心y、相对宽w、相对高h
                n_gt=0
                correct=0
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    # 转化为输入像素尺寸的 左上角像素tx1 ty1,右下角像素tx2 ty2
                    tx1, tx2 = config["img_w"] * (tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (ty - th / 2), config["img_h"] * (ty + th / 2)
                    # 计算ground truth数量,用于统计信息
                    n_gt += 1
                    gt_histro[int(obj_cls)] += 1
                    # 转化为 shape(1,4)的tensor,用来计算IoU
                    box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1)
                    # logging.info('%s' % str(box_gt.shape))

                    sample_pred = output[sample_i]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        # 对于每一个ground truth,遍历预测结果
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]:  # 如果当前预测分类 == 当前真实分类
                            #logging.info("%d" % obj_cls)
                            box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1)
                            #pred_histro[int(obj_pred)] += 1
                            iou = bbox_iou(box_pred, box_gt)
                            #if iou >= config["iou_thres"] and obj_conf >= config["obj_thresh"]:
                            if iou >= config["iou_thresh"]:
                                correct += 1
                                correct_histro[int(obj_pred)] += 1
                                break
                #logging.debug("----------------")
                #logging.debug(correct_histro[4])
                #logging.debug(pred_histro[4])
                #logging.debug(gt_histro[4])
    if n_gt:
        types = config["types"]

        reverse_types = {}  # 建立一个反向的types
        for key in types.keys():
            reverse_types[types[key]] = key

        #logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt)))
        logging.info('Precision:%s' % str([reverse_types[i] +':'+ str(int(100 * correct_histro[i] / pred_histro[i])) for i in range(config["yolo"]["classes"]) ]))
        logging.info('Recall   :%s' % str([reverse_types[i] +':'+ str(int(100 * correct_histro[i] / gt_histro[i])) for i in range(config["yolo"]["classes"])]))
def voc():
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'

    gnd_dir = '/home/yz/cde/ProposalYOLO/data/voc/Labels'
    roi_dir = '/home/yz/cde/MxRCNN/roi/voc100'
    img_dir = '/home/yz/cde/ProposalYOLO/data/voc/JPEGImages'

    rois = os.listdir(roi_dir)
    rois.sort()

    gnds = os.listdir(gnd_dir)
    gnds.sort()

    assert len(rois) == len(gnds)

    total = 0.0
    proposal = 0.0
    correct = 0.0

    for i in range(len(rois)):

        # 1 Prediction
        pred_boxes = np.loadtxt(os.path.join(roi_dir, rois[i]))

        # 2 Ground-truth
        cords = np.loadtxt(os.path.join(gnd_dir, gnds[i]))

        try:
            cords = cords[:, 1:]
        except:
            cords = cords[1:]
            cords = cords.reshape(1, cords.shape[0])

        # 3 Height & Width
        img = os.path.join(img_dir, gnds[i].split('.')[0] + '.jpg')
        print img
        im = cv2.imread(img, cv2.IMREAD_COLOR)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        height, width = im.shape[:2]

        gt_boxes = np.zeros(cords.shape)
        gt_boxes[:, 0] = (cords[:, 0] - cords[:, 2] / 2) * width
        gt_boxes[:, 1] = (cords[:, 1] - cords[:, 3] / 2) * height
        gt_boxes[:, 2] = (cords[:, 0] + cords[:, 2] / 2) * width
        gt_boxes[:, 3] = (cords[:, 1] + cords[:, 3] / 2) * height

        if i < 10:
            # Debug purpose
            plt.figure()
            fig, (ax1, ax2) = plt.subplots(1, 2)
            ax1.imshow(im)
            ax2.imshow(im)
            for idx in range(gt_boxes.shape[0]):
                bbox = patches.Rectangle((gt_boxes[idx][0], gt_boxes[idx][1]), gt_boxes[idx][2] - gt_boxes[idx][0],
                                         gt_boxes[idx][3] - gt_boxes[idx][1], linewidth=2, edgecolor='blue',
                                         facecolor='none')
                ax1.add_patch(bbox)
            for idx in range(pred_boxes.shape[0]):
                bbox = patches.Rectangle((pred_boxes[idx][0], pred_boxes[idx][1]),
                                         pred_boxes[idx][2] - pred_boxes[idx][0],
                                         pred_boxes[idx][3] - pred_boxes[idx][1], linewidth=2, edgecolor='red',
                                         facecolor='none')
                ax2.add_patch(bbox)
            ax1.axis('off')
            ax2.axis('off')
            # plt.gca().xaxis.set_major_locator(NullLocator())
            # plt.gca().yaxis.set_major_locator(NullLocator())
            plt.savefig('/home/yz/cde/ProposalYOLO/eval/RPN/voc100/{}'.format(gnds[i].split('.')[0]), bbox_inches='tight',
                        pad_inches=0.0)
            plt.close()

        total += gt_boxes.shape[0]
        proposal += pred_boxes.shape[0]
        for j in range(gt_boxes.shape[0]):

            best_iou = 0.0
            for k in range(pred_boxes.shape[0]):
                # print gt_boxes[j], pred_boxes[k]
                gt = torch.from_numpy(gt_boxes[j]).float().cuda()
                pd = torch.from_numpy(pred_boxes[k]).float().cuda()
                iou = bbox_iou(pd.unsqueeze(0), gt.unsqueeze(0))
                iou = iou.item()
                best_iou = max(iou, best_iou)
            if best_iou >= 0.5:
                correct += 1

        print total, proposal, correct, correct / total

    precision = correct / proposal
    recall = correct / total
    fscore = (2.0 * precision * recall) / (precision + recall)
    print("Precision: %.4f\tRecall: %.4f\tFscore: %.4f" % (precision, recall, fscore))
def coco():
    os.environ["CUDA_VISIBLE_DEVICES"] = '4'

    gnd_dir = '/home/yz/cde/ProposalYOLO/data/coco/5kxLabels'
    roi_dir = '/home/yz/cde/MxRCNN/roi/coco10'
    img_dir = '/home/yz/cde/ProposalYOLO/data/coco/images/val2014'

    fn = '/home/yz/cde/ProposalYOLO/data/coco/5kx.txt'

    rois = os.listdir(roi_dir)
    rois.sort()

    gnds = os.listdir(gnd_dir)
    gnds.sort()

    assert len(rois) == len(gnds)

    total = 0.0
    proposal = 0.0
    correct = 0.0

    fn_strm = open(fn, 'r')
    for i in range(len(rois)):
        # 0 Name
        line = fn_strm.readline()
        name = line.split('\n')[0]


        # 1 Prediction
        pred_boxes = np.loadtxt(os.path.join(roi_dir, rois[i]))

        # 2 Ground-truth
        cords = np.loadtxt(os.path.join(gnd_dir, name.replace('jpg', 'txt')))

        try:
            cords = cords[:, 0:]
        except:
            cords = cords[0:]
            cords = cords.reshape(1, cords.shape[0])

        # 3 Height & Width
        img = os.path.join(img_dir, name)
        print img
        im = cv2.imread(img, cv2.IMREAD_COLOR)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        height, width = im.shape[:2]

        gt_boxes = cords
        if gt_boxes.shape == (1, 0):
            continue

        if i < 10:
            # Debug purpose
            plt.figure()
            fig, (ax1, ax2) = plt.subplots(1, 2)
            ax1.imshow(im)
            ax2.imshow(im)
            for idx in range(gt_boxes.shape[0]):
                bbox = patches.Rectangle((gt_boxes[idx][0], gt_boxes[idx][1]), gt_boxes[idx][2] - gt_boxes[idx][0],
                                         gt_boxes[idx][3] - gt_boxes[idx][1], linewidth=2, edgecolor='blue',
                                         facecolor='none')
                ax1.add_patch(bbox)
            for idx in range(pred_boxes.shape[0]):
                bbox = patches.Rectangle((pred_boxes[idx][0], pred_boxes[idx][1]),
                                         pred_boxes[idx][2] - pred_boxes[idx][0],
                                         pred_boxes[idx][3] - pred_boxes[idx][1], linewidth=1, edgecolor='red',
                                         facecolor='none')
                ax2.add_patch(bbox)
            ax1.axis('off')
            ax2.axis('off')
            # plt.gca().xaxis.set_major_locator(NullLocator())
            # plt.gca().yaxis.set_major_locator(NullLocator())
            plt.savefig('/home/yz/cde/ProposalYOLO/eval/RPN/test10/{}'.format(gnds[i].split('.')[0]), bbox_inches='tight',
                        pad_inches=0.0)
            plt.close()
        # continue
        total += gt_boxes.shape[0]
        proposal += pred_boxes.shape[0]
        for j in range(gt_boxes.shape[0]):

            best_iou = 0.0
            for k in range(pred_boxes.shape[0]):
                # print gt_boxes[j], pred_boxes[k]
                gt = torch.from_numpy(gt_boxes[j]).float().cuda()
                pd = torch.from_numpy(pred_boxes[k]).float().cuda()
                gt = gt.unsqueeze(0)
                pd = pd.unsqueeze(0)
                iou = bbox_iou(pd, gt)
                iou = iou.item()
                best_iou = max(iou, best_iou)
            if best_iou >= 0.5:
                correct += 1

        print total, proposal, correct, correct / total

    precision = correct / proposal
    recall = correct / total
    fscore = (2.0 * precision * recall) / (precision + recall)
    print("Precision: %.4f\tRecall: %.4f\tFscore: %.4f" % (precision, recall, fscore))
コード例 #14
0
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
                                    config["yolo"]["classes"], (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(dataset=COCODataset(config["val_path"], config["img_w"]),
                                             batch_size=config["batch_size"],
                                             shuffle=True, num_workers=1, pin_memory=False)

    # Start the eval loop
    logging.info("Start eval.")
    n_gt = 0
    correct = 0
    logging.info('%s' % str(dataloader))

    gt_histro={}
    pred_histro = {}
    correct_histro = {}

    for i in range(config["yolo"]["classes"]):
        gt_histro[i] = 1
        pred_histro[i] = 1
        correct_histro[i] = 0

    # images 是一个batch里的全部图片,labels是一个batch里面的全部标签
    for step, (images, labels) in enumerate(dataloader):
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))

            # 把三个尺度上的预测结果在第1维度(第0维度是batch里的照片,第1维度是一张照片里面的各个预测框,第2维度是各个预测数值)上拼接起来
            batch_output = torch.cat(output_list, dim=1)

            logging.info('%s' % str(batch_output.shape))

            # 进行NMS抑制
            batch_output = non_max_suppression(prediction=batch_output, num_classes=config["yolo"]["classes"], conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"])
            #  calculate
            for sample_index_in_batch in range(labels.size(0)):
                # fetched img sample in tensor( C(RxGxB) x H x W ), transform to cv2 format in  H x W x C(BxGxR)
                sample_image = images[sample_index_in_batch].numpy()
                sample_image = np.transpose(sample_image, (1, 2, 0))
                sample_image = cv2.cvtColor(sample_image, cv2.COLOR_RGB2BGR)

                logging.debug("fetched img %d size %s" % (sample_index_in_batch, sample_image.shape))
                # Get labels for sample where width is not zero (dummies)(init all labels to zeros in array)
                target_sample = labels[sample_index_in_batch, labels[sample_index_in_batch, :, 3] != 0]
                # get prediction for this sample
                sample_pred = batch_output[sample_index_in_batch]
                if sample_pred is not None:
                    for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred:  # for each prediction box
                        # logging.info("%d" % obj_cls)
                        box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1)
                        sample_image = draw_prediction(sample_image,conf, obj_conf, int(obj_pred), (x1, y1, x2, y2), config)

                # 每一个ground truth的 分类编号obj_cls、相对中心x、相对中心y、相对宽w、相对高h
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    # 转化为输入像素尺寸的 左上角像素tx1 ty1,右下角像素tx2 ty2
                    tx1, tx2 = config["img_w"] * (tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (ty - th / 2), config["img_h"] * (ty + th / 2)
                    # 计算ground truth数量,用于统计信息
                    n_gt += 1
                    gt_histro[int(obj_cls)] += 1
                    # 转化为 shape(1,4)的tensor,用来计算IoU
                    box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1)
                    # logging.info('%s' % str(box_gt.shape))

                    sample_pred = batch_output[sample_index_in_batch]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        # 对于每一个ground truth,遍历预测结果
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]:  # 如果当前预测分类 == 当前真实分类
                            #logging.info("%d" % obj_cls)
                            box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1)
                            pred_histro[int(obj_pred)] += 1
                            iou = bbox_iou(box_pred, box_gt)
                            if iou >= config["iou_thresh"]:
                                correct += 1
                                correct_histro[int(obj_pred)] += 1
                                break
        if n_gt:
            types = config["types"]
            reverse_types = {}  # 建立一个反向的types
            for key in types.keys():
                reverse_types[types[key]] = key

            logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt)))
            logging.info('mAP Histro:%s' % str([  reverse_types[i] +':'+ str(int(100 * correct_histro[i] / gt_histro[i])) for i in range(config["yolo"]["classes"] )  ]))
            logging.info('Recall His:%s' % str([  reverse_types[i] +':'+ str(int(100 * correct_histro[i] / pred_histro[i])) for i in range(config["yolo"]["classes"]) ]))

    logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
コード例 #15
0
ファイル: auto_test.py プロジェクト: jacke121/YOLOv3_PyTorch
def evaluate(config):
    # checkpoint_paths = {'58': r'\\192.168.25.58\Team-CV\checkpoints\torch_yolov3'}
    checkpoint_paths = {'39': r'F:\Team-CV\checkpoints\shuffle_v2/'}
    # checkpoint_paths = {'68': r'E:\github\YOLOv3_PyTorch\evaluate\weights'}
    post_weights = {k: 0 for k in checkpoint_paths.keys()}
    weight_index = {k: 0 for k in checkpoint_paths.keys()}
    time_inter = 10
    dataloader = torch.utils.data.DataLoader(COCODataset(
        config["train_path"], (config["img_w"], config["img_h"]),
        is_training=False,
        is_scene=True),
                                             batch_size=config["batch_size"],
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=False,
                                             drop_last=True)  # DataLoader
    net, yolo_losses = build_yolov3(config)
    while 1:
        for key, checkpoint_path in checkpoint_paths.items():
            os.makedirs(checkpoint_path + '/result', exist_ok=True)
            checkpoint_weights = os.listdir(checkpoint_path)
            checkpoint_result = os.listdir(checkpoint_path + '/result')
            checkpoint_result = [
                cweight.split("_")[2][:-4] for cweight in checkpoint_result
                if cweight.endswith('ini')
            ]
            checkpoint_weights = [
                cweight for cweight in checkpoint_weights
                if cweight.endswith('weights')
            ]

            if weight_index[key] >= len(checkpoint_weights):
                print('weight_index[key]', weight_index[key],
                      len(checkpoint_weights))
                time.sleep(time_inter)
                continue
            if post_weights[key] == checkpoint_weights[weight_index[key]]:
                print('post_weights[key]', post_weights[key])
                time.sleep(time_inter)
                continue
            post_weights[key] = checkpoint_weights[weight_index[key]]

            if post_weights[key].endswith("_.weights"):  #检查权重是否保存完
                print("post_weights[key].split('_')",
                      post_weights[key].split('_'))
                time.sleep(time_inter)
                continue
            if checkpoint_weights[weight_index[key]].split(
                    "_")[1][:-8] in checkpoint_result:
                print('weight_index[key] +', weight_index[key])
                weight_index[key] += 1
                time.sleep(time_inter // 20)
                continue
            weight_index[key] += 1
            try:
                if config["pretrain_snapshot"]:  # Restore pretrain model
                    state_dict = torch.load(config["pretrain_snapshot"])
                    logging.info("loading model from %s" %
                                 config["pretrain_snapshot"])
                    net.load_state_dict(state_dict)
                else:
                    state_dict = torch.load(
                        os.path.join(checkpoint_path, post_weights[key]))
                    logging.info(
                        "loading model from %s" %
                        os.path.join(checkpoint_path, post_weights[key]))
                    net.load_state_dict(state_dict)
            except Exception as E:
                print(E)
                time.sleep(time_inter)
                continue
            logging.info("Start eval.")  # Start the eval loop
            n_gt = 0
            correct = 0
            imagepath_list = []
            for step, samples in enumerate(dataloader):
                images, labels, image_paths = samples["image"], samples[
                    "label"], samples["img_path"]
                labels = labels.cuda()
                with torch.no_grad():
                    time1 = datetime.datetime.now()
                    outputs = net(images)

                    output_list = []
                    for i in range(3):
                        output_list.append(yolo_losses[i](outputs[i]))
                    output = torch.cat(output_list, 1)
                    output = non_max_suppression(output, 1, conf_thres=0.5)
                    if ((datetime.datetime.now() - time1).seconds > 5):
                        logging.info('Batch %d time is too long ' % (step))
                        n_gt = 1
                        break
                    print(
                        "time2",
                        (datetime.datetime.now() - time1).seconds * 1000 +
                        (datetime.datetime.now() - time1).microseconds // 1000)
                    #  calculate
                    for sample_i in range(labels.size(0)):
                        # Get labels for sample where width is not zero (dummies)
                        target_sample = labels[sample_i,
                                               labels[sample_i, :, 3] != 0]
                        for obj_cls, tx, ty, tw, th in target_sample:
                            # Get rescaled gt coordinates
                            tx1, tx2 = config["img_w"] * (
                                tx - tw / 2), config["img_w"] * (tx + tw / 2)
                            ty1, ty2 = config["img_h"] * (
                                ty - th / 2), config["img_h"] * (ty + th / 2)
                            n_gt += 1
                            box_gt = torch.cat([
                                coord.unsqueeze(0)
                                for coord in [tx1, ty1, tx2, ty2]
                            ]).view(1, -1)
                            sample_pred = output[sample_i]
                            if sample_pred is not None:
                                # Iterate through predictions where the class predicted is same as gt
                                for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[
                                        sample_pred[:, 6] == obj_cls.cuda()]:
                                    box_pred = torch.cat([
                                        coord.unsqueeze(0)
                                        for coord in [x1, y1, x2, y2]
                                    ]).view(1, -1)
                                    iou = bbox_iou(box_pred, box_gt)
                                    if iou >= config["iou_thres"]:
                                        correct += 1
                                        break
                                    else:
                                        if image_paths[
                                                sample_i] not in imagepath_list:
                                            imagepath_list.append(
                                                image_paths[sample_i])
                            else:
                                if image_paths[sample_i] not in imagepath_list:
                                    imagepath_list.append(
                                        image_paths[sample_i])
                if n_gt:
                    logging.info('Batch [%d/%d] err_count:%d mAP: %.5f' %
                                 (step, len(dataloader), len(imagepath_list),
                                  float(correct / n_gt)))

            logging.info('Mean Average Precision: %.5f' %
                         float(correct / n_gt))
            Mean_Average = float(correct / n_gt)
            ini_name = os.path.join(
                checkpoint_path + '/result/',
                '%.4f_%s.ini' % ((float(post_weights[key].split("_")[0]) +
                                  float(correct / n_gt)) / 2,
                                 post_weights[key].replace(".weights", "")))
            write_ini(ini_name, Mean_Average, imagepath_list)
            break
コード例 #16
0
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold,
                   pred_boxes):
        bs = target.size(0)

        mask = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           requires_grad=False)
        noobj_mask = torch.ones(bs,
                                self.num_anchors,
                                in_h,
                                in_w,
                                requires_grad=False)
        tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        gwxh = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           requires_grad=False)
        tconf = torch.zeros(bs,
                            self.num_anchors,
                            in_h,
                            in_w,
                            requires_grad=False)
        tcls = torch.zeros(bs,
                           self.num_anchors,
                           in_h,
                           in_w,
                           self.num_classes,
                           requires_grad=False)
        for b in range(bs):
            # print(pred_shapes.size())
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    continue
                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor([0, 0, gw, gh]).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))
                # print('anchor_shapes:', anchor_shapes)
                # print('gt_box:', gt_box)
                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box, anchor_shapes, x1y1x2y2=False)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                pred_ious = bbox_iou(torch.FloatTensor([gx, gy, gw,
                                                        gh]).unsqueeze(0),
                                     pred_boxes[b, :, gj, gi].cpu(),
                                     x1y1x2y2=False)
                # print(pred_ious.size())
                noobj_mask[b, pred_ious > ignore_threshold, gj, gi] = 0
                # noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)

                # Masks
                mask[b, best_n, gj, gi] = 1
                # Coordinates
                tx[b, best_n, gj, gi] = gx - gi
                ty[b, best_n, gj, gi] = gy - gj
                # Width and height
                tw[b, best_n, gj,
                   gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                th[b, best_n, gj,
                   gi] = math.log(gh / anchors[best_n][1] + 1e-16)
                gwxh[b, best_n, gj, gi] = torch.sigmoid(gw) * torch.sigmoid(gh)
                # object
                tconf[b, best_n, gj, gi] = 1
                # One-hot encoding of label
                tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1

        return mask, noobj_mask, tx, ty, tw, th, gwxh, tconf, tcls
コード例 #17
0
    def eval_voc(self, val_dataset, classes, iou_thresh=0.5):
        logging.info('Start Evaling')
        results = {}

        def voc_ap(rec, prec, use_07_metric=False):
            """ ap = voc_ap(rec, prec, [use_07_metric])
            Compute VOC AP given precision and recall.
            If use_07_metric is true, uses the
            VOC 07 11 point method (default:False).
            """
            _rec = np.arange(0., 1.1, 0.1)
            _prec = []
            if use_07_metric:
                # 11 point metric
                ap = 0.
                for t in np.arange(0., 1.1, 0.1):
                    if np.sum(rec >= t) == 0:
                        p = 0
                    else:
                        p = np.max(prec[rec >= t])
                    _prec.append(p)
                    ap = ap + p / 11.
            else:
                # correct AP calculation
                # first append sentinel values at the end
                mrec = np.concatenate(([0.], rec, [1.]))
                mpre = np.concatenate(([0.], prec, [0.]))

                # compute the precision envelope
                for i in range(mpre.size - 1, 0, -1):
                    mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

                # to calculate area under PR curve, look for points
                # where X axis (recall) changes value
                i = np.where(mrec[1:] != mrec[:-1])[0]

                # and sum (\Delta recall) * prec
                ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])

            return ap

        def caculate_ap(correct, conf, pred_cls, total, classes):
            correct, conf, pred_cls = np.array(correct), np.array(
                conf), np.array(pred_cls)
            index = np.argsort(-conf)
            correct, conf, pred_cls = correct[index], conf[index], pred_cls[
                index]

            ap = []
            AP = {}
            for i, c in enumerate(classes):
                k = pred_cls == i
                n_gt = total[c]
                n_p = sum(k)

                if n_gt == 0 and n_p == 0:
                    continue
                elif n_p == 0 or n_gt == 0:
                    ap.append(0)
                    AP[c] = 0
                else:
                    fpc = np.cumsum(1 - correct[k])
                    tpc = np.cumsum(correct[k])

                    rec = tpc / n_gt
                    prec = tpc / (tpc + fpc)

                    _ap = voc_ap(rec, prec)
                    ap.append(_ap)
                    AP[c] = _ap
            mAP = np.array(ap).mean()
            return mAP, AP

        def parse_rec(imagename, classes):
            filename = imagename.replace('jpg', 'xml')
            tree = ET.parse(filename)
            objects = []
            for obj in tree.findall('object'):
                difficult = obj.find('difficult').text
                cls = obj.find('name').text
                if cls not in classes or int(difficult) == 1:
                    continue
                cls_id = classes.index(cls)
                xmlbox = obj.find('bndbox')
                obj = [
                    float(xmlbox.find('xmin').text),
                    float(xmlbox.find('xmax').text),
                    float(xmlbox.find('ymin').text),
                    float(xmlbox.find('ymax').text), cls_id
                ]
                objects.append(obj)
            return np.asarray(objects)

        total = {}
        for cls in classes:
            total[cls] = 0

        correct = []
        conf_list = []
        pred_list = []
        for step, samples in enumerate(val_dataset):
            images, labels = samples['image'], samples['label']
            image_paths, origin_sizes = samples['image_path'], samples[
                'origin_size']

            logging.info("Now have finished [%.3d/%.3d]" %
                         (step, len(val_dataset)))
            with torch.no_grad():
                outputs = self.net(images)
                output_list = []
                for i in range(3):
                    output_list.append(self.yolo_loss[i](outputs[i]))
                output = torch.cat(output_list, 1)
                batch_detections = non_max_suppression(output,
                                                       self.config.num_classes,
                                                       conf_thres=0.001,
                                                       nms_thres=0.4)

            for idx, detections in enumerate(batch_detections):
                image_path = image_paths[idx]
                label = labels[idx]
                for t in range(label.size(0)):
                    if label[t, :].sum() == 0:
                        label = label[:t, :]
                        break
                label_cls = np.array(label[:, 0])
                for cls_id in label_cls:
                    total[classes[int(cls_id)]] += 1
                if detections is None:
                    if label.size(0) != 0:
                        label_cls = np.unique(label_cls)
                        for cls_id in label_cls:
                            correct.append(0)
                            conf_list.append(1)
                            pred_list.append(int(cls_id))
                    continue
                if label.size(0) == 0:
                    for *pred_box, conf, cls_conf, cls_pred in detections:
                        correct.append(0)
                        conf_list.append(conf)
                        pred_list.append(int(cls_pred))
                else:
                    detections = detections[np.argsort(-detections[:, 4])]
                    detected = []

                    for *pred_box, conf, cls_conf, cls_pred in detections:
                        pred_box = torch.FloatTensor(pred_box).view(1, -1)
                        pred_box[:, 2:] = pred_box[:, 2:] - pred_box[:, :2]
                        pred_box[:, :2] = pred_box[:, :2] + pred_box[:, 2:] / 2
                        pred_box = pred_box / self.config.image_size
                        ious = bbox_iou(pred_box, label[:, 1:])
                        best_i = np.argmax(ious)
                        if ious[best_i] > iou_thresh and int(cls_pred) == int(
                                label[best_i, 0]) and best_i not in detected:
                            correct.append(1)
                            detected.append(best_i)
                        else:
                            correct.append(0)
                        pred_list.append(int(cls_pred))
                        conf_list.append(float(conf))

        results['correct'] = correct
        results['conf'] = conf_list
        results['pred_cls'] = pred_list
        results['total'] = total
        with open('results.json', 'w') as f:
            json.dump(results, f)
            logging.info('Having saved to results.json')

        logging.info('Begin calculating....')
        with open('results.json', 'r') as result_file:
            results = json.load(result_file)

        mAP, AP_class = caculate_ap(correct=results['correct'],
                                    conf=results['conf'],
                                    pred_cls=results['pred_cls'],
                                    total=results['total'],
                                    classes=classes)
        logging.info('mAP(IoU=0.5):{:.1f}'.format(mAP * 100))
コード例 #18
0
ファイル: yolo_loss.py プロジェクト: jacke121/YOLOv3_PyTorch
    def get_target(self, pred_boxes, target, anchors, in_w, in_h,
                   ignore_threshold):
        bs = target.size(0)
        # tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        # tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False)
        nGT = 0
        nCorrect = 0
        for b in range(bs):
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    continue
                nGT += 1
                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor(np.array([0, 0, gw,
                                                     gh])).unsqueeze(0)

                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))
                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box, anchor_shapes)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                self.noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)

                if gi >= pred_boxes.shape[3]:
                    print(pred_boxes.shape, b, best_n, gj, gi)
                    gi = pred_boxes.shape[3] - 1

                if gj >= pred_boxes.shape[2]:
                    print(pred_boxes.shape, b, best_n, gj, gi)
                    gj = pred_boxes.shape[2] - 1

                gt_box = torch.FloatTensor(np.array([gx, gy, gw,
                                                     gh])).unsqueeze(0)
                pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
                # Masks
                self.conf_mask[b, best_n, gj, gi] = 1
                # Coordinates
                self.tx[b, best_n, gj, gi] = gx - gi
                self.ty[b, best_n, gj, gi] = gy - gj
                # Width and height
                self.tw[b, best_n, gj,
                        gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                self.th[b, best_n, gj,
                        gi] = math.log(gh / anchors[best_n][1] + 1e-16)
                # object
                self.tconf[b, best_n, gj, gi] = 1
                # One-hot encoding of label
                self.tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1

                iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
                if iou > 0.8:
                    nCorrect = nCorrect + 1
        return nGT, nCorrect