Exemple #1
0
    def get_ignore(self, prediction, target, scaled_anchors, in_w, in_h,
                   noobj_mask):
        bs = len(target)
        anchor_index = [[0, 1, 2], [3, 4, 5],
                        [6, 7, 8]][self.feature_length.index(in_w)]
        scaled_anchors = np.array(scaled_anchors)[anchor_index]
        # print(scaled_anchors)
        # 先验框的中心位置的调整参数
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        # 先验框的宽高调整参数
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height

        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor

        # 生成网格,先验框中心,网格左上角
        grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_w, 1).repeat(
            int(bs * self.num_anchors / 3), 1,
            1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_h, 1).t().repeat(
            int(bs * self.num_anchors / 3), 1,
            1).view(y.shape).type(FloatTensor)

        # 生成先验框的宽高
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))

        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1,
                                                 in_h * in_w).view(w.shape)
        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1,
                                                 in_h * in_w).view(h.shape)

        # 计算调整后的先验框中心与宽高
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        for i in range(bs):
            pred_boxes_for_ignore = pred_boxes[i]
            pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4)

            for t in range(target[i].shape[0]):
                gx = target[i][t, 0] * in_w
                gy = target[i][t, 1] * in_h
                gw = target[i][t, 2] * in_w
                gh = target[i][t, 3] * in_h
                gt_box = torch.FloatTensor(np.array(
                    [gx, gy, gw, gh])).unsqueeze(0).type(FloatTensor)

                anch_ious = bbox_iou(gt_box,
                                     pred_boxes_for_ignore,
                                     x1y1x2y2=False)
                anch_ious = anch_ious.view(pred_boxes[i].size()[:3])
                noobj_mask[i][anch_ious > self.ignore_threshold] = 0
                # print(torch.max(anch_ious))
        return noobj_mask
Exemple #2
0
    def _first_loss(self, pred, target):
        """
        
        :param pred: type: tensor: tensor.size([image_num, anchor_num, grid_j, gird_i, 5+class_num])
        :param target: type: list: [[image_num, x, y, w, h, cls],...]
        :return: ignore_mask which ignores iou(pred, truth)  > ignore_thres_first_loss
        """
        
        # Init ignore_mask which ignores iou(pred, truth)  > ignore_thres_first_loss
        ignore_mask = self.ByteTensor(self.batch_size, self.num_anchors, self.grid_h, self.grid_w).fill_(1)

        if len(target):
            index_start = target[0][0]
            for i, pi0 in enumerate(pred):
                t = target[target[..., 0] == (i + index_start)]   # Targets for image j of batchA
                if len(t):
                    p_boxes = torch.zeros_like(pi0)
                    # transform pred to yolo box 
                    p_boxes[..., 0] = (torch.sigmoid(pi0[..., 0]) + self.grid_x[i]) / self.grid_w
                    p_boxes[..., 1] = (torch.sigmoid(pi0[..., 1]) + self.grid_y[i]) / self.grid_h
                    p_boxes[..., 2] = (torch.exp(pi0[..., 2]) * self.anchor_w[i]) / self.grid_w
                    p_boxes[..., 3] = (torch.exp(pi0[..., 3]) * self.anchor_h[i]) / self.grid_h
                    p_boxes = p_boxes.view(pi0.size()[0] * pi0.size()[1] * pi0.size()[2], 6)
            
                    # compute iou for each pred gird and all targets.
                    ious = torch.stack(tuple([bbox_iou(x, p_boxes[:, :4], False) for x in t[:, 1:5]]))
                    best_ious, best_index = ious.max(0)
                    best_ious, best_index = best_ious.view(pi0.size()[0], pi0.size()[1], pi0.size()[2], 1), \
                                            best_index.view(pi0.size()[0], pi0.size()[1], pi0.size()[2], 1)
                    ignore_mask[i][torch.squeeze(best_ious > self.ignore_thres_first_loss, 3)] = 0
        
        return ignore_mask
Exemple #3
0
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
    """
    Removes detections with lower object confidence score than 'conf_thres' and performs
    Non-Maximum Suppression to further filter detections.
    Args:
        prediction.shape(batch_size, num_yolo*num_anchors*grid_size*grid_size, 85)
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
    """
    # From center(xywh) to corner(xyxy)
    prediction[..., :4] = xywh2xyxy(prediction[..., :4])

    output = [None for _ in range(len(prediction))]

    for image_i, image_pred in enumerate(prediction):

        # Filter out confidence scores below threshold
        image_pred = image_pred[image_pred[:, 4] >= conf_thres]

        # If none are remaining => process next image
        if not image_pred.size(0):
            continue

        # score = object_conf. * max_class_pred_prob.
        score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]

        # Sort by it
        image_pred = image_pred[np.argsort(-score)]
        class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)

        # detections.shape(unknown, 7_vals)
        # 7_vals=(x1, y1, x2, y2, object_conf., class_score, class_pred_label)
        detections = torch.cat(
            (image_pred[:, :5], class_confs.float(), class_preds.float()), 1)

        # Perform non-maximum suppression
        keep_boxes = []
        while detections.size(0):

            #=== Indices of boxes with large IOUs and matching labels ===
            large_overlap = bbox_iou(detections[0, :4].unsqueeze(0),
                                     detections[:, :4]) > nms_thres
            label_match = detections[0, -1] == detections[:, -1]
            invalid = large_overlap & label_match

            #=== Merge overlapping bboxes weighted by their confidence ===
            weights = detections[invalid, 4:5]
            detections[0, :4] = (
                weights * detections[invalid, :4]).sum(0) / weights.sum()

            keep_boxes += [detections[0]]

            #=== remove the suppression ===
            detections = detections[~invalid]

        if keep_boxes:
            output[image_i] = torch.stack(keep_boxes)

    return output
Exemple #4
0
def build_target(raw_coord, pred, anchors_full, args):
    coord = Variable(torch.zeros(raw_coord.size(0), raw_coord.size(1)).cuda())
    batch, grid = raw_coord.size(0), args.size // args.gsize
    coord[:, 0] = (raw_coord[:, 0] + raw_coord[:, 2]) / (2 * args.size)
    coord[:, 1] = (raw_coord[:, 1] + raw_coord[:, 3]) / (2 * args.size)
    coord[:, 2] = (raw_coord[:, 2] - raw_coord[:, 0]) / (args.size)
    coord[:, 3] = (raw_coord[:, 3] - raw_coord[:, 1]) / (args.size)
    coord = coord * grid
    bbox = torch.zeros(coord.size(0), 9, 5, grid, grid)

    best_n_list, best_gi, best_gj = [], [], []

    for ii in range(batch):
        batch, grid = raw_coord.size(0), args.size // args.gsize
        gi = coord[ii, 0].long()
        gj = coord[ii, 1].long()
        tx = coord[ii, 0] - gi.float()
        ty = coord[ii, 1] - gj.float()
        gw = coord[ii, 2]
        gh = coord[ii, 3]

        anchor_idxs = range(9)
        anchors = [anchors_full[i] for i in anchor_idxs]
        scaled_anchors = [ (x[0] / (args.anchor_imsize/grid), \
            x[1] / (args.anchor_imsize/grid)) for x in anchors]

        ## Get shape of gt box
        gt_box = torch.FloatTensor(np.array([0, 0, gw, gh],
                                            dtype=np.float32)).unsqueeze(0)
        ## Get shape of anchor box
        anchor_shapes = torch.FloatTensor(
            np.concatenate((np.zeros(
                (len(scaled_anchors), 2)), np.array(scaled_anchors)), 1))
        ## Calculate iou between gt and anchor shapes
        # anch_ious = list(bbox_iou(gt_box, anchor_shapes))
        anch_ious = list(bbox_iou(gt_box, anchor_shapes, x1y1x2y2=False))
        ## Find the best matching anchor box
        best_n = np.argmax(np.array(anch_ious))

        tw = torch.log(gw / scaled_anchors[best_n][0] + 1e-16)
        th = torch.log(gh / scaled_anchors[best_n][1] + 1e-16)

        bbox[ii, best_n, :, gj, gi] = torch.stack(
            [tx, ty, tw, th, torch.ones(1).cuda().squeeze()])
        best_n_list.append(int(best_n))
        best_gi.append(gi)
        best_gj.append(gj)

    bbox = Variable(bbox.cuda())
    return bbox, best_gi, best_gj, best_n_list
def get_batch_statistics(outputs, targets, iou_threshold):
    """ Compute true positives, predicted scores and predicted labels per sample.
    Args:
        targets.shape(num_bboxes, 6_vals), 6_vals=(idx, labels, x1,y1,x2,y2)
    """
    batch_metrics = []
    for sample_i in range(len(outputs)):

        # if prediction detections is None
        if outputs[sample_i] is None:
            continue

        # output.shape(num_bboxes, 7_vals)
        output = outputs[sample_i]
        pred_boxes = output[:, :4]  # pred_(x1, y1, x2, y2)
        pred_scores = output[:, 4]  # obj_conf.
        pred_labels = output[:, -1]  # class_pred_labels

        true_positives = np.zeros(pred_boxes.shape[0])

        # targets for i-th sample
        annotations = targets[targets[:, 0] == sample_i]
        # labels for i-th sample
        target_labels = annotations[:, 1] if len(annotations) else []

        if len(annotations):
            annotations = annotations[:, 1:]
            detected_boxes = []
            target_boxes = annotations[:, 1:]

            for pred_i, (pred_box,
                         pred_label) in enumerate(zip(pred_boxes,
                                                      pred_labels)):

                # If targets are found break
                if len(detected_boxes) == len(annotations):
                    break

                # Ignore if label is not one of the target labels
                if pred_label not in target_labels:
                    continue

                iou, box_index = bbox_iou(pred_box.unsqueeze(0),
                                          target_boxes).max(0)
                if iou >= iou_threshold and box_index not in detected_boxes:
                    true_positives[pred_i] = 1
                    detected_boxes += [box_index]
        batch_metrics.append([true_positives, pred_scores, pred_labels])
    return batch_metrics
Exemple #6
0
def inter_cls_nms(detections, threshold=0.8):
    """
    A NMS function similar to the one found in utils.utils but unlike that does suppression regardless of the class
    :param detections: output from network/ previous class level NMS stage
    :param threshold: NMS threshold level
    :return: Non overlapping detection regions
    """

    # detections are expected to be sorted (max to min in this function since the previous function does this
    # (x1, y1, x2, y2, object_conf, class_conf, class)
    det_max = []
    while (detections.shape[0]):
        det_max.append(detections[0, :])
        iou = bbox_iou(detections[0, :], detections[:, :])
        # print(iou)
        detections = detections[iou < threshold, :]
    det_max = torch.cat(det_max).reshape((-1, 7))
    return det_max
Exemple #7
0
def compute_loss_for_MFCP(output, targets, aux_model):
    ft = torch.cuda.FloatTensor if output[0].is_cuda else torch.Tensor
    lcls, lbox = ft([0]), ft([0])
    if type(aux_model) in (nn.parallel.DataParallel,
                           nn.parallel.DistributedDataParallel):
        aux_model = aux_model.module  # aux 的超参数是模型内自带的,所以需要脱分布式训练的壳
    hyp = aux_model.hyp
    ft = torch.cuda.FloatTensor if output.is_cuda else torch.Tensor
    BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([hyp['cls_pw']]),
                                  reduction='sum')
    txy, twh, tcls, tbox, index, anchors_vec = build_targets_for_MFCP(
        aux_model, targets)
    b, a, j, i = index
    nb = len(b)
    if nb:
        pn = output[b, a, j, i]  # predict needed
        pxy = torch.sigmoid(pn[:, 0:2])
        pbox = torch.cat(
            [pxy, torch.exp(pn[:, 2:4]).clamp(max=1E3) * anchors_vec], dim=1)
        DIoU = bbox_iou(pbox.t(), tbox, x1y1x2y2=False, DIoU=True)

        lbox += (1 - DIoU).sum()

        tclsm = torch.zeros_like(pn[:, 4:])
        tclsm[range(len(b)), tcls] = 1.0

        lcls += BCEcls(pn[:, 4:], tclsm)

    lbox *= hyp['diou']
    lcls *= hyp['cls']

    if nb:
        lbox /= nb
        lcls /= (nb * aux_model.nc)

    loss = lbox + lcls

    return loss, torch.cat((lbox, lcls, loss)).clone().detach()
Exemple #8
0
 def on_batch_end(self, last_output, last_target, **kwargs):
     bs = last_output[0].shape[0]
     iou_thres = torch.tensor((0.5, ))
     niou = iou_thres.numel()
     for batch_idx in range(0, bs):
         target_boxes = last_target[0][batch_idx].cpu()
         target_classes = last_target[1][batch_idx].cpu() - 1.0
         people_idxs = (torch.LongTensor(
             (0, )) == target_classes).nonzero().view(-1)
         target_boxes = target_boxes[people_idxs]
         target_classes = target_classes[people_idxs]
         yolo_out = grab_idx(last_output, batch_idx)
         pred = YoloCategoryList.yolo2pred(
             yolo_out)  # list([[x1, y1, x2, y2, conf, cls]])
         detections = pred[0]
         if detections is None:  # bs=1, first and only result
             if len(target_classes):
                 self.stats.append((torch.zeros(0, 1), torch.Tensor(),
                                    torch.Tensor(), target_classes))
             continue
         boxes = YoloCategoryList.bbox2fai(detections)
         correct = torch.zeros(len(detections), niou)
         if len(target_classes):
             for det_idx, det in enumerate(
                     detections):  # detections per image
                 # Break if all targets already located in image
                 pbox = boxes[det_idx]
                 iou, j = bbox_iou(pbox, target_boxes).max(0)
                 correct[det_idx] = iou > iou_thres
         conf = detections[:, 4]
         clazz = detections[:, 5]
         self.stats.append((correct, conf, clazz, target_classes))
     stats = [np.concatenate(x, 0)
              for x in list(zip(*self.stats))]  # to numpy
     p, r, ap, f1, ap_class = ap_per_class(*stats)
     self.apAt50 = ap.item()
Exemple #9
0
    def get_LCP_area(self, targets, predictions, anchors, feature_w,
                     feature_h):
        pred_info = []
        # anchors转化为归一形式
        anchors = torch.from_numpy(anchors).to(targets.dtype).to(
            targets.device) / 416.0
        # anchors_vec是在特征图上的anchors
        anchors_vec = anchors * torch.tensor(
            [feature_h, feature_w], dtype=anchors.dtype, device=anchors.device)
        # 计算一系列索引值
        gwh = targets[:, 4:6]
        iou_anchors = wh_iou(anchors, gwh)
        _, idx_a = iou_anchors.max(0)
        idx_p = idx_a // 3
        idx_a = idx_a % 3
        idx_b = targets[:, 0].long()
        # 选择特定的框
        for i, p in enumerate(predictions):
            mask = idx_p == i
            idx_x, idx_y = (targets[:, 2] *
                            p.size(-2)).long(), (targets[:, 3] *
                                                 p.size(-3)).long()
            pred_info.append(p[idx_b[mask], idx_a[mask], idx_y[mask],
                               idx_x[mask]])
        pred_info = torch.cat(pred_info, dim=0)

        # 此处开始pred_info_detach就只有选框作用,不再需要反向传播
        pred_info_detach = pred_info.clone().detach()
        pred_info_detach[:, 0:2] = torch.sigmoid(
            pred_info_detach[:, 0:2]) + torch.stack([idx_x, idx_y], dim=0).t()
        pred_info_detach[:, 2:4] = torch.exp(pred_info_detach[:, 2:4]).clamp(
            max=1E3) * anchors_vec[(idx_p + idx_a)]

        # 将标签转化为适应特征图的形式,之前是归一化的
        targets[:, [2, 4]] *= feature_w
        targets[:, [3, 5]] *= feature_h
        # 计算IoU大于0.5的
        targets[:, 2:] = xywh2xyxy(targets[:, 2:])
        pred_info_detach[:, :4] = xywh2xyxy(pred_info_detach[:, :4])

        boxes_union, boxes_gt = torch.zeros(
            [len(targets), 5],
            device=targets.device), torch.zeros([len(targets), 5],
                                                device=targets.device)
        boxes_gt[:, 0] = idx_b
        boxes_gt[:, 1:] = targets[:, 2:]

        boxes_union[:, 0] = idx_b
        boxes_union[:, 1:3] = torch.min(pred_info_detach[:, :2], targets[:,
                                                                         2:4])
        boxes_union[:, 3:5] = torch.max(pred_info_detach[:, 2:4], targets[:,
                                                                          4:6])

        giou = bbox_iou(torch.cat([
            torch.sigmoid(pred_info[:, 0:2]) +
            torch.stack([idx_x, idx_y], dim=0).t(),
            torch.exp(pred_info[:, 2:4]).clamp(max=1E3) *
            anchors_vec[(idx_p + idx_a)]
        ],
                                  dim=1).t(),
                        targets[:, 2:6],
                        x1y1x2y2=False,
                        GIoU=True)

        return boxes_gt, boxes_union, (1 - giou).mean()
def non_max_suppression(prediction,
                        num_classes,
                        conf_thres=0.5,
                        nms_thres=0.4):
    """
    Removes detections with lower object confidence score than 'conf_thres' and performs
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
    """

    # From (center x, center y, width, height) to (x1, y1, x2, y2)
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
    prediction[:, :, :4] = box_corner[:, :, :4]

    output = [None for _ in range(len(prediction))]
    for image_i, image_pred in enumerate(prediction):
        # Filter out confidence scores below threshold
        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
        image_pred = image_pred[conf_mask]
        # If none are remaining => process next image
        if not image_pred.size(0):
            continue
        # Get score and class with highest confidence
        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes],
                                           1,
                                           keepdim=True)
        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
        detections = torch.cat(
            (image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
        # Iterate through all predicted classes
        unique_labels = detections[:, -1].cpu().unique()
        if prediction.is_cuda:
            unique_labels = unique_labels.cuda()
        for c in unique_labels:
            # Get the detections with the particular class
            detections_class = detections[detections[:, -1] == c]
            # Sort the detections by maximum objectness confidence
            _, conf_sort_index = torch.sort(detections_class[:, 4],
                                            descending=True)
            detections_class = detections_class[conf_sort_index]
            # Perform non-maximum suppression
            max_detections = []
            while detections_class.size(0):
                # Get detection with highest confidence and save as max detection
                max_detections.append(detections_class[0].unsqueeze(0))
                # Stop if we're at the last detection
                if len(detections_class) == 1:
                    break
                # Get the IOUs for all boxes with lower confidence
                ious = bbox_iou(max_detections[-1], detections_class[1:])
                # Remove detections with IoU >= NMS threshold
                detections_class = detections_class[1:][ious < nms_thres]

            max_detections = torch.cat(max_detections).data
            # Add max detections to outputs
            output[image_i] = (max_detections if output[image_i] is None else
                               torch.cat((output[image_i], max_detections)))

    return output
def validate(*,
             dataloader,
             model,
             device,
             step=-1,
             bbox_all=False,
             debug_mode):
    # result = open("logs/result.txt", "w" )

    with torch.no_grad():
        t_start = time.time()
        conf_thres, nms_thres, iou_thres = model.get_threshs()
        width, height = model.img_size()
        model.eval()
        print("Calculating mAP - Model in evaluation mode")
        n_images = len(dataloader.dataset)
        mAPs = []
        mR = []
        mP = []
        for batch_i, (img_uris, imgs, targets) in enumerate(
                tqdm(dataloader, desc='Computing mAP')):
            imgs = imgs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            # output,_,_,_ = model(imgs)
            output = model(imgs)

            for sample_i, (labels,
                           detections) in enumerate(zip(targets, output)):
                detections = detections[detections[:, 4] > conf_thres]
                if detections.size()[0] == 0:
                    predictions = torch.tensor([])
                else:
                    predictions = torch.argmax(detections[:, 5:], dim=1)
                # From (center x, center y, width, height) to (x1, y1, x2, y2)
                box_corner = torch.zeros((detections.shape[0], 4),
                                         device=detections.device)
                xy = detections[:, 0:2]
                wh = detections[:, 2:4] / 2
                box_corner[:, 0:2] = xy - wh
                box_corner[:, 2:4] = xy + wh
                probabilities = detections[:, 4]
                nms_indices = nms(box_corner, probabilities, nms_thres)
                box_corner = box_corner[nms_indices]
                probabilities = probabilities[nms_indices]
                predictions = predictions[nms_indices]

                if nms_indices.shape[
                        0] == 0:  # there should always be at least one label
                    continue
                # Get detections sorted by decreasing confidence scores
                _, inds = torch.sort(-probabilities)
                box_corner = box_corner[inds]

                probabilities = probabilities[inds]
                predictions = predictions[inds]
                labels = labels[(labels[:, 1:5] <= 0).sum(
                    dim=1
                ) == 0]  # remove the 0-padding added by the dataloader
                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = xywh2xyxy(labels[:, 1:5])
                target_boxes[:, (0, 2)] *= width
                target_boxes[:, (1, 3)] *= height
                detected = torch.zeros(target_boxes.shape[0],
                                       device=target_boxes.device,
                                       dtype=torch.uint8)
                correct = torch.zeros(nms_indices.shape[0],
                                      device=box_corner.device,
                                      dtype=torch.uint8)
                # 0th dim is the detection
                # (repeat in the 1st dim)
                # 2nd dim is the coord
                ious = bbox_iou(
                    box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0],
                                                   -1),
                    target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1,
                                                     -1))
                # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou

                #######################################################
                ##### skip images without label #####
                if [] in ious.data.tolist():
                    continue
                #######################################################

                best_is = torch.argmax(ious, dim=1)

                # TODO fix for multi-class. Need to use predictions somehow?
                for i, iou in enumerate(ious):
                    best_i = best_is[i]
                    if ious[i, best_i] > iou_thres and detected[best_i] == 0:
                        correct[i] = 1
                        detected[best_i] = 1

                # Compute Average Precision (AP) per class
                ap, r, p = average_precision(tp=correct,
                                             conf=probabilities,
                                             n_gt=labels.shape[0])

                # Compute mean AP across all classes in this image, and append to image list
                mAPs.append(ap)
                mR.append(r)
                mP.append(p)
                if bbox_all or sample_i < 2:  # log the first two images in every batch
                    img_filepath = img_uris[sample_i]
                    if img_filepath is None:
                        print(
                            "NULL image filepath for image uri: {uri}".format(
                                uri=img_uris[sample_i]))
                    orig_img = Image.open(img_filepath)
                    # draw = ImageDraw.Draw(img_with_boxes)
                    w, h = orig_img.size
                    pad_h, pad_w, scale_factor = calculate_padding(
                        h, w, height, width)

                    ##################################
                    detect_box = copy.deepcopy(box_corner)
                    ##################################

                    box_corner /= scale_factor
                    box_corner[:, (0, 2)] -= pad_w
                    box_corner[:, (1, 3)] -= pad_h

                    #######################################################################################
                    if debug_mode:
                        pil_img = transforms.ToPILImage()(imgs.squeeze())
                        ##### getting the image's name #####
                        img_path = img_uris[0]
                        img_name = ("_".join(map(str,
                                                 img_path.split("_")[-5:])))
                        tmp_path = os.path.join(
                            visualization_tmp_path,
                            img_name[:-4] + "_predicted_vis.jpg")
                        vis_label = add_class_dimension_to_labels(detect_box)
                        visualize_and_save_to_local(pil_img,
                                                    vis_label,
                                                    tmp_path,
                                                    box_color="red")
                        print("Prediction visualization uploaded")
                    #######################################################################################

            mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
            mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
            mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
        # Means of all images
        mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
        mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
        mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
        dt = time.time() - t_start
        print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format(
            mean_mAP, mean_R, mean_P))
        # result.write(str(1-mean_mAP))
        # result.close()
        return mean_mAP, mean_R, mean_P, dt / (n_images + 1e-12)
Exemple #12
0
def image_augmentation(f_rgb, f_label, width, height, jitter, hue, saturation, exposure):
	rgb_imgs = []
	ious = []
	org_imgs = []
	label = np.array([line for line in open(f_label, 'r').readlines()])
	gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0]  # (N', 4) x_min, y_min, x_max, y_max

	img = cv2.imread(f_rgb)
	warn("img value: {}".format(img[:3,:3,:3]))

	# warn("{} shape: {}".format(f_rgb, img.shape))
	img_height, img_width = img.shape[:2]
	# warn("height: {}, width: {}".format(img_height, img_width))

	for idx in range(len(gt_box2d)):
		box = gt_box2d[idx]
		# warn("box {}: {}".format(idx, box))
		x_min, y_min, x_max, y_max = box
		x_min = int(x_min)
		y_min = int(y_min)
		x_max = int(x_max)
		y_max = int(y_max)

		ori_img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max, x_min:x_max], (64, 64))
		org_imgs.append(ori_img)

		box_height = y_max - y_min
		box_width = x_max - x_min

		dx = int(jitter * box_width) + 1
		dy = int(jitter * box_height) + 1

		# warn("dx : {} dy : {}".format(dx, dy))

		lx = np.random.randint(-dx, dx)
		ly = np.random.randint(-dy, dy)

		lw = np.random.randint(-dx, dx)
		lh = np.random.randint(-dy, dy)

		x = (x_max + x_min)/2.0 + lx
		y = (y_max + y_min)/2.0 + ly
		box_height = box_height + lh
		box_width = box_width + lw

		x_min = int(max(0, x - box_width/2.0))
		x_max = int(min(img_width, x + box_width/2.0))
		y_min = int(max(0, y - box_height/2.0))
		y_max = int(min(img_height, y + box_height/2.0))


		flip = np.random.randint(1,10000)%2  

		img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height))

		if flip:
		    img = cv2.flip(img, 1)
		img = random_distort_image(img, hue, saturation, exposure)
		# for ground truth img, calculate iou with its original location, size

		iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True)


		rgb_imgs.append(img)
		ious.append(iou)



	# Randomly e[nerate same number of background candidate that will have low iou or zero iou.
	# after generating new boxes, it needs to calculate iou to each of gt_boxes2d 
	# which will be used as inference.
	# if inferenced iou is low, then the bounding boxes are empty or background or falsely located.
	# if inferenced iou is high, then the bounding boxes are correctly inferenced by 3D bounding boxes.
	# this is the st]rategry I am taking for simple, mini 2D classifier.

	for idx in range(len(gt_box2d)*4):
		x = np.random.randint(0, img_width)
		y = np.random.randint(0, img_height)
		h = np.random.randint(40, 200)
		w = np.random.randint(40, 200)
		x_min = int(max(0, x - w/2.0))
		x_max = int(min(img_width, x + w/2.0))
		y_min = int(max(0, y - h/2.0))
		y_max = int(min(img_height, y + h/2.0))

		max_iou = 0

		for gt_idx in range(len(gt_box2d)):
			box = gt_box2d[gt_idx]
			iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True)
			if iou > max_iou:
				max_iou = iou

		img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height))
		if flip:
			img = cv2.flip(img, 1)
		img = random_distort_image(img, hue, saturation, exposure)
		rgb_imgs.append(img)
		ious.append(iou)


	return org_imgs, rgb_imgs, ious
Exemple #13
0
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    seen = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP',
                                 'F1')
    p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []
    pbar = tqdm(enumerate(fetcher), total=len(fetcher))
    for idx, (imgs, targets) in pbar:
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # Compute loss
        val_loss += compute_loss(train_out, targets,
                                 model).item()  # GIoU, obj, cls

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)
        # Plot images with bounding boxes
        if idx == 0:
            show_batch(imgs, output)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > 0.5 and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct, pred[:,
                               4].cpu().numpy(), pred[:,
                                                      6].cpu().numpy(), tcls))
        pbar.set_description('loss: %8g' % (val_loss / (idx + 1)))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]

    # sync stats
    if dist.is_initialized():
        for i in range(len(stats)):
            stat = torch.FloatTensor(stats[i]).to(device)
            ls = torch.IntTensor([len(stat)]).to(device)
            ls_list = [
                torch.IntTensor([0]).to(device)
                for _ in range(dist.get_world_size())
            ]
            dist.all_gather(ls_list, ls)
            ls_list = [ls_item.item() for ls_item in ls_list]
            max_ls = max(ls_list)
            if len(stat) < max_ls:
                stat = torch.cat(
                    [stat, torch.zeros(max_ls - len(stat)).to(device)])
            stat_list = [
                torch.zeros(max_ls).to(device)
                for _ in range(dist.get_world_size())
            ]
            dist.all_gather(stat_list, stat)
            stat_list = [
                stat_list[si][:ls_list[si]]
                for si in range(dist.get_world_size()) if ls_list[si] > 0
            ]
            stat = torch.cat(stat_list)
            stats[i] = stat.cpu().numpy()

    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=num_classes)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1))

    # Print results per class
    for i, c in enumerate(ap_class):
        print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
    # Return results
    mAPs = np.zeros(num_classes) + mAP
    for i, c in enumerate(ap_class):
        mAPs[c] = ap[i]
    # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs
    return mAP
Exemple #14
0
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        # 计算一共有多少张图片
        bs = len(target)
        # 获得先验框
        # self.feature_length分辨为原尺寸,32,16,8倍小采样以后的尺寸
        # anchor_index当前feature map尺寸对应self.feature_length的索引值,如32倍下采样对应0,1,2
        anchor_index = [[0, 1, 2], [3, 4, 5],
                        [6, 7, 8]][self.feature_length.index(in_w)]
        subtract_index = [0, 3, 6][self.feature_length.index(in_w)]
        # 创建全是0或者全是1的阵列
        mask = torch.zeros(bs,
                           int(self.num_anchors / 3),
                           in_h,
                           in_w,
                           requires_grad=False)
        noobj_mask = torch.ones(bs,
                                int(self.num_anchors / 3),
                                in_h,
                                in_w,
                                requires_grad=False)

        tx = torch.zeros(bs,
                         int(self.num_anchors / 3),
                         in_h,
                         in_w,
                         requires_grad=False)
        ty = torch.zeros(bs,
                         int(self.num_anchors / 3),
                         in_h,
                         in_w,
                         requires_grad=False)
        tw = torch.zeros(bs,
                         int(self.num_anchors / 3),
                         in_h,
                         in_w,
                         requires_grad=False)
        th = torch.zeros(bs,
                         int(self.num_anchors / 3),
                         in_h,
                         in_w,
                         requires_grad=False)
        t_box = torch.zeros(bs,
                            int(self.num_anchors / 3),
                            in_h,
                            in_w,
                            4,
                            requires_grad=False)
        tconf = torch.zeros(bs,
                            int(self.num_anchors / 3),
                            in_h,
                            in_w,
                            requires_grad=False)
        tcls = torch.zeros(bs,
                           int(self.num_anchors / 3),
                           in_h,
                           in_w,
                           self.num_classes,
                           requires_grad=False)

        box_loss_scale_x = torch.zeros(bs,
                                       int(self.num_anchors / 3),
                                       in_h,
                                       in_w,
                                       requires_grad=False)
        box_loss_scale_y = torch.zeros(bs,
                                       int(self.num_anchors / 3),
                                       in_h,
                                       in_w,
                                       requires_grad=False)
        for b in range(bs):
            # target[b].shape[0]一张图片中有几个真值框
            for t in range(target[b].shape[0]):
                # 计算出在特征层上的点位
                #target经过归一化的真值框的中心坐标和w,h
                #真值框的中心坐标和长宽转为当前尺寸在的坐标
                gx = target[b][t, 0] * in_w
                gy = target[b][t, 1] * in_h

                gw = target[b][t, 2] * in_w
                gh = target[b][t, 3] * in_h

                # 计算出真值框属于哪个网格
                gi = int(gx)
                gj = int(gy)

                gt_box = torch.FloatTensor(np.array([0, 0, gw,
                                                     gh])).unsqueeze(0)

                # 计算出所有先验框的位置
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))
                # 计算重合程度
                # 存放一个真值框和9个anchor的iou
                anch_ious = bbox_iou(gt_box, anchor_shapes)

                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)
                if best_n not in anchor_index:  #第一轮是0,1,2,如果iou最大的真值框不在当前尺寸对应的三个真值框中,就换下一个真值框
                    continue
                # Masks
                #in_h,in_w是输入feature map的尺寸,如果iou最大的框在尺寸对应的三个真值框中且,真值框中心所在网格编号小于feature map的尺寸,则该真值框标为有物体
                #gj,gi,真值框中心所在网格坐标
                if (gj < in_h) and (gi < in_w):
                    best_n = best_n - subtract_index
                    # 判定哪些先验框内部真实的存在物体
                    # 无物体掩码对应网格标记为0
                    noobj_mask[b, best_n, gj, gi] = 0
                    # 有物体掩码对应网格标记为0
                    mask[b, best_n, gj, gi] = 1
                    # 计算先验框中心调整参数和
                    tx[b, best_n, gj, gi] = gx
                    ty[b, best_n, gj, gi] = gy
                    # 计算先验框宽高调整参数
                    tw[b, best_n, gj, gi] = gw
                    th[b, best_n, gj, gi] = gh
                    # 用于获得xywh的比例
                    box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2]
                    box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3]
                    # 物体置信度
                    tconf[b, best_n, gj, gi] = 1
                    # 种类
                    tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1
                else:
                    print('Step {0} out of bound'.format(b))
                    print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(
                        gj, in_h, gi, in_w))
                    continue
        t_box[..., 0] = tx
        t_box[..., 1] = ty
        t_box[..., 2] = tw
        t_box[..., 3] = th
        # 返回的是一个batch的所有标注好的真值框数据
        #mask (bs, int(self.num_anchors / 3), in_h, in_w) 根据真值数据标注中那个网格中存在物体
        #noobj_mask (bs, int(self.num_anchors / 3), in_h, in_w) 根据真值标注原始图片中那个网格中不存在物体
        # t_box (bs, int(self.num_anchors/3), in_h, in_w, 4) 标注当前尺度下真值框的中心坐标,h,w
        # tcls (bs, int(self.num_anchors/3), in_h, in_w, num_classes)根据真值数据注存在物体的网格中的物体分类
        # tconf (bs, int(self.num_anchors/3), in_h, in_w, num_classes)根据真值数据注存在物体的网格中的物体存在置信度
        return mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y
Exemple #15
0
    def __getitem__(self, idx):
        l_bound = idx * self.config['BATCH_SIZE']
        r_bound = (idx + 1) * self.config['BATCH_SIZE']

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.config['BATCH_SIZE']

        instance_count = 0

        x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'],
                            self.config['IMAGE_W'], 3))  # input images
        b_batch = np.zeros(
            (r_bound - l_bound, 1, 1, 1, self.config['TRUE_BOX_BUFFER'], 4)
        )  # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros(
            (r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'],
             self.config['BOX'],
             4 + 1 + len(self.config['LABELS'])))  # desired network output

        for train_instance in self.images[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)

            # construct output from object's x, y, w, h
            true_box_index = 0

            for obj in all_objs:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[
                        'ymin'] and obj['name'] in self.config['LABELS']:
                    center_x = .5 * (obj['xmin'] + obj['xmax'])
                    center_x = center_x / (float(self.config['IMAGE_W']) /
                                           self.config['GRID_W'])
                    center_y = .5 * (obj['ymin'] + obj['ymax'])
                    center_y = center_y / (float(self.config['IMAGE_H']) /
                                           self.config['GRID_H'])

                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    if grid_x < self.config['GRID_W'] and grid_y < self.config[
                            'GRID_H']:
                        obj_indx = self.config['LABELS'].index(obj['name'])

                        center_w = (obj['xmax'] - obj['xmin']) / (
                            float(self.config['IMAGE_W']) /
                            self.config['GRID_W'])  # unit: grid cell
                        center_h = (obj['ymax'] - obj['ymin']) / (
                            float(self.config['IMAGE_H']) /
                            self.config['GRID_H'])  # unit: grid cell

                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou = -1

                        shifted_box = BoundBox(0, 0, center_w, center_h)

                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou = bbox_iou(shifted_box, anchor)

                            if max_iou < iou:
                                best_anchor = i
                                max_iou = iou

                        # assign ground truth x, y, w, h, confidence and class probs to y_batch
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                0:4] = box
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                4] = 1.
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                5 + obj_indx] = 1

                        # assign the true box to b_batch
                        b_batch[instance_count, 0, 0, 0, true_box_index] = box

                        true_box_index += 1
                        true_box_index = true_box_index % self.config[
                            'TRUE_BOX_BUFFER']

            # assign input image to x_batch
            if self.norm != None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                        cv2.rectangle(img[:, :, ::-1],
                                      (obj['xmin'], obj['ymin']),
                                      (obj['xmax'], obj['ymax']), (255, 0, 0),
                                      3)
                        cv2.putText(img[:, :, ::-1], obj['name'],
                                    (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                    1.2e-3 * img.shape[0], (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in current batch
            instance_count += 1

        #print(' new batch created', idx)

        return [x_batch, b_batch], y_batch
def batch_statistics(outputs, targets, iou_threshold):
    """
    Compute true positives, predicted scores and predicted labels per sample
    :param outputs: List of Tensors of predictions [x0, y0, x1, y1, confidence, class label]
    :param targets: List of Dicts of Tensors (Dicts keys : 'boxes', 'labels', 'imaeg_id', 'area')
    :param iou_threshold:
    :return:list of True positive, pred_scores and pred_labels for each image in the batch
    """
    batch_metrics = []
    for sample_i in range(
            len(outputs)
    ):  # outputs[sample_i] is one tensor of several detections, corresponding to one image in the batch
        if outputs[sample_i] is None:
            continue

        output = outputs[sample_i]
        pred_boxes = output[:, :4]
        pred_scores = output[:, 4].cpu()
        pred_labels = output[:, -1].cpu()
        # print("\npred_boxes=", pred_boxes)
        #print("pred_scores=", pred_scores)
        #print("pred_labels=", pred_labels)

        true_positives = np.zeros(pred_boxes.shape[0])
        #print("true positives empty=", true_positives)

        annotations = targets[sample_i]['boxes']
        #print("GT boxes=", annotations)
        target_labels = targets[sample_i]['labels'].cpu(
        ) if len(annotations) > 0 else []
        #print("GT labels=", target_labels)

        if len(annotations) > 0:
            detected_boxes = []
            target_boxes = annotations

            for pred_i, (pred_box,
                         pred_label) in enumerate(zip(pred_boxes,
                                                      pred_labels)):
                #print("\nPrediction ", pred_i)
                #print("pred_box=", pred_box)
                #print("pred_label=", pred_label)
                # If targets are found break
                if len(detected_boxes) == len(annotations):
                    break

                # Ignore if label is not one of the target labels
                if pred_label.item() not in target_labels:
                    continue
                # print("Input to bbox iou 1 =", pred_box.unsqueeze(0))
                iou, box_index = bbox_iou(pred_box.unsqueeze(0),
                                          target_boxes).max(0)
                #print("iou=", iou)
                #print("box_index=", box_index)
                if iou >= iou_threshold and box_index not in detected_boxes:
                    true_positives[pred_i] = 1
                    #print("True Positive")
                    detected_boxes += [box_index]

        batch_metrics.append([true_positives, pred_scores, pred_labels])
    return batch_metrics
Exemple #17
0
def test_det(
        opt,
        batch_size=12,
        img_size=(1088, 608),
        iou_thres=0.5,
        print_interval=40,
):
    data_cfg = opt.data_cfg
    f = open(data_cfg)
    data_cfg_dict = json.load(f)
    f.close()
    nC = 1
    test_path = data_cfg_dict['test']
    dataset_root = data_cfg_dict['root']
    if opt.gpus[0] >= 0:
        opt.device = torch.device('cuda')
    else:
        opt.device = torch.device('cpu')
    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv)
    model = load_model(model, opt.load_model)
    #model = torch.nn.DataParallel(model)
    model = model.to(opt.device)
    model.eval()

    # Get dataloader
    transforms = T.Compose([T.ToTensor()])
    dataset = DetDataset(dataset_root,
                         test_path,
                         img_size,
                         augment=False,
                         transforms=transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             drop_last=False,
                                             collate_fn=collate_fn)
    mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
    outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
        [], [], [], [], [], [], [], [], []
    AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
    for batch_i, (imgs, targets, paths, shapes,
                  targets_len) in enumerate(dataloader):
        t = time.time()
        #seen += batch_size

        output = model(imgs.cuda())[-1]
        origin_shape = shapes[0]
        width = origin_shape[1]
        height = origin_shape[0]
        inp_height = img_size[1]
        inp_width = img_size[0]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // opt.down_ratio,
            'out_width': inp_width // opt.down_ratio
        }
        hm = output['hm'].sigmoid_()
        wh = output['wh']
        reg = output['reg'] if opt.reg_offset else None
        opt.K = 200
        detections, inds = mot_decode(hm,
                                      wh,
                                      reg=reg,
                                      cat_spec_wh=opt.cat_spec_wh,
                                      K=opt.K)
        # Compute average precision for each sample
        targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)]
        for si, labels in enumerate(targets):
            seen += 1
            #path = paths[si]
            #img0 = cv2.imread(path)
            dets = detections[si]
            dets = dets.unsqueeze(0)
            dets = post_process(opt, dets, meta)
            dets = merge_outputs(opt, [dets])[1]

            #remain_inds = dets[:, 4] > opt.det_thres
            #dets = dets[remain_inds]
            if dets is None:
                # If there are labels but no detections mark as zero AP
                if labels.size(0) != 0:
                    mAPs.append(0), mR.append(0), mP.append(0)
                continue

            # If no labels add number of detections as incorrect
            correct = []
            if labels.size(0) == 0:
                # correct.extend([0 for _ in range(len(detections))])
                mAPs.append(0), mR.append(0), mP.append(0)
                continue
            else:
                target_cls = labels[:, 0]

                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = xywh2xyxy(labels[:, 2:6])
                target_boxes[:, 0] *= width
                target_boxes[:, 2] *= width
                target_boxes[:, 1] *= height
                target_boxes[:, 3] *= height
                '''
                path = paths[si]
                img0 = cv2.imread(path)
                img1 = cv2.imread(path)
                for t in range(len(target_boxes)):
                    x1 = target_boxes[t, 0]
                    y1 = target_boxes[t, 1]
                    x2 = target_boxes[t, 2]
                    y2 = target_boxes[t, 3]
                    cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imwrite('gt.jpg', img0)
                for t in range(len(dets)):
                    x1 = dets[t, 0]
                    y1 = dets[t, 1]
                    x2 = dets[t, 2]
                    y2 = dets[t, 3]
                    cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imwrite('pred.jpg', img1)
                abc = ace
                '''

                detected = []
                for *pred_bbox, conf in dets:
                    obj_pred = 0
                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > iou_thres and obj_pred == labels[
                            best_i, 0] and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)

            # Compute Average Precision (AP) per class
            AP, AP_class, R, P = ap_per_class(
                tp=correct,
                conf=dets[:, 4],
                pred_cls=np.zeros_like(dets[:, 4]),  # detections[:, 6]
                target_cls=target_cls)

            # Accumulate AP per class
            AP_accum_count += np.bincount(AP_class, minlength=nC)
            AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)

            # Compute mean AP across all classes in this image, and append to image list
            mAPs.append(AP.mean())
            mR.append(R.mean())
            mP.append(P.mean())

            # Means of all images
            mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16)
            mean_R = np.sum(mR) / (AP_accum_count + 1E-16)
            mean_P = np.sum(mP) / (AP_accum_count + 1E-16)

        if batch_i % print_interval == 0:
            # Print image mAP and running mean mAP
            print(('%11s%11s' + '%11.3g' * 4 + 's') %
                  (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP,
                   time.time() - t))
    # Print mAP per class
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))

    print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))

    # Return mAP
    return mean_mAP, mean_R, mean_P
Exemple #18
0
    def __call__(self, y_pred, y_true):
        device = y_pred[0].device
        loss_cls = torch.zeros(1, device=device)  # Tensor(0)
        loss_box = torch.zeros(1, device=device)  # Tensor(0)
        loss_obj = torch.zeros(1, device=device)  # Tensor(0)

        target_cls, target_box, indices, anchors = self.build_targets(y_pred, y_true)  # targets

        # Define criteria
        reduction = 'mean'  # Loss reduction (sum or mean)
        BCE_cls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([self.hyp['cls_pw']], device=device),
                                       reduction=reduction)
        BCE_obj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([self.hyp['obj_pw']], device=device),
                                       reduction=reduction)

        # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
        label_pos, label_neg = smooth_BCE(eps=0.0)

        # focal loss
        fl_gamma = self.hyp['fl_gamma']  # focal loss gamma
        if fl_gamma > 0:
            BCE_cls, BCE_obj = FocalLoss(BCE_cls, fl_gamma), FocalLoss(BCE_obj, fl_gamma)

        # per output
        count_targets = 0  # targets
        for jdx, pred in enumerate(y_pred):  # layer index, layer predictions
            idx_img, idx_anchor, grid_y, grid_x = indices[jdx]  # image, anchor, grid_y, grid_x
            target_obj = torch.zeros_like(pred[..., 0], device=device)  # target obj

            num_target = idx_img.shape[0]  # number of targets
            if num_target:
                count_targets += num_target  # cumulative targets
                # 对应匹配到正样本的预测信息
                # prediction subset corresponding to targets
                pred_sub = pred[idx_img, idx_anchor, grid_y, grid_x]

                # GIoU
                pred_xy = pred_sub[..., :2].sigmoid()
                pred_wh = pred_sub[..., 2:4].exp().clamp(max=1E3) * anchors[jdx]
                pred_box = torch.cat((pred_xy, pred_wh), 1)  # predicted box
                giou = bbox_iou(pred_box.t(), target_box[jdx].t(), ltrb=False,
                                iou_type='GIoU')  # giou(prediction, target)
                loss_box += (1.0 - giou).mean()  # giou loss

                # Obj giou ratio
                target_obj[idx_img, idx_anchor, grid_y, grid_x] = \
                    (1.0 - self.giou_ratio) + self.giou_ratio * giou.detach().clamp(0).type(target_obj.dtype)

                # Class
                if self.num_cls > 1:  # cls loss (only if multiple classes)
                    pred_tar = torch.full_like(pred_sub[:, 5:], label_neg, device=device)  # targets
                    pred_tar[range(num_target), target_cls[jdx]] = label_pos
                    loss_cls += BCE_cls(pred_sub[:, 5:], pred_tar)  # BCE

            loss_obj += BCE_obj(pred[..., 4], target_obj)  # obj loss

        # 乘上每种损失的对应权重
        loss_box *= self.hyp['giou']
        loss_obj *= self.hyp['obj']
        loss_cls *= self.hyp['cls']

        # loss = loss_box + loss_obj + loss_cls
        return {"box_loss": loss_box, "obj_loss": loss_obj, "class_loss": loss_cls}
    def nms_suppress(self, pred):
        '''
        'OR'   : 一般说的NMS都是OR方式
        'AND'  : 这个与OR整体类似,不同在于如果出现这个类别只有一个框,则认为无效。
                 可能是一般一个物体都会对应多个框,只有一个很有可能是误检了
        'MERGE': 综合利用了高于阈值的预测框,对于每个预测框的conf值来赋予权重,
                 然后求得x1y1x2y2的坐标的加权平均作为最后的预测框
                 weighted mixture box精度更高,但速度较慢一些.
        'SOFT' : soft-NMS https://arxiv.org/abs/1704.04503

        :param pred:
        :param nms_thres:
        :param nms_style: NMS方法选择 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental), 'SOFT'
        :return: list[tensor[], tensor[]]
        '''
        if len(pred) == 0:
            return torch.tensor([])

        det_max = []
        for c in pred[:, -1].unique():  # 没有80类一个个遍历,更高效
            dc = pred[pred[:, -1] == c]  # select class c torch.Size([21, 7]) 代表这个类别有21个预测框
            n = len(dc)  # 当前类别有len(dc)=21个预测框
            if n == 1:
                det_max.append(dc)  # No NMS required if only 1 prediction
                continue
            elif n > 100:
                # 框太多只保留前100,一般情况下是OK的,不过密集场景可能得改一下
                dc = dc[:100]  # limit to first 100 boxes: https://github.com/ultralytics/yolov3/issues/117

            # Non-maximum suppression
            if self.nms_style == 'OR':  # default
                # torch.Size([21, 7]) 开始时21个框
                # dc.shape[0]也就是预测框的数目,如果预测框数目为0,则退出循环
                while dc.shape[0]:  # 21->14->9->3->0
                    det_max.append(dc[:1])  # 保留conf最高的预测框 4
                    if len(dc) == 1:  # 如果只剩下一个预测框了,退出循环
                        break
                    iou = bbox_iou(dc[0], dc[1:])  # 计算conf得分最高的预测框与其他框的IoU
                    dc = dc[1:][iou < self.nms_thres]  # 移除与当前conf得分最高的预测框IoU大于阈值的预测框 remove ious > threshold

            elif self.nms_style == 'AND':  # requires overlap, single boxes erased
                while len(dc) > 1:  # 21->14->9->3->0
                    # 计算得分最高的预测框与其他框的IoU
                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes dc[1:]: torch.Size([20])
                    if iou.max() > 0.5:  # 与当前conf得分最高的预测框IoU最大的如果大于0.5
                        det_max.append(dc[:1])  # 那么就将conf得分最高的预测框加入最终的det_max
                    dc = dc[1:][iou < self.nms_thres]  # remove ious > threshold

            elif self.nms_style == 'MERGE':  # weighted mixture box 默认采用,精度更高,但速度较慢一些
                while len(dc):
                    if len(dc) == 1:
                        det_max.append(dc)
                        break
                    i = bbox_iou(dc[0], dc) > self.nms_thres  # 取大于NMS阈值的框
                    weights = dc[i, 4:5]  # 取出iou大于NMS阈值的框求得这些框的conf值作为weights torch.Size([7, 1])
                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
                    det_max.append(dc[:1])
                    dc = dc[i == 0]  # 这一步也就是进行了筛选

            elif self.nms_style == 'SOFT':  # soft-NMS https://arxiv.org/abs/1704.04503
                sigma = 0.5  # soft-nms sigma parameter
                while len(dc):
                    if len(dc) == 1:
                        det_max.append(dc)
                        break
                    det_max.append(dc[:1])
                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                    dc = dc[1:]
                    dc[:, 4] *= torch.exp(-iou ** 2 / sigma)  # decay confidences
                    dc = dc[dc[:, 4] > self.nms_thres]  # new line per https://github.com/ultralytics/yolov3/issues/362

        if len(det_max) > 0:
            det_max_tensor = det_max[0]
            for det in det_max[1:]:
                det_max_tensor = torch.cat((det_max_tensor, det))
        else:
            det_max_tensor = torch.Tensor([])
        return det_max_tensor
Exemple #20
0
def test(
        model,
        dataloader,
        iou_thres=0.5,
        conf_thres=0.3,
        nms_thres=0.45,
        print_interval=40,
):
    
    
    nC = 1
    mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
    outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
        [], [], [], [], [], [], [], [], []
    AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
    for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader):
        t = time.time()
        out = model(imgs.cuda())
        # out = model(imgs)
        output = []
        for i,o in enumerate(out):
            boxes = xyxy2xywh(o['boxes']).cpu()
            scores = o['scores'].cpu().view(-1,1)
            labels = o['labels'].cpu().view(-1,1).float()
            output.append(torch.Tensor(torch.cat((boxes,scores,scores,labels),dim=1)))
        output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres)
        for i, o in enumerate(output):
            if o is not None:
                output[i] = o[:, :6]

        # Compute average precision for each sample
        targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
        for si, (labels, detections) in enumerate(zip(targets, output)):
            seen += 1

            if detections is None:
                # If there are labels but no detections mark as zero AP
                if labels.size(0) != 0:
                    mAPs.append(0), mR.append(0), mP.append(0)
                continue

            # Get detections sorted by decreasing confidence scores
            detections = detections.cpu().numpy()
            detections = detections[np.argsort(-detections[:, 4])]


            # If no labels add number of detections as incorrect
            correct = []
            if labels.size(0) == 0:
                # correct.extend([0 for _ in range(len(detections))])
                mAPs.append(0), mR.append(0), mP.append(0)
                continue
            else:
                target_cls = torch.zeros_like(labels[:, 0])
                target_boxes = labels[:, 2:6]

                detected = []
                for *pred_bbox, conf, obj_conf  in detections:
                    obj_pred = 0
                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > iou_thres and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)

            # Compute Average Precision (AP) per class
            AP, AP_class, R, P = ap_per_class(tp=correct,
                                              conf=detections[:, 4],
                                              pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6]
                                              target_cls=target_cls)

            # Accumulate AP per class
            AP_accum_count += np.bincount(AP_class, minlength=nC)
            AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)

            # Compute mean AP across all classes in this image, and append to image list
            mAPs.append(AP.mean())
            mR.append(R.mean())
            mP.append(P.mean())

            # Means of all images
            mean_mAP = np.sum(mAPs) / ( AP_accum_count + 1E-16)
            mean_R = np.sum(mR) / ( AP_accum_count + 1E-16)
            mean_P = np.sum(mP) / (AP_accum_count + 1E-16)

        if batch_i % print_interval==0:
            # Print image mAP and running mean mAP
            print(('%11s%11s' + '%11.3g' * 4 + 's') %
                  (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t))
    # Print mAP per class
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))

    print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))

    # Return mAP
    return mean_mAP, mean_R, mean_P
Exemple #21
0
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres, stride):
    '''
    :param pred_boxes: 预测框的位置和长宽 (num_samples, self.num_anchors, grid_size, grid_size, 4)
    :param pred_cls: 预测类别的概率
    :param target: 真值
    :param anchors: Anchor,存在矩阵里
    :param ignore_thres: 默认设为0.5
    :return:
    '''
    BoolTensor = torch.cuda.BoolTensor if pred_boxes.is_cuda else torch.BoolTensor
    FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
    device = torch.device("cuda") if pred_boxes.is_cuda else torch.device(
        "cpu")

    nB = pred_boxes.size(0)  # 样本数量
    nA = pred_boxes.size(1)  # Anchor的数量:3
    nC = pred_cls.size(-1)  # 类别数
    nG = pred_boxes.size(2)  # 13

    # Output tensors
    obj_mask = torch.zeros(nB, nA, nG, nG, requires_grad=False).to(
        device)  # 目标掩码,bool,用于存放该处格点是否有目标,0填充
    noobj_mask = torch.ones(nB, nA, nG, nG,
                            requires_grad=False).to(device)  # 无目标掩码
    class_mask = torch.zeros(nB, nA, nG, nG,
                             requires_grad=False).to(device)  # 类别掩码
    iou_scores = torch.zeros(nB, nA, nG, nG,
                             requires_grad=False).to(device)  # IOU分数
    tx = torch.zeros(nB, nA, nG, nG,
                     requires_grad=True).to(device)  # 真值相对于网格点的偏离值
    ty = torch.zeros(nB, nA, nG, nG,
                     requires_grad=True).to(device)  # 真值相对于网格点的偏离值
    tw = torch.zeros(nB, nA, nG, nG,
                     requires_grad=True).to(device)  # 真值相对于网格点的偏离值
    th = torch.zeros(nB, nA, nG, nG,
                     requires_grad=True).to(device)  # 真值相对于网格点的偏离值
    tcls = torch.zeros(nB, nA, nG, nG, nC,
                       requires_grad=False).to(device)  #存放类别

    # Convert to position relative to box
    # target中存放每个样本的真值,包括目标位置及其所属类别
    # traget一共6列,第一列表示样本序号,第二列表示标签,第三列到第六列表示位置坐标和长宽。
    # 因为一个样本可能包含多个目标,因此需要一个维度用于标记该目标框属于哪个样本
    target_boxes = target[:, 2:6] * nG  # 由于位置真值是以归一化值保存的,因此需要乘上特征图大小
    gxy = target_boxes[:, :2]  # 目标位置真值
    gwh = target_boxes[:, 2:]  # 目标长宽真值
    # Get anchors with best iou
    ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
    best_ious, best_n = ious.max(0)  # 分别计算每个Anchor与真值之间的IOU,挑选最大的Anchor作为最优选项
    # Separate target values
    b, target_labels = target[:, :2].long().t(
    )  # .long()进行数据类型转换;  .t()进行转置,而且是深拷贝
    gx, gy = gxy.t()  # 目标位置真值
    gw, gh = gwh.t()  # 目标长宽真值
    gi, gj = gxy.long().t()  #
    ########## TODO(arthur77wang):
    gi[gi < 0] = 0
    gj[gj < 0] = 0
    gi[gi > nG - 1] = nG - 1
    gj[gj > nG - 1] = nG - 1
    # Set masks
    obj_mask[b, best_n, gj, gi] = 1  # 为1的元素表示对应位置有目标。且标出了对应的最佳anchor
    noobj_mask[b, best_n, gj, gi] = 0  # 为1的元素表示对应位置无目标

    # Set noobj mask to zero where iou exceeds ignore threshold
    for i, anchor_ious in enumerate(ious.t()):
        noobj_mask[b[i], anchor_ious > ignore_thres, gj[i],
                   gi[i]] = 0  # 通过门限设置对应位置是否有目标

    # Coordinates
    # 计算偏移量

    tx[b, best_n, gj, gi] = gx - gx.floor()
    ty[b, best_n, gj, gi] = (gy - gy.floor() + 0.5) / 2
    # Width and height
    # tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
    # th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
    tw[b, best_n, gj, gi] = torch.sqrt(gw / anchors[best_n][:, 0]) / 2
    th[b, best_n, gj, gi] = torch.sqrt(gh / anchors[best_n][:, 1]) / 2

    # One-hot encoding of label
    tcls[b, best_n, gj, gi, target_labels] = 1  # 类别概率真值
    # Compute label correctness and iou at best anchor
    class_mask[b, best_n, gj, gi] = (
        pred_cls[b, best_n, gj,
                 gi].argmax(-1) == target_labels).float()  # 类别的掩码矩阵
    iou_scores[b, best_n, gj,
               gi] = bbox_iou(pred_boxes[b, best_n, gj, gi],
                              target_boxes,
                              x1y1x2y2=False)  # 有目标的位置与对应的最佳anchor的iou分数

    tconf = obj_mask  # 置信度的真值
    return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
Exemple #22
0
def test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=608,
         iou_thres=0.5,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=True,
         hyp=None,
         model=None,
         single_cls=False):
    """test the metrics of the trained model

    :param str cfg: model cfg file
    :param str data: data dict
    :param str weights: weights path
    :param int batch_size: batch size
    :param int img_size: image size
    :param float iou_thres: iou threshold
    :param float conf_thres: confidence threshold
    :param float nms_thres: nms threshold
    :param bool save_json: Whether to save the model
    :param str hyp: hyperparameter
    :param str model: yolov4 model
    :param bool single_cls: only one class
    :return: results
    """

    if model is None:
        device = select_device(opt.device)
        verbose = False
        # Initialize model
        model = Model(cfg, img_size).to(device)
        # Load weights
        if weights.endswith('.pt'):
            checkpoint = torch.load(weights, map_location=device)
            state_dict = intersect_dicts(checkpoint['model'],
                                         model.state_dict())
            model.load_state_dict(state_dict, strict=False)
        elif len(weights) > 0:
            load_darknet_weights(model, weights)
        print(f'Loaded weights from {weights}!')

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device
        verbose = False

    test_path = data['valid']
    num_classes, names = (1, ['item']) if single_cls else (int(
        data['num_classes']), data['names'])

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=8,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets',
                                             'Pre', 'Rec', 'mAP', 'F1')
    precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    json_dict, stats, aver_pre, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=output_format)):
        targets = targets.to(device)
        imgs = imgs.to(device) / 255.0
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs,
                        targets=targets,
                        paths=paths,
                        fname='test_batch0.jpg')

        with torch.no_grad():
            inference_output, train_output = model(imgs)

            if hasattr(model, 'hyp'):  # if model has loss hyperparameters
                loss += compute_loss(train_output, targets,
                                     model)[1][:3].cpu()  # GIoU, obj, cls

            output = non_max_suppression(inference_output,
                                         conf_thres=conf_thres,
                                         nms_thres=nms_thres)

        # Statistics per image
        for i, pred in enumerate(output):
            labels = targets[targets[:, 0] == i, 1:]
            num_labels = len(labels)
            target_class = labels[:, 0].tolist() if num_labels else []
            seen += 1

            if pred is None:
                if num_labels:
                    stats.append(
                        ([], torch.Tensor(), torch.Tensor(), target_class))
                continue

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[i]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[i].shape[1:], box,
                             shapes[i][0])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for det_i, det in enumerate(pred):
                    json_dict.append({
                        'image_id':
                        image_id,
                        'category_id':
                        coco91class[int(det[6])],
                        'bbox':
                        [float(format(x, '.%gf' % 3)) for x in box[det_i]],
                        'score':
                        float(format(det[4], '.%gf' % 5))
                    })

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if num_labels:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for j, (*pbox, _, _, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == num_labels:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in target_class:
                        continue

                    # Best iou, index between pred and targets
                    mask = (pcls == tcls_tensor).nonzero(
                        as_tuple=False).view(-1)
                    iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and mask[
                            best_iou] not in detected:  # and pcls == target_class[bi]:
                        correct[j] = 1
                        detected.append(mask[best_iou])

            # Append statistics (correct, conf, pcls, target_class)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]
    if len(stats):
        precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats)
        mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean(
        ), aver_pre.mean(), f_1.mean()
        num_targets = np.bincount(
            stats[3].astype(np.int64),
            minlength=num_classes)  # number of targets per class
    else:
        num_targets = torch.zeros(1)

    # Print results
    print_format = '%20s' + '%10.3g' * 6
    print(print_format %
          ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1))

    # Print results per class
    if verbose and num_classes > 1 and stats:
        for i, class_ in enumerate(ap_class):
            print(print_format %
                  (names[class_], seen, num_targets[class_], precision[i],
                   recall[i], aver_pre[i], f_1[i]))

    # Save JSON
    if save_json and mean_ap and json_dict:
        try:
            img_ids = [
                int(Path(x).stem.split('_')[-1]) for x in dataset.img_files
            ]
            with open('results.json', 'w') as file:
                json.dump(json_dict, file)

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocogt = COCO('data/coco/annotations/instances_val2017.json'
                          )  # initialize COCO ground truth api
            cocodt = cocogt.loadRes('results.json')  # initialize COCO pred api

            cocoeval = COCOeval(cocogt, cocodt, 'bbox')
            cocoeval.params.imgIds = img_ids  # [:32]  # only evaluate these images
            cocoeval.evaluate()
            cocoeval.accumulate()
            cocoeval.summarize()
            mean_ap = cocoeval.stats[1]  # update mAP to pycocotools mAP
        except ImportError:
            print(
                'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.'
            )

    # Return results
    maps = np.zeros(num_classes) + mean_ap
    for i, class_ in enumerate(ap_class):
        maps[class_] = aver_pre[i]
    return (mean_pre, mean_rec, mean_ap, mf1,
            *(loss / len(dataloader)).tolist()), maps
    def new_detection(self, frame, detections, xywh=False):
        if detections is None or not len(detections):
            self.logger.info("No detections added")
            return
        detections = detections.cpu().numpy()
        if not xywh:
            detections[:, 2] = detections[:, 2] - detections[:, 0]
            detections[:, 3] = detections[:, 3] - detections[:, 1]
        to_remove = []
        self.logger.info("Adding new detections")
        for i, obj in enumerate(self.objects):
            if detections is None or not len(detections):
                return

            cur_bbox = np.array(obj.bbox)
            bboxes_array = detections[:, :4]
            ious = bbox_iou(cur_bbox, bboxes_array)
            relevant_idx = np.where(
                obj.class_type == detections[:,
                                             5])[0]  # matching class objects
            if relevant_idx.size == 0:
                continue

            greatest_overlap = relevant_idx[ious[relevant_idx].argmax(
            )]  # max overlap from matching objects
            # check for greatest intersection over union detection
            if ious[greatest_overlap] > self.iou_thres:
                self.logger.debug(
                    f"Reinitialize Object id: {obj.id} of type: {obj.class_type} due large overlap"
                )
                bbox = tuple(map(int, bboxes_array[greatest_overlap]))
                #                 bbox = tuple(np.around(bboxes_array[greatest_overlap]).astype(int)) # conversion for opencv
                obj.reinitialize(frame, bbox)
                detections = np.delete(detections, greatest_overlap, axis=0)
                continue

            # check for closest detection
            # Todo make distance thres relative to speed and frames without detection.
            distances = bbox_distance(cur_bbox, bboxes_array)
            closest_box = relevant_idx[distances[relevant_idx].argmin(
            )]  # min distance from matching objects
            if distances[closest_box] < self.dist_thres:
                self.logger.debug(
                    f"Reinitialize Object id: {obj.id} of type: {obj.class_type} due close distance"
                )
                bbox = tuple(map(int, bboxes_array[greatest_overlap]))
                #                 bbox = tuple(np.around(bboxes_array[closest_box]).astype(int)) # conversion for opencv
                obj.reinitialize(frame, bbox)
                detections = np.delete(detections, closest_box, axis=0)
                continue
            if obj.frames_without_detection > self.no_detection_thres:
                to_remove.append(i)

        # remove undetected objects
        self.objects = [
            self.objects[i] for i in range(len(self.objects))
            if i not in to_remove
        ]

        # add new detections
        for det in detections:
            self.add(self.default_tracker,
                     frame,
                     tuple(det[:4]),
                     int(det[5]),
                     xyxy=False)
        self.logger.info(self.get_objects_metadata())
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        # 计算一共有多少张图片
        bs = len(target)
        # 获得先验框
        anchor_index = [[0,1,2],[3,4,5],[6,7,8]][[13,26,52].index(in_w)]
        subtract_index = [0,3,6][[13,26,52].index(in_w)]
        # 创建全是0或者全是1的阵列
        mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)

        tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False)

        box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        for b in range(bs):
            for t in range(target[b].shape[0]):
                # 计算出在特征层上的点位
                gx = target[b][t, 0] * in_w
                gy = target[b][t, 1] * in_h
                gw = target[b][t, 2] * in_w
                gh = target[b][t, 3] * in_h

                # 计算出属于哪个网格
                gi = int(gx)
                gj = int(gy)

                # 计算真实框的位置
                gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
                
                # 计算出所有先验框的位置
                anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),
                                                                  np.array(anchors)), 1))
                # 计算重合程度
                anch_ious = bbox_iou(gt_box, anchor_shapes)
               
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)
                if best_n not in anchor_index:
                    continue
                # Masks
                if (gj < in_h) and (gi < in_w):
                    best_n = best_n - subtract_index
                    # 判定哪些先验框内部真实的存在物体
                    noobj_mask[b, best_n, gj, gi] = 0
                    mask[b, best_n, gj, gi] = 1
                    # 计算先验框中心调整参数
                    tx[b, best_n, gj, gi] = gx - gi
                    ty[b, best_n, gj, gi] = gy - gj
                    # 计算先验框宽高调整参数
                    tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n+subtract_index][0])
                    th[b, best_n, gj, gi] = math.log(gh / anchors[best_n+subtract_index][1])
                    # 用于获得xywh的比例
                    box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2]
                    box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3]
                    # 物体置信度
                    tconf[b, best_n, gj, gi] = 1
                    # 种类
                    tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1
                else:
                    print('Step {0} out of bound'.format(b))
                    print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w))
                    continue

        return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
Exemple #25
0
def test(cfg,
         data,
         batch_size,
         img_size,
         conf_thres,
         iou_thres,
         nms_thres,
         src_txt_path,
         weights,
         log_file_path=None,
         model=None):

    # 0、初始化一些参数
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    names = load_classes(data['names'])

    # 1、加载网络
    if model is None:
        device = select_device('0')
        model = Darknet(cfg)
        if weights.endswith('.pt'):  # TODO: .weights权重格式
            model.load_state_dict(
                torch.load(weights, map_location=device)['model']
            )  # 20200704_50epoch_modify_noobj   # TODO:map_location=device ?
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)  # clw note: 多卡
    else:
        device = next(model.parameters()).device  # get model device
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path,
                              img_size,
                              with_label=True,
                              is_training=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn,  # TODO
        pin_memory=True)

    # 3、预测,前向传播
    image_nums = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 'mAP@{}'.format(iou_thres), 'F1')
    #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1')

    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []

    pbar = tqdm(dataloader)
    for i, (img_tensor, target_tensor, _, _) in enumerate(pbar):

        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        target_tensor = target_tensor.to(device)
        height, width = img_tensor.shape[2:]

        start = time.time()
        # Disable gradients
        with torch.no_grad():
            # (1) Run model
            output = model(
                img_tensor
            )  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

            # (2) NMS
            nms_output = non_max_suppression(output, conf_thres, nms_thres)
            s = 'time use per batch: %.3fs' % (time.time() - start)

        pbar.set_description(s)

        for batch_idx, pred in enumerate(nms_output):  # pred: (bs, 7)
            labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:]
            nl = len(labels)  # len of label
            tcls = labels[:, 0].tolist() if nl else []  # target class
            image_nums += 1

            # 考虑一个预测 box 都没有的情况,比如 conf 太高
            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds   TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低
            clip_coords(pred, (height, width))  #  mAP is the same

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2]  # w
                tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1]  # h

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
            '''
                        pred flag                (  [1,       0,       1,       0,       0,       1,       0,       0,       1], 
                        pred conf            tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), 
                        pred cls             tensor([2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.]), 
                        lb_cls                 [2.0,     2.0,  2.0, 2.0, 2.0])
            stats is a []
            '''
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(),
                 tcls))  # Append statistics (correct, conf, pcls, tcls)

    # after get stats for all images , ...
    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    # time.sleep(0.01)  # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了
    #pf = '%20s' + '%10.3g' * 6  # print format
    pf = '%20s' + '%10s' + '%10.3g' * 5
    pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1)
    print(pf_value)
    if __name__ != '__main__':
        write_to_file(s, log_file_path)
        write_to_file(pf_value, log_file_path)

    results = []
    results.append({"all": (mp, mr, map, mf1)})

    # Print results per class
    #if verbose and nc > 1 and len(stats):
    if nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
            print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]))
            if __name__ != '__main__':
                write_to_file(
                    pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]),
                    log_file_path)
            results.append({names[c]: (p[i], r[i], ap[i], f1[i])})

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1), maps