Ejemplo n.º 1
0
    def _data_generator(self, batch_size):
        i = 0
        n = self.sample_nums
        while True:
            total_img_data = []
            total_labels = []
            total_deltas = []
            for b in range(batch_size):
                if i == 0:
                    self._random_shuffle()
                annotation = self._annotations[i]
                image_path, gt_boxes = self._parse_annotation(annotation)
                img = cv2.imread(image_path)
                # height/width/channel
                height, width, _ = img.shape
                # img resize
                img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC)

                # BGR -> RGB 做简单处理
                img = img[:, :, (2, 1, 0)]
                img = img.astype(np.float32)
                img = img / 255
                # gt_box resize
                gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width)
                gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height)

                # regions 里面 是 x1, y1, x2, y2
                _, regions = selective_search(img, scale=200, sigma=0.9, min_size=50)

                rects = np.asarray([list(region['rect']) for region in regions])
                selected_imgs = []
                candidates = set()
                # 过滤掉一些框
                for r in rects:
                    x1, y1, x2, y2 = r
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2))
                    if (x1, y1, x2, y2) in candidates:
                        continue
                    if (x2 - x1) * (y2 - y1) < 220:
                        continue
                    crop_img = img[y1:y2, x1:x2, :]
                    # 裁剪后进行resize
                    crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC)
                    selected_imgs.append(crop_img)
                    candidates.add((x1, y1, x2, y2))
                rects = np.asarray([list(candidate) for candidate in candidates])
                # 将 gt_boxes 添加进来
                for idx in range(len(gt_boxes)):
                    x1, y1, x2, y2 = gt_boxes[idx, 0:4]
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2))
                    # 裁剪后进行resize
                    crop_img = img[y1:y2, x1:x2, :]
                    crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC)
                    selected_imgs.append(crop_img)

                rects = np.vstack((rects, gt_boxes[:, 0:4]))
                # cal iou
                overlaps = bbox_overlaps(rects, gt_boxes)
                # 选出与哪个gt_box iou最大的索引位置
                argmax_overlaps = np.argmax(overlaps, axis=1)
                # judge cls
                max_overlaps = np.max(overlaps, axis=1)
                keep = np.where(max_overlaps > threshold)[0]
                labels = np.empty(len(argmax_overlaps))
                labels.fill(0)
                labels[keep] = gt_boxes[argmax_overlaps[keep], 4]
                # do reg
                deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4])

                total_deltas.append(deltas)
                total_labels.append(labels)
                total_img_data.append(selected_imgs)
                i = (i + 1) % n
            total_img_data = np.concatenate(total_img_data, axis=0)
            total_labels = np.concatenate(total_labels, axis=0)
            total_deltas = np.concatenate(total_deltas, axis=0)
            yield total_img_data, total_labels, total_deltas

#
# voc_data = VocData('~/segment_data', 2007, 'train', './data/voc_classes.txt')
# g = voc_data.data_generator_wrapper()
# x, y, z = next(g)
# print(x.shape)
# print(y.shape)
# print(z.shape)
Ejemplo n.º 2
0
    def _get_labels(self, regions, gt_boxes):
        """
        获取训练用的标签数据
        :param regions:  n * 4  (x1, y1, w, h)
        :param gt_boxes: m * 5  (x1, y1, x2, y2, cls)
        :return:
        """
        # 将gt_boxes添加进regions增加正样本数量
        all_regions = np.vstack((regions, gt_boxes[:, :4]))
        # 1. 计算iou
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_regions[:, :], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        # 为每个anchor设置所属类别  与哪个gt_boxes相交iou最大就是对应的class
        labels = gt_boxes[gt_assignment, 4]
        # 2. 设置正负样本数目
        fg_inds = np.where(max_overlaps >= cfg.TRAIN_FG_THRESH)[0]
        # 128 * 0.25
        fg_rois_per_image = cfg.TRAIN_BATCH_SIZE * cfg.TRAIN_FG_FRACTION
        fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
        if fg_inds.size > 0:
            # 随机抽样
            fg_inds = np.random.choice(fg_inds,
                                       size=fg_rois_per_this_image,
                                       replace=False)
        # [0.1, 0.5] 的region为背景  取不到足够的样本
        # bg_inds = np.where((max_overlaps < cfg.TRAIN_BG_THRESH_HI) &
        #                    (max_overlaps >= cfg.TRAIN_BG_THRESH_LO))[0]
        bg_inds = np.where(max_overlaps < cfg.TRAIN_BG_THRESH_HI)[0]
        bg_rois_per_this_image = cfg.TRAIN_BATCH_SIZE - fg_rois_per_this_image
        bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)

        if bg_inds.size > 0:
            bg_inds = np.random.choice(bg_inds,
                                       size=bg_rois_per_this_image,
                                       replace=False)
        # The indices that we're selecting (both fg and bg)
        keep_inds = np.append(fg_inds, bg_inds)
        # 得到128个labels 和 regions用来训练
        # fast-rcnn论文中说batch_size=2, 每张图片64个训练样本 这里没有采用这种方法 直接使用faster-rcnn论文中
        # batch_size=1 , 每张图片采用128个
        labels = labels[keep_inds]
        labels[fg_rois_per_this_image:] = 0
        regions_target = all_regions[keep_inds]
        # 将regions转成回归值 tx ty tw th
        bbox_target_data = self._transform_regions(
            regions_target, gt_boxes[gt_assignment[keep_inds], :4])
        bbox_targets, bbox_inside_weights = self._get_bbox_regression_labels(
            bbox_target_data, labels)
        # 给regions添加上batch 维度信息
        regions_target = np.vstack(
            (np.zeros(regions_target.shape[0], ), regions_target[:, 0],
             regions_target[:, 1], regions_target[:, 2],
             regions_target[:, 3])).transpose()

        labels = labels.reshape((1, -1))
        regions_target = regions_target.reshape((1, -1, 5))
        # 减去背景类别
        bbox_targets = bbox_targets.reshape(
            (1, -1, (self._num_classes - 1) * 4))
        bbox_inside_weights = bbox_inside_weights.reshape(
            (1, -1, (self._num_classes - 1) * 4))
        bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(
            np.float32)
        return labels, regions_target, bbox_targets, bbox_inside_weights, bbox_outside_weights
Ejemplo n.º 3
0
    def _data_generator(self, batch_size):
        data_augment = DataAugment(augment=self._data_augment,
                                   horizontal_flip=True,
                                   vertical_flip=True)
        im_size = self._cfg.im_size
        feat_stride = self._cfg.feat_stride
        feature_width, feature_height = round(im_size[0] / feat_stride), round(
            im_size[1] / feat_stride)
        anchor = Anchors(feature_size=(feature_height, feature_width),
                         feat_stride=feat_stride)
        i = 0
        while True:
            for annotation in self._annotations:
                img_data = []
                rpn_labels = []
                rpn_bbox_targets = []
                rpn_bbox_inside_weights = []
                rpn_bbox_outside_weights = []
                total_gt_boxes = []
                for b in range(batch_size):
                    if i == 0 and self._shuffle:
                        self._random_shuffle()
                    image_path, gt_boxes = self._parse_annotation(annotation)
                    # 进行数据增广防止过拟合
                    img, gt_boxes = data_augment(image_path, gt_boxes)
                    height, width = img.shape[:2]
                    # resize img
                    img = cv2.resize(img,
                                     im_size,
                                     interpolation=cv2.INTER_CUBIC)
                    # BGR -> RGB
                    img = img[:, :, (2, 1, 0)]
                    img = img.astype(np.float32)
                    img[:, :, 0] -= self._cfg.img_channel_mean[0]
                    img[:, :, 1] -= self._cfg.img_channel_mean[1]
                    img[:, :, 2] -= self._cfg.img_channel_mean[2]
                    img /= self._cfg.img_scaling_factor
                    img_data.append(img)
                    # reisze gt_boxes
                    gt_boxes[:,
                             [0, 2]] = gt_boxes[:,
                                                [0, 2]] * (im_size[0] / width)
                    gt_boxes[:,
                             [1, 3]] = gt_boxes[:,
                                                [1, 3]] * (im_size[1] / height)
                    # get anchors
                    all_anchors, A = anchor.get_anchors()
                    # 得到总额anchros数目 h * w * k (50 * 38 * 9)
                    total_anchors_num = len(all_anchors)
                    # 过滤 anchors
                    allow_border = 0
                    inds_inside = np.where(
                        (all_anchors[:, 0] >= allow_border)
                        & (all_anchors[:, 1] >= allow_border)
                        & (all_anchors[:, 2] <= (im_size[1] + allow_border)) &
                        (all_anchors[:, 3] <= (im_size[0] + allow_border)))[0]
                    anchors = all_anchors[inds_inside, :]
                    labels = np.empty((len(inds_inside, )), dtype=np.float32)
                    labels.fill(-1)
                    # 计算iou
                    overlaps = bbox_overlaps(np.ascontiguousarray(anchors),
                                             np.ascontiguousarray(gt_boxes))
                    argmax_overlaps = np.argmax(overlaps, axis=1)
                    max_overlaps = overlaps[np.arange(len(inds_inside)),
                                            argmax_overlaps]
                    gt_argmax_overlaps = np.argmax(overlaps, axis=0)
                    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                               np.arange(overlaps.shape[1])]
                    gt_argmax_overlaps = np.where(
                        overlaps == gt_max_overlaps)[0]
                    if not self._cfg.train_rpn_clobber_positives:
                        labels[max_overlaps <
                               self._cfg.train_rpn_negative_overlap] = 0
                    labels[gt_argmax_overlaps] = 1
                    labels[max_overlaps >
                           self._cfg.train_rpn_positive_overlap] = 1
                    if self._cfg.train_rpn_clobber_positives:
                        labels[max_overlaps <
                               self._cfg.train_rpn_negative_overlap] = 0
                    # 防止每张图片训练数据过多  这里每张图片positive + negative 的样本数等于256
                    # 其中尽量保证 positive和negative样本数目一致 如果正样本不够128则负样本增加满足一种图片取256个样本
                    num_fg = int(self._cfg.train_rpn_fg_fraction *
                                 self._cfg.train_rpn_batch_size)
                    fg_inds = np.where(labels == 1)[0]
                    if len(fg_inds) > num_fg:
                        # 这个表示随机采样 replace=false表示没有重复采样
                        disabled_inds = np.random.choice(fg_inds,
                                                         size=len(fg_inds) -
                                                         num_fg,
                                                         replace=False)
                        labels[disabled_inds] = -1
                    num_bg = self._cfg.train_rpn_batch_size - np.sum(
                        labels == 1)
                    bg_inds = np.where(labels == 0)[0]
                    if len(bg_inds) > num_bg:
                        disabled_inds = np.random.choice(bg_inds,
                                                         size=len(bg_inds) -
                                                         num_bg,
                                                         replace=False)
                        labels[disabled_inds] = -1

                    bbox_targets = self._compute_targets(
                        anchors, gt_boxes[argmax_overlaps, :])
                    bbox_inside_weights = np.zeros((len(inds_inside), 4),
                                                   dtype=np.float32)
                    bbox_outside_weights = np.zeros((len(inds_inside), 4),
                                                    dtype=np.float32)
                    bbox_inside_weights[labels == 1, :] = np.array(
                        [1.0, 1.0, 1.0, 1.0])
                    if self._cfg.train_rpn_positive_weight < 0:
                        # uniform weighting of examples (given non-uniform sampling) 非均匀采样
                        # 这里相当于把样本进行均匀采样处理出现的概率都是一样的权重是一样
                        num_examples = np.sum(labels >= 0)
                        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
                        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
                    else:
                        assert ((self._cfg.train_rpn_positive_weight > 0) &
                                (self._cfg.train_rpn_positive_weight < 1))
                        # 如果是非均匀采样这里将权重设置成 该样本的概率乘以1/positive_samples  loss = p * loss(positive) + (1-p) loss(negative)
                        # 目的就是调节正负样本损失在总损失中站的比例 防止样本多的损失占比过大
                        positive_weights = (
                            self._cfg.train_rpn_positive_weight /
                            np.sum(labels == 1))
                        negative_weights = (
                            (1.0 - self._cfg.train_rpn_positive_weight) /
                            np.sum(labels == 0))
                    # 给训练样本进行权重赋值
                    bbox_outside_weights[labels == 1] = positive_weights
                    bbox_outside_weights[labels == 0] = negative_weights
                    labels = self._unmap(labels,
                                         total_anchors_num,
                                         inds_inside,
                                         fill=-1)
                    # 把图像内部的anchor对应的bbox_target映射回所有的anchor(加上了那些超出边界的anchor,填充0)
                    bbox_targets = self._unmap(bbox_targets,
                                               total_anchors_num,
                                               inds_inside,
                                               fill=0)
                    # [H * W * A, 4]
                    bbox_inside_weights = self._unmap(bbox_inside_weights,
                                                      total_anchors_num,
                                                      inds_inside,
                                                      fill=0)
                    bbox_outside_weights = self._unmap(bbox_outside_weights,
                                                       total_anchors_num,
                                                       inds_inside,
                                                       fill=0)

                    # 进行reshape
                    #  [H * W * A] --> [H, W, A] --> [A, H, W]
                    labels = labels.reshape(
                        (feature_height, feature_width, A)).transpose(
                            (2, 0, 1))
                    labels = labels.reshape(
                        (A * feature_height, feature_width))
                    #  (H * W * A, 4) -> (H, W, A * 4) -> (A * 4, H, W)
                    bbox_targets = bbox_targets.reshape(
                        (feature_height, feature_width, A * 4)).transpose(
                            (2, 0, 1))
                    #  (H * W * A, 4) -> (H, W, A * 4) -> (A * 4, H, W)
                    bbox_inside_weights = bbox_inside_weights.reshape((feature_height, feature_width, A * 4)).\
                        transpose((2, 0, 1))
                    #  (H * W * A, 4) -> (H, W, A * 4) -> (A * 4, H, W)
                    bbox_outside_weights = bbox_outside_weights.reshape((feature_height, feature_width, A * 4)).\
                        transpose((2, 0, 1))
                    rpn_labels.append(labels)
                    rpn_bbox_inside_weights.append(bbox_inside_weights)
                    rpn_bbox_outside_weights.append(bbox_outside_weights)
                    rpn_bbox_targets.append(bbox_targets)
                    total_gt_boxes.append(gt_boxes)
                    i = (i + 1) % self.sample_nums
                rpn_labels = np.array(rpn_labels)
                rpn_bbox_inside_weights = np.array(rpn_bbox_inside_weights)
                rpn_bbox_targets = np.array(rpn_bbox_targets)
                rpn_bbox_outside_weights = np.array(rpn_bbox_outside_weights)
                img_data = np.array(img_data)
                total_gt_boxes = np.asarray(total_gt_boxes)
                # (1, 3 * 4 * A, H, W)
                rpn_bbox_targets = np.concatenate([
                    rpn_bbox_targets, rpn_bbox_inside_weights,
                    rpn_bbox_outside_weights
                ],
                                                  axis=1)
                yield img_data, [rpn_labels, rpn_bbox_targets], total_gt_boxes
Ejemplo n.º 4
0
from PIL import ImageDraw
from utils.bbox_overlaps import bbox_overlaps
import torch

image_size = 500
base_image = Image.new("RGB", (image_size, image_size), color="#FFF")
boxes = [[15, 15, 28, 28], [45, 45, 10, 10], [30, 30, 30, 30]]
gt_boxes = [[15, 15, 30, 30], [45, 45, 15, 14]]
draw = ImageDraw.Draw(base_image)


def bbox_to_coords(bbox):
    x1, y1, h, w = bbox
    x = x1 - w / 2
    y = y1 - h / 2
    return x, y, x1 + w / 2, y1 + h / 2


for box in gt_boxes:
    draw.rectangle(bbox_to_coords(box), outline="#f00")

for box in boxes:
    draw.rectangle(bbox_to_coords(box), outline="#00f")

base_image.save("overlap.png", "PNG")
bboxes = torch.tensor(boxes).float()
gt_boxes = torch.tensor(gt_boxes).float()
out = bbox_overlaps(bboxes, gt_boxes)
print(out)
print(out.shape)
Ejemplo n.º 5
0
    def _data_generator(self, batch_size, is_svm):
        i = 0
        n = self.samples_num
        while True:
            total_img_data = []
            total_labels = []
            total_deltas = []
            for b in range(batch_size):
                if i == 0:
                    self._random_shuffle()
                annotation = self._annotations[i]
                image_path, gt_boxes = self._parse_annotation(annotation)
                img = cv2.imread(image_path)
                # height/width/channel
                height, width, _ = img.shape
                # img resize
                img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC)

                # BGR -> RGB 做简单处理
                img = img[:, :, (2, 1, 0)]
                img = img.astype(np.float32)
                img = img / 255.
                # gt_box resize
                gt_boxes[:,
                         [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width)
                gt_boxes[:,
                         [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height)

                # regions 里面 是 x1, y1, x2, y2
                _, regions = selective_search(img,
                                              scale=200,
                                              sigma=0.9,
                                              min_size=50)

                rects = np.asarray(
                    [list(region['rect']) for region in regions])
                selected_imgs = []
                candidates = set()
                # 过滤掉一些框
                for r in rects:
                    x1, y1, x2, y2 = r
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(
                        round(x2)), int(round(y2))
                    if (x1, y1, x2, y2) in candidates:
                        continue
                    if (x2 - x1) * (y2 - y1) < 220:
                        continue
                    crop_img = img[y1:y2, x1:x2, :]
                    # 裁剪后进行resize
                    crop_img = cv2.resize(crop_img,
                                          im_size,
                                          interpolation=cv2.INTER_CUBIC)
                    selected_imgs.append(crop_img)
                    candidates.add((x1, y1, x2, y2))

                rects = [list(candidate) for candidate in candidates]
                # 将 gt_boxes 添加进来
                for idx in range(len(gt_boxes)):
                    x1, y1, x2, y2 = gt_boxes[idx, 0:4]
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(
                        round(x2)), int(round(y2))
                    # 裁剪后进行resize
                    crop_img = img[y1:y2, x1:x2, :]
                    try:
                        crop_img = cv2.resize(crop_img,
                                              im_size,
                                              interpolation=cv2.INTER_CUBIC)
                        selected_imgs.append(crop_img)
                        rects.append(gt_boxes[idx, 0:4])
                    except:
                        continue

                rects = np.asarray(rects)
                # cal iou
                overlaps = bbox_overlaps(rects, gt_boxes)
                # 选出与哪个gt_box iou最大的索引位置
                argmax_overlaps = np.argmax(overlaps, axis=1)
                # judge cls
                max_overlaps = np.max(overlaps, axis=1)
                threshold = cfg.THRESHOLD if is_svm else cfg.FINE_TUNE_THRESHOLD
                keep = np.where(max_overlaps >= threshold)[0]
                labels = np.empty(len(argmax_overlaps))
                # svm和fine-tune的iou取值是不一样的
                if is_svm:
                    # 因为svm非常适合小训练集 所以论文中严格限制iou范围 减少svm训练样本集
                    # 用 -1 填充
                    labels.fill(-1)
                    # bg_ids = np.where(max_overlaps < )
                    # ground - truth样本作为正样本  且IoU大于0.3的“hard negatives”,
                    # 背景
                    bg_ids = np.where(max_overlaps > threshold)[0]
                    labels[bg_ids] = 0
                    # gt 为正样本  这里用>0.7来当做正样本
                    fg_ids = np.where(max_overlaps > 0.7)
                    labels[fg_ids] = gt_boxes[argmax_overlaps[fg_ids], 4]
                else:
                    labels.fill(0)
                    # 对于大于指定threshold 前景类别
                    labels[keep] = gt_boxes[argmax_overlaps[keep], 4]
                # to something
                deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4])
                total_deltas.append(deltas)
                total_labels.append(labels)
                total_img_data.append(selected_imgs)
                i = (i + 1) % n
            total_img_data = np.concatenate(total_img_data, axis=0)
            total_labels = np.concatenate(total_labels, axis=0)
            total_deltas = np.concatenate(total_deltas, axis=0)
            yield total_img_data, total_labels, total_deltas
Ejemplo n.º 6
0
def detect(save_img=False):
    imgsz = (
        320, 192
    ) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    weights, half = opt.weights, opt.half

    # Initialize
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else opt.device)

    # Initialize model
    model = Darknet(opt.cfg, imgsz)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        load_darknet_weights(model, weights)

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Eval mode
    model.to(device).eval()

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Get names and colors
    names = load_classes(opt.names)
    # Run inference
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img.float()
              ) if device.type != 'cpu' else None  # run once
    cls2id = dict(zip(names, range(0, len(names))))
    gt_cls_num = np.zeros((len(names)))
    tp = np.zeros((len(names)))
    fp = np.zeros((len(names)))
    fn = np.zeros((len(names)))
    tn = np.zeros((len(names)))
    acc = 0.0
    tot = 0.0
    with open(test_path, "r") as f:
        filenames = f.readlines()
        for filename in filenames:
            img_file = filename.strip() + ".jpg"
            xml_file = filename.strip() + ".xml"
            source = os.path.join(test_img_path, img_file)
            dataset = LoadImages(source, img_size=imgsz)
            xml_path = os.path.join(test_xml_path, xml_file)
            coords = read_xml(xml_path)
            if len(coords) is 0:
                print("No annotations\n")
                continue
            gt_bboxes = [coord[:4] for coord in coords]
            gt_labels = [coord[4] for coord in coords]
            for label in gt_labels:
                gt_cls_num[cls2id[label]] += 1
                tot += 1
            for path, img, im0s, vid_cap in dataset:
                img = torch.from_numpy(img).to(device)
                img = img.half() if half else img.float()  # uint8 to fp16/32
                img /= 255.0  # 0 - 255 to 0.0 - 1.0
                if img.ndimension() == 3:
                    img = img.unsqueeze(0)

                # Inference
                t1 = torch_utils.time_synchronized()
                pred = model(img, augment=opt.augment)[0]
                t2 = torch_utils.time_synchronized()

                # to float
                if half:
                    pred = pred.float()

                # Apply NMS
                pred = non_max_suppression(pred,
                                           opt.conf_thres,
                                           opt.iou_thres,
                                           multi_label=False,
                                           classes=opt.classes,
                                           agnostic=opt.agnostic_nms)

                # Apply Classifier
                if classify:
                    pred = apply_classifier(pred, modelc, img, im0s)

                # Process detections
                for j, det in enumerate(pred):  # detections for image j
                    p, s, im0 = path, '', im0s

                    s += '%gx%g ' % img.shape[2:]  # print string
                    gn = torch.tensor(
                        im0.shape)[[1, 0, 1, 0]]  #  normalization gain whwh
                    if det is not None and len(det):
                        # Rescale boxes from imgsz to im0 size
                        det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                                  im0.shape).round()
                        det_bboxes, det_labels, det_scores = get_result(det)
                        ious = bbox_overlaps(np.array(det_bboxes),
                                             np.array(gt_bboxes))
                        ious_max = ious.max(axis=1)
                        ious_argmax = ious.argmax(axis=1)
                        gt_matched_det = np.ones((len(gt_bboxes))) * -1
                        det_matched_gt = np.ones((len(det_bboxes))) * -1
                        gt_matched_scores = np.zeros((len(gt_bboxes)))
                        for i in range(0, len(det_bboxes)):
                            if ious_max[i] > 0.5:
                                target_gt = ious_argmax[i]
                                if gt_matched_scores[target_gt] < det_scores[i]:
                                    gt_matched_scores[target_gt] = det_scores[
                                        i]
                                    gt_matched_det[target_gt] = i
                                    det_matched_gt[i] = target_gt
                            else:
                                fp[det_labels[i]] += 1

                        for i in range(0, len(det_matched_gt)):
                            gt = int(det_matched_gt[i])
                            if gt > -1:
                                if op.eq(names[det_labels[i]], gt_labels[gt]):
                                    tp[det_labels[i]] += 1
                                    assert (tp[det_labels[i]] <=
                                            gt_cls_num[det_labels[i]])
                                    acc += 1
                                else:
                                    fp[det_labels[i]] += 1

    mat = np.zeros((len(names), len(TABLE_HEAD)))
    for i in range(0, len(names)):
        mat[i][0] = i
        mat[i][1] = gt_cls_num[i]
        mat[i][2] = tp[i]
        mat[i][3] = fp[i]
        mat[i][4] = fn[i]
        mat[i][5] = tp[i] / (tp[i] + fp[i])
        mat[i][6] = tp[i] / (tp[i] + fn[i])
        print("%s: %.0f gt, %.0f det, %.0f tp, precision: %.6f, recall: %.6f" %
              (names[i], gt_cls_num[i], tp[i] + fp[i], tp[i], tp[i] /
               (tp[i] + fp[i]), tp[i] / (tp[i] + fn[i])))

    if os.path.exists("rzx_statistics.xlsx"):
        os.remove("rzx_statistics.xlsx")
    workbook = openpyxl.Workbook("rzx_statistics.xlsx")
    sheet = workbook.create_sheet("sheet")
    sheet.append(TABLE_HEAD)
    for i in range(0, len(names)):
        label = names[i]
        sheet.append([
            label,
            "%.0f" % gt_cls_num[i],
            "%.0f" % tp[i],
            "%.0f" % fp[i],
            "%.0f" % fn[i],
            "%.6f" % (tp[i] / (tp[i] + fp[i])),
            "%.6f" % (tp[i] / (tp[i] + fn[i]))
        ])

    workbook.save("rzx_statistics.xlsx")