def _apply_mosaic(self, images, targets):
        assert len(images) == 4 and len(targets) == 4
        sw, sh = self._img_size
        c = images[0].shape[2]
        sum_rgb = np.zeros([images[0].ndim, ])
        for img in images:
            sum_rgb += np.array(cv2.mean(img))[:3]
        mean_rgb = sum_rgb / len(images)
        img4 = np.full((sh * 2, sw * 2, c), mean_rgb, dtype=np.uint8)  # base image with 4 tiles
        offsets = [(0, 0), (sw, 0), (0, sh), (sw, sh)]
        target4 = ParamList((sw, sh))
        for i, img, target in zip(range(4), images, targets):
            h, w, _ = img.shape
            pad_w = int(sw - w) // 2
            pad_h = int(sh - h) // 2
            y_st = pad_h + offsets[i][1]
            x_st = pad_w + offsets[i][0]
            img4[y_st:y_st + h, x_st:x_st + w] = img
            bbox = target.get_field('bbox')
            bbox[:, 0::2] += x_st
            bbox[:, 1::2] += y_st
            target.update_field('bbox', bbox)
            # np.clip(bbox[:, 0::2], 0, 2 * sw, out=bbox[:, 0::2])  # use with random_affine
            # np.clip(bbox[:, 1::2], 0, 2 * sh, out=bbox[:, 1::2])
            target4.merge(target)

        raff = transforms.RandomAffine2D()

        param = {
            'border': (-sh//2, -sw//2)
        }
        param.update(self._config)
        return raff(img4, target4, **param)
Esempio n. 2
0
 def collate_fn(batch):
     img, target, path, shape = zip(*batch)  # transposed
     ntarget = ParamList((None, None))
     for i, t in enumerate(target):
         id = t.get_field('img_id')
         id[:, ] = i
         t.update_field('img_id', id)
         ntarget.merge(t)
     # ntarget.to_tensor()
     return torch.stack(img, 0), ntarget, path, shape
Esempio n. 3
0
    def __getitem__(self, index):
        indices = [index]
        if self._is_mosaic and self.is_training:
            indices += [
                random.randint(0,
                               len(self._labels) - 1) for _ in range(3)
            ]  # 3 additional image indices
        images = []
        targets = []

        for i, idx in enumerate(indices):
            img = self._load_image(idx)
            target = ParamList((img.shape[1], img.shape[0]))
            K = self._K[idx]
            _labels = self._labels[idx].copy()
            cls, noise_mask, repeats = self._transform_obj_label(
                self._labels[idx][:, 0].copy())
            _labels = np.repeat(_labels, repeats=repeats, axis=0)
            N = len(cls)
            target.add_field('class', cls)
            target.add_field('img_id', np.zeros((N, ), dtype=np.int))
            target.add_field('bbox', _labels[:, 1:5])
            target.add_field('dimension', _labels[:, 5:8])
            target.add_field('alpha', _labels[:, 8])
            target.add_field('Ry', _labels[:, 9])
            target.add_field('location', _labels[:, -3:])
            mask = np.ones((N, ), dtype=np.int)
            mask[cls == -1] = 0
            target.add_field('mask', mask)
            target.add_field('noise_mask', noise_mask)
            target.add_field(
                'K', np.repeat(K.copy().reshape(1, 9), repeats=N, axis=0))

            if self._augment is not None:
                img, target = self._augment(img,
                                            targets=target,
                                            **self._aug_params)
            images.append(img)
            targets.append(target)
        if self._is_mosaic and self.is_training:
            img, target = self._apply_mosaic(images, targets)
        else:
            img, target = self._apply_padding(images, targets)

        # Convert
        img = np.ascontiguousarray(img)
        params = {'device': self._config.DEVICE}
        target = self._build_targets(target)
        params.update(self._norm_params)
        img, target = self._transform(img, targets=target, **params)
        path = os.path.join(self._root, 'training',
                            'image_2/{}.png'.format(self._image_files[index]))
        return img, target, path, self.__shapes[index]
Esempio n. 4
0
    def _build_targets(self, targets):
        outputs = ParamList(self._img_size, is_training=self.is_training)
        outputs.copy_field(targets, ['img_id', 'mask', 'noise_mask', 'K'])
        down_ratio = self._config.MODEL.DOWN_SAMPLE
        bboxes = targets.get_field('bbox') / down_ratio
        m_masks = targets.get_field('mask')

        W, H = self._img_size[0] // 4, self._img_size[1] // 4
        N = m_masks.shape[0]
        centers = data_utils.bbox_center(bboxes)
        m_projs = centers.astype(np.long)
        m_offs = centers - m_projs
        outputs.add_field('m_proj', m_projs)
        outputs.add_field('m_off', m_offs)

        locations = targets.get_field('location')
        Rys = targets.get_field('Ry')
        dimensions = targets.get_field('dimension')
        Ks = targets.get_field('K')
        Ks[:, 0:6] /= down_ratio
        vertexs, _, mask_3ds = kitti_utils.calc_proj2d_bbox3d(
            dimensions, locations, Rys, Ks.reshape(-1, 3, 3))
        vertexs = np.ascontiguousarray(np.transpose(vertexs, axes=[0, 2,
                                                                   1]))[:, :-1]
        v_projs = vertexs.astype(np.long)
        v_offs = vertexs - v_projs
        v_coor_offs = vertexs - centers.reshape(-1, 1, 2)
        v_masks = (v_projs[..., 0] >= 0) & (v_projs[..., 0] < W) & (
            v_projs[..., 1] >= 0) & (v_projs[..., 1] < H)
        outputs.add_field('v_proj', v_projs)
        outputs.add_field('v_off', v_offs)
        outputs.add_field('v_coor_off', v_coor_offs)
        outputs.add_field('v_mask', v_masks)
        outputs.add_field('mask_3d', mask_3ds)

        if self._config.DATASET.GAUSSIAN_GEN_TYPE == 'dynamic_radius':
            gaussian_sigma, gaussian_radius = data_utils.dynamic_radius(bboxes)
        else:
            gaussian_sigma, gaussian_radius = data_utils.dynamic_sigma(
                bboxes, self._config.DATASET.BBOX_AREA_MAX,
                self._config.DATASET.BBOX_AREA_MIN)
        clses = targets.get_field('class')
        num_cls = len(self._classes)
        noise_masks = targets.get_field('noise_mask')
        num_vertex = vertexs.shape[1]
        m_hm = np.zeros((num_cls, H, W), dtype=np.float)
        # v_hm = np.zeros((num_vertex, H, W), dtype=np.float)
        for i in range(N):
            m_mask = m_masks[i]
            noise_mask = noise_masks[i]
            mask_3d = mask_3ds[i]
            gaussian_kernel, xs, ys = None, None, None
            if m_mask | mask_3d:
                gaussian_kernel, xs, ys = data_utils.gaussian2D(
                    gaussian_sigma[i], gaussian_radius[i])
                if noise_mask:
                    gaussian_kernel[len(xs) // 2] = 0.9999
            if m_mask:
                # to-do
                m_proj = m_projs[i]
                cls = clses[i]
                m_xs = xs + m_proj[0]
                m_ys = ys + m_proj[1]
                valid = (m_xs >= 0) & (m_xs < W) & (m_ys >= 0) & (m_ys < H)
                m_hm[cls, m_ys[valid], m_xs[valid]] = np.maximum(
                    m_hm[cls, m_ys[valid], m_xs[valid]],
                    gaussian_kernel[valid])
            # if mask_3d:
            #     # to-do
            #     v_proj = v_projs[i]
            #     for j, v in enumerate(v_proj):
            #         v_xs = xs + v[0]
            #         v_ys = ys + v[1]
            #         valid = (v_xs >= 0) & (v_xs < W) & (v_ys >= 0) & (v_ys < H)
            #         v_hm[j, v_ys[valid], v_xs[valid]] = np.maximum(v_hm[j, v_ys[valid], v_xs[valid]],
            #                                                        gaussian_kernel[valid])
        outputs.add_field('m_hm', np.expand_dims(m_hm, axis=0))
        # outputs.add_field('v_hm', np.expand_dims(v_hm, axis=0))
        return outputs
Esempio n. 5
0
def optim_decode_bbox3d(clses, bbox3d_projs, K, ref_dim, ref_loc):
    '''

    :param clses: (N, )
    :param bbox3d_projs: (N, 8, 2)
    :return:
    '''

    x_corners = []
    y_corners = []
    z_corners = []
    for i in [1, -1]:  # x
        for j in [1, -1]:  # y
            for k in [1, -1]:  # z
                x_corners.append(i)
                y_corners.append(j)
                z_corners.append(k)
    Cor = np.vstack([x_corners, y_corners, z_corners]) * 0.5
    K = K.reshape(3, 3)
    cons = constraint()
    dims = []
    Rys = []
    locs = []
    clses_new = []
    Ks = []

    options = {'disp': None, 'maxcor': 10, 'ftol': 2.220446049250313e-09, 'gtol': 1e-05, 'eps': 1e-08,
               'maxfun': 15000, 'maxiter': 15000, 'iprint': -1, 'maxls': 20, 'finite_diff_rel_step': None}
    for cls, UV in zip(clses, bbox3d_projs):
        dim = ref_dim[cls]
        X0 = np.array([0, 1] + [dim[2], dim[0], dim[1]] + ref_loc)
        res = minimize(aimFun(*(Cor, K, UV.T)), X0, method='L-BFGS-B',
                       jac=jac(*(Cor, K, UV.T)), constraints=cons, options=options)

        if res.fun < 0.1:
            x = res.x
            Ry = np.arctan2(x[0], x[1])
            Rys.append(Ry)
            dims.append(np.array([x[3], x[4], x[2]]).reshape(1, 3))
            locs.append(np.array([x[-3], x[-2], x[-1]]).reshape(1, 3))
            clses_new.append(cls)
            Ks.append(K.reshape(1, 9))
    out = ParamList((640, 640))
    out.add_field('class', clses_new)
    out.add_field('Ry', np.array(Rys))
    out.add_field('dimension', np.concatenate(dims, axis=0) if len(dims) else np.zeros((0, 3)))
    out.add_field('location', np.concatenate(locs, axis=0) if len(dims) else np.zeros((0, 3)))
    out.add_field('K', np.concatenate(Ks, axis=0) if len(dims) else np.zeros((0, 9)))
    return out
Esempio n. 6
0
def train(config):
    utils.init_seeds(1)
    results_file = os.path.join(config['logdir'], 'results.txt')
    # Remove previous results
    for f in glob.glob(os.path.join(
            config['logdir'], 'train_batch*.jpg')) + glob.glob(results_file):
        os.remove(f)

    epochs = config['epochs']  # 300
    batch_size = config['batch_size']  # 64
    weights = config['weights']  # initial training weights
    imgsz, imgsz_test = config['img_size']
    strides = config['detect_strides']
    num_classes = config['num_classes']
    if config['only_3d']:
        config['notest'] = True
        config['include_scopes'] = ['model.24.bbox3d_headers']
        config['giou'] = 0.
        config['obj'] = 0.
        config['cls'] = 0.
    elif config['only_2d']:
        config['exclude_scopes'] = ['model.24.bbox3d_headers']
        config['conf'] = 0.
        config['orient'] = 0.
        config['dim'] = 0.

    config[
        'cls'] *= num_classes / 80.  # scale coco-tuned config['cls'] to current dataset
    gs = int(max(strides))

    # dataset
    with open(config['data']) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    dataset_path = data_dict['dataset_path']

    # Trainloader
    test_cfg = {}
    test_cfg.update(config)
    dataloader, dataset = create_dataloader(dataset_path,
                                            config,
                                            transform=TrainAugmentation(
                                                cfg['img_size'][0],
                                                mean=config['brg_mean']),
                                            is_training=True)
    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
    assert mlc < num_classes, \
        'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, num_classes, config['cfg'])

    # Testloader
    test_cfg['is_rect'] = True
    test_cfg['is_mosaic'] = False
    testloader = create_dataloader(dataset_path,
                                   test_cfg,
                                   transform=TestTransform(
                                       cfg['img_size'][0],
                                       mean=config['brg_mean']),
                                   is_training=False,
                                   split='test')[0]

    # Create model
    model = Model(config).to(device)
    nb = len(dataloader)  # number of batches
    max_step_burn_in = max(
        3 * nb, 1e3)  # burn-in iterations, max(3 epochs, 1k iterations)
    solver = Solver(model,
                    config,
                    max_steps_burn_in=max_step_burn_in,
                    apex=None)
    losser = losses.YoloLoss(model)
    # Load Model
    start_epoch, best_fitness = 0, 0.0
    checkpointer = model_utils.CheckPointer(model,
                                            solver,
                                            save_dir='./weights',
                                            save_to_disk=True,
                                            device=device)
    if weights.endswith('.pt'):  # pytorch format
        ckpt = checkpointer.load(weights,
                                 use_latest=False,
                                 load_solver=(not config['resume']))
        # load results
        if ckpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(ckpt['training_results'])  # write results.txt
        if not config['resume']:
            start_epoch = ckpt['epoch'] + 1
        best_fitness = ckpt['best_fitness']
        del ckpt
    else:
        solver.build_optim_and_scheduler()

    if tb_writer:
        # Class frequency
        labels = np.concatenate(dataset.labels, 0)
        c = torch.tensor(labels[:, 0])  # classes
        visual_utils.plot_labels(labels, config['logdir'])
        tb_writer.add_histogram('classes', c, 0)

    # Check anchors
    if not config['noautoanchor']:
        anchor_utils.check_anchors(dataset,
                                   model=model,
                                   thr=config['anchor_t'],
                                   imgsz=imgsz)

    # Start training
    t0 = time.time()
    results = (
        0, 0, 0, 0, 0, 0, 0
    )  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
    print('Using %g dataloader workers' % dataloader.num_workers)
    print('Starting training for %g epochs...' % epochs)
    for epoch in range(
            start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()
        mloss = torch.zeros(7, device=device)  # mean losses
        print(('\n' + '%10s' * 12) %
              ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'conf', 'orient',
               'dim', 'total', 'targets', 'img_size', 'lr'))
        pbar = tqdm.tqdm(enumerate(dataloader), total=nb)  # progress bar
        for i, (
                imgs, targets, paths, _
        ) in pbar:  # batch -------------------------------------------------------------
            targets.delete_by_mask()
            targets.to_float32()
            targ = ParamList(targets.size, True)
            targ.copy_from(targets)
            img_id = targets.get_field('img_id')
            classes = targets.get_field('class')
            bboxes = targets.get_field('bbox')

            targets = torch.cat(
                [img_id.unsqueeze(-1),
                 classes.unsqueeze(-1), bboxes], dim=-1)
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(
                device).float() / 1.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            solver.update(epoch)
            # Multi-scale
            if config['multi_scale']:
                sz = random.randrange(imgsz * 0.5,
                                      imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
                          ]  # new shape (stretched to gs-multiple)
                    imgs = F.interpolate(imgs,
                                         size=ns,
                                         mode='bilinear',
                                         align_corners=False)

            # Forward
            pred = model(imgs)

            # Loss
            # loss, loss_items = losses.calc_loss(pred, targets.to(device), model)
            loss, loss_items = losser(pred, targ)
            # print(loss_items)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            solver.optimizer_step(loss)

            # Print
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = '%.3gG' % (torch.cuda.memory_cached() /
                             1E9 if torch.cuda.is_available() else 0)  # (GB)
            s = ('%10s' * 2 + '%10.4g' * 10) % (
                '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0],
                imgs.shape[-1], solver.learn_rate)
            pbar.set_description(s)

            # Plot
            if ni < 3:
                f = os.path.join(config['logdir'],
                                 'train_batch%g.jpg' % ni)  # filename
                result = visual_utils.plot_images(images=imgs,
                                                  targets=targets,
                                                  paths=paths,
                                                  fname=f)
                if tb_writer and result is not None:
                    tb_writer.add_image(f,
                                        result,
                                        dataformats='HWC',
                                        global_step=epoch)
                    # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ============================================================================================
        solver.scheduler_step()
        # mAP
        solver.ema.update_attr(model)
        final_epoch = epoch + 1 == epochs
        if not config['notest'] or final_epoch:  # Calculate mAP
            results, maps, times = test.test(
                config['data'],
                batch_size=batch_size,
                imgsz=imgsz_test,
                save_json=final_epoch
                and config['data'].endswith(os.sep + 'kitti.yaml'),
                model=solver.ema.model,
                logdir=config['logdir'],
                dataloader=testloader)

        # Write
        with open(os.path.join(results_file), 'a') as f:
            f.write(s + '%10.4g' * 7 % results +
                    '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

        # Tensorboard
        if tb_writer:
            tags = [
                'train/giou_loss', 'train/obj_loss', 'train/cls_loss',
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5',
                'metrics/F1', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss'
            ]
            for x, tag in zip(list(mloss[:-1]) + list(results), tags):
                tb_writer.add_scalar(tag, x, epoch)

        # Update best mAP
        fi = utils.fitness(np.array(results).reshape(
            1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
        if fi > best_fitness:
            best_fitness = fi

        # Save model
        save = (not config['nosave']) or final_epoch
        if save:
            with open(results_file, 'r') as f:  # create checkpoint
                ckpt = {
                    'epoch': epoch,
                    'best_fitness': best_fitness,
                    'training_results': f.read()
                }

            # Save last, best and delete
            checkpointer.save(last, **ckpt)
            if (best_fitness == fi) and not final_epoch:
                checkpointer.save(best, **ckpt)
            del ckpt

        # end epoch =================================================================================================
    # end training
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1,
                                                    (time.time() - t0) / 3600))

    torch.cuda.empty_cache()
    return results
Esempio n. 7
0
def test(
        data,
        weights=None,
        batch_size=16,
        imgsz=640,
        conf_thres=0.001,
        iou_thres=0.6,  # for NMS
        save_json=False,
        verbose=False,
        model=None,
        dataloader=None,
        logdir='./runs',
        merge=False):
    # Initialize/load model and set device
    if model is None:
        training = False
        device = torch_utils.select_device(opt.device, batch_size=batch_size)

        # Remove previous
        for f in glob.glob(os.path.join(logdir, 'test_batch*.jpg')):
            os.remove(f)

        # Load model
        model = torch.load(
            weights, map_location=device)['model'].float()  # load to FP32
        torch_utils.model_info(model)
        model.fuse()
        model.to(device)

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    else:  # called by train.py
        training = True
        device = next(model.parameters()).device  # get model device

    # Half
    half = device.type != 'cpu' and torch.cuda.device_count(
    ) == 1  # half precision only supported on single-GPU
    half = False
    if half:
        model.half()  # to FP16

    # Configure
    model.eval()
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    nc = int(data['num_classes'])  # number of classes
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    niou = iouv.numel()
    losser = YoloLoss(model)
    # Dataloader
    if dataloader is None:  # not training
        merge = opt.merge  # use Merge NMS
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img
                  ) if device.type != 'cpu' else None  # run once
        path = data['test'] if opt.task == 'test' else data[
            'val']  # path to val/test images
        dataloader = kitti.create_dataloader(path,
                                             imgsz,
                                             batch_size,
                                             int(max(model.stride)),
                                             config=None,
                                             augment=False,
                                             cache=False,
                                             pad=0.5,
                                             rect=True)[0]

    seen = 0
    names = data['names']
    kitti8class = data_utils.kitti8_classes()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (img, targets, paths,
                  shapes) in enumerate(tqdm.tqdm(dataloader, desc=s)):
        targets.delete_by_mask()
        targets.to_float32()
        targ = ParamList(targets.size, True)
        targ.copy_from(targets)
        img_id = targets.get_field('img_id')
        classes = targets.get_field('class')
        bboxes = targets.get_field('bbox')
        targets = torch.cat(
            [img_id.unsqueeze(-1),
             classes.unsqueeze(-1), bboxes], dim=-1)
        img = img.to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        # img /= 1.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = torch_utils.time_synchronized()
            inf_out, train_out = model(img)  # inference and training outputs
            t0 += torch_utils.time_synchronized() - t

            # Compute loss
            if training:  # if model has loss hyperparameters
                # loss += calc_loss([x.float() for x in train_out], targets, model)[1][:3]  # GIoU, obj, cls
                loss += losser([x.float() for x in train_out], targ)[1][:3]
            # Run NMS
            t = torch_utils.time_synchronized()
            output = postprocess.apply_nms(inf_out,
                                           nc,
                                           conf_thres=conf_thres,
                                           iou_thres=iou_thres,
                                           merge=merge)
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            utils.clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary

            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                utils.scale_coords(img[si].shape[1:], box, shapes[si][0],
                                   shapes[si][1])  # to original shape
                box = data_utils.xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id': image_id,
                        'category_id': kitti8class[int(p[5])],
                        'bbox': [round(x, 3) for x in b],
                        'score': round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = data_utils.xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(
                        -1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(
                        -1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = metrics_utils.box_iou(
                            pred[pi, :4],
                            tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if batch_i < 1:
            f = os.path.join(logdir,
                             'test_batch%g_gt.jpg' % batch_i)  # filename
            visual_utils.plot_images(img, targets, paths, f,
                                     names)  # ground truth
            f = os.path.join(logdir, 'test_batch%g_pred.jpg' % batch_i)
            visual_utils.plot_images(img,
                                     utils.output_to_target(
                                         output, width, height), paths, f,
                                     names)  # predictions

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = metrics_utils.ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(
            1)  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3
              for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print(
            'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
            % t)

    # Save JSON
    if save_json and map50 and len(jdict):
        imgIds = [
            int(Path(x).stem.split('_')[-1])
            for x in dataloader.dataset.img_files
        ]
        f = 'detections_val2017_%s_results.json' % \
            (weights.split(os.sep)[-1].replace('.pt', '') if weights else '')  # filename
        print('\nCOCO mAP with pycocotools... saving %s...' % f)
        with open(f, 'w') as file:
            json.dump(jdict, file)

        try:
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocoGt = COCO(
                glob.glob('../coco/annotations/instances_val*.json')
                [0])  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api

            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:
                                        2]  # update results ([email protected]:0.95, [email protected])
        except:
            print(
                'WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
                'See https://github.com/cocodataset/cocoapi/issues/356')

    # Return results
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(dataloader)).tolist()), maps, t
    def __getitem__(self, index):
        indices = [index]
        if self._is_mosaic and self.is_training:
            indices += [random.randint(0, len(self._labels) - 1) for _ in range(3)]  # 3 additional image indices
        images = []
        targets = []
        transform = transforms.Compose([
            transforms.ImageTo(np.float32),
            transforms.Normalize(),
            transforms.ToPercentCoords(),
            transforms.ToXYWH(),
            transforms.ToTensor(),
            transforms.ToNCHW()
        ])
        for i, idx in enumerate(indices):
            img = self._load_image(idx)
            K = self._K[idx]
            # K = self._load_calib_param(idx)
            N = len(self._labels[idx])
            target = ParamList((img.shape[1], img.shape[0]))
            target.add_field('img_id', np.zeros((N,), dtype=np.int))
            target.add_field('class', self._labels[idx][:, 0].copy())
            target.add_field('bbox', self._labels[idx][:, 1:5].copy())
            target.add_field('dimension', self._labels[idx][:, 5:8].copy())
            target.add_field('alpha', self._labels[idx][:, 8].copy())
            target.add_field('Ry', self._labels[idx][:, 9].copy())
            target.add_field('location', self._labels[idx][:, -3:].copy())
            target.add_field('mask', np.ones((N,), dtype=np.int))
            target.add_field('K', np.repeat(K.copy().reshape(1, 9), repeats=N, axis=0))
            if self._augment is not None:
                img, target = self._augment(img, targets=target, **self._config)
            images.append(img)
            targets.append(target)
        if self._is_mosaic and self.is_training:
            img, target = self._apply_mosaic(images, targets)
        else:
            img, target = self._apply_padding(images, targets)

        # Convert
        # img = img[:, :, ::-1]
        img = np.ascontiguousarray(img)
        img, target = transform(img, targets=target)
        path = os.path.join(self._root, 'training', 'image_2/{}.png'.format(self._image_files[index]))
        # print('K: ', target.get_field('K'))
        # print('shape: ', self.__shapes[index])
        # print('')
        return img, target, path, self.__shapes[index]