Example #1
0
def optim_decode_bbox3d(clses, bbox3d_projs, K, ref_dim, ref_loc):
    '''

    :param clses: (N, )
    :param bbox3d_projs: (N, 8, 2)
    :return:
    '''

    x_corners = []
    y_corners = []
    z_corners = []
    for i in [1, -1]:  # x
        for j in [1, -1]:  # y
            for k in [1, -1]:  # z
                x_corners.append(i)
                y_corners.append(j)
                z_corners.append(k)
    Cor = np.vstack([x_corners, y_corners, z_corners]) * 0.5
    K = K.reshape(3, 3)
    cons = constraint()
    dims = []
    Rys = []
    locs = []
    clses_new = []
    Ks = []

    options = {'disp': None, 'maxcor': 10, 'ftol': 2.220446049250313e-09, 'gtol': 1e-05, 'eps': 1e-08,
               'maxfun': 15000, 'maxiter': 15000, 'iprint': -1, 'maxls': 20, 'finite_diff_rel_step': None}
    for cls, UV in zip(clses, bbox3d_projs):
        dim = ref_dim[cls]
        X0 = np.array([0, 1] + [dim[2], dim[0], dim[1]] + ref_loc)
        res = minimize(aimFun(*(Cor, K, UV.T)), X0, method='L-BFGS-B',
                       jac=jac(*(Cor, K, UV.T)), constraints=cons, options=options)

        if res.fun < 0.1:
            x = res.x
            Ry = np.arctan2(x[0], x[1])
            Rys.append(Ry)
            dims.append(np.array([x[3], x[4], x[2]]).reshape(1, 3))
            locs.append(np.array([x[-3], x[-2], x[-1]]).reshape(1, 3))
            clses_new.append(cls)
            Ks.append(K.reshape(1, 9))
    out = ParamList((640, 640))
    out.add_field('class', clses_new)
    out.add_field('Ry', np.array(Rys))
    out.add_field('dimension', np.concatenate(dims, axis=0) if len(dims) else np.zeros((0, 3)))
    out.add_field('location', np.concatenate(locs, axis=0) if len(dims) else np.zeros((0, 3)))
    out.add_field('K', np.concatenate(Ks, axis=0) if len(dims) else np.zeros((0, 9)))
    return out
Example #2
0
    def __getitem__(self, index):
        indices = [index]
        if self._is_mosaic and self.is_training:
            indices += [
                random.randint(0,
                               len(self._labels) - 1) for _ in range(3)
            ]  # 3 additional image indices
        images = []
        targets = []

        for i, idx in enumerate(indices):
            img = self._load_image(idx)
            target = ParamList((img.shape[1], img.shape[0]))
            K = self._K[idx]
            _labels = self._labels[idx].copy()
            cls, noise_mask, repeats = self._transform_obj_label(
                self._labels[idx][:, 0].copy())
            _labels = np.repeat(_labels, repeats=repeats, axis=0)
            N = len(cls)
            target.add_field('class', cls)
            target.add_field('img_id', np.zeros((N, ), dtype=np.int))
            target.add_field('bbox', _labels[:, 1:5])
            target.add_field('dimension', _labels[:, 5:8])
            target.add_field('alpha', _labels[:, 8])
            target.add_field('Ry', _labels[:, 9])
            target.add_field('location', _labels[:, -3:])
            mask = np.ones((N, ), dtype=np.int)
            mask[cls == -1] = 0
            target.add_field('mask', mask)
            target.add_field('noise_mask', noise_mask)
            target.add_field(
                'K', np.repeat(K.copy().reshape(1, 9), repeats=N, axis=0))

            if self._augment is not None:
                img, target = self._augment(img,
                                            targets=target,
                                            **self._aug_params)
            images.append(img)
            targets.append(target)
        if self._is_mosaic and self.is_training:
            img, target = self._apply_mosaic(images, targets)
        else:
            img, target = self._apply_padding(images, targets)

        # Convert
        img = np.ascontiguousarray(img)
        params = {'device': self._config.DEVICE}
        target = self._build_targets(target)
        params.update(self._norm_params)
        img, target = self._transform(img, targets=target, **params)
        path = os.path.join(self._root, 'training',
                            'image_2/{}.png'.format(self._image_files[index]))
        return img, target, path, self.__shapes[index]
Example #3
0
    def _build_targets(self, targets):
        outputs = ParamList(self._img_size, is_training=self.is_training)
        outputs.copy_field(targets, ['img_id', 'mask', 'noise_mask', 'K'])
        down_ratio = self._config.MODEL.DOWN_SAMPLE
        bboxes = targets.get_field('bbox') / down_ratio
        m_masks = targets.get_field('mask')

        W, H = self._img_size[0] // 4, self._img_size[1] // 4
        N = m_masks.shape[0]
        centers = data_utils.bbox_center(bboxes)
        m_projs = centers.astype(np.long)
        m_offs = centers - m_projs
        outputs.add_field('m_proj', m_projs)
        outputs.add_field('m_off', m_offs)

        locations = targets.get_field('location')
        Rys = targets.get_field('Ry')
        dimensions = targets.get_field('dimension')
        Ks = targets.get_field('K')
        Ks[:, 0:6] /= down_ratio
        vertexs, _, mask_3ds = kitti_utils.calc_proj2d_bbox3d(
            dimensions, locations, Rys, Ks.reshape(-1, 3, 3))
        vertexs = np.ascontiguousarray(np.transpose(vertexs, axes=[0, 2,
                                                                   1]))[:, :-1]
        v_projs = vertexs.astype(np.long)
        v_offs = vertexs - v_projs
        v_coor_offs = vertexs - centers.reshape(-1, 1, 2)
        v_masks = (v_projs[..., 0] >= 0) & (v_projs[..., 0] < W) & (
            v_projs[..., 1] >= 0) & (v_projs[..., 1] < H)
        outputs.add_field('v_proj', v_projs)
        outputs.add_field('v_off', v_offs)
        outputs.add_field('v_coor_off', v_coor_offs)
        outputs.add_field('v_mask', v_masks)
        outputs.add_field('mask_3d', mask_3ds)

        if self._config.DATASET.GAUSSIAN_GEN_TYPE == 'dynamic_radius':
            gaussian_sigma, gaussian_radius = data_utils.dynamic_radius(bboxes)
        else:
            gaussian_sigma, gaussian_radius = data_utils.dynamic_sigma(
                bboxes, self._config.DATASET.BBOX_AREA_MAX,
                self._config.DATASET.BBOX_AREA_MIN)
        clses = targets.get_field('class')
        num_cls = len(self._classes)
        noise_masks = targets.get_field('noise_mask')
        num_vertex = vertexs.shape[1]
        m_hm = np.zeros((num_cls, H, W), dtype=np.float)
        # v_hm = np.zeros((num_vertex, H, W), dtype=np.float)
        for i in range(N):
            m_mask = m_masks[i]
            noise_mask = noise_masks[i]
            mask_3d = mask_3ds[i]
            gaussian_kernel, xs, ys = None, None, None
            if m_mask | mask_3d:
                gaussian_kernel, xs, ys = data_utils.gaussian2D(
                    gaussian_sigma[i], gaussian_radius[i])
                if noise_mask:
                    gaussian_kernel[len(xs) // 2] = 0.9999
            if m_mask:
                # to-do
                m_proj = m_projs[i]
                cls = clses[i]
                m_xs = xs + m_proj[0]
                m_ys = ys + m_proj[1]
                valid = (m_xs >= 0) & (m_xs < W) & (m_ys >= 0) & (m_ys < H)
                m_hm[cls, m_ys[valid], m_xs[valid]] = np.maximum(
                    m_hm[cls, m_ys[valid], m_xs[valid]],
                    gaussian_kernel[valid])
            # if mask_3d:
            #     # to-do
            #     v_proj = v_projs[i]
            #     for j, v in enumerate(v_proj):
            #         v_xs = xs + v[0]
            #         v_ys = ys + v[1]
            #         valid = (v_xs >= 0) & (v_xs < W) & (v_ys >= 0) & (v_ys < H)
            #         v_hm[j, v_ys[valid], v_xs[valid]] = np.maximum(v_hm[j, v_ys[valid], v_xs[valid]],
            #                                                        gaussian_kernel[valid])
        outputs.add_field('m_hm', np.expand_dims(m_hm, axis=0))
        # outputs.add_field('v_hm', np.expand_dims(v_hm, axis=0))
        return outputs
    def __getitem__(self, index):
        indices = [index]
        if self._is_mosaic and self.is_training:
            indices += [random.randint(0, len(self._labels) - 1) for _ in range(3)]  # 3 additional image indices
        images = []
        targets = []
        transform = transforms.Compose([
            transforms.ImageTo(np.float32),
            transforms.Normalize(),
            transforms.ToPercentCoords(),
            transforms.ToXYWH(),
            transforms.ToTensor(),
            transforms.ToNCHW()
        ])
        for i, idx in enumerate(indices):
            img = self._load_image(idx)
            K = self._K[idx]
            # K = self._load_calib_param(idx)
            N = len(self._labels[idx])
            target = ParamList((img.shape[1], img.shape[0]))
            target.add_field('img_id', np.zeros((N,), dtype=np.int))
            target.add_field('class', self._labels[idx][:, 0].copy())
            target.add_field('bbox', self._labels[idx][:, 1:5].copy())
            target.add_field('dimension', self._labels[idx][:, 5:8].copy())
            target.add_field('alpha', self._labels[idx][:, 8].copy())
            target.add_field('Ry', self._labels[idx][:, 9].copy())
            target.add_field('location', self._labels[idx][:, -3:].copy())
            target.add_field('mask', np.ones((N,), dtype=np.int))
            target.add_field('K', np.repeat(K.copy().reshape(1, 9), repeats=N, axis=0))
            if self._augment is not None:
                img, target = self._augment(img, targets=target, **self._config)
            images.append(img)
            targets.append(target)
        if self._is_mosaic and self.is_training:
            img, target = self._apply_mosaic(images, targets)
        else:
            img, target = self._apply_padding(images, targets)

        # Convert
        # img = img[:, :, ::-1]
        img = np.ascontiguousarray(img)
        img, target = transform(img, targets=target)
        path = os.path.join(self._root, 'training', 'image_2/{}.png'.format(self._image_files[index]))
        # print('K: ', target.get_field('K'))
        # print('shape: ', self.__shapes[index])
        # print('')
        return img, target, path, self.__shapes[index]