def optim_decode_bbox3d(clses, bbox3d_projs, K, ref_dim, ref_loc): ''' :param clses: (N, ) :param bbox3d_projs: (N, 8, 2) :return: ''' x_corners = [] y_corners = [] z_corners = [] for i in [1, -1]: # x for j in [1, -1]: # y for k in [1, -1]: # z x_corners.append(i) y_corners.append(j) z_corners.append(k) Cor = np.vstack([x_corners, y_corners, z_corners]) * 0.5 K = K.reshape(3, 3) cons = constraint() dims = [] Rys = [] locs = [] clses_new = [] Ks = [] options = {'disp': None, 'maxcor': 10, 'ftol': 2.220446049250313e-09, 'gtol': 1e-05, 'eps': 1e-08, 'maxfun': 15000, 'maxiter': 15000, 'iprint': -1, 'maxls': 20, 'finite_diff_rel_step': None} for cls, UV in zip(clses, bbox3d_projs): dim = ref_dim[cls] X0 = np.array([0, 1] + [dim[2], dim[0], dim[1]] + ref_loc) res = minimize(aimFun(*(Cor, K, UV.T)), X0, method='L-BFGS-B', jac=jac(*(Cor, K, UV.T)), constraints=cons, options=options) if res.fun < 0.1: x = res.x Ry = np.arctan2(x[0], x[1]) Rys.append(Ry) dims.append(np.array([x[3], x[4], x[2]]).reshape(1, 3)) locs.append(np.array([x[-3], x[-2], x[-1]]).reshape(1, 3)) clses_new.append(cls) Ks.append(K.reshape(1, 9)) out = ParamList((640, 640)) out.add_field('class', clses_new) out.add_field('Ry', np.array(Rys)) out.add_field('dimension', np.concatenate(dims, axis=0) if len(dims) else np.zeros((0, 3))) out.add_field('location', np.concatenate(locs, axis=0) if len(dims) else np.zeros((0, 3))) out.add_field('K', np.concatenate(Ks, axis=0) if len(dims) else np.zeros((0, 9))) return out
def __getitem__(self, index): indices = [index] if self._is_mosaic and self.is_training: indices += [ random.randint(0, len(self._labels) - 1) for _ in range(3) ] # 3 additional image indices images = [] targets = [] for i, idx in enumerate(indices): img = self._load_image(idx) target = ParamList((img.shape[1], img.shape[0])) K = self._K[idx] _labels = self._labels[idx].copy() cls, noise_mask, repeats = self._transform_obj_label( self._labels[idx][:, 0].copy()) _labels = np.repeat(_labels, repeats=repeats, axis=0) N = len(cls) target.add_field('class', cls) target.add_field('img_id', np.zeros((N, ), dtype=np.int)) target.add_field('bbox', _labels[:, 1:5]) target.add_field('dimension', _labels[:, 5:8]) target.add_field('alpha', _labels[:, 8]) target.add_field('Ry', _labels[:, 9]) target.add_field('location', _labels[:, -3:]) mask = np.ones((N, ), dtype=np.int) mask[cls == -1] = 0 target.add_field('mask', mask) target.add_field('noise_mask', noise_mask) target.add_field( 'K', np.repeat(K.copy().reshape(1, 9), repeats=N, axis=0)) if self._augment is not None: img, target = self._augment(img, targets=target, **self._aug_params) images.append(img) targets.append(target) if self._is_mosaic and self.is_training: img, target = self._apply_mosaic(images, targets) else: img, target = self._apply_padding(images, targets) # Convert img = np.ascontiguousarray(img) params = {'device': self._config.DEVICE} target = self._build_targets(target) params.update(self._norm_params) img, target = self._transform(img, targets=target, **params) path = os.path.join(self._root, 'training', 'image_2/{}.png'.format(self._image_files[index])) return img, target, path, self.__shapes[index]
def _build_targets(self, targets): outputs = ParamList(self._img_size, is_training=self.is_training) outputs.copy_field(targets, ['img_id', 'mask', 'noise_mask', 'K']) down_ratio = self._config.MODEL.DOWN_SAMPLE bboxes = targets.get_field('bbox') / down_ratio m_masks = targets.get_field('mask') W, H = self._img_size[0] // 4, self._img_size[1] // 4 N = m_masks.shape[0] centers = data_utils.bbox_center(bboxes) m_projs = centers.astype(np.long) m_offs = centers - m_projs outputs.add_field('m_proj', m_projs) outputs.add_field('m_off', m_offs) locations = targets.get_field('location') Rys = targets.get_field('Ry') dimensions = targets.get_field('dimension') Ks = targets.get_field('K') Ks[:, 0:6] /= down_ratio vertexs, _, mask_3ds = kitti_utils.calc_proj2d_bbox3d( dimensions, locations, Rys, Ks.reshape(-1, 3, 3)) vertexs = np.ascontiguousarray(np.transpose(vertexs, axes=[0, 2, 1]))[:, :-1] v_projs = vertexs.astype(np.long) v_offs = vertexs - v_projs v_coor_offs = vertexs - centers.reshape(-1, 1, 2) v_masks = (v_projs[..., 0] >= 0) & (v_projs[..., 0] < W) & ( v_projs[..., 1] >= 0) & (v_projs[..., 1] < H) outputs.add_field('v_proj', v_projs) outputs.add_field('v_off', v_offs) outputs.add_field('v_coor_off', v_coor_offs) outputs.add_field('v_mask', v_masks) outputs.add_field('mask_3d', mask_3ds) if self._config.DATASET.GAUSSIAN_GEN_TYPE == 'dynamic_radius': gaussian_sigma, gaussian_radius = data_utils.dynamic_radius(bboxes) else: gaussian_sigma, gaussian_radius = data_utils.dynamic_sigma( bboxes, self._config.DATASET.BBOX_AREA_MAX, self._config.DATASET.BBOX_AREA_MIN) clses = targets.get_field('class') num_cls = len(self._classes) noise_masks = targets.get_field('noise_mask') num_vertex = vertexs.shape[1] m_hm = np.zeros((num_cls, H, W), dtype=np.float) # v_hm = np.zeros((num_vertex, H, W), dtype=np.float) for i in range(N): m_mask = m_masks[i] noise_mask = noise_masks[i] mask_3d = mask_3ds[i] gaussian_kernel, xs, ys = None, None, None if m_mask | mask_3d: gaussian_kernel, xs, ys = data_utils.gaussian2D( gaussian_sigma[i], gaussian_radius[i]) if noise_mask: gaussian_kernel[len(xs) // 2] = 0.9999 if m_mask: # to-do m_proj = m_projs[i] cls = clses[i] m_xs = xs + m_proj[0] m_ys = ys + m_proj[1] valid = (m_xs >= 0) & (m_xs < W) & (m_ys >= 0) & (m_ys < H) m_hm[cls, m_ys[valid], m_xs[valid]] = np.maximum( m_hm[cls, m_ys[valid], m_xs[valid]], gaussian_kernel[valid]) # if mask_3d: # # to-do # v_proj = v_projs[i] # for j, v in enumerate(v_proj): # v_xs = xs + v[0] # v_ys = ys + v[1] # valid = (v_xs >= 0) & (v_xs < W) & (v_ys >= 0) & (v_ys < H) # v_hm[j, v_ys[valid], v_xs[valid]] = np.maximum(v_hm[j, v_ys[valid], v_xs[valid]], # gaussian_kernel[valid]) outputs.add_field('m_hm', np.expand_dims(m_hm, axis=0)) # outputs.add_field('v_hm', np.expand_dims(v_hm, axis=0)) return outputs
def __getitem__(self, index): indices = [index] if self._is_mosaic and self.is_training: indices += [random.randint(0, len(self._labels) - 1) for _ in range(3)] # 3 additional image indices images = [] targets = [] transform = transforms.Compose([ transforms.ImageTo(np.float32), transforms.Normalize(), transforms.ToPercentCoords(), transforms.ToXYWH(), transforms.ToTensor(), transforms.ToNCHW() ]) for i, idx in enumerate(indices): img = self._load_image(idx) K = self._K[idx] # K = self._load_calib_param(idx) N = len(self._labels[idx]) target = ParamList((img.shape[1], img.shape[0])) target.add_field('img_id', np.zeros((N,), dtype=np.int)) target.add_field('class', self._labels[idx][:, 0].copy()) target.add_field('bbox', self._labels[idx][:, 1:5].copy()) target.add_field('dimension', self._labels[idx][:, 5:8].copy()) target.add_field('alpha', self._labels[idx][:, 8].copy()) target.add_field('Ry', self._labels[idx][:, 9].copy()) target.add_field('location', self._labels[idx][:, -3:].copy()) target.add_field('mask', np.ones((N,), dtype=np.int)) target.add_field('K', np.repeat(K.copy().reshape(1, 9), repeats=N, axis=0)) if self._augment is not None: img, target = self._augment(img, targets=target, **self._config) images.append(img) targets.append(target) if self._is_mosaic and self.is_training: img, target = self._apply_mosaic(images, targets) else: img, target = self._apply_padding(images, targets) # Convert # img = img[:, :, ::-1] img = np.ascontiguousarray(img) img, target = transform(img, targets=target) path = os.path.join(self._root, 'training', 'image_2/{}.png'.format(self._image_files[index])) # print('K: ', target.get_field('K')) # print('shape: ', self.__shapes[index]) # print('') return img, target, path, self.__shapes[index]