def __getitem__(self, index): img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) s = max(img.shape[0], img.shape[1]) * 1.0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] trans_input = get_affine_transform( c, s, 0, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_classes = self.opt.num_classes trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) num_hm = 1 if self.opt.agnostic_ex else num_classes hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_c = np.zeros((num_classes, output_res, output_res), dtype=np.float32) reg_t = np.zeros((self.max_objs, 2), dtype=np.float32) reg_l = np.zeros((self.max_objs, 2), dtype=np.float32) reg_b = np.zeros((self.max_objs, 2), dtype=np.float32) reg_r = np.zeros((self.max_objs, 2), dtype=np.float32) ind_t = np.zeros((self.max_objs), dtype=np.int64) ind_l = np.zeros((self.max_objs), dtype=np.int64) ind_b = np.zeros((self.max_objs), dtype=np.int64) ind_r = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['bbox']) # tlbr pts = np.array(ann['extreme_points'], dtype=np.float32).reshape(4, 2) # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug cls_id = int(self.cat_ids[ann['category_id']]) hm_id = 0 if self.opt.agnostic_ex else cls_id if flipped: pts[:, 0] = width - pts[:, 0] - 1 pts[1], pts[3] = pts[3].copy(), pts[1].copy() for j in range(4): pts[j] = affine_transform(pts[j], trans_output) pts = np.clip(pts, 0, self.opt.output_res - 1) h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) pt_int = pts.astype(np.int32) draw_gaussian(hm_t[hm_id], pt_int[0], radius) draw_gaussian(hm_l[hm_id], pt_int[1], radius) draw_gaussian(hm_b[hm_id], pt_int[2], radius) draw_gaussian(hm_r[hm_id], pt_int[3], radius) reg_t[k] = pts[0] - pt_int[0] reg_l[k] = pts[1] - pt_int[1] reg_b[k] = pts[2] - pt_int[2] reg_r[k] = pts[3] - pt_int[3] ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0] ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0] ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0] ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0] ct = [ int((pts[3, 0] + pts[1, 0]) / 2), int((pts[0, 1] + pts[2, 1]) / 2) ] draw_gaussian(hm_c[cls_id], ct, radius) reg_mask[k] = 1 ret = { 'input': inp, 'hm_t': hm_t, 'hm_l': hm_l, 'hm_b': hm_b, 'hm_r': hm_r, 'hm_c': hm_c } if self.opt.reg_offset: ret.update({ 'reg_mask': reg_mask, 'reg_t': reg_t, 'reg_l': reg_l, 'reg_b': reg_b, 'reg_r': reg_r, 'ind_t': ind_t, 'ind_l': ind_l, 'ind_b': ind_b, 'ind_r': ind_r }) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) lt = np.array( [bbox[0], bbox[1]], dtype=np.float32) lt_int = lt.astype(np.int32) rb = np.array( [bbox[2], bbox[3]], dtype=np.float32) rb_int = rb.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) draw_gaussian(hm[cls_id], lt_int, radius) draw_gaussian(hm[cls_id], rb_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1 or type( anno['segmentation']) != list: # Excludes crowd objects continue if len(anno['segmentation']) > 1: obj_contours = [ np.array(s).reshape((-1, 2)).astype(np.int32) for s in anno['segmentation'] ] obj_contours = sorted(obj_contours, key=cv2.contourArea) polygons = obj_contours[-1] else: polygons = anno['segmentation'][0] gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if cv2.contourArea(contour.astype(np.int32)) < 6: continue fixed_contour = uniformsample(contour, self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [ np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1]) ] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(160, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt mass centers to bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) activated_codes = np.zeros( (self.max_objs, self.n_codes), dtype=np.int64) # keep track of codes that is activated regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) if h < 1e-6 or w < 1e-6: # remove small bboxes continue norm_shape = (indexed_shape - mass_center) / np.array( [w / 2., h / 2.]) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = norm_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) activated_codes[k] = (np.abs(codes_[k]) > 1e-4) * 1 # active codes annotated 1 w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'active': activated_codes, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __get_default_coco(self, img, anns, num_objs): boxes = [] if self.num_keypoints > 0: kpts = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) boxes.append(BoundingBox(*bbox)) if self.num_keypoints > 0: if 'keypoints' not in ann: ann['keypoints'] = np.zeros((3 * self.num_keypoints, )) kpt = [ Keypoint(*x) for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2] ] kpts.extend(kpt) bbs = BoundingBoxesOnImage(boxes, shape=img.shape) if self.num_keypoints > 0: kpts = KeypointsOnImage(kpts, shape=img.shape) if self.augmentation is not None: if self.num_keypoints > 0: img_aug, bbs_aug, kpts_aug = self.augmentation( image=img, bounding_boxes=bbs, keypoints=kpts) else: img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs) else: if self.num_keypoints > 0: kpts_aug = kpts.copy() img_aug, bbs_aug = np.copy(img), bbs.copy() if self.num_keypoints > 0: img_aug, bbs_aug, kpts_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug, keypoints=kpts_aug) else: img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 2), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros((self.max_detections, 6), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) if self.num_keypoints > 0: kp = np.zeros((self.max_detections, self.num_keypoints * 2), dtype=np.float32) gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2), dtype=np.float32) kp_reg_mask = np.zeros( (self.max_detections, self.num_keypoints * 2), dtype=np.uint8) bbs_aug, kpts_aug = self.resize_out(bounding_boxes=bbs_aug, keypoints=kpts_aug) else: bbs_aug = self.resize_out(bounding_boxes=bbs_aug) for k in range(num_objs): ann = anns[k] bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h)) bbox = np.array( [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]) cls_id = int(self.cat_mapping[ann['category_id']]) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.num_keypoints > 0: valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1] for i, p in enumerate( kpts_aug[k * self.num_keypoints:k * self.num_keypoints + self.num_keypoints]): kp[k][i * 2] = p.x - ct_int[0] kp[k][i * 2 + 1] = p.y - ct_int[1] is_valid = valid[i] == 2 and not p.is_out_of_image( (output_w, output_w)) kp_reg_mask[k, i * 2] = int(is_valid) kp_reg_mask[k, i * 2 + 1] = int(is_valid) gt_kp[k][i] = p.x, p.y if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del bbs del bbs_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 6), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } if self.num_keypoints > 0: ret['kps'] = kp ret['gt_kps'] = gt_kp ret['kp_reg_mask'] = kp_reg_mask del kpts_aug return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: #TODO这里是更改多尺度训练的地方。 s = s#* np.random.choice(np.arange(0.8, 1.5, 0.1))#change 0.6 1.4 w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn()*rf, -rf*2, rf*2) if self.opt.angle_norm and self.split == 'train': #首先是读取标注文件,获得中心点和头部点获得所有角度的集合 angle_list=[] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) pts = np.array(ann['keypoints'][0:3], np.float32).reshape( self.num_joints, 3)#tmjx ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) angle= math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1])) angle_list.append(angle) #下面这段代码求旋转的角度 angle_list=np.array(angle_list)%np.pi #首先归一化到np.pi angle_int=(angle_list// (np.pi/9)).astype('int') angle_b=np.bincount(angle_int) index_rot=np.argmax(angle_b) ind_rot=(angle_list>(index_rot)*np.pi/9) * (angle_list<=(index_rot+1)*np.pi/9) angle_rot=np.average(angle_list[ind_rot]) rot=angle_rot*(-180)/np.pi if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) #TODO change wwlekeuihx cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'][0:3], np.float32).reshape(num_joints, 3)#tmjx if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 #for e in self.flip_idx: #pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() # bbox[:2] = affine_transform(bbox[:2], trans_output) # bbox[2:] = affine_transform(bbox[2:], trans_output) #bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] center_obj=[(bbox[2] + bbox[0])/2,(bbox[3] + bbox[1])/2] center_obj=affine_transform(center_obj, trans_output_rot) scale_trans= self.opt.output_res/s h *= scale_trans w *= scale_trans h = np.clip(h , 0, output_res - 1) w = np.clip(w , 0, output_res - 1) if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) *1.2 sqrt_wh = np.sqrt(np.sqrt(h*w)) radius_w = radius * np.sqrt(w) / sqrt_wh radius_h = radius * np.sqrt(h) / sqrt_wh radius_w = self.opt.hm_gauss if self.opt.mse_loss else max(0, np.ceil(radius_w)) radius_h = self.opt.hm_gauss if self.opt.mse_loss else max(0, np.ceil(radius_h)) # radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) ct = np.array( center_obj, dtype=np.float32) # if self.opt.Rguass: if ct[0]<0 or ct[0]>output_res - 1 or ct[1]<0 or ct[1]>output_res - 1: # continue ct[0] = np.clip(ct[0], 0, output_res - 1) ct[1] = np.clip(ct[1], 0, output_res - 1) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2: j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) #TODO change angle= math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1])) if self.opt.Rguass: draw_gaussian(hm[cls_id], ct_int, [radius_w,radius_h,angle]) else: radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) # if rot != 0: # hm = hm * 0 + 0.9999 # reg_mask *= 0 # kps_mask *= 0 ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask} if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape( num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape( num_joints * 2, output_res, output_res) ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask}) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta #这里是调试可视化生成的特征图的程序 # debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3), # theme=self.opt.debugger_theme) # inp1 = inp.transpose(1,2,0) # inp1=(inp1*self.std + self.mean)*255. # self.debug(debugger, inp1, ret) return ret
def __getitem__(self, index): img_id = self.ids[index] file_name = self.hoi_annotations[img_id]['file_name'] img_path = os.path.join(self.root, self.image_dir, file_name) anns = self.hoi_annotations[img_id]['annotations'] hoi_anns = self.hoi_annotations[img_id]['hoi_annotation'] num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.7, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) hm_human = np.zeros((1, output_h, output_w), dtype=np.float32) hm_rel = np.zeros((self.num_classes_verb, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) sub_offset = np.zeros((self.max_rels, 2), dtype=np.float32) obj_offset = np.zeros((self.max_rels, 2), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] bbox_ct = [] num_rels = min(len(hoi_anns), self.max_rels) for k in range(num_objs): ann = anns[k] bbox = np.asarray(ann['bbox']) if isinstance(ann['category_id'], str): ann['category_id'] = int(ann['category_id'].replace('\n', '')) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) bbox_ct.append(ct_int.tolist()) if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if cls_id == 0: draw_gaussian(hm_human[cls_id], ct_int, radius) else: draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) offset_mask = np.zeros((self.max_rels), dtype=np.uint8) rel_ind = np.zeros((self.max_rels), dtype=np.int64) for k in range(num_rels): hoi = hoi_anns[k] if isinstance(hoi['category_id'], str): hoi['category_id'] = int(hoi['category_id'].replace('\n', '')) hoi_cate = int(self.cat_ids_verb[hoi['category_id']]) sub_ct = bbox_ct[hoi['subject_id']] obj_ct = bbox_ct[hoi['object_id']] offset_mask[k] = 1 rel_ct = np.array([(sub_ct[0] + obj_ct[0]) / 2, (sub_ct[1] + obj_ct[1]) / 2], dtype=np.float32) radius = gaussian_radius((math.ceil(abs(sub_ct[0] - obj_ct[0])), math.ceil(abs(sub_ct[1] - obj_ct[1])))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius rel_ct_int = rel_ct.astype(np.int32) draw_gaussian(hm_rel[hoi_cate], rel_ct_int, radius) rel_sub_offset = np.array( [rel_ct_int[0] - sub_ct[0], rel_ct_int[1] - sub_ct[1]], dtype=np.float32) rel_obj_offset = np.array( [rel_ct_int[0] - obj_ct[0], rel_ct_int[1] - obj_ct[1]], dtype=np.float32) sub_offset[k] = 1. * rel_sub_offset[0], 1. * rel_sub_offset[1] obj_offset[k] = 1. * rel_obj_offset[0], 1. * rel_obj_offset[1] rel_ind[k] = rel_ct_int[1] * output_w + rel_ct_int[0] ret = { 'input': inp, 'hm': hm, 'hm_human': hm_human, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hm_rel': hm_rel, 'sub_offset': sub_offset, 'obj_offset': obj_offset, 'offset_mask': offset_mask, 'rel_ind': rel_ind } if self.opt.reg_offset: ret.update({'reg': reg}) return ret
def __getitem__(self, index): img_id = self.images[index] video_info = self.coco.loadImgs(ids=[img_id])[0] file_name = video_info['file_name'] image_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) input_h, input_w = self.opt.input_h, self.opt.input_w centers = np.array([ann['keypoints_2d'] for ann in anns])[:, 0::9, :2] centers = centers.reshape(-1, 2) keep = np.where(np.all((0 < centers) & (1 > centers), axis=1) == True) centers = centers[keep] anns = [anns[i] for i in keep[0]] img = cv2.imread(image_path) # resize, pad, and color augs centers[:, 0], centers[:, 1] = centers[:, 0]*img.shape[1], centers[:, 1]*img.shape[0] augmented = self.augs(image=img, keypoints=centers) inp, centers = augmented['image'], np.array(augmented['keypoints']) num_objs = min(len(centers), self.max_objs) wh_ratio = img.shape[1] / img.shape[0] c = np.array([inp.shape[1] / 2., inp.shape[0] / 2.], dtype=np.float32) s = max(inp.shape[0], inp.shape[1]) * 1.0 aug = False if self.split == 'train' and np.random.random() < self.opt.aug_ddd and num_objs > 0: aug = True sf = self.opt.scale # cf = self.opt.shift scale_rand = np.random.random() s = s * np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf) trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(inp, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) centers = np.concatenate([centers, np.ones((centers.shape[0], 1))], axis=1) centers = np.matmul(trans_input, centers.T).T if num_objs > 0: centers[:, 0], centers[:, 1] = centers[:, 0] / inp.shape[1], centers[:, 1] / inp.shape[0] inp = (inp.astype(np.float32) / 255.) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio # empty input heat_map = np.zeros([self.num_classes, output_h, output_w], dtype=np.float32) scales = np.zeros([self.max_objs, 3], dtype=np.float32) translations = np.zeros([self.max_objs, 3], dtype=np.float32) rotvecs = np.zeros([self.max_objs, 3], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) ind = np.zeros((self.max_objs), dtype=np.int64) reg = np.zeros((self.max_objs, 2), dtype=np.float32) for k in range(num_objs): ann = anns[k] bbox = np.array(ann['bbox']) scale = np.array(ann['scale']) rot_angles = np.array(ann['rot']) translation = np.array(ann['translation']) if aug: translation[2] *= np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf) # translation[0] += translation[0] * y_shift * cf # translation[1] -= (x_shift * cf) * 0.3 ct = centers[k][:2] ct[0], ct[1] = ct[0] * output_h, ct[1] * output_w ct[0], ct[1] = np.clip(ct[0], 0, output_w - 1), np.clip(ct[1], 0, output_w - 1) cls_id = int(self.cat_ids[ann['category_id']]) bbox[[0, 2]] *= output_w bbox[[1, 3]] *= output_h bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius/2)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct_int = ct.astype(np.int32) draw_umich_gaussian(heat_map[cls_id], ct_int, radius) scales[k] = scale translations[k] = translation rotvecs[k] = rot_angles ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if DEBUG: lines = ( [1, 5], [2, 6], [3, 7], [4, 8], # lines along x-axis [1, 3], [5, 7], [2, 4], [6, 8], # lines along y-axis [1, 2], [3, 4], [5, 6], [7, 8] # lines along z-axis ) plt.scatter(ct_int[0], ct_int[1]) r = R.from_euler('zyx', rot_angles).as_matrix() box_3d = Box.from_transformation(r, translation, scale).vertices points_2d = project_points(box_3d, np.array(video_info['projection_matrix'])) points_2d[:, 0] = points_2d[:, 0] * (128*wh_ratio) + 128*(1-wh_ratio)/2 points_2d[:, 1] *= 128 points_2d = points_2d.astype(int) for ids in lines: plt.plot( (points_2d[ids[0]][0], points_2d[ids[1]][0]), (points_2d[ids[0]][1], points_2d[ids[1]][1]), color='r', ) # points_2d = np.array(ann['keypoints_2d']) # points_2d[:, 0] *= 128 # points_2d[:, 1] *= 128 # # points_2d = points_2d.astype(int) # for ids in lines: # plt.plot( # (points_2d[ids[0]][0], points_2d[ids[1]][0]), # (points_2d[ids[0]][1], points_2d[ids[1]][1]), # color='b', # ) ret = { 'input': inp, 'hm': heat_map, 'reg_mask': reg_mask, 'ind': ind, 'dim': scales, 'rot': rotvecs, 'loc': translations } if self.opt.reg_offset: ret.update({'reg': reg}) if DEBUG: if inp.shape[0] == 3: plot_img = inp.transpose(1, 2, 0) plot_img = (plot_img * self.std) + self.mean else: plot_img = inp.copy() plot_img = cv2.resize(plot_img, (output_w, output_h)) plot_img = cv2.cvtColor(plot_img, cv2.COLOR_BGR2RGB) plt.imshow(plot_img) plt.show() plt.imshow(heat_map[0]) plt.show() return ret
def __getitem__(self, index): img_id = self.images[index] #loadImgs(ids=[img_id]) return a list, whose length = 1 file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) cropped = False if self.split == 'train': if np.random.random() < 1: cropped = True file_name = file_name.split('.')[0] + 'crop.jpg' img_path = os.path.join(self.img_dir, file_name) if self.split == 'val': cropped = True img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False rotted = False if self.split == 'train': if not self.opts.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self.get_border(128, img.shape[1]) h_border = self.get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opts.scale cf = self.opts.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opts.aug_rot: rotted = True rf = self.opts.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opts.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # input_res is max(input_h, input_w), input is the size of original img if np.random.random() < self.opts.keep_inp_res_prob and max( (height | 127) + 1, (width | 127) + 1) < 1024: self.opts.input_h = (height | 127) + 1 self.opts.input_w = (width | 127) + 1 self.opts.output_h = self.opts.input_h // self.opts.down_ratio self.opts.output_w = self.opts.input_w // self.opts.down_ratio self.opts.input_res = max(self.opts.input_h, self.opts.input_w) self.opts.output_res = max(self.opts.output_h, self.opts.output_w) trans_input = get_affine_transform( c, s, rot, [self.opts.input_res, self.opts.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opts.input_res, self.opts.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opts.no_color_aug: color_aug(self.data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std #change data shape to [3, input_size, input_size] inp = inp.transpose(2, 0, 1) #output_res is max(output_h, output_w), output is the size after down sampling output_res = self.opts.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, 2 * num_joints), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opts.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] if cropped: bbox = np.array(ann['bbox']) else: bbox = np.array(ann['org_bbox']) cls_id = int(ann['category_id']) - 1 if cropped: pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) else: pts = np.array(ann['org_keypoints'], np.float32).reshape(num_joints, 3) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for joint_idx in self.flip_idx: pts[joint_idx[0]], pts[joint_idx[1]] = pts[ joint_idx[1]].copy(), pts[ joint_idx[0]].copy() #don't forget copy first bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) if rotted: pts_rot = np.zeros((num_joints, 2)) for j in range(num_joints): if pts[j, 2] > 0: pts_rot[j, :2] = affine_transform( pts[j, :2], trans_output_rot) bbox[:2] = np.min(pts_rot, axis=0) bbox[2:] = np.max(pts_rot, axis=0) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opts.hm_gauss if self.opts.mse_loss else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int # the error of center[x, y] reg_mask[k] = 1 num_kpts = pts[:, 2].sum() #whether joint can be seen or not if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 #means this obj can'e be seen hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)), min_overlap=1) hp_radius = self.opts.hm_gauss if self.opts.mse_loss else max( 0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: #means this joint can be seen pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and pts[ j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opts.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) hp1 = draw_gaussian(hm_hp[j], pt_int, hp_radius) # plt.imsave('/home/mry/Desktop/testimg/hp_{}_{}.jpg'.format(k, j), hp1) draw_gaussian(hm[cls_id], ct_int, radius) ##ge_det:x0, y0, x1, y1, joint1_x, joint1_y,...,joint17_x, joint17_y, cls_id gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) #if rot != 0: # hm = hm * 0 + 0.9999 # reg_mask *= 0 # kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opts.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opts.reg_offset: ret.update({'reg': reg}) if self.opts.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opts.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opts.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) img_show = copy.deepcopy(img) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w # flipped = False # remove flip process ############## remove image preprocess # if self.split == 'train': # if not self.opt.not_rand_crop: # s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # w_border = self._get_border(128, img.shape[1]) # h_border = self._get_border(128, img.shape[0]) # c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) # c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # else: # sf = self.opt.scale # cf = self.opt.shift # c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # # if np.random.random() < self.opt.flip: # flipped = True # img = img[:, ::-1, :] # c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) ################## plot # cv2.imwrite('/Workspace/CenterNet/in_{}'.format(file_name), inp) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) ################# plot # inp_out = cv2.warpAffine(img_show, trans_output, # (output_w, output_h), # flags=cv2.INTER_LINEAR) # for k in range(num_objs): # ann = anns[k] # bbox_show = copy.deepcopy(ann['bbox']) # bbox_show[:2] = affine_transform(bbox_show[:2], trans_output) # cv2.circle(inp_out, tuple(list(map(int, bbox_show[:2]))), 2, (0, 0, 255), -1) # print('file {} num {}'.format(file_name, num_objs)) # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) ### add angle regression reg_angle = np.zeros((self.max_objs, 1), dtype=np.float32) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian ########### show gt # for k in range(num_objs): # ann = anns[k] # bbox = ann['bbox'] # bbox[:2] = affine_transform(bbox[:2], trans_output) # bbox[2:4] = affine_transform(bbox[2:4], trans_output) # bbox[0] = np.clip(bbox[0], 0, output_w - 1) # bbox[1] = np.clip(bbox[1], 0, output_h - 1) # self.getfourpoints(bbox, inp_out) # cv2.imwrite('/Workspace/CenterNet/gt_{}'.format(file_name), inp_out) gt_det = [] for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['bbox']) bbox = ann['bbox'] cls_id = int(self.cat_ids[ann['category_id']]) bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:4] = affine_transform(bbox[2:4], trans_output) bbox[0] = np.clip(bbox[0], 0, output_w - 1) bbox[1] = np.clip(bbox[1], 0, output_h - 1) h, w = bbox[3], bbox[2] if h > 0 and w > 0: ct = np.array([bbox[0], bbox[1]], dtype=np.float32) ct_int = ct.astype(np.int32) reg_angle[k] = bbox[4] if not self.opt.ellipse: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius draw_gaussian(hm[cls_id], ct_int, radius) else: draw_ellipse_gaussian(hm[cls_id], ct_int, w, h, reg_angle[k]) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.opt.ellipse: hm = np.where(hm > 1e-2, hm, 0) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'angle': reg_angle } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): image_fn = self.flist[index] image = cv2.imread(image_fn) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) box_fn = str(Path(self.box_root)/(Path(image_fn).stem + '.txt')) if osp.exists(box_fn): xywh = np.loadtxt(box_fn) xx,yy,ww,hh = xywh x1,y1,x2,y2 = xx-ww/2,yy-hh/2,xx+ww/2,yy+hh/2 boxes = np.array([[x1,y1,x2,y2]]).astype('float32') else: boxes = np.array([[0.0,0.0,1.0,1.0]]).astype('float32') if self.transform: image, boxes = self.transform(image, boxes) #generate box_gt for loss #box x1,y1,x2,y2, [0,1] output_h,output_w,grid_wh = self.configs.hh,self.configs.ww,self.configs.grid_wh hin,win = self.configs.image_size hm = np.zeros((self.configs.num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.configs.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) dense_xy = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.configs.max_objs, 2), dtype=np.float32) ind = np.zeros((self.configs.max_objs), dtype=np.int64) reg_mask = np.zeros((self.configs.max_objs), dtype=np.uint8) num_objs = min(boxes.shape[0], self.configs.max_objs) # gt_det = [] for k in range(num_objs): bbox = boxes[k] h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h*grid_wh), math.ceil(w*grid_wh))) radius = max(0, int(radius)) #radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2.0 * grid_wh, (bbox[1] + bbox[3]) / 2.0* grid_wh], dtype=np.float32) ct_int = ct.astype(np.int32) ct_int = np.clip(ct_int, 0, grid_wh-1) draw_umich_gaussian(hm[k], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) draw_dense_reg(dense_xy, hm.max(axis=0), ct_int, reg[k], radius) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # #ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} #if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_mask = np.concatenate([hm_a, hm_a], axis=0) ret = {'hm': hm, 'wh': wh, 'xy': reg, 'ind': ind,'dense_xy': dense_xy,'dense_wh': dense_wh,'dense_mask':dense_mask, 'boxes': boxes} #ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) #del ret['wh'] #elif self.opt.cat_spec_wh: #ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) #del ret['wh'] #if self.opt.reg_offset: #ret.update({'reg': reg}) # if self.opt.debug > 0 or not self.split == 'train': # gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ # np.zeros((1, 6), dtype=np.float32) # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} # ret['meta'] = meta # return ret # return image, ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue polygons = get_connected_polygon_using_mask(anno['segmentation'], (h_img, w_img), n_vertices=self.n_vertices, closing_max_kernel=50) gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if len(contour) > self.n_vertices: fixed_contour = resample(contour, num=self.n_vertices) else: fixed_contour = turning_angle_resample(contour, self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_mean = np.mean(fixed_contour, axis=0) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1])] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # in xyxy format img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 4), dtype=np.float32) # width and height of the shape real_ = torch.FloatTensor(np.zeros((self.max_objs, self.n_vertices), dtype=np.float32)) imaginary_ = torch.FloatTensor(np.zeros((self.max_objs, self.n_vertices), dtype=np.float32)) regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for 4 offsets of center of mass to the bbox inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) # detections = [] for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) # box_center = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.], dtype=np.float32) mass_center = np.mean(indexed_shape, axis=0) if h < 1e-6 or w < 1e-6: # remove small bboxes continue centered_shape = indexed_shape - mass_center if h > 0 and w > 0: obj_c = mass_center obj_c_int = obj_c.astype(np.int32) radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) fourier_results = torch.fft(torch.FloatTensor(centered_shape), signal_ndim=1) / 32. real_[k] = fourier_results[:, 0] imaginary_[k] = fourier_results[:, 1] w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \ mass_center[0] - bbox[0], bbox[2] - mass_center[0] # [top, bottom, left, right] distance regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) # -----------------------------------debug--------------------------------- # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32) # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3)) # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3)) # print(w_h_[0], regs[0]) # cv2.imshow('hmap', canvas) # cv2.waitKey() # -----------------------------------debug--------------------------------- return {'image': img, 'real': real_.numpy(), 'imaginary': imaginary_.numpy(), 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) channel_counter = 1 # len(self.coco.getCatIds()) # target # target_img = cv2.imread(img_path) N_FRAMES = self.opt.nbr_frames middle = int(N_FRAMES/2) index = os.path.basename(img_path).replace('.jpg', '').replace('img', '').replace('.JPEG', '') rest = img_path.replace(index + '.jpg', '').replace(os.path.dirname(img_path), '') length = len(index) modulo = '1' for i in range(length): modulo += '0' img_paths = [] for i in range(N_FRAMES): new_img_path = os.path.dirname(img_path) \ + rest \ + str((int(index) - (i-middle)) % int(modulo)).zfill(length) + '.jpg' if not os.path.exists(new_img_path): new_img_path = img_path img_paths.append(new_img_path) imgs = [] for path in img_paths: imgs.append(cv2.imread(path)) img = np.concatenate(imgs, -1) bboxes = {} for ann in anns: if str(ann['category_id']) in bboxes: bboxes[str(ann['category_id'])].append([int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]) else: bboxes[str(ann['category_id'])] = [[int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]] # """ PYFLOW = True if PYFLOW: if 'uav' in self.opt.dataset: seg_path = os.path.join('/store/datasets/UAV/bgsubs', os.path.dirname(file_name).split('/')[-1], os.path.basename(file_name).replace('jpg', 'png')) else: seg_path = os.path.join('/store/datasets/OlderUA-Detrac/pyflow-bgsubs', os.path.dirname(file_name).split('/')[-1], os.path.basename(file_name).replace('jpg', 'png')) # """ if PYFLOW: seg_img = cv2.imread(seg_path, 0) # hughes else: seg_img = np.zeros([channel_counter, img.shape[0], img.shape[1]]) for label in range(1, channel_counter+1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[label-1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True # target # target_img = target_img[:, ::-1, :] seg_img = seg_img[:, ::-1] img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) seg_inp = cv2.warpAffine(seg_img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # print('pre: ', img.shape) # target # target_inp = cv2.warpAffine(target_img, trans_input,(input_w, input_h),flags=cv2.INTER_LINEAR) inp = np.zeros((input_w, input_h, N_FRAMES*3)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): inp[:, :, i*3:i*3+3] = cv2.warpAffine(img[:, :, i*3:i*3+3], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # print('post: ', inp.shape) # target # target_inp = (target_inp.astype(np.float32) / 255.) inp = (inp.astype(np.float32) / 255.) seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes # print('np.mean(inp), PRE: ', np.mean(inp)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp[:, :, i*3:i*3+3], self._eig_val, self._eig_vec) else: if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # target # color_aug(self._data_rng, target_inp, self._eig_val, self._eig_vec) # print('np.mean(inp), POST: ', np.mean(inp)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): inp[:, :, i*3:i*3+3] = (inp[:, :, i*3:i*3+3] - self.mean) / self.std else: inp = (inp - self.mean) / self.std # target # target_inp = (target_inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # target # target_inp = target_inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # write_hm = cv2.resize(((hm-np.min(hm)/np.max(hm))*255).astype(np.uint8).squeeze(0), (512, 512)) # cv2.imwrite('/store/datasets/UA-Detrac/test_sample/VID_HM/' + 'inp_' + os.path.basename(file_name) + '_' + 'HM.jpg', write_hm) # ReScale 1/4 # scale_percent = 25 # percent of original size # width = int(seg_inp.shape[1] * scale_percent / 100) # height = int(seg_inp.shape[0] * scale_percent / 100) # dim = (width, height) # seg_inp = cv2.resize(seg_inp, dim, interpolation=cv2.INTER_AREA) seg_inp = np.expand_dims(seg_inp, 0) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp} # 'seg': np.expand_dims(seg_inp, 0)} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # if inp.shape[0] == N_FRAMES*3: # for i in range(N_FRAMES): # img_test = (inp[i*3:i*3+3, :, :].transpose(1, 2, 0) * 255).astype(np.uint8) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_sample/VID_HM/", 'inp_' + os.path.basename(file_name) + '_' + str(i)), img_test) #img_test = (target_inp.transpose(1, 2, 0) * 255).astype(np.uint8) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/exp/tensors/VID_HM/", os.path.basename(file_name) + '_target'), img_test) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.transpose(1, 2, 0) * 255).astype(np.uint8)) # exit() return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array( [self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 ''' imgs = coco_a.getImgIds() [391895, 522418, 184613, 318219, ...]图片id列表 bboxes 图片标记的所有框的列表 [[359.17 146.17 112.45 213.57] [339.88 22.16 153.88 300.73] [471.64 172.82 35.92 48.1 ] [486.01 183.31 30.63 34.98]] ''' flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(img, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', img) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) # detections = [] ''' bbox = bboxes[0] print(bbox[[0, 2]]) print(bbox) print(bbox[0:2]) [359.17 112.45] [359.17 146.17 112.45 213.57] [359.17 146.17] <class 'numpy.ndarray'> ''' for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) #根据图像文件名,读入对应的关键点标注文件 (filepath, tempfilename) = os.path.split(img_path) (filename, extension) = os.path.splitext(tempfilename) kps_path = os.path.join( '/media/srt/dataset/L_Shelf_0114/Kps_Ann', filename + '_kps.npy' ) #/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps kps_raw = np.load(kps_path) c3 = np.ones(6) kps_ann = np.column_stack((kps_raw, c3)) #将关键点维度变为[6,3] height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 #对crop,shift进行赋值 input_h, input_w = self.opt.input_h, self.opt.input_w #在opt中定义的分辨率 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale #0 cf = self.opt.shift #0 c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) #加上multi-pose中的随机旋转 if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) # if np.random.random() < self.opt.flip: # flipped = True # img = img[:, ::-1, :] # c[0] = width - c[0] - 1 # 对输入执行仿射变换 trans_input = get_affine_transform(c, s, rot, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) test_image = inp[1] #用于与kps_hp可视化使用 output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes num_kps = 6 #点数是否需要+1 ? trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) trans_output_rot = get_affine_transform(c, s, rot, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) #中心对应的hp hm_hp = np.zeros((num_kps, output_h, output_w), dtype=np.float32) #kps对应的hp #此处只是初始化,未赋值 dense_kps = np.zeros((num_kps, 2, output_h, output_w), dtype=np.float32) dense_kps_mask = np.zeros((num_kps, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) kps = np.zeros((num_kps, num_kps * 2), dtype=np.float32) #其他关键点指向某个关键点的向量 reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_kps * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_kps, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_kps), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_kps), dtype=np.int64) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian #获取标注各项数据的标志 gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) #pts的读入方式可以自行定义 pts = np.array(kps_ann, np.float32).reshape(num_kps, 3) #原来的按照coco数据集json标注读入 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_kps): if pts[j, 2] > 0: #如果关键点的第3位>0,则对关键点进行变换 pts[j, :2] = affine_transform(pts[j, :2], trans_output) #对关键点进行变换 if pts[j, 0] >= 0 and pts[j, 0] < output_w and \ pts[j, 1] >= 0 and pts[j, 1] < output_h: #计算其他点指向该点的向量 kps[j, j * 2:j * 2 + 2] = pts[:, :2] - pts[j, :2] kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_kps + j] = pts[j, :2] - pt_int hp_mask[k * num_kps + j] = 1 if self.opt.dense_hp: #必须在中心点hm gassian之前画 print('draw dense hp!!!') draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) heatmap = np.squeeze(hm_hp[j]) #(1,160,240) heatmap = cv2.resize(heatmap, (960, 640), interpolation=cv2.INTER_CUBIC) new_image = test_image + heatmap * 2 array_name = 'forbidden_s_c_kps_hp/visual_kps_' + str( index) + '_' + str(j) + '.png' # matplotlib.image.imsave(array_name, new_image) #画中心点的高斯图 draw_gaussian(hm[cls_id], ct_int, radius) heatmap = np.squeeze(hm[cls_id]) # (1,160,240) heatmap = cv2.resize(heatmap, (960, 640), interpolation=cv2.INTER_CUBIC) new_image = test_image + heatmap * 2 array_name = 'visual_center_' + str(index) + '.png' # matplotlib.image.imsave(array_name, new_image) if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_kps * 2).tolist() + [cls_id]) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #在原来的基础上增加了 'hps','hps_mask' ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_kps * 2, output_h, output_w) dense_kps_mask = dense_kps_mask.reshape(num_kps, 1, output_h, output_w) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_kps * 2, output_h, output_w) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def _get_data(self, position): img_id = self.images[self._indexes[position]] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.params.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True assert ( len(img.shape) == 3 ), f"The dimensions of img should be 3. Filename: {img_path}, shape: {img.shape}" img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._rng, inp, self.params._eig_val, self.params._eig_vec) inp = (inp - self.params.mean) / self.params.std if self.mixed_precision: inp = fast_pad(inp) # Transpose to NCHW if channel_last is not enabled if not self.channel_last: inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.params.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) ind = np.zeros((self.params.max_objs), dtype=np.int32) wh = np.zeros((self.params.max_objs, 2), dtype=np.float32) reg = np.zeros((self.params.max_objs, 2), dtype=np.float32) reg_mask = np.zeros((self.params.max_objs, 1), dtype=np.float32) cls = np.zeros((self.params.max_objs, 1), dtype=np.int32) draw_gaussian = draw_umich_gaussian for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.params.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cls[k] = cls_id # Transpose heatmap to NHWC if channel last is enabled if self.channel_last: hm = np.transpose(hm, (1, 2, 0)) ret = (inp, hm, ind, wh, reg, reg_mask, cls) return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue # polygons = anno['segmentation'][0] polygons = anno['segmentation'] if len(polygons) > 1: bg = np.zeros((h_img, w_img, 1), dtype=np.uint8) for poly in polygons: len_poly = len(poly) vertices = np.zeros((1, len_poly // 2, 2), dtype=np.int32) for i in range(len_poly // 2): vertices[0, i, 0] = int(poly[2 * i]) vertices[0, i, 1] = int(poly[2 * i + 1]) # cv2.fillPoly(bg, vertices, color=(255)) cv2.drawContours(bg, vertices, color=(255), contourIdx=-1, thickness=-1) pads = 5 while True: kernel = np.ones((pads, pads), np.uint8) bg_closed = cv2.morphologyEx(bg, cv2.MORPH_CLOSE, kernel) obj_contours, _ = cv2.findContours(bg_closed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(obj_contours) > 1: pads += 5 else: polygons = obj_contours[0] break else: # continue polygons = anno['segmentation'][0] gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices fixed_contour = resample(contour, num=self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_mean = np.mean(fixed_contour, axis=0) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy # if img_id in self.all_annotations.keys(): # annotations = self.all_annotations[img_id] # shape_annots = self.all_shapes[img_id] # labels = annotations['cat_id'] # bboxes = annotations['bbox'] # xyxy format # shapes = shape_annots['shape'] # polygonal vertices format xyxyxyxyxy... # codes = annotations['codes'] # labels = np.array(labels) # bboxes = np.array(bboxes, dtype=np.float32) # codes = np.array(codes, dtype=np.float32) # shapes = np.array(shapes, dtype=np.float32) # else: # bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) # labels = np.array([[0]]) # codes = np.zeros(shape=(1, self.n_codes), dtype=np.float32) # shapes = np.zeros(shape=(1, self.n_vertices * 2), dtype=np.float32) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label, shape in zip(bboxes, labels, shapes): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # # Flip the contour # for m in range(self.n_vertices): # shape[2 * m] = width - shape[2 * m] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # # # generate gt shape mean and std from contours # for m in range(self.n_vertices): # apply scale and crop transform to shapes # shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_img) # # contour = np.reshape(shape, (self.n_vertices, 2)) # # Indexing from the left-most vertex, argmin x-axis # idx = np.argmin(contour[:, 0]) # indexed_shape = np.concatenate((contour[idx:, :], contour[:idx, :]), axis=0) # # clockwise_flag = check_clockwise_polygon(indexed_shape) # if not clockwise_flag: # fixed_contour = np.flip(indexed_shape, axis=0) # else: # fixed_contour = indexed_shape # # contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.img_size['w'] - 1) # contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.img_size['h'] - 1) # # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2) # cv2.drawContours(image_show, [contour.astype(np.int32)], # color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), # contourIdx=-1, thickness=-1) # # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap # w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of the shape w_h_std = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of the shape codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt coefficients/codes for shapes regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) # detections = [] for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) contour = np.reshape(shape, (self.n_vertices, 2)) # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(contour[:, 0]) indexed_shape = np.concatenate( (contour[idx:, :], contour[:idx, :]), axis=0) clockwise_flag = check_clockwise_polygon(indexed_shape) if not clockwise_flag: fixed_contour = np.flip(indexed_shape, axis=0) else: fixed_contour = indexed_shape.copy() contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.fmap_size['w'] - 1) contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.fmap_size['h'] - 1) contour_mean = np.mean(contour, axis=0) contour_std = np.std(contour, axis=0) if np.sqrt(np.sum(contour_std**2)) <= 1e-6: continue else: norm_shape = (contour - contour_mean) / np.sqrt( np.sum(contour_std**2)) if h > 0 and w > 0 and np.sqrt(np.sum(contour_std**2)) > 1e-6: obj_c = contour_mean obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_std[k] = contour_std temp_codes, _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=80) codes_[k] = np.exp(temp_codes) regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) # -----------------------------------debug--------------------------------- # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32) # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3)) # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3)) # print(w_h_[0], regs[0]) # cv2.imshow('hmap', canvas) # cv2.waitKey() # -----------------------------------debug--------------------------------- # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label, shape in zip(bboxes, labels, shapes): # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- return { 'image': img, 'codes': codes_, 'hmap': hmap, 'w_h_std': w_h_std, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __next__(self): load_vid_time, img_transform_time, create_heatmap_time = 0, 0, 0 start = time.time() if self.cap is None or self.count >= self.length: if self.cap is not None and self.vid_i == self.num_videos and self.loop: self.vid_i = 0 elif self.cap is not None and self.vid_i == self.num_videos: raise StopIteration if self.opt.vidstream == 'skvideo': self.cap = skvideo.io.vread(self.video_paths[self.vid_i]) metadata = skvideo.io.ffprobe(self.video_paths[self.vid_i]) fr_lst = metadata['video']['@avg_frame_rate'].split('/') self.rate = int(fr_lst[0]) / int(fr_lst[1]) self.length = int(metadata['video']['@nb_frames']) else: self.cap = cv2.VideoCapture(self.video_paths[self.vid_i]) width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.rate = self.cap.get(cv2.CAP_PROP_FPS) self.length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.frame_gen = self._frame_from_video(self.cap) # self.detections = pickle.load(open(self.annotation_path[self.vid_i], 'rb')) self.count = 0 self.vid_i += 1 end_load_vid = time.time() load_vid_time = end_load_vid - start # load image depending on stream start_resize = time.time() if self.opt.vidstream == 'skvideo': img = self.cap[self.count] else: img = next(self.frame_gen) # in_h = int(original_img.shape[0] / self.opt.downsample) # in_w = int(original_img.shape[1] / self.opt.downsample) # img = cv2.resize(original_img, (in_w, in_h)) # cv2.imwrite("/home/jl5/CenterNet/tmp.png", img) start_img_transform = time.time() anns = self.mmdetect_pred2inst(self.count) num_objs = min(len(anns), self.max_objs) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # send to gpu trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = torch.from_numpy(inp).cuda() inp = (inp.float() / 255.) # if self.split == 'train' and not self.opt.no_color_aug: # color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - torch.from_numpy(self.mean).cuda()) / torch.from_numpy( self.std).cuda() inp = inp.permute(2, 0, 1) end_img_transform = time.time() img_transform_time = end_img_transform - start_img_transform output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) unconfident_hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] def show_bbox(im): fig, ax = plt.subplots(1) ax.imshow(im) for i in range(num_objs): bbox = np.array(anns[i]['bbox'], dtype=np.int32) bbox = bbox / self.opt.downsample rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) plt.savefig('/home/jl5/CenterNet/tmp.png') pdb.set_trace() # detect = self.detections[self.count] # if self.opt.task == 'ctdet_semseg': # seg_mask, weight_mask = batch_segmentation_masks(1, (720, 1280), np.array([detect['boxes']]), np.array([detect['classes']]), detect['masks'], # np.array([detect['scores']]), [len(detect['boxes'])], True, coco_class_groups, mask_threshold=0.5, box_threshold=self.opt.center_thresh, scale_boxes=False) # unbatch_seg = seg_mask[0].astype(np.uint8) # unbatch_weight = weight_mask[0].astype(np.uint8) # seg_mask = np.expand_dims(cv2.resize(unbatch_seg, (1280, 736)), axis=0).astype(np.int32) # weight_mask = np.expand_dims(cv2.resize(unbatch_weight, (1280, 736)), axis = 0).astype(bool) start_detect = time.time() for k in range(num_objs): ann = anns[k] bbox = np.array( ann['bbox'], dtype=np.float32) # self._coco_box_to_bbox(ann['bbox']) # bbox = bbox / self.opt.downsample # if need to downsample cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if ann['score'] >= 0.3 and ann['score'] < 0.5: draw_gaussian(unconfident_hm[cls_id], ct_int, radius) reg_mask[k] = 0 else: draw_gaussian(hm[cls_id], ct_int, radius) reg_mask[k] = 1 wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.opt.task == 'ctdet_semseg': ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_mask, 'weight_seg': weight_mask } else: ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'unconf_hm': unconfident_hm } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count} meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count} ret['meta'] = meta self.count += 1 end_detect_time = time.time() create_heatmap_time = end_detect_time - start_detect # print("load vid {:.4f} | img transform {:.4f} | create instance {:.4f} \n".format(load_vid_time, img_transform_time, create_heatmap_time)) return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: # add some fields for evaluation # anno['iscrowd'] = 0 # anno['segmentation'] = anno['a_segm'] # only evaluate amodal segmentation # anno['bbox'] = anno['i_bbox'] # only evaluate inmodal detection if anno['category_id'] not in KINS_IDS: continue # excludes 3: person-sitting class for evaluation polygons = get_connected_polygon_using_mask(anno['segmentation'], (h_img, w_img), n_vertices=self.n_vertices, closing_max_kernel=50) gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox'] # this is used to clip resampled polygons contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if len(contour) > self.n_vertices: fixed_contour = resample(contour, num=self.n_vertices) else: fixed_contour = turning_angle_resample(contour, self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(400, width) h_border = get_border(180, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) # -----------------------------------debug--------------------------------- # image_show = img.copy() img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # -----------------------------------debug--------------------------------- # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h'])) hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap of centers occ_map = np.zeros((1, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # grayscale map for occlusion levels w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of inmodal bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros((self.max_objs, 2), dtype=np.float32) # gt amodal mass centers to inmodal bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt amodal coefficients regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for quantization error inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) if h < 1e-6 or w < 1e-6: # remove small bboxes continue centered_shape = indexed_shape - mass_center if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # obj_c = mass_center obj_c_int = obj_c.astype(np.int32) radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = centered_shape.reshape((1, -1)) # shapes_[k] = indexed_shape.reshape((1, -1)) # only for debugging center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) w_h_[k] = 1. * w, 1. * h # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \ # mass_center[0] - bbox[0], bbox[2] - mass_center[0] # [top, bottom, left, right] distance regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # occlusion level map gt occ_map[0] += self.polys_to_mask([np.ndarray.flatten(indexed_shape).tolist()], self.fmap_size['h'], self.fmap_size['w']) * 1. occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ # -----------------------------------debug--------------------------------- # for bbox, label, shape in zip(bboxes, labels, shapes_): # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # # print(shape, shape.shape) # cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255), # thickness=1) # # cv2.imshow('img', image_show) # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255) # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]), # code=cv2.COLOR_GRAY2BGR) # cat_img = np.concatenate([m_img, image_show], axis=0) # cv2.imshow('segm', cat_img) # cv2.waitKey() # -----------------------------------debug--------------------------------- return {'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'occ_map': occ_map, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.img_paths[index] img_set, img_vid, img_name = img_id.split("_", 2) img_name = img_name.replace("txt", "jpg") img_path = os.path.join(self.img_dir, img_set, img_vid) img_rgb = cv2.imread(os.path.join(img_path, "visible", img_name), cv2.IMREAD_COLOR) img_ir = cv2.imread(os.path.join(img_path, "lwir", img_name), cv2.IMREAD_GRAYSCALE) with open(os.path.join(self.annot_path, self.img_paths[index])) as annot_file: annot_data = [line.rstrip('\n') for line in annot_file][1:] bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) if len(annot_data) != 0: bboxes = bboxes.repeat(len(annot_data), axis=0) for i in range(len(annot_data)): line_data = annot_data[i].split() label = line_data[0] if self.split == "train": if label not in ["person", "person?", "people"]: continue elif label != "person": continue bboxes[i, :] = list(map(int, line_data[1:5])) bboxes[:, 2:] += bboxes[:, :2] # resize image and bbox height, width = img_rgb.shape[:2] img_rgb = cv2.resize(img_rgb, (self.img_size['w'], self.img_size['h'])) img_ir = cv2.resize(img_ir, (self.img_size['w'], self.img_size['h'])) img_ir = np.expand_dims(img_ir, axis=2) bboxes[:, 0::2] *= self.img_size['w'] / width bboxes[:, 1::2] *= self.img_size['h'] / height # discard non-valid bboxes bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, self.img_size['w'] - 1) bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, self.img_size['h'] - 1) keep_inds = np.logical_and((bboxes[:, 2] - bboxes[:, 0]) > 0, (bboxes[:, 3] - bboxes[:, 1]) > 0) bboxes = bboxes[keep_inds] # randomly flip image and bboxes if self.split == 'train' and np.random.uniform() > 0.5: img_rgb[:] = img_rgb[:, ::-1, :] img_ir[:] = img_ir[:, ::-1, :] bboxes[:, [0, 2]] = img_rgb.shape[1] - bboxes[:, [2, 0]] - 1 img_rgb = img_rgb.astype(np.float32) / 255. img_ir = img_ir.astype(np.float32) / 255. img_rgb -= self.mean[0, 0, :3] img_rgb /= self.std[0, 0, :3] img_ir -= self.mean[0, 0, 3] img_ir /= self.std[0, 0, 3] img_rgb = img_rgb.transpose((2, 0, 1)) # [H, W, C] to [C, H, W] img_ir = img_ir.transpose((2, 0, 1)) hmap_tl = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) hmap_br = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) regs_tl = np.zeros((self.max_objs, 2), dtype=np.float32) regs_br = np.zeros((self.max_objs, 2), dtype=np.float32) inds_tl = np.zeros((self.max_objs, ), dtype=np.int64) inds_br = np.zeros((self.max_objs, ), dtype=np.int64) num_objs = np.array(min(bboxes.shape[0], self.max_objs)) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) ind_masks[:num_objs] = 1 for i, (xtl, ytl, xbr, ybr) in enumerate(bboxes): fxtl = (xtl * self.fmap_size['w'] / self.img_size['w']) fytl = (ytl * self.fmap_size['h'] / self.img_size['h']) fxbr = (xbr * self.fmap_size['w'] / self.img_size['w']) fybr = (ybr * self.fmap_size['h'] / self.img_size['h']) ixtl = int(fxtl) iytl = int(fytl) ixbr = int(fxbr) iybr = int(fybr) if self.gaussian: width = xbr - xtl height = ybr - ytl width = math.ceil(width * self.fmap_size['w'] / self.img_size['w']) height = math.ceil(height * self.fmap_size['h'] / self.img_size['h']) radius = max( 0, int(gaussian_radius((height, width), self.gaussian_iou))) draw_gaussian(hmap_tl[0], [ixtl, iytl], radius) draw_gaussian(hmap_br[0], [ixbr, iybr], radius) else: hmap_tl[0, iytl, ixtl] = 1 hmap_br[0, iybr, ixbr] = 1 regs_tl[i, :] = [fxtl - ixtl, fytl - iytl] regs_br[i, :] = [fxbr - ixbr, fybr - iybr] inds_tl[i] = iytl * self.fmap_size['w'] + ixtl inds_br[i] = iybr * self.fmap_size['w'] + ixbr return { 'img_rgb': img_rgb, 'img_ir': img_ir, 'hmap_tl': hmap_tl, 'hmap_br': hmap_br, 'regs_tl': regs_tl, 'regs_br': regs_br, 'inds_tl': inds_tl, 'inds_br': inds_br, 'ind_masks': ind_masks }
def __getitem__(self, index): img_id = self.images[index] inp, ann_list, output_w, output_h, meta = self.get_img_ann(index, scale_lv=2) # TBD: Mosaic augmentation requires large input image size # Increase input image size from 512x512 to 800x800 or larger and # adjust the scale level to avoid the mosaic boundary to become # a significant boundary of objects #inp, ann_list, output_w, output_h, meta = self.mosaic_mix( index ) if False: # Augmnetation visualization img = inp.transpose(1, 2, 0) img = (img * self.std + self.mean) * 255 for an in ann_list: bbox, cls_id, bbox2 = an bbox = bbox.astype(np.int32) bbox2 = bbox2.astype(np.int32) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, img.shape[1]) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, img.shape[0]) if bbox[2] - bbox[0] > 0 and bbox[3] - bbox[1] > 0: cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 3) if bbox2.shape[0] > 0: cv2.rectangle(img, (bbox2[0], bbox2[1]), (bbox2[2], bbox2[3]), (0, 255, 0), 2) cv2.imwrite('temp_%d.jpg' % (index), img) num_objs = min(len(ann_list), self.max_objs) num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_reg = np.zeros((4, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) dense_wh_mask = np.zeros((4, output_h, output_w), dtype=np.float32) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] xs = np.random.randint(output_w, size=(self.max_objs, 1)) ys = np.random.randint(output_h, size=(self.max_objs, 1)) bgs = np.concatenate([xs, ys], axis=1) for k in range(num_objs): bbox, cls_id, bbox2 = ann_list[k] bbox /= self.opt.down_ratio bbox2 /= self.opt.down_ratio oh, ow = bbox[3] - bbox[1], bbox[2] - bbox[0] bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h / (oh + 0.01) < 0.9 or w / (ow + 0.01) < 0.9) and bbox2.shape[0] > 0: bbox = bbox2 h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] #get center of box ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if (h > 2 or h / (oh + 0.01) > 0.5) and (w > 2 or w / (ow + 0.01) > 0.5): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius draw_dense_reg(dense_reg, dense_wh_mask, ct_int, bbox, radius) draw_gaussian(hm[cls_id], ct_int, radius) cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) dense_wh = dense_reg[:2, :, :] dense_off = dense_reg[2:, :, :] ret = { 'input': inp, 'hm': hm, 'dense_wh': dense_wh, 'dense_off': dense_off, 'dense_wh_mask': dense_wh_mask[:2] } if self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = { 'c': meta[0], 's': meta[1], 'gt_det': gt_det, 'img_id': img_id } ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array( [self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = (img.astype(np.float32) / 255.) if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) anns = list( filter( lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns)) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if self.cfg.DATASET.RANDOM_CROP: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.cfg.DATASET.SCALE cf = self.cfg.DATASET.SHIFT c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.cfg.DATASET.AUG_ROT: rf = self.cfg.DATASET.ROTATE rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.cfg.DATASET.FLIP: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES]) inp = cv2.warpAffine( img, trans_input, (self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.cfg.DATASET.NO_COLOR_AUG: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - np.array(self.cfg.DATASET.MEAN).astype( np.float32)) / np.array(self.cfg.DATASET.STD).astype(np.float32) inp = inp.transpose(2, 0, 1) output_res = self.cfg.MODEL.OUTPUT_RES num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) trans_seg_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) seg = np.zeros((self.max_objs, output_res, output_res), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.cfg.LOSS.MSE_LOSS else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) segment = self.coco.annToMask(ann) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() segment = segment[:, ::-1] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) segment = cv2.warpAffine(segment, trans_seg_output, (output_res, output_res), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.cfg.hm_gauss if self.cfg.LOSS.MSE_LOSS else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 #mask pad_rate = 0.3 segment_mask = np.ones_like(segment) x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_res - 1)*2).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_res - 1)*2).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment_mask == 1] = 255 seg[k] = segment #keypoint num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.cfg.hm_gauss \ if self.cfg.LOSS.MSE_LOSS else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.cfg.LOSS.DENSE_HP: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask, 'seg': seg } if self.cfg.LOSS.DENSE_HP: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.cfg.LOSS.REG_OFFSET: ret.update({'reg': reg}) if self.cfg.LOSS.HM_HP: ret.update({'hm_hp': hm_hp}) if self.cfg.LOSS.REG_HP_OFFSET: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.cfg.DEBUG > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __get_rotated_coco(self, img, anns, num_objs): kpts = [] kpts_tmp = [] for k in range(num_objs): ann = anns[k] ann_rotated = get_annotation_with_angle(ann) ann_rotated[4] = ann_rotated[4] rot = rotate_bbox(*ann_rotated) kpts.extend([Keypoint(*x) for x in rot]) if self.num_keypoints > 0: if 'keypoints' not in ann: ann['keypoints'] = np.zeros((3 * self.num_keypoints, )) kpt = [ Keypoint(*x) for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2] ] kpts_tmp.extend(kpt) idx_boxes = len(kpts) if self.num_keypoints > 0: kpts.extend(kpts_tmp) kpts = KeypointsOnImage(kpts, shape=img.shape) if self.augmentation is not None: img_aug, kpts_aug = self.augmentation(image=img, keypoints=kpts) else: img_aug, kpts_aug = np.copy(img), kpts.copy() img_aug, kpts_aug = self.resize(image=img_aug, keypoints=kpts_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 3), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros( (self.max_detections, 7 if self.use_rotated_boxes else 6), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) if self.num_keypoints > 0: kp = np.zeros((self.max_detections, self.num_keypoints * 2), dtype=np.float32) gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2), dtype=np.float32) kp_reg_mask = np.zeros( (self.max_detections, self.num_keypoints * 2), dtype=np.uint8) kpts_aug = self.resize_out(keypoints=kpts_aug) box_kpts_aug, kpts_aug = kpts_aug[:idx_boxes], kpts_aug[idx_boxes:] assert num_objs == len(box_kpts_aug) // 4 for k in range(num_objs): ann = anns[k] points = [] for p in box_kpts_aug[k * 4:k * 4 + 4]: box_kp = list( (np.clip(p.x, 0, output_w - 1), np.clip(p.y, 0, output_h - 1))) points.append(box_kp) points = np.array(points).astype(np.float32) cv_ct, cv_wh, cv_angle = cv2.minAreaRect(points) if cv_wh[0] == 0 or cv_wh[1] == 0: continue cx, cy, w, h, angle = get_annotation_with_angle({ 'rbbox': np.array([cv_ct[0], cv_ct[1], cv_wh[0], cv_wh[1], cv_angle]) }) ct = np.array((cx, cy)) cls_id = int(self.cat_mapping[ann['category_id']]) if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = w, h, angle ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ct[0], ct[1], w, h, angle, 1, cls_id]) if self.num_keypoints > 0: valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1] for i, p in enumerate( kpts_aug[k * self.num_keypoints:k * self.num_keypoints + self.num_keypoints]): kp[k][i * 2] = p.x - ct_int[0] kp[k][i * 2 + 1] = p.y - ct_int[1] is_valid = valid[i] == 2 and not p.is_out_of_image( (output_w, output_w)) kp_reg_mask[k, i * 2] = int(is_valid) kp_reg_mask[k, i * 2 + 1] = int(is_valid) gt_kp[k][i] = p.x, p.y if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del box_kpts_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 7), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } if self.num_keypoints > 0: ret['kps'] = kp ret['gt_kps'] = gt_kp ret['kp_reg_mask'] = kp_reg_mask del kpts_aug return ret
def __getitem__(self, index): index = 45236 img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) assert os.path.exists(img_path), 'Image path does not exist: {}'.format(img_path) # Target has {'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id'} ann_ids = self.coco.getAnnIds(imgIds=[img_id]) target = self.coco.loadAnns(ids=ann_ids) # Separate out crowd annotations. These are annotations that signify a large crowd of # objects of said class, where there is no annotation for each individual object. target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] if len(target) > 0: # Pool all the masks for this image into one [num_objects,height,width] matrix masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] masks = np.vstack(masks) masks = masks.reshape(-1, height, width) # if doesn't transpose, error will occur in augmentation (line 100) masks = masks.transpose(1, 2, 0) # labels = [int(self.cat_ids[obj['category_id']]) for obj in target] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] masks = masks[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) if self.rgb: inp = inp[..., ::-1] inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) d1 = masks.shape[2] masks = cv2.warpAffine(masks, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) masks = np.expand_dims(masks, 2) if masks.ndim != 3 else masks d2 = masks.shape[2] assert d1 == d2 masks = masks.transpose(2, 0, 1) masks = (masks >= 0.5).astype(np.uint8) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) # centers = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian segm_masks = [] gt_det = [] num_objs = min(len(target), self.max_objs) for k in range(num_objs): ann = target[k] # convert bboxes to point_form (xmin, ymin, xmax, ymax) bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # After augmentation some masks will be empty. if h > 0 and w > 0 and masks[k].sum() > 0.0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) # centers[k] = ct_int[0], ct_int[1] draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) det = [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, cls_id] gt_det.append(det) segm_masks.append(masks[k]) if len(segm_masks) > 0: masks = np.stack(segm_masks) gt_det = np.stack(gt_det) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'masks': masks, 'gt_bbox_lbl': gt_det} # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, # 'masks': masks, 'centers': centers, 'gt_bbox_lbl': gt_det} # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, # 'masks': masks, 'labels': labels, 'crowd': crowd, 'centers': centers, 'gt_bbox': gt_det} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, files_index): for i, c in enumerate(self.cds): if files_index >= c: ds = list(self.label_files.keys())[i] start_index = c img_path = self.img_files[ds][files_index - start_index] label_path = self.label_files[ds][files_index - start_index] imgs, labels, img_path, (input_h, input_w) = self.get_data( img_path, label_path) for i, _ in enumerate(labels): if labels[i, 1] > -1: labels[i, 1] += self.tid_start_index[ds] output_h = imgs.shape[1] // self.opt.down_ratio output_w = imgs.shape[2] // self.opt.down_ratio num_classes = self.num_classes num_objs = labels.shape[0] hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) if self.opt.ltrb: wh = np.zeros((self.max_objs, 4), dtype=np.float32) else: wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs, ), dtype=np.int64) reg_mask = np.zeros((self.max_objs, ), dtype=np.uint8) ids = np.zeros((self.max_objs, ), dtype=np.int64) bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian for k in range(num_objs): label = labels[k] bbox = label[2:] cls_id = int(label[0]) bbox[[0, 2]] = bbox[[0, 2]] * output_w bbox[[1, 3]] = bbox[[1, 3]] * output_h bbox_amodal = copy.deepcopy(bbox) bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2. bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2. bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2] bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3] bbox[0] = np.clip(bbox[0], 0, output_w - 1) bbox[1] = np.clip(bbox[1], 0, output_h - 1) h = bbox[3] w = bbox[2] bbox_xy = copy.deepcopy(bbox) bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2 bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2 bbox_xy[2] = bbox_xy[0] + bbox_xy[2] bbox_xy[3] = bbox_xy[1] + bbox_xy[3] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = 6 if self.opt.mse_loss else radius #radius = max(1, int(radius)) if self.opt.mse_loss else radius ct = np.array([bbox[0], bbox[1]], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) if self.opt.ltrb: wh[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \ bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1] else: wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 ids[k] = label[1] bbox_xys[k] = bbox_xy ret = { 'input': imgs, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'ids': ids, 'bbox': bbox_xys } return ret
def _get_pre_dets(self, anns, trans_input, trans_output): hm_h, hm_w = self.opt.input_h, self.opt.input_w down_ratio = self.opt.down_ratio trans = trans_input reutrn_hm = self.opt.pre_hm pre_hm = np.zeros((1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None pre_cts, pre_whs, track_ids, pre_bboxes, pre_bbox_amodals= [], [], [],[],[] ignore_regions =[] for ann in anns: cls_id = int(ann['category_id']) if cls_id > self.opt.num_classes or cls_id <= -999 or cls_id <= 0 or ( 'iscrowd' in ann and ann['iscrowd'] > 0): bbox, _ = self._get_bbox_output( ann['bbox'], trans_output, hm_h, hm_w) ignore_regions.append(bbox) for ann in anns: cls_id = int(ann['category_id']) if cls_id > self.opt.num_classes or cls_id <= -99 or \ ('iscrowd' in ann and ann['iscrowd'] > 0): continue ## bbox input bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] max_rad = 1 ignored = False if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) max_rad = max(max_rad, radius) ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct0 = ct.copy() conf = 1 for area in ignore_regions: if (area[0] <= ct[0] and ct[0] <= area[2]) and (area[1] <= ct[1] and ct[1] <= area[3]): ignored = True break if ignored: continue else: ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h conf = 1 if np.random.random() > self.opt.lost_disturb else 0 ct_int = ct.astype(np.int32) if conf == 0: pre_cts.append(ct / down_ratio) ### output ct else: pre_cts.append(ct0 / down_ratio) track_ids.append(ann['track_id'] if 'track_id' in ann else -1) if reutrn_hm: draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf) if np.random.random() < self.opt.fp_disturb and reutrn_hm: ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w ct2[1] = ct2[1] + np.random.randn() * 0.05 * h ct2_int = ct2.astype(np.int32) draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf) ## get the bbox out bbox_out, bbox_amodal = self._get_bbox_output(ann['bbox'], trans_output) pre_bboxes.append(np.array(bbox_out)) pre_bbox_amodals.append(np.array(bbox_amodal)) h_out, w_out = bbox_out[3] - bbox_out[1], bbox_out[2] - bbox_out[0] pre_wh = np.array( [w_out, h_out], dtype=np.float32) pre_whs.append(pre_wh) return pre_hm, pre_cts, track_ids, pre_whs, pre_bboxes,pre_bbox_amodals
def _add_instance(self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat'][k] = cls_id - 1 ret['mask'][k] = 1 if 'wh' in ret: ret['wh'][k] = 1. * w, 1. * h ret['wh_mask'][k] = 1 ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0] if 'reg' in ret: ret['reg'][k] = ct - ct_int ret['reg_mask'][k] = 1 draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) if 'tracking' in self.opt.heads: if ann['track_id'] in track_ids and ann['track_id'] >= 0: pre_ct = pre_cts[track_ids.index(ann['track_id'])] ret['tracking_mask'][k] = 1 ret['tracking'][k] = pre_ct - ct_int gt_det['tracking'].append(ret['tracking'][k]) else: gt_det['tracking'].append(np.zeros(2, np.float32)) if 'ltrb' in self.opt.heads: ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \ bbox[2] - ct_int[0], bbox[3] - ct_int[1] ret['ltrb_mask'][k] = 1 if 'ltrb_amodal' in self.opt.heads: ret['ltrb_amodal'][k] = \ bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \ bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1] ret['ltrb_amodal_mask'][k] = 1 gt_det['ltrb_amodal'].append(bbox_amodal) if 'nuscenes_att' in self.opt.heads: if ('attributes' in ann) and ann['attributes'] > 0: att = int(ann['attributes'] - 1) ret['nuscenes_att'][k][att] = 1 ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1 gt_det['nuscenes_att'].append(ret['nuscenes_att'][k]) if 'velocity' in self.opt.heads: if ('velocity' in ann) and min(ann['velocity']) > -1000: ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3] ret['velocity_mask'][k] = 1 gt_det['velocity'].append(ret['velocity'][k]) if 'hps' in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if 'rot' in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if 'dep' in self.opt.heads: if 'depth' in ann: ret['dep_mask'][k] = 1 ret['dep'][k] = ann['depth'] * aug_s gt_det['dep'].append(ret['dep'][k]) else: gt_det['dep'].append(2) if 'dim' in self.opt.heads: if 'dim' in ann: ret['dim_mask'][k] = 1 ret['dim'][k] = ann['dim'] gt_det['dim'].append(ret['dim'][k]) else: gt_det['dim'].append([1, 1, 1]) if 'amodel_offset' in self.opt.heads: if 'amodel_center' in ann: amodel_center = affine_transform(ann['amodel_center'], trans_output) ret['amodel_offset_mask'][k] = 1 ret['amodel_offset'][k] = amodel_center - ct_int gt_det['amodel_offset'].append(ret['amodel_offset'][k]) else: gt_det['amodel_offset'].append([0, 0])
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array( [self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] # 获取中心坐标p center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 # 仿射变换 flipped = False if self.split == 'train': # 随机选择一个尺寸来训练 scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 # 仿射变换 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # 归一化 img = (img.astype(np.float32) / 255.) if self.split == 'train': # 对图片的亮度对比度等属性进行修改 color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] # 对Ground Truth heatmap进行仿射变换 trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # 这时候已经是下采样为原来的四分之一了 # 3个最重要的变量 hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression # indexs inds = np.zeros((self.max_objs, ), dtype=np.int64) # 具体选择哪些index ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # 对检测框也进行仿射变换 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) # 防止越界 bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) # 得到高和宽 h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # 中心坐标-浮点型 obj_c_int = obj_c.astype(np.int32) # 整型的中心坐标 # 根据一元二次方程计算出最小的半径 radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) # 得到高斯分布 draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h # 记录偏移量 regs[k] = obj_c - obj_c_int # discretization error # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数 inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] # 进行mask标记 ind_masks[k] = 1 return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) if 'calib' in img_info: calib = np.array(img_info['calib'], dtype=np.float32) else: calib = self.calib height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) if self.opt.keep_res: s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32) else: s = np.array([width, height], dtype=np.int32) aug = False if self.split == 'train' and np.random.random() < self.opt.aug_ddd: aug = True sf = self.opt.scale cf = self.opt.shift s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) trans_input = get_affine_transform( c, s, 0, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) # if self.split == 'train' and not self.opt.no_color_aug: # color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) num_classes = self.opt.num_classes trans_output = get_affine_transform( c, s, 0, [self.opt.output_w, self.opt.output_h]) hm = np.zeros((num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) dep = np.zeros((self.max_objs, 1), dtype=np.float32) rotbin = np.zeros((self.max_objs, 2), dtype=np.int64) rotres = np.zeros((self.max_objs, 2), dtype=np.float32) dim = np.zeros((self.max_objs, 3), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) rot_mask = np.zeros((self.max_objs), dtype=np.uint8) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if cls_id <= -99: continue # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((h, w)) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if cls_id < 0: ignore_id = [_ for _ in range(num_classes)] \ if cls_id == - 1 else [- cls_id - 2] if self.opt.rect_mask: hm[ignore_id, int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1] = 0.9999 else: for cc in ignore_id: draw_gaussian(hm[cc], ct, radius) hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999 continue draw_gaussian(hm[cls_id], ct, radius) wh[k] = 1. * w, 1. * h gt_det.append([ct[0], ct[1], 1] + \ self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \ [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id]) if self.opt.reg_bbox: gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]] # if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!! if 1: alpha = self._convert_alpha(ann['alpha']) # print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y']) if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = alpha - (-0.5 * np.pi) if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = alpha - (0.5 * np.pi) dep[k] = ann['depth'] dim[k] = ann['dim'] # print(' cat dim', cls_id, dim[k]) ind[k] = ct_int[1] * self.opt.output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if not aug else 0 rot_mask[k] = 1 # print('gt_det', gt_det) # print('') ret = { 'input': inp, 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind, 'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask, 'rot_mask': rot_mask } if self.opt.reg_bbox: ret.update({'wh': wh}) if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not ('train' in self.split): gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 18), dtype=np.float32) meta = { 'c': c, 's': s, 'gt_det': gt_det, 'calib': calib, 'image_path': img_path, 'img_id': img_id } ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img = cv2.imread(img_id) height, width = img.shape[0], img.shape[1] # YOLO标注转换 with warnings.catch_warnings(): warnings.simplefilter('ignore') anns = np.loadtxt(self.anno[index]).reshape(-1, 5) if anns.size: x1 = width * (anns[:, 1] - anns[:, 3] / 2) y1 = height * (anns[:, 2] - anns[:, 4] / 2) x2 = width * (anns[:, 1] + anns[:, 3] / 2) y2 = height * (anns[:, 2] + anns[:, 4] / 2) anns[:, 1] = x1 anns[:, 2] = y1 anns[:, 3] = x2 anns[:, 4] = y2 num_objs = min(len(anns), self.max_objs) # 数据变换 c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(height, width) * 1.0 rotation = 0 shear = 0 input_h, input_w = self.opt.input_h, self.opt.input_w hflipped = False vflipped = False if self.split == 'train': if self.shear: shear = np.clip(np.random.randn() * self.shear, -self.shear, self.shear) if shear: if shear < 0: img = img[:, ::-1, :] anns[:, [1, 3]] = width - anns[:, [3, 1]] - 1 M = np.array([[1, abs(shear), 0], [0, 1, 0]]) nW = width + abs(shear * height) anns[:, [1, 3]] += ((anns[:, [2, 4]]) * abs(shear)).astype(int) img = cv2.warpAffine(img, M, (int(nW), height)) if shear < 0: img = img[:, ::-1, :] anns[:, [1, 3]] = nW - anns[:, [3, 1]] - 1 c[0] = nW / 2. s = max(nW, s) width = nW sf = self.scale s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if self.hflip and np.random.random() < self.hflip: hflipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 if self.vflip and np.random.random() < self.vflip: vflipped = True img = img[::-1, :, :] c[1] = height - c[1] - 1 # 旋转参数设置 if self.rotation: rotation = np.clip(np.random.randn() * self.rotation, -self.rotation, self.rotation) trans_input = get_affine_transform(c, s, rotation, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, rotation, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) obj = np.zeros((output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) target = np.zeros((self.max_objs, 5), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] if self.opt.task in ['fcos']: #, 'ttf']: # using original target trans_output = trans_input output_w, output_h = input_w, input_h for k in range(num_objs): bbox = anns[k, 1:] cls_id = int(anns[k, 0]) if hflipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 if vflipped: bbox[[1, 3]] = height - bbox[[3, 1]] - 1 lt = affine_transform(bbox[:2], trans_output) rb = affine_transform(bbox[2:], trans_output) rt = affine_transform(bbox[[2, 1]], trans_output) lb = affine_transform(bbox[[0, 3]], trans_output) bbox[:2] = np.min([lt, rb, rt, lb], axis=0) bbox[2:] = np.max([lt, rb, rt, lb], axis=0) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if self.opt.task in ['fcos']: #, 'ttf']: target[k] = cls_id, bbox[0], bbox[1], bbox[2], bbox[3] if h > 0 and w > 0: reg_mask[k] = 1 continue if h > 0 and w > 0: obj[int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1] = 1 radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = 2 * radius / 3 if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 # reg_mask[k] = 2 - w * h / output_w / output_h cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.opt.task in ['fcos']: #, 'ttf']: ret = {'input': inp, 'target': target, 'mask': reg_mask} return ret ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.reg_obj: ret.update({'obj': obj[np.newaxis]}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret