def __getitem__(self, index): if os.path.exists(self.imgs_path[index]): img = cv2.imread(self.imgs_path[index]) else: print("%s not exists" % self.imgs_path[index]) anns = np.array(self.words[index]) bboxes = anns[:, :4] bboxes = np.array([self._coco_box_to_bbox(bb) for bb in bboxes]) lms = np.zeros((anns.shape[0], 10), dtype=np.float32) if self.split == "train": for idx, ann in enumerate(anns): lm = np.zeros(10, dtype=np.float32) - 1 if ann[4] >= 0: for i in range(5): lm[i * 2] = ann[4 + 3 * i] lm[i * 2 + 1] = ann[4 + 3 * i + 1] lms[idx] = lm num_objs = min(len(anns), self.max_objs) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.default_resolution[0], self.default_resolution[ 1] flipped = False if self.split == 'train': s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp1 = inp.copy() inp = (inp.astype(np.float32) / 255.) color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.down_ratio output_w = input_w // self.down_ratio num_classes = 1 trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) landmarks = np.zeros((self.max_objs, 10), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) lm_reg = np.zeros((self.max_objs, 10), dtype=np.float32) lm_ind = np.zeros((self.max_objs), dtype=np.int64) lm_mask = np.zeros((self.max_objs), dtype=np.uint8) gt_det = [] cls_id = 0 for k in range(num_objs): flag_lm = False bbox = bboxes[k] lm = lms[k] bbox1 = bbox.copy() if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 if lm[0] >= 0: lm[0::2] = width - lm[0::2] - 1 l_tmp = lm.copy() lm[0:2] = l_tmp[2:4] lm[2:4] = l_tmp[0:2] lm[6:8] = l_tmp[8:10] lm[8:10] = l_tmp[6:8] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) if lm[0] >= 0: lm[:2] = affine_transform(lm[:2], trans_output) lm[2:4] = affine_transform(lm[2:4], trans_output) lm[4:6] = affine_transform(lm[4:6], trans_output) lm[6:8] = affine_transform(lm[6:8], trans_output) lm[8:10] = affine_transform(lm[8:10], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if lm[0]>0 and lm[1]< output_h and lm[2] < output_w and lm[3] < output_h \ and lm[6] > 0 and lm[7] > 0 and lm[8] < output_w and lm[9] > 0: lm_ind[k] = ct_int[1] * output_w + ct_int[0] if h * w > 10: lm_mask[k] = 1 lm_temp = lm.copy() lm_int = lm_temp.astype(np.int32) lm_reg[k] = lm_temp - lm_int lm_temp[[0, 2, 4, 6, 8]] = lm_temp[[0, 2, 4, 6, 8]] - ct_int[0] lm_temp[[1, 3, 5, 7, 9]] = lm_temp[[1, 3, 5, 7, 9]] - ct_int[1] landmarks[k] = lm_temp gt_det.append([ 4 * (ct[0] - w / 2), 4 * (ct[1] - h / 2), 4 * (ct[0] + w / 2), 4 * (ct[1] + h / 2) ]) # if self.debug :# and ("COCO" in str(self.imgs_path[files_index])): # print(len(lms), len(bboxes)) # import matplotlib # matplotlib.use('Agg') # import matplotlib.pyplot as plt # for lm, bb in zip(lms, bboxes): # plt.figure(figsize=(50, 50)) # if bb[3] - bb[1] > 0 and bb[2] - bb[0] and np.array(np.where(lm > 0)).shape[1] ==10: # cv2.circle(inp1, (int(lm[0]), int(lm[1])), 2, (255, 0, 0), -1) # cv2.circle(inp1, (int(lm[2]), int(lm[3])), 2, (255, 255, 0), -1) # cv2.circle(inp1, (int(lm[4]), int(lm[5])), 2, (255, 155, 155), -1) # cv2.circle(inp1, (int(lm[6]), int(lm[7])), 2, (255, 0, 255), -1) # cv2.circle(inp1, (int(lm[8]), int(lm[9])), 2, (65, 86, 255), -1) # plt.plot(bb[[0, 2, 2, 0, 0]].T, bb[[1, 1, 3, 3, 1]].T, '.-') # plt.imshow(inp1) # plt.axis('off') # plt.savefig('debug/_after%s'%self.imgs_path[index].split("/")[-1]) # time.sleep(10) ret = { 'input': inp, 'hm': hm, 'lm': landmarks, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'lm_ind': lm_ind, 'lm_mask': lm_mask } if not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 4), dtype=np.float32) meta = {'gt_det': gt_det} ret['meta'] = meta return ret
def _get_label(self, c, s, rot, width, flipped, anns): def _coco_box_to_bbox(box): bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], dtype=np.float32) return bbox output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) # affine transform to 128x128 with rotation trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) # affine transform to 128x128 without rotation hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] num_objs = min(len(anns), self.max_objs) # max number of objects, default 32 for k in range(num_objs): ann = anns[k] bbox = _coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() ## affine transform bbox to feature map 128x128 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): ## 3.1 handle pure bbox radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # center of bbox ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h # width and height of bbox ind[k] = ct_int[1] * output_res + ct_int[0] # center of bbox in feature map index 0-16384 reg[k] = ct - ct_int # decimal of center of bbox reg_mask[k] = 1 # center mask ??? num_kpts = pts[:, 2].sum() if num_kpts == 0: # if no key points, hm=0.9999, reg_mask[k]=0 hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 ## 3.2 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: # key points is visible pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: # key points in output feature map kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int # vector of key points to cneter of bbox kps_mask[k, j * 2: j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([bbox[0], bbox[1], bbox[2], bbox[3], 1] + # [0:4] bbox,[4] 1, [5:39] key points, [40] class id 0 pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 return gt_det, hm, reg, reg_mask, ind, wh, kps, kps_mask, hm_hp, \ hp_offset, hp_ind, hp_mask, dense_kps, dense_kps_mask
def __getitem__(self, index): if os.path.exists(self.imgs_path[index]): img = cv2.imread(self.imgs_path[index]) else: print("%s not exists" % self.imgs_path[index]) anns = self.words[index] num_objs = min(len(anns), self.max_objs) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.default_resolution[0], self.default_resolution[ 1] flipped = False if self.split == 'train': s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) # if self.split == 'train': color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.down_ratio output_w = input_w // self.down_ratio num_classes = 1 trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) landmarks = np.zeros((self.max_objs, 10), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.mse_loss else \ draw_umich_gaussian cls_id = 0 for k in range(num_objs): ann = anns[k] bbox = np.array(ann[:4].copy()) x_o, y_o, w_o, h_o = ann[0], ann[1], ann[2], ann[3] bbox = self._coco_box_to_bbox(bbox) lm = [] for i in range(5): if self.split == 'train' and ann[4] > 0: x = (ann[4 + 3 * i] - x_o) / (w_o + 1e-14) y = (ann[4 + 3 * i + 1] - y_o) / (h_o + 1e-14) _lm = [x, y] else: _lm = [0, 0] lm.append(_lm) lm = np.array(lm).reshape(1, -1)[0] if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 landmarks[k] = lm ret = { 'input': inp, 'hm': hm, 'lm': landmarks, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg } if not self.split == 'train': gt_det = np.zeros((self.max_objs, 4), dtype=np.float32) for k in range(num_objs): ann = anns[k] bbox = np.array(ann[:4].copy()) bbox = self._coco_box_to_bbox(bbox) gt_det[k:4] = bbox gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 4), dtype=np.float32) meta = {'gt_det': gt_det, 'h': height, 'w': width} ret['meta'] = meta return ret
def __getitem__(self, index): assert index <= len(self), 'index range error' imgpath = self.lines[index].rstrip() img, label = load_data_detection(imgpath, self.shape) testlabel = label.copy() height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.shape[0], self.shape[1] flipped = False if self.train: if not self.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) if np.random.random() < self.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.train and not self.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) output_h = input_h // 4 output_w = input_w // 4 trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) img = np.array(inp) img = ((img - self.mean) / self.std).astype(np.float32) img = img.transpose(2, 0, 1) img = img.astype(np.float32) hm = np.zeros( (self.num_classes, int(self.shape[0] / 4), int(self.shape[1] / 4)), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) label = np.array(label) for k in range(label.shape[0]): cls_id = int(label[k, 0]) bbox = label[k, 1:5] if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 ret = { 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, "reg": reg } if Debug: testlabel = np.array(testlabel) cv2.rectangle(test_img, (int(testlabel[0, 1]), int(testlabel[0, 2])), (int(testlabel[0, 3]), int(testlabel[0, 4])), (255, 0, 0), 2) cv2.imshow("heatmap", hm[int(label[0, 0])]) cv2.imshow("tests", test_img) print(label) cv2.waitKey(0) return (img, ret)
rotation_y = float( tmp[7]) # Rotation around Y-axis in camera coords. [-Pi; Pi] bbox2D = [ float(tmp[8]), float(tmp[9]), float(tmp[10]), float(tmp[11]) ] # (0-based) bounding box of the object: Left, top, right, bottom image coordinates alpha = float(tmp[12]) xmin, ymin, xmax, ymax = int(bbox2D[0]), int(bbox2D[1]), int( bbox2D[2]), int(bbox2D[3]) width_2d = xmax - xmin height_2d = ymax - ymin bbox = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) bbox[:2] = affine_transform( bbox[:2], trans_output) # (112, 112)에 맞게 BR 점의 위치를 옮겨줌 bbox[2:] = affine_transform( bbox[2:], trans_output) # (112, 112)에 맞게 TL 점의 위치를 옮겨줌 # #np.clip함수를 사용하여 bbox행렬의 범위를 output_w, output-h 내의 범위로 바꿔줌 bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, 640 - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, 480 - 1) cv2.line(out, (bbox[0], bbox[1]), (bbox[2], bbox[1]), (0, 255, 0), 2, lineType=cv2.LINE_AA) cv2.line(out, (bbox[2], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2, lineType=cv2.LINE_AA) cv2.line(out, (bbox[2], bbox[3]), (bbox[0], bbox[3]), (0, 255, 0), 2, lineType=cv2.LINE_AA) cv2.line(out, (bbox[0], bbox[3]), (bbox[0], bbox[1]), (0, 255, 0),
def __getitem__(self, index): img_id = self.images[index] # id img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) # label labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: 。 bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # x1 y1 w h to x1 y1 x2 y2 img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. # [0,1] img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] # Ground Truth heatmap trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # vectors hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap,size(3,96,96) w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height pxpy = np.zeros((self.max_objs, 2), dtype=np.float32) # length and theta regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression # index inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) # detections = [] for k, (bbox, label) in enumerate(zip(bboxes, labels)): #if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # h and w d = math.sqrt((bbox[3]-bbox[1])*(bbox[3]-bbox[1])+(bbox[2]-bbox[0])*(bbox[2]-bbox[0]))/2 theta = math.pi-math.atan(h/w) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) # gaussian_radius draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h pxpy[k] = 1. * d, 1. * theta regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] # = fmap_w * cy + cx ind_masks[k] = 1 return {'image': img, 'hmap': hmap, 'w_h_':w_h_,'pxpy': pxpy, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def transform_preds(coords, center, scale, output_size): target_coords = np.zeros(coords.shape) trans = get_affine_transform(center, scale, 0, output_size, inv=1) for p in range(coords.shape[0]): target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) return target_coords