def _get_input(self, img, trans_input): inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) return inp
def _get_data(self, position): img_id = self.images[self._indexes[position]] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.params.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True assert ( len(img.shape) == 3 ), f"The dimensions of img should be 3. Filename: {img_path}, shape: {img.shape}" img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._rng, inp, self.params._eig_val, self.params._eig_vec) inp = (inp - self.params.mean) / self.params.std if self.mixed_precision: inp = fast_pad(inp) # Transpose to NCHW if channel_last is not enabled if not self.channel_last: inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.params.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) ind = np.zeros((self.params.max_objs), dtype=np.int32) wh = np.zeros((self.params.max_objs, 2), dtype=np.float32) reg = np.zeros((self.params.max_objs, 2), dtype=np.float32) reg_mask = np.zeros((self.params.max_objs, 1), dtype=np.float32) cls = np.zeros((self.params.max_objs, 1), dtype=np.int32) draw_gaussian = draw_umich_gaussian for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.params.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cls[k] = cls_id # Transpose heatmap to NHWC if channel last is enabled if self.channel_last: hm = np.transpose(hm, (1, 2, 0)) ret = (inp, hm, ind, wh, reg, reg_mask, cls) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) if img is None: print(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): # print('--------------->>>> multi pose index',index) # print('--------------->>>> multi pose index',index) # print('--------------->>>> multi pose index',index) # print('--------------->>>> multi pose index',index) img_id = self.images[index] # print('--------------->>>> multi pose ',img_id) file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opt.hm_gauss if self.opt.mse_loss else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # """ HM_ATT = False PYFLOW = True ONE_CLASS_ONLY = True if not HM_ATT: if PYFLOW: if 'uav' in self.opt.dataset: seg_path = os.path.join( '/store/datasets/UAV/bgsubs', os.path.dirname(file_name).split('/')[-1], os.path.basename(file_name).replace('jpg', 'png')) else: seg_path = os.path.join( '/store/datasets/OlderUA-Detrac/pyflow-bgsubs', os.path.dirname(file_name).split('/')[-1], os.path.basename(file_name).replace('jpg', 'png')) # """ ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) channel_counter = len(self.coco.getCatIds()) if not HM_ATT: bboxes = {} for ann in anns: if str(ann['category_id']) in bboxes: bboxes[str(ann['category_id'])].append([ int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3]) ]) else: bboxes[str(ann['category_id'])] = [[ int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3]) ]] # for ann in anns: # bboxes.append([int(ann['bbox'][0]), # int(ann['bbox'][1]), # int(ann['bbox'][0] + ann['bbox'][2]), # int(ann['bbox'][1] + ann['bbox'][3])]) num_objs = min(len(anns), self.max_objs) # print(img_path) img = cv2.imread(img_path) if not HM_ATT: if PYFLOW: seg_img = cv2.imread(seg_path, 0) # hughes if not PYFLOW: if 'coco' in img_path: if 'val' in img_path: seg_dir = '/store/datasets/coco/annotations/stuff_val2017_pixelmaps' else: seg_dir = '/store/datasets/coco/annotations/stuff_train2017_pixelmaps' stuff_img = cv2.imread( os.path.join(seg_dir, file_name.replace('.jpg', '.png'))) seg_img = np.zeros([img.shape[0], img.shape[1]]) seg_img[stuff_img[:, :, 0] == 0] += 1 seg_img[stuff_img[:, :, 1] == 214] += 1 seg_img[stuff_img[:, :, 2] == 255] += 1 seg_img[seg_img == 3] = 255 seg_img[seg_img < 255] = 0 else: if not ONE_CLASS_ONLY: seg_img = np.zeros( [channel_counter, img.shape[0], img.shape[1]]) for label in range(1, channel_counter + 1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[label - 1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 else: seg_img = np.zeros([img.shape[0], img.shape[1]]) for label in range(1, channel_counter + 1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 # seg_img = np.zeros([img.shape[0], img.shape[1]]) # for bbox in bboxes: # seg_img[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name.replace('.jpg', '_rgb.jpg'))), seg_img_rgb) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), seg_img) # exit() # print("IMG_SHAPE: ", img.shape, " MEAN: ", np.mean(img)) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), img) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] if not HM_ATT: if ONE_CLASS_ONLY: seg_img = seg_img[:, ::-1] else: seg_img = seg_img[:, ::-1, :] # print('img.shape: ', img.shape) # print('seg_img.shape: ', seg_img.shape) # exit() # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), img) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_img) c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) # print('TRANS INPUT SHAPE: ', trans_input.shape) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) if not HM_ATT: if ONE_CLASS_ONLY: seg_inp = cv2.warpAffine(seg_img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) else: seg_inp = np.zeros((seg_img.shape[0], input_w, input_h)) for channel in range(seg_img.shape[0]): seg_inp[channel, :, :] = cv2.warpAffine( seg_img[channel, :, :], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if not HM_ATT: seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), inp) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_inp) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # print('MEAN: ', np.average(seg_inp)) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian if self.opt.elliptical_gt: draw_gaussian = draw_ellipse_gaussian else: draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if self.opt.elliptical_gt: radius_x = radius if h > w else int(radius * (w / h)) radius_y = radius if w >= h else int(radius * (h / w)) # radius_x = radius if w > h else int(radius / (w/h)) # radius_y = radius if h >= w else int(radius / (h/w)) draw_gaussian(hm[cls_id], ct_int, radius_x, radius_y) else: draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if not HM_ATT: if ONE_CLASS_ONLY: # scale_percent = 25 # percent of original size # width = int(seg_inp.shape[1] * scale_percent / 100) # height = int(seg_inp.shape[0] * scale_percent / 100) # dim = (width, height) # seg_inp = cv2.resize(seg_inp, dim, interpolation=cv2.INTER_AREA) seg_inp = np.expand_dims(seg_inp, 0) # print(seg_inp.shape) # print(hm.shape) # print(inp.shape) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp, 'ct_att': hm } # 'seg': seg_inp} # 'seg': np.expand_dims(seg_inp, 0)} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # ret['seg'] = ret['hm'] # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), (inp.transpose(1, 2, 0)* 255).astype(np.uint8)) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.squeeze(0) * 255).astype(np.uint8)) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_images/hm/", "hm_" + os.path.basename(file_name)), (hm.squeeze(0) * 255).astype(np.uint8)) return ret
def __getitem__(self, index): img_id = self.ids[index] file_name = self.hoi_annotations[img_id]['file_name'] img_path = os.path.join(self.root, self.image_dir, file_name) anns = self.hoi_annotations[img_id]['annotations'] hoi_anns = self.hoi_annotations[img_id]['hoi_annotation'] num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.7, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) hm_rel = np.zeros((self.num_classes_verb, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) sub_offset = np.zeros((self.max_rels, 2), dtype=np.float32) obj_offset = np.zeros((self.max_rels, 2), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] bbox_ct = [] num_rels = min(len(hoi_anns), self.max_rels) for k in range(num_objs): ann = anns[k] bbox = np.asarray(ann['bbox']) if isinstance(ann['category_id'], str): ann['category_id'] = int(ann['category_id'].replace('\n', '')) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) bbox_ct.append(ct_int.tolist()) if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) offset_mask = np.zeros((self.max_rels), dtype=np.uint8) rel_ind = np.zeros((self.max_rels), dtype=np.int64) for k in range(num_rels): hoi = hoi_anns[k] if isinstance(hoi['category_id'], str): hoi['category_id'] = int(hoi['category_id'].replace('\n', '')) hoi_cate = int(self.cat_ids_verb[hoi['category_id']]) sub_ct = bbox_ct[hoi['subject_id']] obj_ct = bbox_ct[hoi['object_id']] offset_mask[k] = 1 rel_ct = np.array([(sub_ct[0] + obj_ct[0]) / 2, (sub_ct[1] + obj_ct[1]) / 2], dtype=np.float32) radius = gaussian_radius((math.ceil(abs(sub_ct[0] - obj_ct[0])), math.ceil(abs(sub_ct[1] - obj_ct[1])))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius rel_ct_int = rel_ct.astype(np.int32) draw_gaussian(hm_rel[hoi_cate], rel_ct_int, radius) rel_sub_offset = np.array( [rel_ct_int[0] - sub_ct[0], rel_ct_int[1] - sub_ct[1]], dtype=np.float32) rel_obj_offset = np.array( [rel_ct_int[0] - obj_ct[0], rel_ct_int[1] - obj_ct[1]], dtype=np.float32) sub_offset[k] = 1. * rel_sub_offset[0], 1. * rel_sub_offset[1] obj_offset[k] = 1. * rel_obj_offset[0], 1. * rel_obj_offset[1] rel_ind[k] = rel_ct_int[1] * output_w + rel_ct_int[0] ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hm_rel': hm_rel, 'sub_offset': sub_offset, 'obj_offset': obj_offset, 'offset_mask': offset_mask, 'rel_ind': rel_ind } if self.opt.reg_offset: ret.update({'reg': reg}) return ret
def __getitem__(self, index): mosaic_pro = random.random() if mosaic_pro > 0: img_id = self.images[index] img, labels = self.load_mosaic(index) all_ann = [] for da_label in labels: da_label = da_label.tolist() for da_l in da_label: all_ann.append(da_l) num_objs = min(len(all_ann), self.max_objs) else: positive_aug = random.random() if positive_aug > 2: index1 = random.randint(0, self.num_samples - 1) # chartlet_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix" img_id = self.images[index] img_id1 = self.images[index1] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] file_name1 = self.coco.loadImgs(ids=[img_id1])[0]['file_name'] path_num = random.random() img_path = os.path.join(self.img_dir, file_name) img_path1 = os.path.join(self.img_dir, file_name1) # if path_num > 0.5: # img_path = os.path.join(chartlet_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_ids1 = self.coco.getAnnIds(imgIds=[img_id1]) anns = self.coco.loadAnns(ids=ann_ids) anns1 = self.coco.loadAnns(ids=ann_ids1) img = cv2.imread(img_path) img1 = cv2.imread(img_path1) hand_num = len(anns1) if hand_num > 0: for ann1 in anns1: ran_id = random.randint(0, 26000) hand_x = ann1['bbox'][0] hand_y = ann1['bbox'][1] hand_w = ann1['bbox'][2] hand_h = ann1['bbox'][3] temp = img1[hand_y:hand_y + hand_h, hand_x:hand_x + hand_w] temp_h, temp_w, c = temp.shape src_h, src_w, src_c = img.shape for n in range(100): min_src = min(src_w, src_h) max_temp = max(temp_h, temp_w) if (max_temp > 0.5 * min_src): break if (src_w < temp_w or src_h < temp_h): break x_tmp = random.randint(0, src_w - temp_w) y_tmp = random.randint(0, src_h - temp_h) src_rect = [ x_tmp, y_tmp, x_tmp + temp_w, y_tmp + temp_h ] iou_all = 0 for gt in anns: gt = [ gt['bbox'][0], gt['bbox'][1], gt['bbox'][0] + gt['bbox'][2], gt['bbox'][1] + gt['bbox'][3] ] iou = self.compute_iou(gt, src_rect) iou_all = iou_all + iou # print(iou_all) if iou_all == 0: img[y_tmp:y_tmp + temp_h, x_tmp:x_tmp + temp_w] = temp a = { 'bbox': [x_tmp, y_tmp, temp_w, temp_h], 'category_id': 1 } anns.append(a) break num_objs = min(len(anns), self.max_objs) else: img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] # daming_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix" img_path = os.path.join(self.img_dir, file_name) # img_path1 = os.path.join(daming_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) # daming_num = random.random() # if daming_num > 0.5: # img = cv2.imread(img_path) # else: # img = cv2.imread(img_path1) gray_pro = random.random() if gray_pro > 2: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # s = s * np.random.choice(np.arange(0.3, 1.2, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) iaa_pro = random.random() if iaa_pro > 2: aug_seq = iaa.Sequential( [iaa.MultiplyHueAndSaturation((0.5, 1.5), per_channel=True)]) # aug_seq = iaa.Sequential([ # iaa.Sometimes( # 0.5, # iaa.GaussianBlur(sigma=(0, 0.5)) # ), # iaa.LinearContrast((0.75, 1.5)), # iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # iaa.Multiply((0.8, 1.2), per_channel=0.2), # ], random_order=True) inp, _ = aug_seq(image=inp, bounding_boxes=None) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # ind is the center index, reg is the offset of center point in extracted feature maps reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): if mosaic_pro > 0: ann = all_ann[k] bbox = np.array([ float(ann[0]), float(ann[1]), float(ann[2]), float(ann[3]) ], dtype=np.float32) else: ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = 0 if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: # print("- h : ", h," - w : ", w) radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_path = self.images[index] ann_path = img_path.replace('Data', 'Annotations').replace(os.path.splitext(img_path)[-1], '.xml') anns = [] root = ET.parse(ann_path).getroot() #im_w = int(root.find('size/width').text) #im_h = int(root.find('size/height').text) for obj in root.findall('object'): cls = obj.find('name').text if cls not in self.id2idx: continue cls = self.id2idx[cls] x1 = int(obj.find('bndbox/xmin').text) y1 = int(obj.find('bndbox/ymin').text) x2 = int(obj.find('bndbox/xmax').text) y2 = int(obj.find('bndbox/ymax').text) #x = 0.5 * (x1 + x2) / im_w #y = 0.5 * (y1 + y2) / im_h #ww = (x2 - x1) / im_w #hh = (y2 - y1) / im_h anns.append(np.array([cls, x1, y1, x2, y2], dtype=np.float32)) #l = np.array(boxes, dtype=np.float32) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = ann[1:] cls_id = int(ann[0]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_path} ret['meta'] = meta return ret
def __getitem__(self, index): template_name = self.images[index].split('\n')[0] img_path = os.path.join(self.img_dir, template_name + '.png') anno_path = os.path.join(self.annot_path, template_name + '.txt') anns = [] with open(anno_path, 'r') as f: line = f.readline().split() while line: anns.append([int(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4]), float(line[5])]) line = f.readline().split() num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(64, img.shape[1]) h_border = self._get_border(64, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes num_ct_classes = self.num_ct_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) lm_heatmaps = np.zeros((num_classes, output_h, output_w), dtype=np.float32) rm_heatmaps = np.zeros((num_classes, output_h, output_w), dtype=np.float32) ct_heatmaps = np.zeros((num_ct_classes, output_h, output_w), dtype=np.float32) lm_reg = np.zeros((self.max_objs, 2), dtype=np.float32) rm_reg = np.zeros((self.max_objs, 2), dtype=np.float32) ct_reg = np.zeros((self.max_objs, 2), dtype=np.float32) lm_tag = np.zeros((self.max_objs), dtype=np.int64) rm_tag = np.zeros((self.max_objs), dtype=np.int64) ct_tag = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian for k in range(num_objs): ann = anns[k] width_origin = ann[3] - ann[1] cls_id = int(self.cat_ids[ann[0]]) ann[1:3] = affine_transform(ann[1:3], trans_output) ann[3:5] = affine_transform(ann[3:5], trans_output) ftl_p, fbl_p, fbr_p, ftr_p = self._bbox_to_points(ann[1:5], ann[5]) flm_p = np.array([(ftl_p[0] + fbl_p[0]) / 2, (ftl_p[1] + fbl_p[1]) / 2], dtype=np.float32) frm_p = np.array([(ftr_p[0] + fbr_p[0]) / 2, (ftr_p[1] + fbr_p[1]) / 2], dtype=np.float32) fct_p = np.array([(ftl_p[0] + fbr_p[0]) / 2, (ftl_p[1] + fbr_p[1]) / 2], dtype=np.float32) # skip the bounding box whose points beyond the border after affine transformation and rotation if flm_p[0] < 0 or flm_p[0] > output_w - 1 or flm_p[1] < 0 or flm_p[1] > output_h - 1 or \ frm_p[0] < 0 or frm_p[0] > output_w - 1 or frm_p[1] < 0 or frm_p[1] > output_h - 1: continue lm_p = flm_p.astype(np.int32) rm_p = frm_p.astype(np.int32) ct_p = fct_p.astype(np.int32) w = np.sqrt(np.power(flm_p[0] - frm_p[0], 2) + np.power(flm_p[1] - frm_p[1], 2)) h = w / width_origin * 20. if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius draw_gaussian(lm_heatmaps[cls_id], lm_p, radius) draw_gaussian(rm_heatmaps[cls_id], rm_p, radius) draw_gaussian(ct_heatmaps[0], ct_p, radius) lm_tag[k] = lm_p[1] * output_w + lm_p[0] rm_tag[k] = rm_p[1] * output_w + rm_p[0] ct_tag[k] = ct_p[1] * output_w + ct_p[0] if ct_p[1] * output_w + ct_p[0] > 16383: print(file_name) print("Out of upper bound!") elif ct_p[1] * output_w + ct_p[0] < 0: print(file_name) print("Out of lower bound!") lm_reg[k] = flm_p - lm_p rm_reg[k] = frm_p - rm_p ct_reg[k] = fct_p - ct_p reg_mask[k] = 1 if (ct_reg > 1).any(): print("Float precision error!") ret = {'input': inp, 'lm': lm_heatmaps, 'rm': rm_heatmaps, 'ct': ct_heatmaps, \ 'lm_tag': lm_tag, 'rm_tag': rm_tag, 'ct_tag': ct_tag, \ 'lm_reg': lm_reg, 'rm_reg': rm_reg, 'ct_reg': ct_reg, 'reg_mask': reg_mask} return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) channel_counter = 1 # len(self.coco.getCatIds()) # target # target_img = cv2.imread(img_path) N_FRAMES = 11 middle = int(N_FRAMES/2) index = os.path.basename(img_path).replace('.jpg', '').replace('img', '').replace('.JPEG', '') rest = img_path.replace(index + '.jpg', '').replace(os.path.dirname(img_path), '') length = len(index) modulo = '1' for i in range(length): modulo += '0' img_paths = [] for i in range(N_FRAMES): new_img_path = os.path.dirname(img_path) \ + rest \ + str((int(index) - (i-middle)) % int(modulo)).zfill(length) + '.jpg' if not os.path.exists(new_img_path): new_img_path = img_path img_paths.append(new_img_path) imgs = [] for path in img_paths: imgs.append(cv2.imread(path)) img = np.concatenate(imgs, -1) bboxes = {} for ann in anns: if str(ann['category_id']) in bboxes: bboxes[str(ann['category_id'])].append([int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]) else: bboxes[str(ann['category_id'])] = [[int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]] seg_img = np.zeros([channel_counter, img.shape[0], img.shape[1]]) for label in range(1, channel_counter+1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[label-1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True # target # target_img = target_img[:, ::-1, :] img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) seg_inp = np.zeros((seg_img.shape[0], input_w, input_h)) for channel in range(seg_img.shape[0]): seg_inp[channel, :, :] = cv2.warpAffine(seg_img[channel, :, :], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # print('pre: ', img.shape) # target # target_inp = cv2.warpAffine(target_img, trans_input,(input_w, input_h),flags=cv2.INTER_LINEAR) inp = np.zeros((input_w, input_h, N_FRAMES*3)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): inp[:, :, i*3:i*3+3] = cv2.warpAffine(img[:, :, i*3:i*3+3], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # print('post: ', inp.shape) # target # target_inp = (target_inp.astype(np.float32) / 255.) inp = (inp.astype(np.float32) / 255.) seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes # print('np.mean(inp), PRE: ', np.mean(inp)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp[:, :, i*3:i*3+3], self._eig_val, self._eig_vec) else: if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # target # color_aug(self._data_rng, target_inp, self._eig_val, self._eig_vec) # print('np.mean(inp), POST: ', np.mean(inp)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): inp[:, :, i*3:i*3+3] = (inp[:, :, i*3:i*3+3] - self.mean) / self.std else: inp = (inp - self.mean) / self.std # target # target_inp = (target_inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # target # target_inp = target_inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # write_hm = cv2.resize(((hm-np.min(hm)/np.max(hm))*255).astype(np.uint8).squeeze(0), (512, 512)) # cv2.imwrite('/store/datasets/UA-Detrac/test_sample/VID_HM/' + 'inp_' + os.path.basename(file_name) + '_' + 'HM.jpg', write_hm) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp} # 'seg': np.expand_dims(seg_inp, 0)} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # if inp.shape[0] == N_FRAMES*3: # for i in range(N_FRAMES): # img_test = (inp[i*3:i*3+3, :, :].transpose(1, 2, 0) * 255).astype(np.uint8) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_sample/VID_HM/", 'inp_' + os.path.basename(file_name) + '_' + str(i)), img_test) #img_test = (target_inp.transpose(1, 2, 0) * 255).astype(np.uint8) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/exp/tensors/VID_HM/", os.path.basename(file_name) + '_target'), img_test) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.transpose(1, 2, 0) * 255).astype(np.uint8)) # exit() return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), cfg.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.split == 'train': s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = cfg.train_resolution[0], cfg.train_resolution[1] else: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) flipped = False if self.split == 'train': s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = get_border(128, img.shape[1]) h_border = get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_matrix = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_matrix, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = inp.astype(np.float32) / 255. # TODO:inp appears numbers below 0 after color_aug (myself) if self.split == 'train': color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - cfg.mean) / cfg.std inp = inp.transpose(2, 0, 1) output_h = input_h // cfg.down_ratio output_w = input_w // cfg.down_ratio trans_matrix = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((cfg.max_objs, 2), dtype=np.float32) reg = np.zeros((cfg.max_objs, 2), dtype=np.float32) ind = np.zeros(cfg.max_objs, dtype=np.int64) reg_mask = np.zeros(cfg.max_objs, dtype=np.uint8) gt_box = [] for i in range(num_objs): ann = anns[i] bbox = coco2x1y1x2y2(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_matrix) bbox[2:] = affine_transform(bbox[2:], trans_matrix) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: # get an object size-adapative radius radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[i] = 1. * w, 1. * h ind[i] = ct_int[1] * output_w + ct_int[0] reg[i] = ct - ct_int reg_mask[i] = 1 gt_box.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg': reg, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.debug > 0 or not self.split == 'train': gt_box = np.array( gt_box, dtype=np.float32) if len(gt_box) > 0 else np.zeros( (1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_box, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] if img_id < 7481: img_id_r = img_id + 7481 else: img_id_r = img_id - 7481 file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] file_name_r = "{:06d}".format(img_id_r) + '.png' img_path = os.path.join(self.img_dir, file_name) img_path_r = os.path.join(self.img_dir, file_name_r) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) img_r = cv2.imread(img_path_r) img_shape = img.shape[:2] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 c_r = np.array([img_r.shape[1] / 2., img_r.shape[0] / 2.], dtype=np.float32) s_r = max(img_r.shape[0], img_r.shape[1]) * 1.0 trans_input_l = get_affine_transform( c, s, rot, [self.opt.input_w, self.opt.input_h]) trans_input_r = get_affine_transform( c_r, s_r, rot, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine( img, trans_input_l, (self.opt.input_w, self.opt.input_h), #(self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) inp_r = cv2.warpAffine( img_r, trans_input_r, (self.opt.input_w, self.opt.input_h), # (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp_r = (inp_r.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp_r, self._eig_val, self._eig_vec) inp_r = (inp_r - self.mean) / self.std inp_r = inp_r.transpose(2, 0, 1) trans_output_l = np.zeros((self.opt.pynum, 2, 3), dtype=np.float32) trans_output_r = np.zeros((self.opt.pynum, 2, 3), dtype=np.float32) for j in range(self.opt.pynum): down_ratio = math.pow(2, j + 1) trans_output_l[j, :, :] = get_affine_transform( c, s, rot, [ self.opt.input_w // down_ratio, self.opt.input_h // down_ratio ]) trans_output_r[j, :, :] = get_affine_transform( c_r, s_r, rot, [ self.opt.input_w // down_ratio, self.opt.input_h // down_ratio ]) dim = np.zeros((self.max_objs, 3), dtype=np.float32) ori = np.zeros((self.max_objs), dtype=np.float32) pos = np.zeros((self.max_objs, 3), dtype=np.float32) dim_real = np.zeros((self.max_objs, 3), dtype=np.float32) ori_real = np.zeros((self.max_objs), dtype=np.float32) pos_real = np.zeros((self.max_objs, 3), dtype=np.float32) dim_est = np.zeros((self.max_objs, 3), dtype=np.float32) ori_est = np.zeros((self.max_objs, 3, 3), dtype=np.float32) ori_est_scalar = np.zeros((self.max_objs), dtype=np.float32) pos_est = np.zeros((self.max_objs, 3), dtype=np.float32) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) calib = np.array(anns[0]['calib_l'], dtype=np.float32) calib = np.reshape(calib, (3, 4)) calib_r = np.array(anns[0]['calib_r'], dtype=np.float32) calib_r = np.reshape(calib_r, (3, 4)) if self.split == 'val': for k in range(self.max_objs): if k + 1 > num_objs: kk = random.randint(0, num_objs - 1) ann = anns[kk] else: ann = anns[k] reg_mask[k] = 1 dim_est[k][0] = ann['dim'][0] + random.uniform(-0.8, 0.8) dim_est[k][1] = ann['dim'][1] + random.uniform(-0.8, 0.8) dim_est[k][2] = ann['dim'][2] + random.uniform(-0.8, 0.8) ori[k] = random.uniform(-0.3, 0.3) ori_est_scalar[k] = ann['rotation_y'] - ori[k] ori_est[k] = self.E2R(ann['rotation_y'] - ori[k]) pos_est[k][0] = ann['location'][0] + random.uniform(-1, 1) pos_est[k][1] = ann['location'][1] + random.uniform(-0.5, 0.5) pos_est[k][2] = ann['location'][2] + random.uniform(-2, 2) dim[k][0] = ann['dim'][0] - dim_est[k][0] dim[k][1] = ann['dim'][1] - dim_est[k][1] dim[k][2] = ann['dim'][2] - dim_est[k][2] pos[k][0] = ann['location'][0] - pos_est[k][0] pos[k][1] = ann['location'][1] - pos_est[k][1] pos[k][2] = ann['location'][2] - pos_est[k][2] dim_real[k][0] = ann['dim'][0] dim_real[k][1] = ann['dim'][1] dim_real[k][2] = ann['dim'][2] pos_real[k][0] = ann['location'][0] pos_real[k][1] = ann['location'][1] pos_real[k][2] = ann['location'][2] ori_real[k] = ann['rotation_y'] if self.split == 'train': for k in range(self.max_objs): if k + 1 > num_objs: kk = random.randint(0, num_objs - 1) ann = anns[kk] else: ann = anns[k] reg_mask[k] = 1 if np.random.random() < 0.7: dim_est[k][0] = ann['dim'][0] + random.uniform(-1.5, 1.5) dim_est[k][1] = ann['dim'][1] + random.uniform(-1.5, 1.5) dim_est[k][2] = ann['dim'][2] + random.uniform(-1.5, 1.5) ori[k] = random.uniform(-0.6, 0.6) ori_est_scalar[k] = ann['rotation_y'] - ori[k] ori_est[k] = self.E2R(ann['rotation_y'] - ori[k]) pos_est[k][0] = ann['location'][0] + random.uniform(-2, 2) pos_est[k][1] = ann['location'][1] + random.uniform( -0.8, 0.8) pos_est[k][2] = ann['location'][2] + random.uniform(-3, 3) else: dim_est[k][0] = ann['dim'][0] + random.uniform(-0.5, 0.5) dim_est[k][1] = ann['dim'][1] + random.uniform(-0.5, 0.5) dim_est[k][2] = ann['dim'][2] + random.uniform(-0.5, 0.5) ori[k] = random.uniform(-0.3, 0.3) ori_est_scalar[k] = ann['rotation_y'] - ori[k] ori_est[k] = self.E2R(ann['rotation_y'] - ori[k]) pos_est[k][0] = ann['location'][0] + random.uniform( -0.8, 0.8) pos_est[k][1] = ann['location'][1] + random.uniform( -0.3, 0.3) pos_est[k][2] = ann['location'][2] + random.uniform(-1, 1) dim[k][0] = ann['dim'][0] - dim_est[k][0] dim[k][1] = ann['dim'][1] - dim_est[k][1] dim[k][2] = ann['dim'][2] - dim_est[k][2] pos[k][0] = ann['location'][0] - pos_est[k][0] pos[k][1] = ann['location'][1] - pos_est[k][1] pos[k][2] = ann['location'][2] - pos_est[k][2] dim_real[k][0] = ann['dim'][0] dim_real[k][1] = ann['dim'][1] dim_real[k][2] = ann['dim'][2] pos_real[k][0] = ann['location'][0] pos_real[k][1] = ann['location'][1] pos_real[k][2] = ann['location'][2] ori_real[k] = ann['rotation_y'] #reg_mask[k]=1 meta = {} meta['img_shape'] = img_shape meta['num_objs'] = num_objs meta['img_name'] = file_name ret = { 'input': inp, 'input_r': inp_r, 'dim': dim, 'ori': ori, 'pos': pos, 'dim_real': dim_real, 'ori_real': ori_real, 'pos_real': pos_real, 'dim_est': dim_est, 'ori_est': ori_est, 'pos_est': pos_est, 'ori_est_scalar': ori_est_scalar, 'calib_l': calib, 'calib_r': calib_r, 'trans_output_l': trans_output_l, 'trans_output_r': trans_output_r, 'reg_mask': reg_mask, 'meta': meta } return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # (filepath, tempfilename) = os.path.split(img_path) # (filename, extension) = os.path.splitext(tempfilename) # kps_path = os.path.join('/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps', # filename + '_kps.npy') # kps_ann = np.load(kps_path) # print('load the kps!!!', kps_path) # c3= np.ones(6) # kps=np.column_stack((kps_ann,c3)) # print(kps) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: # print('Random Crop Done') s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) #输出scale的数值 # print('s is :',s) else: # print('Do not Random Crop') sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 #对输入执行仿射变换 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) #保存inp # print('type of inp is:',type(inp)) # print('size of inp is:', inp.shape) #3通道的图像取一个维度就可以 test_image = inp[1] output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) #将高斯heatmap保存下来 # print('shape of hp: ',hm.shape) heatmap = np.squeeze(hm) heatmap = cv2.resize(heatmap, (960, 640), interpolation=cv2.INTER_CUBIC) new_image = test_image + heatmap * 2 array_name = 'visual_ann_' + str(index) + '.png' matplotlib.image.imsave(array_name, new_image) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): ######################################## Start of modified Code Block ##################################################### curr_example = self.all_frames[index] img_path = curr_example[0] anns = curr_example[1] ######################################## End of modified Code Block ##################################################### num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) try: img_shape = img.shape self.last_img = img except AttributeError: print("Image '{}' failed!!!".format(img_path)) self.failed_images.add(img_path) img = self.last_img height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: # this is the default! color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # GEO: we need to calculate the mean and std in datasets/dataset/gaila.py inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = len(list(self.cat_ids.keys())) trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ######################################## Start of modified Code Block ##################################################### ann = anns.iloc[k] bbox = np.asarray([ ann["topLeftX"], ann["topLeftY"], ann['bottomRightX'], ann['bottomRightY'] ], dtype=np.float32) cls_id = int(self.cat_ids[ann['name']]) ######################################## End of modified Code Block ##################################################### if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) ######################################## Start of modified Code Block ##################################################### _id = int(img_path.split('/')[-1].split('.')[0]) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': _id} ######################################## End of modified Code Block ##################################################### ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] a_bboxes = [] shapes = [] a_shapes = [] for anno in annotations: if anno['category_id'] not in KINS_IDS: continue # excludes 3: person-sitting class for evaluation a_polygons = anno['segmentation'][ 0] # only one mask for each instance polygons = anno['i_segm'][0] # gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox'] # this is used to clip resampled polygons a_contour = np.array(a_polygons).reshape((-1, 2)) contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if cv2.contourArea(contour.astype( np.int32)) < 5: # remove tiny objects continue fixed_contour = uniformsample(a_contour, self.n_vertices) i_contour = uniformsample(contour, self.n_vertices) # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes # continue shapes.append(np.ndarray.flatten(i_contour).tolist()) a_shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) a_bboxes.append(anno['a_bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) a_bboxes = np.array(a_bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) a_shapes = np.array(a_shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) a_bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) a_shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy a_bboxes[:, 2:] += a_bboxes[:, :2] img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(360, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) # -----------------------------------debug--------------------------------- # image_show = img.copy() img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # -----------------------------------debug--------------------------------- # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h'])) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap of centers occ_map = np.zeros( (1, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # grayscale map for occlusion levels w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of inmodal bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt amodal mass centers to inmodal bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt amodal coefficients regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for quantization error inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32) # voting for heatmaps for k, (bbox, a_bbox, label, shape, a_shape) in enumerate( zip(bboxes, a_bboxes, labels, shapes, a_shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 a_bbox[[0, 2]] = width - a_bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): a_shape[2 * m] = width - a_shape[2 * m] - 1 shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[ 0] # This box is the inmodal boxes a_bbox[:2] = affine_transform(a_bbox[:2], trans_fmap) a_bbox[2:] = affine_transform(a_bbox[2:], trans_fmap) a_bbox[[0, 2]] = np.clip(a_bbox[[0, 2]], 0, self.fmap_size['w'] - 1) a_bbox[[1, 3]] = np.clip(a_bbox[[1, 3]], 0, self.fmap_size['h'] - 1) # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes a_shape[2 * m:2 * m + 2] = affine_transform( a_shape[2 * m:2 * m + 2], trans_fmap) shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(a_shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) i_shape_clipped = np.reshape(shape, (self.n_vertices, 2)) i_shape_clipped[:, 0] = np.clip(i_shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) i_shape_clipped[:, 1] = np.clip(i_shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) if h < 1e-6 or w < 1e-6: # remove small bboxes continue centered_shape = indexed_shape - mass_center # these are amodal mask shapes if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = centered_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) a_shifted_poly = indexed_shape - np.array([ a_bbox[0], a_bbox[1] ]) # crop amodal shapes to the amodal bboxes amodal_obj_mask = self.polys_to_mask( [np.ndarray.flatten(a_shifted_poly, order='C').tolist()], a_bbox[3], a_bbox[2]) i_shifted_poly = i_shape_clipped - np.array([ a_bbox[0], a_bbox[1] ]) # crop inmodal shapes to the same amodal bboxes inmodal_obj_mask = self.polys_to_mask( [np.ndarray.flatten(i_shifted_poly, order='C').tolist()], a_bbox[3], a_bbox[2]) obj_mask = ( amodal_obj_mask + inmodal_obj_mask ) * 255. / 2 # convert to float type in image scale obj_mask = cv2.resize( obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), interpolation=cv2.INTER_LINEAR) * 1. votes_[k] = obj_mask.reshape((1, -1)) / 255. w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # occlusion level map gt occ_map[0] += self.polys_to_mask( [np.ndarray.flatten(indexed_shape).tolist()], self.fmap_size['h'], self.fmap_size['w']) * 1. occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ # -----------------------------------debug--------------------------------- # for bbox, label, shape in zip(bboxes, labels, shapes_): # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # # print(shape, shape.shape) # cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255), # thickness=1) # # cv2.imshow('img', image_show) # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255) # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]), # code=cv2.COLOR_GRAY2BGR) # cat_img = np.concatenate([m_img, image_show], axis=0) # cv2.imshow('segm', cat_img) # cv2.waitKey() # -----------------------------------debug--------------------------------- return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'occ_map': occ_map, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'votes': votes_, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] # 读出图像名称 img_path = os.path.join(self.img_dir, file_name) # 图像完成文件名称 ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) # 读取图像对应的GT检测框 num_objs = min(len(anns), self.max_objs) # 读入图像,并对图像进行预处理 # print(img_id, img_path) img = cv2.imread(img_path) # import pdb # pdb.set_trace() height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train' or self.split == 'debug1': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.7, 1.3, 0.1)) w_border = self._get_border(512, img.shape[1]) h_border = self._get_border(512, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # c[0] = np.random.randint(low=0.4*img.shape[1], high=0.6*img.shape[1] ) # c[1] = np.random.randint(low=0.4*img.shape[0], high=0.6*img.shape[0]) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # 根据偏移的c和s得到变换矩阵,之后所有的框也可以按照变换矩阵进行移动 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # 0-255转为0-1 if DEBUG: raw_img = inp.copy() inp = (inp.astype(np.float32) / 255.) # 色彩偏移 if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # 减均值除方差 inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # 图像预处理结束 output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # heatmap wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # dense的wh angle = np.zeros((self.max_objs, 1), dtype=np.float32) dense_angle = np.zeros((1, output_h, output_w), dtype=np.float32) # dense的angle reg = np.zeros((self.max_objs, 2), dtype=np.float32) # offset偏差值 ind = np.zeros((self.max_objs), dtype=np.int64) # 物体在图像上编号,编号根据坐标得到 reg_mask = np.zeros((self.max_objs), dtype=np.uint8) # 对于图像变化后不存在了物体mask设置为0 cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) # 分类的长宽 cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # 分类的长宽mask cat_spec_angle = np.zeros((self.max_objs, num_classes), dtype=np.float32) # 分类的长宽 cat_spec_angle_mask = np.zeros((self.max_objs, num_classes), dtype=np.uint8) # 分类的长宽mask draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] # 遍历所有的物体 for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['rbbox']) bbox = ann['rbbox'] cls_id = int(self.cat_ids[ann['category_id']]) # 跟随图像变化,对于检测框进行相同变换 if flipped: # cx做镜像处理 bbox[0] = width - bbox[0] - 1 # 获取四个角点 pt1, pt2, pt3, pt4 = self._get_four_points((bbox[0], bbox[1]), bbox[-1], bbox[2], bbox[3]) pt1 = affine_transform((pt1[0, 0], pt1[0, 1]), trans_output) pt2 = affine_transform((pt2[0, 0], pt2[0, 1]), trans_output) pt3 = affine_transform((pt3[0, 0], pt3[0, 1]), trans_output) pt4 = affine_transform((pt4[0, 0], pt4[0, 1]), trans_output) # 得到中心点坐标,长宽以及角度 ct = np.array( [(pt1[0] + pt3[0]) / 2, (pt1[1] + pt3[1]) / 2], dtype=np.float32) w = np.linalg.norm(pt1 - pt2) h = np.linalg.norm(pt1 - pt4) # 计算新的angle # vec_base = np.array([0, 1], dtype=np.float32) # vec_angle = np.array([(pt1[0] + pt2[0]) / 2, (pt1[1] + pt2[1]) / 2], dtype=np.float32) - ct # norm_base = np.linalg.norm(vec_base) # norm_angle = np.linalg.norm(vec_angle) # cos_angle = vec_base.dot(vec_angle) / (norm_base * norm_angle + np.finfo(float).eps) # a = np.arccos(cos_angle) if self.opt.dataset == 'hrsc': a = bbox[-1] if flipped: a = np.pi - a elif self.opt.dataset == 'dota': a = bbox[-1] # ####### dota的json文件角度是0到2pi ########## if flipped: a = 2 * np.pi - a elif self.opt.dataset == 'rosku': # ####### rosku的json文件角度是-0.5pi到0.5pi ########## a = bbox[-1] / math.pi if flipped: a = -1 * a a = np.clip(a, -0.5, 0.5) a = a + 0.5 else: raise Exception('Wrong dataset.') if DEBUG: color = [255, 0, 0] line_width = 2 # ####### rosku的json文件角度是-0.5pi到0.5pi ########## # temp_a = (a - 0.5) * math.pi temp_a = a npt1, npt2, npt3, npt4 = self._get_four_points((ct[0], ct[1]), temp_a, w, h) npt1 = self._float_to_int(npt1) npt2 = self._float_to_int(npt2) npt3 = self._float_to_int(npt3) npt4 = self._float_to_int(npt4) cv2.line(raw_img, npt1, npt2, color, line_width) cv2.line(raw_img, npt2, npt3, color, line_width) cv2.line(raw_img, npt3, npt4, color, line_width) cv2.line(raw_img, npt4, npt1, color, line_width) if 0 <= ct[0] <= output_w - 1 and 0 <= ct[1] <= output_h - 1: # 热力图,GT进行一定扩散 radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct_int = ct.astype(np.int32) # 中心点绘制GT draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h angle[k] = 1. * a ind[k] = ct_int[1] * output_w + ct_int[0] # 物体在特征图上索引值 reg[k] = ct - ct_int # ct的实际值和整数化后的偏移 reg_mask[k] = 1 # wh设置 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # angle设置 cat_spec_angle[k, cls_id] = angle[k] cat_spec_angle_mask[k, cls_id] = 1 if self.opt.dense_angle or self.opt.fsm: draw_dense_reg(dense_angle, hm.max(axis=0), ct_int, angle[k], radius) # ang_radius = max(int(1.0), int(radius/2.)) # draw_dense_reg_uni(dense_angle[0, :], ct_int, angle[k], ang_radius) gt_det.append([ct[0], ct[1], w, h, angle[k], 1, cls_id]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'angle': angle} # wh if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] # angle if self.opt.dense_angle or self.opt.fsm: dense_angle_mask = hm.max(axis=0, keepdims=True) ret.update({'dense_angle': dense_angle, 'dense_angle_mask': dense_angle_mask}) if self.opt.dense_angle: del ret['angle'] elif self.opt.cat_spec_angle: ret.update({'cat_spec_angle': cat_spec_angle, 'cat_spec_angle_mask': cat_spec_angle_mask}) del ret['angle'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 7), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id, 'img_name':file_name} ret['meta'] = meta if DEBUG: ret['raw_img'] = raw_img ret['gt_det'] = gt_det ret['img_id'] = img_id cv2.imwrite(os.path.join('./cache', '%s.jpg' % img_id), raw_img) return ret
def __getitem__(self, index): img_id = self.images[index] img_path = self.data_dir + f"/JPEGImages/{img_id}.jpg" if self.has_landmark == 1: anns = self._get_annotation_lm(img_id) else: anns = self._get_annotation(img_id) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) * self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kpts_reg = np.zeros((self.max_objs, 10), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) #kpts_mask 关键点回归非heatmap 预测 kpts_mask = np.zeros((self.max_objs, 10), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): box, landmarks, label = anns[k] bbox = np.array(box, dtype=np.float32) lm = np.array(landmarks, dtype=np.float32) cls_id = int(self.cat_ids[label]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 flag_lm = 0 for idx in range(10): flag_lm += lm[idx] if flag_lm > 1: for idx in range(0, 10, 2): lm[idx:idx + 2] = affine_transform( lm[idx:idx + 2], trans_output) if lm[idx] >= 0 and lm[idx] < output_w and \ lm[idx + 1] >= 0 and lm[idx + 1]<output_h: kpts_mask[k, idx:idx + 2] = 1 kpts_reg[k][idx] = (lm[idx] - ct_int[0]) kpts_reg[k][idx + 1] = (lm[idx + 1] - ct_int[1]) cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.kpts_reg: #关键点回归的添加 ret.update({'kpts_reg': kpts_reg}) ret.update({'kpts_mask': kpts_mask}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) anns = list( filter( lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns)) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if self.cfg.DATASET.RANDOM_CROP: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.cfg.DATASET.SCALE cf = self.cfg.DATASET.SHIFT c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.cfg.DATASET.AUG_ROT: rf = self.cfg.DATASET.ROTATE rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.cfg.DATASET.FLIP: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES]) inp = cv2.warpAffine( img, trans_input, (self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.cfg.DATASET.NO_COLOR_AUG: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - np.array(self.cfg.DATASET.MEAN).astype( np.float32)) / np.array(self.cfg.DATASET.STD).astype(np.float32) inp = inp.transpose(2, 0, 1) output_res = self.cfg.MODEL.OUTPUT_RES num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) trans_seg_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) seg = np.zeros((self.max_objs, output_res, output_res), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.cfg.LOSS.MSE_LOSS else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) segment = self.coco.annToMask(ann) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() segment = segment[:, ::-1] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) segment = cv2.warpAffine(segment, trans_seg_output, (output_res, output_res), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.cfg.hm_gauss if self.cfg.LOSS.MSE_LOSS else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 #mask pad_rate = 0.3 segment_mask = np.ones_like(segment) x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_res - 1)*2).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_res - 1)*2).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment_mask == 1] = 255 seg[k] = segment #keypoint num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.cfg.hm_gauss \ if self.cfg.LOSS.MSE_LOSS else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.cfg.LOSS.DENSE_HP: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask, 'seg': seg } if self.cfg.LOSS.DENSE_HP: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.cfg.LOSS.REG_OFFSET: ret.update({'reg': reg}) if self.cfg.LOSS.HM_HP: ret.update({'hm_hp': hm_hp}) if self.cfg.LOSS.REG_HP_OFFSET: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.cfg.DEBUG > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # all anns of one img ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) # height, width height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # ori img center if self.opt.keep_res: # False input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: # not keep_res, use opt.input_h, w # note: h != w, ori not keep_res, then set w=h=512 # s = max(img.shape[0], img.shape[1]) * 1.0 s = np.array([width, height], dtype=np.float32) # ori img size? input_h, input_w = self.opt.input_h, self.opt.input_w # flip flipped = False # get scale and center to do affine transform if self.split == 'train': # random scale if not self.opt.not_rand_crop: # train set opt.not_rand_crop=False, so will use default random scale # s = s * np.random.choice(np.arange(0.4, 0.6, 0.1)) # (1920,1080) -> (640) # note: restrict the img center translate range, lrtb 1/2 # w_border = self._get_border(img.shape[1] // 4, img.shape[1]) # h_border = self._get_border(img.shape[0] // 4, img.shape[0]) # random center, this may translate img so far w_range, h_range = img.shape[1] // 8, img.shape[0] // 8 c[0] = np.random.randint(low=img.shape[1] // 2 - w_range, high=img.shape[1] // 2 + w_range) c[1] = np.random.randint(low=img.shape[0] // 2 - h_range, high=img.shape[0] // 2 + h_range) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # random flip if np.random.random() < self.opt.flip: # 0.5 flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # trans ori img to input size trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) # use generated trans_input matrix to trans img inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # note: see trans img # print('scale:', s, 'center:', c) # cv2.imwrite('{}_img_trans.png'.format(img_id), inp) inp = (inp.astype(np.float32) / 255.) # color augment if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # normalize inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # down sample output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes # trans ori img box to output size trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # draw gaussian core on heatmap hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # 20 # dense or sparse wh regress wh = np.zeros((self.max_objs, 2), dtype=np.float32) # (10,2) sparse! dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # dense! reg = np.zeros((self.max_objs, 2), dtype=np.float32) # (10,2) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # msra, umich # opt.mse_loss = False draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian # GT gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) # xywh -> x1y1x2y2; shape (4,) segmentation = np.array(ann['segmentation'][0]).reshape((-1, 2)) # x,y # map ori cat_id (whatever) to [0, num_class-1] cls_id = int(self.cat_ids[ann['category_id']]) # self.cat_ids in cigar.py if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # [0,2], segmentation[:, 0] = width - segmentation[:, 0] - 1 # flip x # transform box 2 pts to output bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # x1y1x2y2 # transform segmentation, just trans polygon_center is enough polygon_center = self._get_polygon_center(segmentation) polygon_center = affine_transform(polygon_center, trans_output) print(polygon_center) if h > 0 and w > 0: # note: radius generated with spatial extent info from h,w radius = gaussian_radius(det_size=(math.ceil(h), math.ceil(w))) radius = max(0, int(math.ceil(radius / 3))) # radius = max(0, int(radius)) # opt.mse_loss = False radius = self.opt.hm_gauss if self.opt.mse_loss else radius # box center box_center = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) print(box_center) # note: change ct to polygon center ct = polygon_center ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) # label of w,h wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] # 1D ind of ct position # note: update offset reg[k] = box_center - ct_int # float_box_center - int_polygon_center print('offset:', reg[k]) reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # use box_center to compute box ct = box_center.astype(np.int32) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } # from utils.plt_utils import plt_heatmaps # note: see heatmaps # plt_heatmaps(hm, basename='{}_hm'.format(img_id)) # print(wh) if self.opt.dense_wh: # False hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) # Get the image img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': # Random crop by default if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # Otherwise scale and shift image else: sf = self.opt.scale cf = self.opt.shift # Scale image c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # Shift image s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # Flip image if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 if self.opt.rotate > 0: # rotate the image if self.opt.rotate == 90: img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) if self.opt.rotate == 180: img = cv2.rotate(img, cv2.ROTATE_180) if self.opt.rotate == 270: img = cv2.rotate(img, cv2.img_rotate_90_counterclockwise) # Perform affine transformation trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) # Warp affine inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # Scale RGB pixels inp = (inp.astype(np.float32) / 255.) # Add color augmentation if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # Add for circle cl = np.zeros((self.max_objs, 1), dtype=np.float32) dense_cl = np.zeros((1, output_h, output_w), dtype=np.float32) reg_cl = np.zeros((self.max_objs, 2), dtype=np.float32) ind_cl = np.zeros((self.max_objs), dtype=np.int64) cat_spec_cl = np.zeros((self.max_objs, num_classes * 1), dtype=np.float32) cat_spec_clmask = np.zeros((self.max_objs, num_classes * 1), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] # For each object in the annotation for k in range(num_objs): # Get the annotation ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) # Debug print statements # print(self.cat_ids) # print(ann['category_id']) # print(int(self.cat_ids[int(ann['category_id'])])) cls_id = int(self.cat_ids[int(ann['category_id'])]) center_point = ann['circle_center'] center_radius = ann['circle_radius'] # If the image was flipped, then flip the annotation if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 center_point[0] = width - center_point[0] # If the image was affine transformed, then transform the annotation bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) center_point_aff = affine_transform(center_point, trans_output) center_radius_aff = center_radius * trans_output[0][0] bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0 and center_point_aff[0]>0 \ and center_point_aff[1]>0 and center_point_aff[0]<output_w\ and center_point_aff[1]<output_h: ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # ct_int = ct.astype(np.int32) # # draw_gaussian(hm[cls_id], ct_int, radius) # wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] # reg[k] = ct - ct_int # cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] # cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 # if self.opt.dense_wh: # draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) if self.opt.ez_guassian_radius: radius = center_radius_aff else: radius = gaussian_radius((math.ceil(center_radius_aff*2), math.ceil(center_radius_aff*2))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius cp = center_point_aff cp_int = cp.astype(np.int32) draw_gaussian(hm[cls_id], cp_int, radius) ind_cl[k] = cp_int[1] * output_w + cp_int[0] reg_cl[k] = cp - cp_int reg_mask[k] = 1 cr = center_radius_aff cl[k] = 1. * cr cat_spec_cl[k, cls_id * 1: cls_id * 1 + 1] = cl[k] cat_spec_clmask[k, cls_id * 1: cls_id * 1 + 1] = 1 if self.opt.filter_boarder: if cp[0] - cr < 0 or cp[0] + cr > output_w: continue if cp[1] - cr < 0 or cp[1] + cr > output_h: continue gt_det.append([cp[0], cp[1], cr, 1, cls_id]) # if ind_cl[0]<0: # aaa = 1 # # print('ind') # print(ind[0:10]) # print('ind_cl') # print(ind_cl[0:10]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind_cl, 'cl': cl} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg_cl}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 5), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) s = max(img.shape[0], img.shape[1]) * 1.0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] trans_input = get_affine_transform( c, s, 0, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_classes = self.opt.num_classes trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) num_hm = 1 if self.opt.agnostic_ex else num_classes hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_c = np.zeros((num_classes, output_res, output_res), dtype=np.float32) reg_t = np.zeros((self.max_objs, 2), dtype=np.float32) reg_l = np.zeros((self.max_objs, 2), dtype=np.float32) reg_b = np.zeros((self.max_objs, 2), dtype=np.float32) reg_r = np.zeros((self.max_objs, 2), dtype=np.float32) ind_t = np.zeros((self.max_objs), dtype=np.int64) ind_l = np.zeros((self.max_objs), dtype=np.int64) ind_b = np.zeros((self.max_objs), dtype=np.int64) ind_r = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['bbox']) # tlbr pts = np.array(ann['extreme_points'], dtype=np.float32).reshape(4, 2) # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug cls_id = int(self.cat_ids[ann['category_id']]) hm_id = 0 if self.opt.agnostic_ex else cls_id if flipped: pts[:, 0] = width - pts[:, 0] - 1 pts[1], pts[3] = pts[3].copy(), pts[1].copy() for j in range(4): pts[j] = affine_transform(pts[j], trans_output) pts = np.clip(pts, 0, self.opt.output_res - 1) h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) pt_int = pts.astype(np.int32) draw_gaussian(hm_t[hm_id], pt_int[0], radius) draw_gaussian(hm_l[hm_id], pt_int[1], radius) draw_gaussian(hm_b[hm_id], pt_int[2], radius) draw_gaussian(hm_r[hm_id], pt_int[3], radius) reg_t[k] = pts[0] - pt_int[0] reg_l[k] = pts[1] - pt_int[1] reg_b[k] = pts[2] - pt_int[2] reg_r[k] = pts[3] - pt_int[3] ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0] ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0] ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0] ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0] ct = [ int((pts[3, 0] + pts[1, 0]) / 2), int((pts[0, 1] + pts[2, 1]) / 2) ] draw_gaussian(hm_c[cls_id], ct, radius) reg_mask[k] = 1 ret = { 'input': inp, 'hm_t': hm_t, 'hm_l': hm_l, 'hm_b': hm_b, 'hm_r': hm_r, 'hm_c': hm_c } if self.opt.reg_offset: ret.update({ 'reg_mask': reg_mask, 'reg_t': reg_t, 'reg_l': reg_l, 'reg_b': reg_b, 'reg_r': reg_r, 'ind_t': ind_t, 'ind_l': ind_l, 'ind_b': ind_b, 'ind_r': ind_r }) return ret
def __getitem__(self, index): #函数为入口。这里我们可以得到我们输出参数,分别是\color{red}{inp, hm, reg\_mask, ind, wh}。 img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) # 目标个数,这里为100 img = cv2.imread(img_path) #接着我们获取图片的最长边以及输入尺寸(512,512) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # 获取中心点 if self.opt.keep_res: # False input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: # True s = max(img.shape[0], img.shape[1]) * 1.0 # s最长的边长 input_h, input_w = self.opt.input_h, self.opt.input_w # 512, 512 #对数据进行一系列处理。最终输出结果即我们第一个所需要的输入图像\color{red}{inp}. flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s # * np.random.choice(np.arange(0.6, 1.4, 0.1))# 随机尺度 w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # 随机裁剪 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # 放射变换 inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) #接着我们需要完成我们的heatmap的生成。 output_h = input_h // self.opt.down_ratio # 输出512//4=128 output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # heatmap(80,128,128) wh = np.zeros((self.max_objs, 2), dtype=np.float32) # 中心点宽高(100*2) angs = np.zeros((self.max_objs, 1), dtype=np.float32) # 角度(100*2) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # 返回2*128*128 reg = np.zeros((self.max_objs, 2), dtype=np.float32) # 记录下采样带来的误差,返回100*2的小数 ind = np.zeros((self.max_objs), dtype=np.int64) # 返回100个ind reg_mask = np.zeros((self.max_objs), dtype=np.uint8) # 返回8个 回归mask cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) # 100*80*2 cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # 100*80*2 #这里mse_loss为False, 所以我们只需要关注draw_umich_gaussian函数即可 draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[[4]] = 180 - bbox[[4]] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:4] = affine_transform(bbox[2:4], trans_output) #这里是导致舰船检测过程中出现中心点偏移的关键 #bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) #bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] #TODO insert ang = bbox[4] h = np.clip(h, 0, output_h - 1) w = np.clip(w, 0, output_w - 1) if h > 0 and w > 0: radius = gaussian_radius( (math.ceil(h), math.ceil(w))) #关键是如何确定高斯半径 radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) if ct[0] < 0 or ct[0] > output_w - 1 or ct[1] < 0 or ct[ 1] > output_h - 1: # continue # ct[0] = np.clip(ct[0], 0, output_w - 1) # ct[1] = np.clip(ct[1], 0, output_h - 1) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) #cv2.imwrite("/data/humaocheng/CenterNet-master/single_heatmap.jpg", hm[0]*255) wh[k] = 1. * w, 1. * h # 目标矩形框的宽高——目标尺寸损失 angs[k] = 1. * ang ind[k] = ct_int[1] * output_w + ct_int[ 0] # 目标中心点在128×128特征图中的索引 reg[k] = ct - ct_int # off Loss, # ct 即 center point reg是偏置回归数组,存放每个中心店的偏置值 k是当前图中第k个目标 # 实际例子为 # [98.97667 2.3566666] - [98 2] = [0.97667, 0.3566666] reg_mask[k] = 1 #是记录我们前100个点,这里相当于记载一张图片存在哪些目标, #有的话对应索引设置为1,其余设置为0。 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #TODO insert gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, ang, 1, cls_id ]) # cv2.imwrite("/data/humaocheng/CenterNet-master/heatmap.jpg",hm[0]*255) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'ang': angs } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: #TODO这里是更改多尺度训练的地方。 s = s #* np.random.choice(np.arange(0.8, 1.5, 0.1))#change 0.6 1.4 w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: # roate aug rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 #下面这段代码求旋转的角度 if self.opt.angle_norm and self.split == 'train': angle_list = np.array(angle_list) % np.pi #首先归一化到np.pi angle_int = (angle_list // (np.pi / 9)).astype('int') angle_b = np.bincount(angle_int) index_rot = np.argmax(angle_b) ind_rot = (angle_list > (index_rot) * np.pi / 9) * (angle_list <= (index_rot + 1) * np.pi / 9) angle_rot = np.average(angle_list[ind_rot]) #这段代码是旋转图像,和中间点特征图,关键点特征图 angle_img_rot = angle_rot * (-180) / np.pi hm_rotate = hm.transpose(1, 2, 0) M = cv2.getRotationMatrix2D( ((output_res) / 2.0, (output_res) / 2.0), angle_img_rot, 1) hm_rotate = cv2.warpAffine(hm_rotate, M, (output_res, output_res)) hm = hm_rotate.transpose(2, 0, 1) hp_rotate = hm_hp.transpose(1, 2, 0) hp_rotate = cv2.warpAffine(hp_rotate, M, (output_res, output_res)) hm_hp = hp_rotate[np.newaxis, :] M = cv2.getRotationMatrix2D( ((self.opt.input_res) / 2.0, (self.opt.input_res) / 2.0), angle_img_rot, 1) inp = inp.transpose(1, 2, 0) inp = cv2.warpAffine(inp, M, (self.opt.input_res, self.opt.input_res)) inp = inp.transpose(2, 0, 1) # inp1=cv2.warpAffine(inp1,M,(self.opt.input_res,self.opt.input_res)) #结束 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) # inp1 = cv2.warpAffine(img, trans_input, # (self.opt.input_res, self.opt.input_res), # flags=cv2.INTER_LINEAR) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] angle_list = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) #TODO change wwlekeuihx cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'][0:3], np.float32).reshape(num_joints, 3) #tmjx if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 #for e in self.flip_idx: #pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) #bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] h = np.clip(h, 0, output_res - 1) w = np.clip(w, 0, output_res - 1) if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) * 1.2 sqrt_wh = np.sqrt(np.sqrt(h * w)) radius_w = radius * np.sqrt(w) / sqrt_wh radius_h = radius * np.sqrt(h) / sqrt_wh radius_w = self.opt.hm_gauss if self.opt.mse_loss else max( 0, np.ceil(radius_w)) radius_h = self.opt.hm_gauss if self.opt.mse_loss else max( 0, np.ceil(radius_h)) # radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct[0] = np.clip(ct[0], 0, output_res - 1) ct[1] = np.clip(ct[1], 0, output_res - 1) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) #TODO change angle = math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1])) angle_list.append(angle) draw_gaussian(hm[cls_id], ct_int, [radius_w, radius_h, angle]) # draw_gaussian(hm[cls_id], ct_int, radiusw,radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta #这里是调试可视化生成的特征图的程序 # debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3), # theme=self.opt.debugger_theme) # self.debug(debugger, inp1, ret) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w if self.split == 'train': input_w = self.patch_sizes[(self.getcount//self.opt.batch_size) % len(self.patch_sizes)] input_h = input_w self.getcount = 0 if self.getcount == self.num_samples else self.getcount + 1 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] allmask = np.zeros((output_h, output_w, self.opt.num_maskclasses+levelnum), dtype=np.uint8) for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) if ann['category_id'] not in self._valid_ids: continue cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: ### gen mask begin ### # clsbase = cls_id*9 clsbase = 0*9 mask = self.coco.annToMask(ann) if flipped: mask = mask[:, ::-1] mask = cv2.warpAffine(mask, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR) roi = mask[y1:y2, x1:x2] roi_h, roi_w = roi.shape if roi_h < 6 or roi_w < 6: continue l = size2level(output_w*output_h, roi_w*roi_h) allmask[:,:,self.opt.num_maskclasses+l] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l], mask) allmask[:,:,self.opt.num_maskclasses+l+1] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l+1], mask) roi_cx = roi_w//2 roi_cy = roi_h//2 cell_w = (roi_w+5)//6 cell_h = (roi_h+5)//6 allmaskroi = allmask[y1:y2, x1:x2, :] ww = max(6,cell_w//4) hh = max(6,cell_h//4) # TOP self.assignroi(0, allmaskroi, roi, 0, 0, roi_cx-cell_w+ww, roi_cy-cell_h+hh) self.assignroi(1, allmaskroi, roi, roi_cx-cell_w-ww, 0, roi_cx+cell_w+ww, roi_cy-cell_h+hh) self.assignroi(2, allmaskroi, roi, roi_cx+cell_w-ww, 0, roi_w, roi_cy-cell_h+hh) # MIDDLE self.assignroi(3, allmaskroi, roi, 0, roi_cy-cell_h-hh, roi_cx-cell_w+ww, roi_cy+cell_h+hh) self.assignroi(4, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy-cell_h-hh, roi_cx+cell_w+ww, roi_cy+cell_h+hh) self.assignroi(5, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy-cell_h-hh, roi_w, roi_cy+cell_h+hh) # BOTTOM self.assignroi(6, allmaskroi, roi, 0, roi_cy+cell_h-hh, roi_cx-cell_w+ww, roi_h ) self.assignroi(7, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy+cell_h-hh, roi_cx+cell_w+ww, roi_h ) self.assignroi(8, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy+cell_h-hh, roi_w, roi_h ) ### gen mask end ### radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if self.opt.mse_loss: radius = self.opt.hm_gauss draw_gaussian(hm[cls_id], ct_int, radius) else: #draw_gaussian(hm[cls_id], ct_int, radius) xradius = int(gaussian_radius((math.ceil(w),math.ceil(w)))) yradius = int(gaussian_radius((math.ceil(h),math.ceil(h)))) draw_elipse_gaussian(hm[cls_id], ct_int, (xradius,yradius)) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #cv2.imwrite("./results/hehe.jpg", (hm.max(axis=0).squeeze()*255).astype(np.uint8)) if index % 30 == 0: cv2.imwrite("./results/top.jpg", (allmask[:,:,0:3]*255).astype(np.uint8)) cv2.imwrite("./results/middle.jpg", (allmask[:,:,3:6]*255).astype(np.uint8)) cv2.imwrite("./results/bottom.jpg", (allmask[:,:,6:9]*255).astype(np.uint8)) cv2.imwrite("./results/full.jpg", (((allmask[:,:,0:3]+allmask[:,:,3:6]+allmask[:,:,6:9]) > 0)*255).astype(np.uint8)) cv2.imwrite("./results/large.jpg", (((allmask[:,:,9:12]) > 0)*255).astype(np.uint8)) cv2.imwrite("./results/small.jpg", (((allmask[:,:,12:15]) > 0)*255).astype(np.uint8)) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'allmask': allmask.astype(np.float32).transpose(2, 0, 1) } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) #if self.opt.debug > 0 or not self.split == 'train': if not self.split == 'train': if len(gt_det) > 0: gt_det = np.array(gt_det, dtype=np.float32) else: gt_det = np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # img = cv2.warpAffine(img, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR) # img = img*allmask[:,:,:3] # cv2.imwrite("./results/maskit.jpg", img) return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue polygons = get_connected_polygon_using_mask( anno['segmentation'], (h_img, w_img), n_vertices=self.n_vertices, closing_max_kernel=50) gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if len(contour) > self.n_vertices: fixed_contour = resample(contour, num=self.n_vertices) else: fixed_contour = turning_angle_resample(contour, self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [ np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1]) ] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(160, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt mass centers to bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) contour_std_ = np.zeros( (self.max_objs, 1), dtype=np.float32) # keep track of codes that is activated regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) contour_std = np.std(indexed_shape, axis=0) + 1e-4 if h < 1e-6 or w < 1e-6: # remove small bboxes continue # centered_shape = indexed_shape - mass_center norm_shape = (indexed_shape - mass_center) / np.sqrt( np.sum(contour_std**2)) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # obj_c = mass_center obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = norm_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) contour_std_[k] = np.sqrt(np.sum(contour_std**2)) w_h_[k] = 1. * w, 1. * h # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \ # mass_center[0] - bbox[0], bbox[2] - mass_center[0] # [top, bottom, left, right] distance regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'std': contour_std_, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): #img_id = self.images[index] #file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] #img_path = os.path.join(self.img_dir, file_name) #ann_ids = self.coco.getAnnIds(imgIds=[img_id]) #anns = self.coco.loadAnns(ids=ann_ids) #num_objs = min(len(anns), self.max_objs) img_id = index img_path = self.images[index] label_path = self.label_files[index] #print(self.img_dir) #print(file_name) img = cv2.imread(img_path) h, w, _ = img.shape labels = [] #print(img_path) #print(label_path)32 #print(os.path.isfile(label_path)) if os.path.isfile(label_path): # with open(label_path, 'r') as f: # x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) x = self.labels[index] #print(x) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) labels[:, 3] = w * (x[:, 3]) labels[:, 4] = h * (x[:, 4]) #labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) #labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) #print('labels:{}'.format(len(labels))) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp_ori = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp_ori.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] #for k in range(num_objs): for k in range(len(labels)): ann = labels[k] #print('ann:{}'.format(ann)) bbox = self._coco_box_to_bbox(ann[1:5]) #print(index,bbox) #cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),3) #print('bbox: ',bbox) cls_id = int(ann[0]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) #print('refined_bbox: ',bbox) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) #print('refined_bbox: ',[ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #cv2.rectangle(inp_ori,(int(ct[0] - w / 2)*self.opt.down_ratio, int(ct[1] - h / 2)*self.opt.down_ratio),(int(ct[0] + w / 2)*self.opt.down_ratio, int(ct[1] + h / 2)*self.opt.down_ratio),(0,0,255),3) #cv2.imshow('img',img) #cv2.imshow('img_ori',inp_ori) #cv2.waitKey(0) #cv2.destroyAllWindows() ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy # print("===============", img_path) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] # 获取中心坐标p center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 # 仿射变换 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 # 实行仿射变换 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = (img.astype(np.float32) / 255.) if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # 3个最重要的变量 hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) # 椭圆形状 radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) # 得到高斯分布 draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h # 记录偏移量 regs[k] = obj_c - obj_c_int # discretization error # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数 inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] # 进行mask标记? ind_masks[k] = 1 return {'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) # delete not person and crowd # anns = list(filter(lambda x:x['category_id'] in self._valid_ids and x['iscrowd']!= 1 , anns)) anns = list(filter(lambda x: x['category_id'] in self._valid_ids, anns)) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) test_im = inp.copy() inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) trans_seg_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) seg = np.ones( (self.max_objs, output_h, output_w), dtype=np.float32) * 255 ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] if num_objs > 0: iii = np.random.randint(0, num_objs) for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) #ann['segmentation']['counts'] = ann['segmentation']['counts'].encode(encoding='UTF-8') if ann['segmentation'] != None: segment = self.coco.annToMask(ann) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 if ann['segmentation'] != None: segment = segment[:, ::-1] ''' if ann['segmentation']!=None and k == iii: seg_index = cv2.warpAffine(segment, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) seg_index = seg_index > 0 color = np.array([[255,0,0]]) test_im[seg_index] = test_im[seg_index]*0.2 + color * 0.8 ''' bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) if ann['segmentation'] != None: segment = cv2.warpAffine(segment, trans_seg_output, (output_w, output_h), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 pad_rate = 0.1 if ann['segmentation'] != None: segment_mask = np.ones_like(segment) x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_w - 1)).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_h - 1)).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment > 0] = 1 segment[segment_mask == 1] = 255 seg[k] = segment if ann['segmentation'] != None: pass #cv2.rectangle( # segment, (bbox[0], bbox[1]), (bbox[0], bbox[1]), (255,0,0), 2) #print(file_name.split('/')[-1]) #cv2.imwrite('/home/zhe.zhao/'+ file_name.split('/')[-1].split('.')[0]+str(k)+'.jpg',segment*255) #cv2.imwrite('/home/zhe.zhao/0_'+ file_name.split('/')[-1].split('.')[0]+str(k)+'.jpg',test_im) #cv2.waitKey(0) #seg_mask[k] = segment_mask #print(np.sum(segment)/np.sum(segment_mask)) ## pos / neg gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) #cv2.imwrite('/home/zhe.zhao/'+ file_name.split('/')[-1],test_im) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'seg': seg } if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def img_transform(self, img, anns, flip_en=True, scale_lv=2, out_shift=None, crop=None): height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = [img.shape[1], img.shape[0]] input_h, input_w = self.opt.input_h, self.opt.input_w crop = [0, 0, input_w, input_h] if crop is None else crop flipped = False rot_en = self.opt.rotate > 0 rot = crpsh_x = crpsh_y =0 img_s = [img.shape[1], img.shape[0]] if self.split == 'train': if scale_lv == 2: s = np.random.choice([ 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896]) elif scale_lv == 1: s = np.random.choice([ 512, 576, 640, 704, 768, 832]) else: s = np.random.choice([ 192, 256, 320, 384, 448, 512]) distortion = 0.6 sd = np.random.random()*distortion*2 - distortion + 1 if img.shape[0] > img.shape[1]: s = [s, s*(img.shape[0] / img.shape[1])*sd] else: s = [s*(img.shape[1] / img.shape[0])*sd, s] crpsh_x = max( (s[0] - (crop[2]-crop[0])) / 2, (crop[2]-crop[0])*0.2) crpsh_y = max( (s[1] - (crop[3]-crop[1])) / 2, (crop[3]-crop[1])*0.2) if flip_en and np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] if rot_en: rot = np.random.random()*self.opt.rotate*2 - self.opt.rotate elif not self.opt.keep_res: s = np.array([input_w, input_h], dtype=np.float32) out_center = [input_w/2, input_h/2] if out_shift is None else out_shift out_center[0] += (np.random.random()*2-1) * crpsh_x out_center[1] += (np.random.random()*2-1) * crpsh_y trans_input = get_affine_transform( c, img_s, rot, s, out_center) trans_inv = get_affine_transform( c, img_s, rot, s, out_center, inv=1) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_objs = min(len(anns), self.max_objs) ann_list = [] border_xy, border_idx = get_border_coord(trans_inv, width, height, crop) for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] bbox[:2] = affine_transform(bbox[:2], trans_input) bbox[2:] = affine_transform(bbox[2:], trans_input) segm = ann['segmentation'] # Create bbox from the visible part of objects through segmenation mask m = self.coco.annToMask(ann) bbox2 = mask2box(m, trans_input, border_xy, border_idx, flipped, width, height, crop) if rot_en: bbox = bbox2.astype(np.float32) ann_list.append([bbox, cls_id, bbox2]) #end of objs loop meta = (c, s) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) return inp, ann_list, output_w, output_h, meta
def __getitem__(self, index): index = 45236 img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) assert os.path.exists(img_path), 'Image path does not exist: {}'.format(img_path) # Target has {'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id'} ann_ids = self.coco.getAnnIds(imgIds=[img_id]) target = self.coco.loadAnns(ids=ann_ids) # Separate out crowd annotations. These are annotations that signify a large crowd of # objects of said class, where there is no annotation for each individual object. target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] if len(target) > 0: # Pool all the masks for this image into one [num_objects,height,width] matrix masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] masks = np.vstack(masks) masks = masks.reshape(-1, height, width) # if doesn't transpose, error will occur in augmentation (line 100) masks = masks.transpose(1, 2, 0) # labels = [int(self.cat_ids[obj['category_id']]) for obj in target] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] masks = masks[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) if self.rgb: inp = inp[..., ::-1] inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) d1 = masks.shape[2] masks = cv2.warpAffine(masks, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) masks = np.expand_dims(masks, 2) if masks.ndim != 3 else masks d2 = masks.shape[2] assert d1 == d2 masks = masks.transpose(2, 0, 1) masks = (masks >= 0.5).astype(np.uint8) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) # centers = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian segm_masks = [] gt_det = [] num_objs = min(len(target), self.max_objs) for k in range(num_objs): ann = target[k] # convert bboxes to point_form (xmin, ymin, xmax, ymax) bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # After augmentation some masks will be empty. if h > 0 and w > 0 and masks[k].sum() > 0.0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) # centers[k] = ct_int[0], ct_int[1] draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) det = [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, cls_id] gt_det.append(det) segm_masks.append(masks[k]) if len(segm_masks) > 0: masks = np.stack(segm_masks) gt_det = np.stack(gt_det) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'masks': masks, 'gt_bbox_lbl': gt_det} # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, # 'masks': masks, 'centers': centers, 'gt_bbox_lbl': gt_det} # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, # 'masks': masks, 'labels': labels, 'crowd': crowd, 'centers': centers, 'gt_bbox': gt_det} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret