def __getitem__(self, index): def _coco_box_to_bbox(box): bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], dtype=np.float32) return bbox def _get_border(border, size): i = 1 while size - border // i <= border // i: i *= 2 return border // i img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id], iscrowd=0) # remove crowd annotations anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # randomly choice a scale from 0.6 to 1.4 w_border = _get_border(128, img.shape[1]) h_border = _get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # use affine transform to crop image to 512x512 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std #??? inp = inp.transpose(2, 0, 1) # output feature map output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # affine bbox hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # how many classes wh = np.zeros((self.max_objs, 2), dtype=np.float32) # how many objects dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = _coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) # affine bbox bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) # keep bbox output > value > 0, this means the object has been truncated bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: # if w or h eq 0, it means the bbox is out of the picture # heat map radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) # get lable of gaussian hit map wh[k] = 1. * w, 1. * h # width and height of bbox ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int # reg_offset, namely the difference between center of integer and center of floating points reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # bbox of ground truth ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # ground truth return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # """ HM_ATT = False PYFLOW = True ONE_CLASS_ONLY = True if not HM_ATT: if PYFLOW: if 'uav' in self.opt.dataset: seg_path = os.path.join( '/store/datasets/UAV/bgsubs', os.path.dirname(file_name).split('/')[-1], os.path.basename(file_name).replace('jpg', 'png')) else: seg_path = os.path.join( '/store/datasets/OlderUA-Detrac/pyflow-bgsubs', os.path.dirname(file_name).split('/')[-1], os.path.basename(file_name).replace('jpg', 'png')) # """ ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) channel_counter = len(self.coco.getCatIds()) if not HM_ATT: bboxes = {} for ann in anns: if str(ann['category_id']) in bboxes: bboxes[str(ann['category_id'])].append([ int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3]) ]) else: bboxes[str(ann['category_id'])] = [[ int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3]) ]] # for ann in anns: # bboxes.append([int(ann['bbox'][0]), # int(ann['bbox'][1]), # int(ann['bbox'][0] + ann['bbox'][2]), # int(ann['bbox'][1] + ann['bbox'][3])]) num_objs = min(len(anns), self.max_objs) # print(img_path) img = cv2.imread(img_path) if not HM_ATT: if PYFLOW: seg_img = cv2.imread(seg_path, 0) # hughes if not PYFLOW: if 'coco' in img_path: if 'val' in img_path: seg_dir = '/store/datasets/coco/annotations/stuff_val2017_pixelmaps' else: seg_dir = '/store/datasets/coco/annotations/stuff_train2017_pixelmaps' stuff_img = cv2.imread( os.path.join(seg_dir, file_name.replace('.jpg', '.png'))) seg_img = np.zeros([img.shape[0], img.shape[1]]) seg_img[stuff_img[:, :, 0] == 0] += 1 seg_img[stuff_img[:, :, 1] == 214] += 1 seg_img[stuff_img[:, :, 2] == 255] += 1 seg_img[seg_img == 3] = 255 seg_img[seg_img < 255] = 0 else: if not ONE_CLASS_ONLY: seg_img = np.zeros( [channel_counter, img.shape[0], img.shape[1]]) for label in range(1, channel_counter + 1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[label - 1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 else: seg_img = np.zeros([img.shape[0], img.shape[1]]) for label in range(1, channel_counter + 1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 # seg_img = np.zeros([img.shape[0], img.shape[1]]) # for bbox in bboxes: # seg_img[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name.replace('.jpg', '_rgb.jpg'))), seg_img_rgb) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), seg_img) # exit() # print("IMG_SHAPE: ", img.shape, " MEAN: ", np.mean(img)) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), img) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] if not HM_ATT: if ONE_CLASS_ONLY: seg_img = seg_img[:, ::-1] else: seg_img = seg_img[:, ::-1, :] # print('img.shape: ', img.shape) # print('seg_img.shape: ', seg_img.shape) # exit() # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), img) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_img) c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) # print('TRANS INPUT SHAPE: ', trans_input.shape) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) if not HM_ATT: if ONE_CLASS_ONLY: seg_inp = cv2.warpAffine(seg_img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) else: seg_inp = np.zeros((seg_img.shape[0], input_w, input_h)) for channel in range(seg_img.shape[0]): seg_inp[channel, :, :] = cv2.warpAffine( seg_img[channel, :, :], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if not HM_ATT: seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), inp) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_inp) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # print('MEAN: ', np.average(seg_inp)) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian if self.opt.elliptical_gt: draw_gaussian = draw_ellipse_gaussian else: draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if self.opt.elliptical_gt: radius_x = radius if h > w else int(radius * (w / h)) radius_y = radius if w >= h else int(radius * (h / w)) # radius_x = radius if w > h else int(radius / (w/h)) # radius_y = radius if h >= w else int(radius / (h/w)) draw_gaussian(hm[cls_id], ct_int, radius_x, radius_y) else: draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if not HM_ATT: if ONE_CLASS_ONLY: # scale_percent = 25 # percent of original size # width = int(seg_inp.shape[1] * scale_percent / 100) # height = int(seg_inp.shape[0] * scale_percent / 100) # dim = (width, height) # seg_inp = cv2.resize(seg_inp, dim, interpolation=cv2.INTER_AREA) seg_inp = np.expand_dims(seg_inp, 0) # print(seg_inp.shape) # print(hm.shape) # print(inp.shape) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp, 'ct_att': hm } # 'seg': seg_inp} # 'seg': np.expand_dims(seg_inp, 0)} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # ret['seg'] = ret['hm'] # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), (inp.transpose(1, 2, 0)* 255).astype(np.uint8)) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.squeeze(0) * 255).astype(np.uint8)) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_images/hm/", "hm_" + os.path.basename(file_name)), (hm.squeeze(0) * 255).astype(np.uint8)) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) if img is None: print(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] img_path = self.data_dir + f"/JPEGImages/{img_id}.jpg" if self.has_landmark == 1: anns = self._get_annotation_lm(img_id) else: anns = self._get_annotation(img_id) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) * self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kpts_reg = np.zeros((self.max_objs, 10), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) #kpts_mask 关键点回归非heatmap 预测 kpts_mask = np.zeros((self.max_objs, 10), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): box, landmarks, label = anns[k] bbox = np.array(box, dtype=np.float32) lm = np.array(landmarks, dtype=np.float32) cls_id = int(self.cat_ids[label]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 flag_lm = 0 for idx in range(10): flag_lm += lm[idx] if flag_lm > 1: for idx in range(0, 10, 2): lm[idx:idx + 2] = affine_transform( lm[idx:idx + 2], trans_output) if lm[idx] >= 0 and lm[idx] < output_w and \ lm[idx + 1] >= 0 and lm[idx + 1]<output_h: kpts_mask[k, idx:idx + 2] = 1 kpts_reg[k][idx] = (lm[idx] - ct_int[0]) kpts_reg[k][idx + 1] = (lm[idx + 1] - ct_int[1]) cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.kpts_reg: #关键点回归的添加 ret.update({'kpts_reg': kpts_reg}) ret.update({'kpts_mask': kpts_mask}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): mosaic_pro = random.random() if mosaic_pro > 0: img_id = self.images[index] img, labels = self.load_mosaic(index) all_ann = [] for da_label in labels: da_label = da_label.tolist() for da_l in da_label: all_ann.append(da_l) num_objs = min(len(all_ann), self.max_objs) else: positive_aug = random.random() if positive_aug > 2: index1 = random.randint(0, self.num_samples - 1) # chartlet_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix" img_id = self.images[index] img_id1 = self.images[index1] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] file_name1 = self.coco.loadImgs(ids=[img_id1])[0]['file_name'] path_num = random.random() img_path = os.path.join(self.img_dir, file_name) img_path1 = os.path.join(self.img_dir, file_name1) # if path_num > 0.5: # img_path = os.path.join(chartlet_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_ids1 = self.coco.getAnnIds(imgIds=[img_id1]) anns = self.coco.loadAnns(ids=ann_ids) anns1 = self.coco.loadAnns(ids=ann_ids1) img = cv2.imread(img_path) img1 = cv2.imread(img_path1) hand_num = len(anns1) if hand_num > 0: for ann1 in anns1: ran_id = random.randint(0, 26000) hand_x = ann1['bbox'][0] hand_y = ann1['bbox'][1] hand_w = ann1['bbox'][2] hand_h = ann1['bbox'][3] temp = img1[hand_y:hand_y + hand_h, hand_x:hand_x + hand_w] temp_h, temp_w, c = temp.shape src_h, src_w, src_c = img.shape for n in range(100): min_src = min(src_w, src_h) max_temp = max(temp_h, temp_w) if (max_temp > 0.5 * min_src): break if (src_w < temp_w or src_h < temp_h): break x_tmp = random.randint(0, src_w - temp_w) y_tmp = random.randint(0, src_h - temp_h) src_rect = [ x_tmp, y_tmp, x_tmp + temp_w, y_tmp + temp_h ] iou_all = 0 for gt in anns: gt = [ gt['bbox'][0], gt['bbox'][1], gt['bbox'][0] + gt['bbox'][2], gt['bbox'][1] + gt['bbox'][3] ] iou = self.compute_iou(gt, src_rect) iou_all = iou_all + iou # print(iou_all) if iou_all == 0: img[y_tmp:y_tmp + temp_h, x_tmp:x_tmp + temp_w] = temp a = { 'bbox': [x_tmp, y_tmp, temp_w, temp_h], 'category_id': 1 } anns.append(a) break num_objs = min(len(anns), self.max_objs) else: img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] # daming_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix" img_path = os.path.join(self.img_dir, file_name) # img_path1 = os.path.join(daming_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) # daming_num = random.random() # if daming_num > 0.5: # img = cv2.imread(img_path) # else: # img = cv2.imread(img_path1) gray_pro = random.random() if gray_pro > 2: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # s = s * np.random.choice(np.arange(0.3, 1.2, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) iaa_pro = random.random() if iaa_pro > 2: aug_seq = iaa.Sequential( [iaa.MultiplyHueAndSaturation((0.5, 1.5), per_channel=True)]) # aug_seq = iaa.Sequential([ # iaa.Sometimes( # 0.5, # iaa.GaussianBlur(sigma=(0, 0.5)) # ), # iaa.LinearContrast((0.75, 1.5)), # iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # iaa.Multiply((0.8, 1.2), per_channel=0.2), # ], random_order=True) inp, _ = aug_seq(image=inp, bounding_boxes=None) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # ind is the center index, reg is the offset of center point in extracted feature maps reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): if mosaic_pro > 0: ann = all_ann[k] bbox = np.array([ float(ann[0]), float(ann[1]), float(ann[2]), float(ann[3]) ], dtype=np.float32) else: ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = 0 if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: # print("- h : ", h," - w : ", w) radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] #loadImgs(ids=[img_id]) return a list, whose length = 1 file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) cropped = False if self.split == 'train': if np.random.random() < 1: cropped = True file_name = file_name.split('.')[0] + 'crop.jpg' img_path = os.path.join(self.img_dir, file_name) if self.split == 'val': cropped = True img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False rotted = False # input_res is max(input_h, input_w), input is the size of original img if np.random.random() < self.opts.keep_inp_res_prob and max( (height | 127) + 1, (width | 127) + 1) < 1024: self.opts.input_h = (height | 127) + 1 self.opts.input_w = (width | 127) + 1 self.opts.output_h = self.opts.input_h // self.opts.down_ratio self.opts.output_w = self.opts.input_w // self.opts.down_ratio self.opts.input_res = max(self.opts.input_h, self.opts.input_w) self.opts.output_res = max(self.opts.output_h, self.opts.output_w) trans_input = get_affine_transform( c, s, rot, [self.opts.input_res, self.opts.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opts.input_res, self.opts.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) inp = (inp - self.mean) / self.std #change data shape to [3, input_size, input_size] inp = inp.transpose(2, 0, 1) #output_res is max(output_h, output_w), output is the size after down sampling output_res = self.opts.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, 2 * num_joints), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opts.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] if cropped: bbox = np.array(ann['bbox']) else: bbox = np.array(ann['org_bbox']) cls_id = int(ann['category_id']) - 1 if cropped: pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) else: pts = np.array(ann['org_keypoints'], np.float32).reshape(num_joints, 3) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for joint_idx in self.flip_idx: pts[joint_idx[0]], pts[joint_idx[1]] = pts[ joint_idx[1]].copy(), pts[ joint_idx[0]].copy() #don't forget copy first bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) if rotted: pts_rot = np.zeros((num_joints, 2)) for j in range(num_joints): if pts[j, 2] > 0: pts_rot[j, :2] = affine_transform( pts[j, :2], trans_output_rot) bbox[:2] = np.min(pts_rot, axis=0) bbox[2:] = np.max(pts_rot, axis=0) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opts.hm_gauss if self.opts.mse_loss else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int # the error of center[x, y] reg_mask[k] = 1 num_kpts = pts[:, 2].sum() #whether joint can be seen or not if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 #means this obj can'e be seen hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)), min_overlap=1) hp_radius = self.opts.hm_gauss if self.opts.mse_loss else max( 0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: #means this joint can be seen pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and pts[ j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opts.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) hp1 = draw_gaussian(hm_hp[j], pt_int, hp_radius) # plt.imsave('/home/mry/Desktop/testimg/hp_{}_{}.jpg'.format(k, j), hp1) draw_gaussian(hm[cls_id], ct_int, radius) ##ge_det:x0, y0, x1, y1, joint1_x, joint1_y,...,joint17_x, joint17_y, cls_id gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) #if rot != 0: # hm = hm * 0 + 0.9999 # reg_mask *= 0 # kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opts.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opts.reg_offset: ret.update({'reg': reg}) if self.opts.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opts.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opts.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w if self.split == 'train': input_w = self.patch_sizes[(self.getcount//self.opt.batch_size) % len(self.patch_sizes)] input_h = input_w self.getcount = 0 if self.getcount == self.num_samples else self.getcount + 1 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] allmask = np.zeros((output_h, output_w, self.opt.num_maskclasses+levelnum), dtype=np.uint8) for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) if ann['category_id'] not in self._valid_ids: continue cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: ### gen mask begin ### # clsbase = cls_id*9 clsbase = 0*9 mask = self.coco.annToMask(ann) if flipped: mask = mask[:, ::-1] mask = cv2.warpAffine(mask, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR) roi = mask[y1:y2, x1:x2] roi_h, roi_w = roi.shape if roi_h < 6 or roi_w < 6: continue l = size2level(output_w*output_h, roi_w*roi_h) allmask[:,:,self.opt.num_maskclasses+l] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l], mask) allmask[:,:,self.opt.num_maskclasses+l+1] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l+1], mask) roi_cx = roi_w//2 roi_cy = roi_h//2 cell_w = (roi_w+5)//6 cell_h = (roi_h+5)//6 allmaskroi = allmask[y1:y2, x1:x2, :] ww = max(6,cell_w//4) hh = max(6,cell_h//4) # TOP self.assignroi(0, allmaskroi, roi, 0, 0, roi_cx-cell_w+ww, roi_cy-cell_h+hh) self.assignroi(1, allmaskroi, roi, roi_cx-cell_w-ww, 0, roi_cx+cell_w+ww, roi_cy-cell_h+hh) self.assignroi(2, allmaskroi, roi, roi_cx+cell_w-ww, 0, roi_w, roi_cy-cell_h+hh) # MIDDLE self.assignroi(3, allmaskroi, roi, 0, roi_cy-cell_h-hh, roi_cx-cell_w+ww, roi_cy+cell_h+hh) self.assignroi(4, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy-cell_h-hh, roi_cx+cell_w+ww, roi_cy+cell_h+hh) self.assignroi(5, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy-cell_h-hh, roi_w, roi_cy+cell_h+hh) # BOTTOM self.assignroi(6, allmaskroi, roi, 0, roi_cy+cell_h-hh, roi_cx-cell_w+ww, roi_h ) self.assignroi(7, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy+cell_h-hh, roi_cx+cell_w+ww, roi_h ) self.assignroi(8, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy+cell_h-hh, roi_w, roi_h ) ### gen mask end ### radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if self.opt.mse_loss: radius = self.opt.hm_gauss draw_gaussian(hm[cls_id], ct_int, radius) else: #draw_gaussian(hm[cls_id], ct_int, radius) xradius = int(gaussian_radius((math.ceil(w),math.ceil(w)))) yradius = int(gaussian_radius((math.ceil(h),math.ceil(h)))) draw_elipse_gaussian(hm[cls_id], ct_int, (xradius,yradius)) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #cv2.imwrite("./results/hehe.jpg", (hm.max(axis=0).squeeze()*255).astype(np.uint8)) if index % 30 == 0: cv2.imwrite("./results/top.jpg", (allmask[:,:,0:3]*255).astype(np.uint8)) cv2.imwrite("./results/middle.jpg", (allmask[:,:,3:6]*255).astype(np.uint8)) cv2.imwrite("./results/bottom.jpg", (allmask[:,:,6:9]*255).astype(np.uint8)) cv2.imwrite("./results/full.jpg", (((allmask[:,:,0:3]+allmask[:,:,3:6]+allmask[:,:,6:9]) > 0)*255).astype(np.uint8)) cv2.imwrite("./results/large.jpg", (((allmask[:,:,9:12]) > 0)*255).astype(np.uint8)) cv2.imwrite("./results/small.jpg", (((allmask[:,:,12:15]) > 0)*255).astype(np.uint8)) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'allmask': allmask.astype(np.float32).transpose(2, 0, 1) } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) #if self.opt.debug > 0 or not self.split == 'train': if not self.split == 'train': if len(gt_det) > 0: gt_det = np.array(gt_det, dtype=np.float32) else: gt_det = np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # img = cv2.warpAffine(img, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR) # img = img*allmask[:,:,:3] # cv2.imwrite("./results/maskit.jpg", img) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) #根据图像文件名,读入对应的关键点标注文件 (filepath, tempfilename) = os.path.split(img_path) (filename, extension) = os.path.splitext(tempfilename) kps_path = os.path.join( '/media/srt/dataset/L_Shelf_0114/Kps_Ann', filename + '_kps.npy' ) #/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps kps_raw = np.load(kps_path) c3 = np.ones(6) kps_ann = np.column_stack((kps_raw, c3)) #将关键点维度变为[6,3] height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 #对crop,shift进行赋值 input_h, input_w = self.opt.input_h, self.opt.input_w #在opt中定义的分辨率 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale #0 cf = self.opt.shift #0 c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) #加上multi-pose中的随机旋转 if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) # if np.random.random() < self.opt.flip: # flipped = True # img = img[:, ::-1, :] # c[0] = width - c[0] - 1 # 对输入执行仿射变换 trans_input = get_affine_transform(c, s, rot, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) test_image = inp[1] #用于与kps_hp可视化使用 output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes num_kps = 6 #点数是否需要+1 ? trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) trans_output_rot = get_affine_transform(c, s, rot, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) #中心对应的hp hm_hp = np.zeros((num_kps, output_h, output_w), dtype=np.float32) #kps对应的hp #此处只是初始化,未赋值 dense_kps = np.zeros((num_kps, 2, output_h, output_w), dtype=np.float32) dense_kps_mask = np.zeros((num_kps, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) kps = np.zeros((num_kps, num_kps * 2), dtype=np.float32) #其他关键点指向某个关键点的向量 reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_kps * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_kps, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_kps), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_kps), dtype=np.int64) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian #获取标注各项数据的标志 gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) #pts的读入方式可以自行定义 pts = np.array(kps_ann, np.float32).reshape(num_kps, 3) #原来的按照coco数据集json标注读入 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_kps): if pts[j, 2] > 0: #如果关键点的第3位>0,则对关键点进行变换 pts[j, :2] = affine_transform(pts[j, :2], trans_output) #对关键点进行变换 if pts[j, 0] >= 0 and pts[j, 0] < output_w and \ pts[j, 1] >= 0 and pts[j, 1] < output_h: #计算其他点指向该点的向量 kps[j, j * 2:j * 2 + 2] = pts[:, :2] - pts[j, :2] kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_kps + j] = pts[j, :2] - pt_int hp_mask[k * num_kps + j] = 1 if self.opt.dense_hp: #必须在中心点hm gassian之前画 print('draw dense hp!!!') draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) heatmap = np.squeeze(hm_hp[j]) #(1,160,240) heatmap = cv2.resize(heatmap, (960, 640), interpolation=cv2.INTER_CUBIC) new_image = test_image + heatmap * 2 array_name = 'forbidden_s_c_kps_hp/visual_kps_' + str( index) + '_' + str(j) + '.png' # matplotlib.image.imsave(array_name, new_image) #画中心点的高斯图 draw_gaussian(hm[cls_id], ct_int, radius) heatmap = np.squeeze(hm[cls_id]) # (1,160,240) heatmap = cv2.resize(heatmap, (960, 640), interpolation=cv2.INTER_CUBIC) new_image = test_image + heatmap * 2 array_name = 'visual_center_' + str(index) + '.png' # matplotlib.image.imsave(array_name, new_image) if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_kps * 2).tolist() + [cls_id]) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #在原来的基础上增加了 'hps','hps_mask' ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_kps * 2, output_h, output_w) dense_kps_mask = dense_kps_mask.reshape(num_kps, 1, output_h, output_w) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_kps * 2, output_h, output_w) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): image_fn = self.flist[index] image = cv2.imread(image_fn) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) box_fn = str(Path(self.box_root)/(Path(image_fn).stem + '.txt')) if osp.exists(box_fn): xywh = np.loadtxt(box_fn) xx,yy,ww,hh = xywh x1,y1,x2,y2 = xx-ww/2,yy-hh/2,xx+ww/2,yy+hh/2 boxes = np.array([[x1,y1,x2,y2]]).astype('float32') else: boxes = np.array([[0.0,0.0,1.0,1.0]]).astype('float32') if self.transform: image, boxes = self.transform(image, boxes) #generate box_gt for loss #box x1,y1,x2,y2, [0,1] output_h,output_w,grid_wh = self.configs.hh,self.configs.ww,self.configs.grid_wh hin,win = self.configs.image_size hm = np.zeros((self.configs.num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.configs.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) dense_xy = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.configs.max_objs, 2), dtype=np.float32) ind = np.zeros((self.configs.max_objs), dtype=np.int64) reg_mask = np.zeros((self.configs.max_objs), dtype=np.uint8) num_objs = min(boxes.shape[0], self.configs.max_objs) # gt_det = [] for k in range(num_objs): bbox = boxes[k] h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h*grid_wh), math.ceil(w*grid_wh))) radius = max(0, int(radius)) #radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2.0 * grid_wh, (bbox[1] + bbox[3]) / 2.0* grid_wh], dtype=np.float32) ct_int = ct.astype(np.int32) ct_int = np.clip(ct_int, 0, grid_wh-1) draw_umich_gaussian(hm[k], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) draw_dense_reg(dense_xy, hm.max(axis=0), ct_int, reg[k], radius) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # #ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} #if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_mask = np.concatenate([hm_a, hm_a], axis=0) ret = {'hm': hm, 'wh': wh, 'xy': reg, 'ind': ind,'dense_xy': dense_xy,'dense_wh': dense_wh,'dense_mask':dense_mask, 'boxes': boxes} #ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) #del ret['wh'] #elif self.opt.cat_spec_wh: #ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) #del ret['wh'] #if self.opt.reg_offset: #ret.update({'reg': reg}) # if self.opt.debug > 0 or not self.split == 'train': # gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ # np.zeros((1, 6), dtype=np.float32) # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} # ret['meta'] = meta # return ret # return image, ret
def _get_dota_item(self, index): img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] file_name = img_info['file_name'] filename = osp.splitext(file_name)[0] suffix = osp.splitext(file_name)[1] crop_str = list(map(str, img_info['crop'])) crop_img_path = osp.join('/code/data/DOTA/crop800_80', '_'.join([filename] + crop_str) + suffix) if not osp.isfile(crop_img_path): img_path = os.path.join('/media/data/DOTA/trainval/images', file_name) img = cv2.imread(img_path) sx, sy, ex, ey = img_info['crop'] img = img[sy:ey + 1, sx:ex + 1] cv2.imwrite(crop_img_path, img) else: img = cv2.imread(crop_img_path) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) # 如果不deep拷贝,修改了anns就修改了self.coco里的标签 anns = copy.deepcopy(self.coco.loadAnns(ids=ann_ids)) # if True: if self.opt.debug: segs = [ann['segmentation'][0] for ann in anns] cvtools.imwrite( cvtools.draw_boxes_texts(img.copy(), segs, box_format='polygon'), self.opt.debug_dir + '/{}'.format(file_name)) if self.opt.flip: try: hv_flip = cvtools.RandomMirror(both=False) segs = [ann['segmentation'][0].copy() for ann in anns] for i, seg in enumerate(segs): if len(seg) != 8: segm_hull = cv2.convexHull(np.array(seg).reshape( -1, 2).astype(np.float32), clockwise=False) xywha = cv2.minAreaRect(segm_hull) segs[i] = cv2.boxPoints(xywha).reshape(-1).tolist() img, segs = hv_flip(img, segs) # if True: if self.opt.debug: cvtools.imwrite( cvtools.draw_boxes_texts(img.copy(), segs, box_format='polygon'), self.opt.debug_dir + '/flip_{}'.format(file_name)) for i in range(len(anns)): anns[i]['segmentation'][0] = list(segs[i]) bbox = cv2.boundingRect( np.array(segs[i], dtype=np.float32).reshape(-1, 2)) anns[i]['bbox'] = list(bbox) except Exception as e: print(e) return [] if self.opt.rotate: rotate = cvtools.RandomRotate() segs = [ann['segmentation'][0].copy() for ann in anns] for i, seg in enumerate(segs): if len(seg) != 8: segm_hull = cv2.convexHull(np.array(seg).reshape( -1, 2).astype(np.float32), clockwise=False) xywha = cv2.minAreaRect(segm_hull) segs[i] = cv2.boxPoints(xywha).reshape(-1).tolist() img, segs = rotate(img, segs) # if True: if self.opt.debug: cvtools.imwrite( cvtools.draw_boxes_texts(img.copy(), segs, box_format='polygon'), self.opt.debug_dir + '/rotate_{}'.format(file_name)) for i in range(len(anns)): anns[i]['segmentation'][0] = list(segs[i]) bbox = cv2.boundingRect( np.array(segs[i], dtype=np.float32).reshape(-1, 2)) anns[i]['bbox'] = list(bbox) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # self.opt.input_h = self.opt.input_w = 32 * random.randint(12, 20) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w # flipped = False if 'train' in self.split: if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # if np.random.random() < self.opt.flip: # flipped = True # img = img[:, ::-1, :] # c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) gt_boxes = np.array( [cvtools.x1y1wh_to_x1y1x2y2(ann['bbox']) for ann in anns]) img_box = cvtools.xywh_to_x1y1x2y2(np.array([[c[0], c[1], s, s]])) img_box[0, 0::2] = np.clip(img_box[0, 0::2], 0, width - 1) img_box[0, 1::2] = np.clip(img_box[0, 1::2], 0, height - 1) iofs = cvtools.bbox_overlaps(gt_boxes, img_box, mode='iof') ids = np.where(iofs > 0.7)[0] if len(ids) == 0: return [] anns = [anns[ind] for ind in ids] # if True: if self.opt.debug: segs = [ann['segmentation'][0].copy() for ann in anns] # 复制一份,否则是原视图 inp_draw = inp.copy() for k in range(len(segs)): seg = segs[k] for i in range(0, len(seg), 2): seg[i:i + 2] = affine_transform(seg[i:i + 2], trans_input) # seg[i] = np.clip(seg[i], 0, input_w - 1) # seg[i + 1] = np.clip(seg[i + 1], 0, input_h - 1) segm_hull = cv2.convexHull(np.array(seg).reshape(-1, 2).astype( np.float32), clockwise=False) xy, _, _ = cv2.minAreaRect(segm_hull) cv2.circle(inp_draw, (int(xy[0]), int(xy[1])), radius=5, color=(0, 0, 255), thickness=-1) cvtools.imwrite( cvtools.draw_boxes_texts(inp_draw, segs, draw_start=False, box_format='polygon'), osp.join(self.opt.debug_dir, 'trans_' + file_name)) inp = (inp.astype(np.float32) / 255.) if 'train' in self.split and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) rets = [] # out_size = [] # for down_ratio in down_ratios: # output_h = input_h // down_ratio # output_w = input_w // down_ratio # num_classes = self.num_classes # out_size.append([output_w, output_h]) # # trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # # hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # wh = np.zeros((self.max_objs, 2), dtype=np.float32) # if self.opt.a_method == 2: # angle = np.full((self.max_objs, 1), 0.5, dtype=np.float32) # else: # angle = np.zeros((self.max_objs, 1), dtype=np.float32) # dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # reg = np.zeros((self.max_objs, 2), dtype=np.float32) # ind = np.zeros((self.max_objs), dtype=np.int64) # reg_mask = np.zeros((self.max_objs), dtype=np.uint8) # cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) # cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # # # for k in range(num_objs): # # cls_id = int(self.cat_ids[anns[k]['category_id']]) # # draw_heatmap(hm[cls_id], osp.join(self.opt.debug_dir, 'heatmap_' + str(cls_id) + '_' + file_name)) # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'a': angle} # if self.opt.dense_wh: # hm_a = hm.max(axis=0, keepdims=True) # dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) # ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) # del ret['wh'] # elif self.opt.cat_spec_wh: # ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) # del ret['wh'] # if self.opt.reg_offset: # ret.update({'reg': reg}) # # if self.opt.debug > 0 or not self.split == 'train': # # gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ # # np.zeros((1, 6), dtype=np.float32) # # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} # # ret['meta'] = meta # rets.append(ret) # # draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ # draw_umich_gaussian # if not self.opt.fpn: # output_w, output_h = out_size[0] # trans_output = get_affine_transform(c, s, 0, out_size[0]) # # for k in range(num_objs): # ann = anns[k] # cls_id = int(self.cat_ids[ann['category_id']]) # # # 确定GT分配给哪个FPN层 # if self.opt.fpn: # bbox = ann['bbox'] # fpn_k = int(math.log(224. / math.sqrt(bbox[2] * bbox[3]), 2)) # if fpn_k < 0: # fpn_k = 0 # if fpn_k > 2: # fpn_k = 2 # ret = rets[fpn_k] # output_w, output_h = out_size[fpn_k] # trans_output = get_affine_transform(c, s, 0, out_size[fpn_k]) # # segm = np.array(ann['segmentation'][0]) # # if flipped: # # for i in range(0, len(segm), 2): # # segm[i] = width - segm[i] - 1 # for i in range(0, len(segm), 2): # segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_output) # segm[i] = np.clip(segm[i], 0, output_w - 1) # segm[i + 1] = np.clip(segm[i + 1], 0, output_h - 1) # # segm_hull = cv2.convexHull(segm.reshape(-1, 2).astype(np.float32), # clockwise=False) # xy, (w, h), a = cv2.minAreaRect(segm_hull) # hm = ret['hm'] # reg_mask = ret['reg_mask'] # ind = ret['ind'] # wh = ret['wh'] # angle = ret['a'] # if h > 0 and w > 0: # a, w, h = convert_angle(a, w, h, self.opt.a_method) # ct = np.array(xy, dtype=np.float32) # ct_int = ct.astype(np.int32) # radius = gaussian_radius((math.ceil(h), math.ceil(w))) # radius = max(0, int(radius)) # radius = self.opt.hm_gauss if self.opt.mse_loss else radius # # radius = np.array((h / 3., w / 3.), np.int32) # draw_gaussian(hm[cls_id], ct_int, radius) # wh[k] = 1. * w, 1. * h # gt_a = a / 90. # if self.opt.a_method == 2: # gt_a = (a + 90.) / 180. # angle[k] = gt_a # ind[k] = ct_int[1] * output_w + ct_int[0] # if 'reg' in ret: # ret['reg'][k] = ct - ct_int # reg_mask[k] = 1 # if 'cat_spec_wh' in ret: # ret['cat_spec_wh'][k, cls_id * 2: cls_id * 2 + 2] = wh[k] # if 'cat_spec_mask' in ret: # ret['cat_spec_mask'][k, cls_id * 2: cls_id * 2 + 2] = 1 # if self.opt.dense_wh: # draw_dense_reg(ret['dense_wh'], hm.max(axis=0), ct_int, # wh[k], radius) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) if self.opt.a_method == 2: angle = np.full((self.max_objs, 1), 0.5, dtype=np.float32) else: angle = np.zeros((self.max_objs, 1), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) anns = re_anns(anns, trans_output, output_w, output_h) num_objs = min(len(anns), self.max_objs) # if True: if self.opt.debug: gt_img = cv2.warpAffine(img, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR) segs = [ann['segmentation'][0] for ann in anns] cvtools.imwrite( cvtools.draw_boxes_texts(gt_img, segs, draw_start=False, box_format='polygon'), osp.join(self.opt.debug_dir, 'gt_' + file_name)) bad_num = 0 gt_det = [] for k in range(num_objs): ann = anns[k] cls_id = int(self.cat_ids[ann['category_id']]) segm = np.array(ann['segmentation'][0]) # if flipped: # for i in range(0, len(segm), 2): # segm[i] = width - segm[i] - 1 # for i in range(0, len(segm), 2): # segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_output) # segm[i] = np.clip(segm[i], 0, output_w - 1) # segm[i + 1] = np.clip(segm[i + 1], 0, output_h - 1) segm_hull = cv2.convexHull(segm.reshape(-1, 2).astype(np.float32), clockwise=False) xy, (w, h), a = cv2.minAreaRect(segm_hull) if xy[0] > output_w or xy[0] < 0 or xy[1] > output_h or xy[1] < 0: # TODO:查明为何会出现这种情况。P0750 # xy中y下出现负值或大于127 # print(file_name, ann, segm, xy) bad_num += 1 continue if h > 0 and w > 0: a, w, h = convert_angle(a, w, h, self.opt.a_method) ct = np.array(xy, dtype=np.float32) ct_int = ct.astype(np.int32) radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius # radius = np.array((h / 3., w / 3.), np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h gt_a = a / 90. if self.opt.a_method == 2: gt_a = (a + 90.) / 180. angle[k] = gt_a ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append(segm + [cls_id]) else: bad_num += 1 if bad_num == num_objs: return [] ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'a': angle } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or 'train' not in self.split: gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta rets.append(ret) return rets
def __getitem__(self, index): img_id = self.images[index] img = cv2.imread(img_id) height, width = img.shape[0], img.shape[1] # YOLO标注转换 with warnings.catch_warnings(): warnings.simplefilter('ignore') anns = np.loadtxt(self.anno[index]).reshape(-1, 5) if anns.size: x1 = width * (anns[:, 1] - anns[:, 3] / 2) y1 = height * (anns[:, 2] - anns[:, 4] / 2) x2 = width * (anns[:, 1] + anns[:, 3] / 2) y2 = height * (anns[:, 2] + anns[:, 4] / 2) anns[:, 1] = x1 anns[:, 2] = y1 anns[:, 3] = x2 anns[:, 4] = y2 num_objs = min(len(anns), self.max_objs) # 数据变换 c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(height, width) * 1.0 rotation = 0 shear = 0 input_h, input_w = self.opt.input_h, self.opt.input_w hflipped = False vflipped = False if self.split == 'train': if self.shear: shear = np.clip(np.random.randn() * self.shear, -self.shear, self.shear) if shear: if shear < 0: img = img[:, ::-1, :] anns[:, [1, 3]] = width - anns[:, [3, 1]] - 1 M = np.array([[1, abs(shear), 0], [0, 1, 0]]) nW = width + abs(shear * height) anns[:, [1, 3]] += ((anns[:, [2, 4]]) * abs(shear)).astype(int) img = cv2.warpAffine(img, M, (int(nW), height)) if shear < 0: img = img[:, ::-1, :] anns[:, [1, 3]] = nW - anns[:, [3, 1]] - 1 c[0] = nW / 2. s = max(nW, s) width = nW sf = self.scale s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if self.hflip and np.random.random() < self.hflip: hflipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 if self.vflip and np.random.random() < self.vflip: vflipped = True img = img[::-1, :, :] c[1] = height - c[1] - 1 # 旋转参数设置 if self.rotation: rotation = np.clip(np.random.randn() * self.rotation, -self.rotation, self.rotation) trans_input = get_affine_transform(c, s, rotation, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, rotation, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) obj = np.zeros((output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) target = np.zeros((self.max_objs, 5), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] if self.opt.task in ['fcos']: #, 'ttf']: # using original target trans_output = trans_input output_w, output_h = input_w, input_h for k in range(num_objs): bbox = anns[k, 1:] cls_id = int(anns[k, 0]) if hflipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 if vflipped: bbox[[1, 3]] = height - bbox[[3, 1]] - 1 lt = affine_transform(bbox[:2], trans_output) rb = affine_transform(bbox[2:], trans_output) rt = affine_transform(bbox[[2, 1]], trans_output) lb = affine_transform(bbox[[0, 3]], trans_output) bbox[:2] = np.min([lt, rb, rt, lb], axis=0) bbox[2:] = np.max([lt, rb, rt, lb], axis=0) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if self.opt.task in ['fcos']: #, 'ttf']: target[k] = cls_id, bbox[0], bbox[1], bbox[2], bbox[3] if h > 0 and w > 0: reg_mask[k] = 1 continue if h > 0 and w > 0: obj[int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1] = 1 radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = 2 * radius / 3 if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 # reg_mask[k] = 2 - w * h / output_w / output_h cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.opt.task in ['fcos']: #, 'ttf']: ret = {'input': inp, 'target': target, 'mask': reg_mask} return ret ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.reg_obj: ret.update({'obj': obj[np.newaxis]}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) img_show = copy.deepcopy(img) c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 # flipped = False # if self.split == 'train': # if not self.opt.not_rand_crop: # s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # w_border = self._get_border(128, img.shape[1]) # h_border = self._get_border(128, img.shape[0]) # c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) # c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # else: # sf = self.opt.scale # cf = self.opt.shift # c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # if np.random.random() < self.opt.aug_rot: # rf = self.opt.rotate # rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) ################## plot input # cv2.imwrite('/Workspace/CenterNet/in_{}'.format(file_name), inp) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # ----------------------------------------- inp finished output_res = self.opt.output_res self.num_joints = self.opt.num_joints num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) ################# plot gt # inp_out = cv2.warpAffine(img_show, trans_output, # (output_res, output_res), # flags=cv2.INTER_LINEAR) # for k in range(num_objs): # ann = anns[k] # bbox_show = copy.deepcopy(ann['bbox']) # bbox_show[:2] = affine_transform(bbox_show[:2], trans_output) # bbox_show[2:4] = affine_transform(bbox_show[2:4], trans_output) # bbox_show[4:6] = affine_transform(bbox_show[4:6], trans_output) # bbox_show[6:8] = affine_transform(bbox_show[6:8], trans_output) # # bbox_show = np.clip(bbox_show, 0, output_res - 1) # ct = self._calculate_intersection_point(bbox_show) # ct_int = ct.astype(np.int32) # countour = cv2.boxPoints(((bbox[0], bbox[1]), (bbox[2], bbox[3]), bbox[4] / math.pi * 180)) # cv2.drawContours(inp_out, [np.array(bbox_show).reshape(4,2).astype(int)], 0, (0, 0, 255), 2) # cv2.circle(inp_out, tuple(ct_int), 2, (0, 0, 255), -1) # print('file {} num {}'.format(file_name, num_objs)) # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = np.array(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['segmentation'], np.float32).reshape(num_joints, 2) bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:4] = affine_transform(bbox[2:4], trans_output) bbox[4:6] = affine_transform(bbox[4:6], trans_output) bbox[6:8] = affine_transform(bbox[6:8], trans_output) bbox = np.clip(bbox, 0, output_res - 1) cx, cy, w, h, theta = self.polygonToRotRectangle(bbox) if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opt.hm_gauss if self.opt.mse_loss else max( 0, int(radius)) ct = np.array([cx, cy]).astype(np.int32) # ct = self._calculate_intersection_point(bbox) ct_int = ct.astype(np.int32) ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): # if pts[j, 2] > 0: if True: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) if not self.opt.ellipse: draw_gaussian(hm[cls_id], ct_int, radius) else: draw_ellipse_gaussian(hm[cls_id], ct_int, w, h, theta) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 hm = np.where(hm > 1e-2, hm, 0) ########### plot hm # for i in range(hm.shape[0]): # idx = np.where(hm[i]>=0.05) # inp_out[idx] = 0 # cv2.imwrite('/Workspace/CenterNet/hm_{}_{}'.format(i, file_name), hm[i] * 255) # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) label_sel = np.array([1.], dtype=np.float32) name_in = int(file_name[:6]) if name_in > 14961 and name_in < 22480: label_sel[0] = 0. img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # if np.random.random() < self.opt.aug_rot: # rf = self.opt.rotate # rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) # # if np.random.random() < self.opt.flip: # flipped = True # img = img[:, ::-1, :] # c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine( img, trans_input, (self.opt.input_w, self.opt.input_h), #(self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) num_joints = self.num_joints trans_output = get_affine_transform( c, s, 0, [self.opt.output_w, self.opt.output_h]) trans_output_inv = get_affine_transform( c, s, 0, [self.opt.output_w, self.opt.output_h], inv=1) hm = np.zeros((self.num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32) hm_hp = np.zeros((num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) dense_kps = np.zeros( (num_joints, 2, self.opt.output_h, self.opt.output_w), dtype=np.float32) dense_kps_mask = np.zeros( (num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dim = np.zeros((self.max_objs, 3), dtype=np.float32) location = np.zeros((self.max_objs, 3), dtype=np.float32) dep = np.zeros((self.max_objs, 1), dtype=np.float32) ori = np.zeros((self.max_objs, 1), dtype=np.float32) rotbin = np.zeros((self.max_objs, 2), dtype=np.int64) rotres = np.zeros((self.max_objs, 2), dtype=np.float32) rot_mask = np.zeros((self.max_objs), dtype=np.uint8) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) kps_cent = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) inv_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) coor_kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) rot_scalar = np.zeros((self.max_objs, 1), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian calib = np.array(anns[0]['calib'], dtype=np.float32) calib = np.reshape(calib, (3, 4)) gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'][:27], np.float32).reshape(num_joints, 3) alpha1 = ann['alpha'] orien = ann['rotation_y'] loc = ann['location'] if flipped: alpha1 = np.sign(alpha1) * np.pi - alpha1 orien = np.sign(orien) * np.pi - orien loc[0] = -loc[0] bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): alpha = self._convert_alpha(alpha1) if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = alpha - (-0.5 * np.pi) if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = alpha - (0.5 * np.pi) rot_scalar[k] = alpha radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opt.hm_gauss if self.opt.mse_loss else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * self.opt.output_w + ct_int[0] reg[k] = ct - ct_int dim[k] = ann['dim'] # dim[k][0]=math.log(dim[k][0]/1.63) # dim[k][1] = math.log(dim[k][1]/1.53) # dim[k][2] = math.log(dim[k][2]/3.88) dep[k] = loc[2] ori[k] = orien location[k] = loc reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 rot_mask[k] = 1 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) kps_cent[k, :] = pts[8, :2] for j in range(num_joints): pts[j, :2] = affine_transform(pts[j, :2], trans_output) kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 if pts[j, 2] > 0: #pts[j, :2] = affine_transform(pts[j, :2], trans_output) if pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w and \ pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h: #kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int #kps_mask[k, j * 2: j * 2 + 2] = 1 inv_mask[k, j * 2:j * 2 + 2] = 1 coor_kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[ k * num_joints + j] = pt_int[1] * self.opt.output_w + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) if coor_kps_mask[k, 16] == 0 or coor_kps_mask[k, 17] == 0: coor_kps_mask[k, :] = coor_kps_mask[k, :] * 0 draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 meta = {'file_name': file_name} if flipped: coor_kps_mask = coor_kps_mask * 0 inv_mask = inv_mask * 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask, 'dim': dim, 'rotbin': rotbin, 'rotres': rotres, 'rot_mask': rot_mask, 'dep': dep, 'rotscalar': rot_scalar, 'kps_cent': kps_cent, 'calib': calib, 'opinv': trans_output_inv, 'meta': meta, "label_sel": label_sel, 'location': location, 'ori': ori, 'coor_kps_mask': coor_kps_mask, 'inv_mask': inv_mask } if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_path = self.img_list[index] anns = self.anno_list[index] num_objs = min(len(anns), self.max_objs) # print('anns:\n',anns) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.input_h, self.input_w flipped = False if not self.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.scale cf = self.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) if self.show: input_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if not self.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.down_ratio output_w = input_w // self.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.mse_loss else draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] label, bbox = ann bbox = np.array(bbox) cls_id = int(self.dict2num[label] - 1) # print('bbox,cls_id : ',(bbox),(cls_id)) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) if self.show: cv2.putText(input_img, label, (int(bbox[0] * self.down_ratio), int(bbox[1] * self.down_ratio)), cv2.FONT_HERSHEY_COMPLEX, 1, self.voc_color[cls_id], 1) cv2.rectangle(input_img, (int(bbox[0] * self.down_ratio), int(bbox[1] * self.down_ratio)), (int(bbox[2] * self.down_ratio), int(bbox[3] * self.down_ratio)), self.voc_color[cls_id], 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[ 0] # ind[k]: 0~128*128-1, object index in 128*128 reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.show: cv2.namedWindow("image", 0) cv2.imshow("image", input_img) cv2.namedWindow("heatmap", 0) cv2.imshow("heatmap", np.hstack(hm)) cv2.waitKey(500) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.reg_offset: ret.update({'reg': reg}) if self.debug > 0 or not self.state == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] # 读出图像名称 img_path = os.path.join(self.img_dir, file_name) # 图像完成文件名称 ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) # 读取图像对应的GT检测框 num_objs = min(len(anns), self.max_objs) # 读入图像,并对图像进行预处理 # print(img_id, img_path) img = cv2.imread(img_path) # import pdb # pdb.set_trace() height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train' or self.split == 'debug1': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.7, 1.3, 0.1)) w_border = self._get_border(512, img.shape[1]) h_border = self._get_border(512, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # c[0] = np.random.randint(low=0.4*img.shape[1], high=0.6*img.shape[1] ) # c[1] = np.random.randint(low=0.4*img.shape[0], high=0.6*img.shape[0]) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # 根据偏移的c和s得到变换矩阵,之后所有的框也可以按照变换矩阵进行移动 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # 0-255转为0-1 if DEBUG: raw_img = inp.copy() inp = (inp.astype(np.float32) / 255.) # 色彩偏移 if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # 减均值除方差 inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # 图像预处理结束 output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # heatmap wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # dense的wh angle = np.zeros((self.max_objs, 1), dtype=np.float32) dense_angle = np.zeros((1, output_h, output_w), dtype=np.float32) # dense的angle reg = np.zeros((self.max_objs, 2), dtype=np.float32) # offset偏差值 ind = np.zeros((self.max_objs), dtype=np.int64) # 物体在图像上编号,编号根据坐标得到 reg_mask = np.zeros((self.max_objs), dtype=np.uint8) # 对于图像变化后不存在了物体mask设置为0 cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) # 分类的长宽 cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # 分类的长宽mask cat_spec_angle = np.zeros((self.max_objs, num_classes), dtype=np.float32) # 分类的长宽 cat_spec_angle_mask = np.zeros((self.max_objs, num_classes), dtype=np.uint8) # 分类的长宽mask draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] # 遍历所有的物体 for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['rbbox']) bbox = ann['rbbox'] cls_id = int(self.cat_ids[ann['category_id']]) # 跟随图像变化,对于检测框进行相同变换 if flipped: # cx做镜像处理 bbox[0] = width - bbox[0] - 1 # 获取四个角点 pt1, pt2, pt3, pt4 = self._get_four_points((bbox[0], bbox[1]), bbox[-1], bbox[2], bbox[3]) pt1 = affine_transform((pt1[0, 0], pt1[0, 1]), trans_output) pt2 = affine_transform((pt2[0, 0], pt2[0, 1]), trans_output) pt3 = affine_transform((pt3[0, 0], pt3[0, 1]), trans_output) pt4 = affine_transform((pt4[0, 0], pt4[0, 1]), trans_output) # 得到中心点坐标,长宽以及角度 ct = np.array( [(pt1[0] + pt3[0]) / 2, (pt1[1] + pt3[1]) / 2], dtype=np.float32) w = np.linalg.norm(pt1 - pt2) h = np.linalg.norm(pt1 - pt4) # 计算新的angle # vec_base = np.array([0, 1], dtype=np.float32) # vec_angle = np.array([(pt1[0] + pt2[0]) / 2, (pt1[1] + pt2[1]) / 2], dtype=np.float32) - ct # norm_base = np.linalg.norm(vec_base) # norm_angle = np.linalg.norm(vec_angle) # cos_angle = vec_base.dot(vec_angle) / (norm_base * norm_angle + np.finfo(float).eps) # a = np.arccos(cos_angle) if self.opt.dataset == 'hrsc': a = bbox[-1] if flipped: a = np.pi - a elif self.opt.dataset == 'dota': a = bbox[-1] # ####### dota的json文件角度是0到2pi ########## if flipped: a = 2 * np.pi - a elif self.opt.dataset == 'rosku': # ####### rosku的json文件角度是-0.5pi到0.5pi ########## a = bbox[-1] / math.pi if flipped: a = -1 * a a = np.clip(a, -0.5, 0.5) a = a + 0.5 else: raise Exception('Wrong dataset.') if DEBUG: color = [255, 0, 0] line_width = 2 # ####### rosku的json文件角度是-0.5pi到0.5pi ########## # temp_a = (a - 0.5) * math.pi temp_a = a npt1, npt2, npt3, npt4 = self._get_four_points((ct[0], ct[1]), temp_a, w, h) npt1 = self._float_to_int(npt1) npt2 = self._float_to_int(npt2) npt3 = self._float_to_int(npt3) npt4 = self._float_to_int(npt4) cv2.line(raw_img, npt1, npt2, color, line_width) cv2.line(raw_img, npt2, npt3, color, line_width) cv2.line(raw_img, npt3, npt4, color, line_width) cv2.line(raw_img, npt4, npt1, color, line_width) if 0 <= ct[0] <= output_w - 1 and 0 <= ct[1] <= output_h - 1: # 热力图,GT进行一定扩散 radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct_int = ct.astype(np.int32) # 中心点绘制GT draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h angle[k] = 1. * a ind[k] = ct_int[1] * output_w + ct_int[0] # 物体在特征图上索引值 reg[k] = ct - ct_int # ct的实际值和整数化后的偏移 reg_mask[k] = 1 # wh设置 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # angle设置 cat_spec_angle[k, cls_id] = angle[k] cat_spec_angle_mask[k, cls_id] = 1 if self.opt.dense_angle or self.opt.fsm: draw_dense_reg(dense_angle, hm.max(axis=0), ct_int, angle[k], radius) # ang_radius = max(int(1.0), int(radius/2.)) # draw_dense_reg_uni(dense_angle[0, :], ct_int, angle[k], ang_radius) gt_det.append([ct[0], ct[1], w, h, angle[k], 1, cls_id]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'angle': angle} # wh if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] # angle if self.opt.dense_angle or self.opt.fsm: dense_angle_mask = hm.max(axis=0, keepdims=True) ret.update({'dense_angle': dense_angle, 'dense_angle_mask': dense_angle_mask}) if self.opt.dense_angle: del ret['angle'] elif self.opt.cat_spec_angle: ret.update({'cat_spec_angle': cat_spec_angle, 'cat_spec_angle_mask': cat_spec_angle_mask}) del ret['angle'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 7), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id, 'img_name':file_name} ret['meta'] = meta if DEBUG: ret['raw_img'] = raw_img ret['gt_det'] = gt_det ret['img_id'] = img_id cv2.imwrite(os.path.join('./cache', '%s.jpg' % img_id), raw_img) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) img_show = copy.deepcopy(img) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w # flipped = False # remove flip process ############## remove image preprocess # if self.split == 'train': # if not self.opt.not_rand_crop: # s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # w_border = self._get_border(128, img.shape[1]) # h_border = self._get_border(128, img.shape[0]) # c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) # c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # else: # sf = self.opt.scale # cf = self.opt.shift # c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) # s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # # if np.random.random() < self.opt.flip: # flipped = True # img = img[:, ::-1, :] # c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) ################## plot # cv2.imwrite('/Workspace/CenterNet/in_{}'.format(file_name), inp) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) ################# plot # inp_out = cv2.warpAffine(img_show, trans_output, # (output_w, output_h), # flags=cv2.INTER_LINEAR) # for k in range(num_objs): # ann = anns[k] # bbox_show = copy.deepcopy(ann['bbox']) # bbox_show[:2] = affine_transform(bbox_show[:2], trans_output) # cv2.circle(inp_out, tuple(list(map(int, bbox_show[:2]))), 2, (0, 0, 255), -1) # print('file {} num {}'.format(file_name, num_objs)) # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) ### add angle regression reg_angle = np.zeros((self.max_objs, 1), dtype=np.float32) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian ########### show gt # for k in range(num_objs): # ann = anns[k] # bbox = ann['bbox'] # bbox[:2] = affine_transform(bbox[:2], trans_output) # bbox[2:4] = affine_transform(bbox[2:4], trans_output) # bbox[0] = np.clip(bbox[0], 0, output_w - 1) # bbox[1] = np.clip(bbox[1], 0, output_h - 1) # self.getfourpoints(bbox, inp_out) # cv2.imwrite('/Workspace/CenterNet/gt_{}'.format(file_name), inp_out) gt_det = [] for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['bbox']) bbox = ann['bbox'] cls_id = int(self.cat_ids[ann['category_id']]) bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:4] = affine_transform(bbox[2:4], trans_output) bbox[0] = np.clip(bbox[0], 0, output_w - 1) bbox[1] = np.clip(bbox[1], 0, output_h - 1) h, w = bbox[3], bbox[2] if h > 0 and w > 0: ct = np.array([bbox[0], bbox[1]], dtype=np.float32) ct_int = ct.astype(np.int32) reg_angle[k] = bbox[4] if not self.opt.ellipse: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius draw_gaussian(hm[cls_id], ct_int, radius) else: draw_ellipse_gaussian(hm[cls_id], ct_int, w, h, reg_angle[k]) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.opt.ellipse: hm = np.where(hm > 1e-2, hm, 0) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'angle': reg_angle } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) anns = list( filter( lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns)) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if self.cfg.DATASET.RANDOM_CROP: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.cfg.DATASET.SCALE cf = self.cfg.DATASET.SHIFT c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.cfg.DATASET.AUG_ROT: rf = self.cfg.DATASET.ROTATE rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.cfg.DATASET.FLIP: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES]) inp = cv2.warpAffine( img, trans_input, (self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.cfg.DATASET.NO_COLOR_AUG: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - np.array(self.cfg.DATASET.MEAN).astype( np.float32)) / np.array(self.cfg.DATASET.STD).astype(np.float32) inp = inp.transpose(2, 0, 1) output_res = self.cfg.MODEL.OUTPUT_RES num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) trans_seg_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) seg = np.zeros((self.max_objs, output_res, output_res), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.cfg.LOSS.MSE_LOSS else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) segment = self.coco.annToMask(ann) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() segment = segment[:, ::-1] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) segment = cv2.warpAffine(segment, trans_seg_output, (output_res, output_res), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.cfg.hm_gauss if self.cfg.LOSS.MSE_LOSS else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 #mask pad_rate = 0.3 segment_mask = np.ones_like(segment) x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_res - 1)*2).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_res - 1)*2).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment_mask == 1] = 255 seg[k] = segment #keypoint num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.cfg.hm_gauss \ if self.cfg.LOSS.MSE_LOSS else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.cfg.LOSS.DENSE_HP: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask, 'seg': seg } if self.cfg.LOSS.DENSE_HP: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.cfg.LOSS.REG_OFFSET: ret.update({'reg': reg}) if self.cfg.LOSS.HM_HP: ret.update({'hm_hp': hm_hp}) if self.cfg.LOSS.REG_HP_OFFSET: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.cfg.DEBUG > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): index = 45236 img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) assert os.path.exists(img_path), 'Image path does not exist: {}'.format(img_path) # Target has {'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id'} ann_ids = self.coco.getAnnIds(imgIds=[img_id]) target = self.coco.loadAnns(ids=ann_ids) # Separate out crowd annotations. These are annotations that signify a large crowd of # objects of said class, where there is no annotation for each individual object. target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] if len(target) > 0: # Pool all the masks for this image into one [num_objects,height,width] matrix masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] masks = np.vstack(masks) masks = masks.reshape(-1, height, width) # if doesn't transpose, error will occur in augmentation (line 100) masks = masks.transpose(1, 2, 0) # labels = [int(self.cat_ids[obj['category_id']]) for obj in target] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] masks = masks[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) if self.rgb: inp = inp[..., ::-1] inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) d1 = masks.shape[2] masks = cv2.warpAffine(masks, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) masks = np.expand_dims(masks, 2) if masks.ndim != 3 else masks d2 = masks.shape[2] assert d1 == d2 masks = masks.transpose(2, 0, 1) masks = (masks >= 0.5).astype(np.uint8) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) # centers = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian segm_masks = [] gt_det = [] num_objs = min(len(target), self.max_objs) for k in range(num_objs): ann = target[k] # convert bboxes to point_form (xmin, ymin, xmax, ymax) bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # After augmentation some masks will be empty. if h > 0 and w > 0 and masks[k].sum() > 0.0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) # centers[k] = ct_int[0], ct_int[1] draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) det = [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, cls_id] gt_det.append(det) segm_masks.append(masks[k]) if len(segm_masks) > 0: masks = np.stack(segm_masks) gt_det = np.stack(gt_det) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'masks': masks, 'gt_bbox_lbl': gt_det} # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, # 'masks': masks, 'centers': centers, 'gt_bbox_lbl': gt_det} # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, # 'masks': masks, 'labels': labels, 'crowd': crowd, 'centers': centers, 'gt_bbox': gt_det} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): #函数为入口。这里我们可以得到我们输出参数,分别是\color{red}{inp, hm, reg\_mask, ind, wh}。 img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) # 目标个数,这里为100 img = cv2.imread(img_path) #接着我们获取图片的最长边以及输入尺寸(512,512) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # 获取中心点 if self.opt.keep_res: # False input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: # True s = max(img.shape[0], img.shape[1]) * 1.0 # s最长的边长 input_h, input_w = self.opt.input_h, self.opt.input_w # 512, 512 #对数据进行一系列处理。最终输出结果即我们第一个所需要的输入图像\color{red}{inp}. flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s # * np.random.choice(np.arange(0.6, 1.4, 0.1))# 随机尺度 w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # 随机裁剪 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # 放射变换 inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) #接着我们需要完成我们的heatmap的生成。 output_h = input_h // self.opt.down_ratio # 输出512//4=128 output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # heatmap(80,128,128) wh = np.zeros((self.max_objs, 2), dtype=np.float32) # 中心点宽高(100*2) angs = np.zeros((self.max_objs, 1), dtype=np.float32) # 角度(100*2) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # 返回2*128*128 reg = np.zeros((self.max_objs, 2), dtype=np.float32) # 记录下采样带来的误差,返回100*2的小数 ind = np.zeros((self.max_objs), dtype=np.int64) # 返回100个ind reg_mask = np.zeros((self.max_objs), dtype=np.uint8) # 返回8个 回归mask cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) # 100*80*2 cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # 100*80*2 #这里mse_loss为False, 所以我们只需要关注draw_umich_gaussian函数即可 draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[[4]] = 180 - bbox[[4]] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:4] = affine_transform(bbox[2:4], trans_output) #这里是导致舰船检测过程中出现中心点偏移的关键 #bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) #bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] #TODO insert ang = bbox[4] h = np.clip(h, 0, output_h - 1) w = np.clip(w, 0, output_w - 1) if h > 0 and w > 0: radius = gaussian_radius( (math.ceil(h), math.ceil(w))) #关键是如何确定高斯半径 radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) if ct[0] < 0 or ct[0] > output_w - 1 or ct[1] < 0 or ct[ 1] > output_h - 1: # continue # ct[0] = np.clip(ct[0], 0, output_w - 1) # ct[1] = np.clip(ct[1], 0, output_h - 1) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) #cv2.imwrite("/data/humaocheng/CenterNet-master/single_heatmap.jpg", hm[0]*255) wh[k] = 1. * w, 1. * h # 目标矩形框的宽高——目标尺寸损失 angs[k] = 1. * ang ind[k] = ct_int[1] * output_w + ct_int[ 0] # 目标中心点在128×128特征图中的索引 reg[k] = ct - ct_int # off Loss, # ct 即 center point reg是偏置回归数组,存放每个中心店的偏置值 k是当前图中第k个目标 # 实际例子为 # [98.97667 2.3566666] - [98 2] = [0.97667, 0.3566666] reg_mask[k] = 1 #是记录我们前100个点,这里相当于记载一张图片存在哪些目标, #有的话对应索引设置为1,其余设置为0。 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #TODO insert gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, ang, 1, cls_id ]) # cv2.imwrite("/data/humaocheng/CenterNet-master/heatmap.jpg",hm[0]*255) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'ang': angs } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __next__(self): load_vid_time, img_transform_time, create_heatmap_time = 0, 0, 0 start = time.time() if self.cap is None or self.count >= self.length: if self.cap is not None and self.vid_i == self.num_videos and self.loop: self.vid_i = 0 elif self.cap is not None and self.vid_i == self.num_videos: raise StopIteration if self.opt.vidstream == 'skvideo': self.cap = skvideo.io.vread(self.video_paths[self.vid_i]) metadata = skvideo.io.ffprobe(self.video_paths[self.vid_i]) fr_lst = metadata['video']['@avg_frame_rate'].split('/') self.rate = int(fr_lst[0]) / int(fr_lst[1]) self.length = int(metadata['video']['@nb_frames']) else: self.cap = cv2.VideoCapture(self.video_paths[self.vid_i]) width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.rate = self.cap.get(cv2.CAP_PROP_FPS) self.length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.frame_gen = self._frame_from_video(self.cap) # self.detections = pickle.load(open(self.annotation_path[self.vid_i], 'rb')) self.count = 0 self.vid_i += 1 end_load_vid = time.time() load_vid_time = end_load_vid - start # load image depending on stream start_resize = time.time() if self.opt.vidstream == 'skvideo': img = self.cap[self.count] else: img = next(self.frame_gen) # in_h = int(original_img.shape[0] / self.opt.downsample) # in_w = int(original_img.shape[1] / self.opt.downsample) # img = cv2.resize(original_img, (in_w, in_h)) # cv2.imwrite("/home/jl5/CenterNet/tmp.png", img) start_img_transform = time.time() anns = self.mmdetect_pred2inst(self.count) num_objs = min(len(anns), self.max_objs) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # send to gpu trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = torch.from_numpy(inp).cuda() inp = (inp.float() / 255.) # if self.split == 'train' and not self.opt.no_color_aug: # color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - torch.from_numpy(self.mean).cuda()) / torch.from_numpy( self.std).cuda() inp = inp.permute(2, 0, 1) end_img_transform = time.time() img_transform_time = end_img_transform - start_img_transform output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) unconfident_hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] def show_bbox(im): fig, ax = plt.subplots(1) ax.imshow(im) for i in range(num_objs): bbox = np.array(anns[i]['bbox'], dtype=np.int32) bbox = bbox / self.opt.downsample rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) plt.savefig('/home/jl5/CenterNet/tmp.png') pdb.set_trace() # detect = self.detections[self.count] # if self.opt.task == 'ctdet_semseg': # seg_mask, weight_mask = batch_segmentation_masks(1, (720, 1280), np.array([detect['boxes']]), np.array([detect['classes']]), detect['masks'], # np.array([detect['scores']]), [len(detect['boxes'])], True, coco_class_groups, mask_threshold=0.5, box_threshold=self.opt.center_thresh, scale_boxes=False) # unbatch_seg = seg_mask[0].astype(np.uint8) # unbatch_weight = weight_mask[0].astype(np.uint8) # seg_mask = np.expand_dims(cv2.resize(unbatch_seg, (1280, 736)), axis=0).astype(np.int32) # weight_mask = np.expand_dims(cv2.resize(unbatch_weight, (1280, 736)), axis = 0).astype(bool) start_detect = time.time() for k in range(num_objs): ann = anns[k] bbox = np.array( ann['bbox'], dtype=np.float32) # self._coco_box_to_bbox(ann['bbox']) # bbox = bbox / self.opt.downsample # if need to downsample cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if ann['score'] >= 0.3 and ann['score'] < 0.5: draw_gaussian(unconfident_hm[cls_id], ct_int, radius) reg_mask[k] = 0 else: draw_gaussian(hm[cls_id], ct_int, radius) reg_mask[k] = 1 wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.opt.task == 'ctdet_semseg': ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_mask, 'weight_seg': weight_mask } else: ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'unconf_hm': unconfident_hm } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count} meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count} ret['meta'] = meta self.count += 1 end_detect_time = time.time() create_heatmap_time = end_detect_time - start_detect # print("load vid {:.4f} | img transform {:.4f} | create instance {:.4f} \n".format(load_vid_time, img_transform_time, create_heatmap_time)) return ret
def __getitem__(self, index): #img_id = self.images[index] #file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] #img_path = os.path.join(self.img_dir, file_name) #ann_ids = self.coco.getAnnIds(imgIds=[img_id]) #anns = self.coco.loadAnns(ids=ann_ids) #num_objs = min(len(anns), self.max_objs) img_id = index img_path = self.images[index] label_path = self.label_files[index] #print(self.img_dir) #print(file_name) img = cv2.imread(img_path) h, w, _ = img.shape labels = [] #print(img_path) #print(label_path)32 #print(os.path.isfile(label_path)) if os.path.isfile(label_path): # with open(label_path, 'r') as f: # x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) x = self.labels[index] #print(x) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) labels[:, 3] = w * (x[:, 3]) labels[:, 4] = h * (x[:, 4]) #labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) #labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) #print('labels:{}'.format(len(labels))) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp_ori = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp_ori.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] #for k in range(num_objs): for k in range(len(labels)): ann = labels[k] #print('ann:{}'.format(ann)) bbox = self._coco_box_to_bbox(ann[1:5]) #print(index,bbox) #cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),3) #print('bbox: ',bbox) cls_id = int(ann[0]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) #print('refined_bbox: ',bbox) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) #print('refined_bbox: ',[ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) #cv2.rectangle(inp_ori,(int(ct[0] - w / 2)*self.opt.down_ratio, int(ct[1] - h / 2)*self.opt.down_ratio),(int(ct[0] + w / 2)*self.opt.down_ratio, int(ct[1] + h / 2)*self.opt.down_ratio),(0,0,255),3) #cv2.imshow('img',img) #cv2.imshow('img_ori',inp_ori) #cv2.waitKey(0) #cv2.destroyAllWindows() ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): ######################################## Start of modified Code Block ##################################################### curr_example = self.all_frames[index] img_path = curr_example[0] anns = curr_example[1] ######################################## End of modified Code Block ##################################################### num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) try: img_shape = img.shape self.last_img = img except AttributeError: print("Image '{}' failed!!!".format(img_path)) self.failed_images.add(img_path) img = self.last_img height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: # this is the default! color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # GEO: we need to calculate the mean and std in datasets/dataset/gaila.py inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = len(list(self.cat_ids.keys())) trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ######################################## Start of modified Code Block ##################################################### ann = anns.iloc[k] bbox = np.asarray([ ann["topLeftX"], ann["topLeftY"], ann['bottomRightX'], ann['bottomRightY'] ], dtype=np.float32) cls_id = int(self.cat_ids[ann['name']]) ######################################## End of modified Code Block ##################################################### if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) ######################################## Start of modified Code Block ##################################################### _id = int(img_path.split('/')[-1].split('.')[0]) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': _id} ######################################## End of modified Code Block ##################################################### ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # all anns of one img ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) # height, width height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # ori img center if self.opt.keep_res: # False input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: # not keep_res, use opt.input_h, w # note: h != w, ori not keep_res, then set w=h=512 # s = max(img.shape[0], img.shape[1]) * 1.0 s = np.array([width, height], dtype=np.float32) # ori img size? input_h, input_w = self.opt.input_h, self.opt.input_w # flip flipped = False # get scale and center to do affine transform if self.split == 'train': # random scale if not self.opt.not_rand_crop: # train set opt.not_rand_crop=False, so will use default random scale # s = s * np.random.choice(np.arange(0.4, 0.6, 0.1)) # (1920,1080) -> (640) # note: restrict the img center translate range, lrtb 1/2 # w_border = self._get_border(img.shape[1] // 4, img.shape[1]) # h_border = self._get_border(img.shape[0] // 4, img.shape[0]) # random center, this may translate img so far w_range, h_range = img.shape[1] // 8, img.shape[0] // 8 c[0] = np.random.randint(low=img.shape[1] // 2 - w_range, high=img.shape[1] // 2 + w_range) c[1] = np.random.randint(low=img.shape[0] // 2 - h_range, high=img.shape[0] // 2 + h_range) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # random flip if np.random.random() < self.opt.flip: # 0.5 flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # trans ori img to input size trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) # use generated trans_input matrix to trans img inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # note: see trans img # print('scale:', s, 'center:', c) # cv2.imwrite('{}_img_trans.png'.format(img_id), inp) inp = (inp.astype(np.float32) / 255.) # color augment if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # normalize inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # down sample output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes # trans ori img box to output size trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # draw gaussian core on heatmap hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # 20 # dense or sparse wh regress wh = np.zeros((self.max_objs, 2), dtype=np.float32) # (10,2) sparse! dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # dense! reg = np.zeros((self.max_objs, 2), dtype=np.float32) # (10,2) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # msra, umich # opt.mse_loss = False draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian # GT gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) # xywh -> x1y1x2y2; shape (4,) segmentation = np.array(ann['segmentation'][0]).reshape((-1, 2)) # x,y # map ori cat_id (whatever) to [0, num_class-1] cls_id = int(self.cat_ids[ann['category_id']]) # self.cat_ids in cigar.py if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # [0,2], segmentation[:, 0] = width - segmentation[:, 0] - 1 # flip x # transform box 2 pts to output bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # x1y1x2y2 # transform segmentation, just trans polygon_center is enough polygon_center = self._get_polygon_center(segmentation) polygon_center = affine_transform(polygon_center, trans_output) print(polygon_center) if h > 0 and w > 0: # note: radius generated with spatial extent info from h,w radius = gaussian_radius(det_size=(math.ceil(h), math.ceil(w))) radius = max(0, int(math.ceil(radius / 3))) # radius = max(0, int(radius)) # opt.mse_loss = False radius = self.opt.hm_gauss if self.opt.mse_loss else radius # box center box_center = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) print(box_center) # note: change ct to polygon center ct = polygon_center ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) # label of w,h wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] # 1D ind of ct position # note: update offset reg[k] = box_center - ct_int # float_box_center - int_polygon_center print('offset:', reg[k]) reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # use box_center to compute box ct = box_center.astype(np.int32) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } # from utils.plt_utils import plt_heatmaps # note: see heatmaps # plt_heatmaps(hm, basename='{}_hm'.format(img_id)) # print(wh) if self.opt.dense_wh: # False hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # (filepath, tempfilename) = os.path.split(img_path) # (filename, extension) = os.path.splitext(tempfilename) # kps_path = os.path.join('/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps', # filename + '_kps.npy') # kps_ann = np.load(kps_path) # print('load the kps!!!', kps_path) # c3= np.ones(6) # kps=np.column_stack((kps_ann,c3)) # print(kps) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: # print('Random Crop Done') s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) #输出scale的数值 # print('s is :',s) else: # print('Do not Random Crop') sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 #对输入执行仿射变换 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) #保存inp # print('type of inp is:',type(inp)) # print('size of inp is:', inp.shape) #3通道的图像取一个维度就可以 test_image = inp[1] output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) #将高斯heatmap保存下来 # print('shape of hp: ',hm.shape) heatmap = np.squeeze(hm) heatmap = cv2.resize(heatmap, (960, 640), interpolation=cv2.INTER_CUBIC) new_image = test_image + heatmap * 2 array_name = 'visual_ann_' + str(index) + '.png' matplotlib.image.imsave(array_name, new_image) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: #TODO这里是更改多尺度训练的地方。 s = s #* np.random.choice(np.arange(0.8, 1.5, 0.1))#change 0.6 1.4 w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: # roate aug rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 #下面这段代码求旋转的角度 if self.opt.angle_norm and self.split == 'train': angle_list = np.array(angle_list) % np.pi #首先归一化到np.pi angle_int = (angle_list // (np.pi / 9)).astype('int') angle_b = np.bincount(angle_int) index_rot = np.argmax(angle_b) ind_rot = (angle_list > (index_rot) * np.pi / 9) * (angle_list <= (index_rot + 1) * np.pi / 9) angle_rot = np.average(angle_list[ind_rot]) #这段代码是旋转图像,和中间点特征图,关键点特征图 angle_img_rot = angle_rot * (-180) / np.pi hm_rotate = hm.transpose(1, 2, 0) M = cv2.getRotationMatrix2D( ((output_res) / 2.0, (output_res) / 2.0), angle_img_rot, 1) hm_rotate = cv2.warpAffine(hm_rotate, M, (output_res, output_res)) hm = hm_rotate.transpose(2, 0, 1) hp_rotate = hm_hp.transpose(1, 2, 0) hp_rotate = cv2.warpAffine(hp_rotate, M, (output_res, output_res)) hm_hp = hp_rotate[np.newaxis, :] M = cv2.getRotationMatrix2D( ((self.opt.input_res) / 2.0, (self.opt.input_res) / 2.0), angle_img_rot, 1) inp = inp.transpose(1, 2, 0) inp = cv2.warpAffine(inp, M, (self.opt.input_res, self.opt.input_res)) inp = inp.transpose(2, 0, 1) # inp1=cv2.warpAffine(inp1,M,(self.opt.input_res,self.opt.input_res)) #结束 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) # inp1 = cv2.warpAffine(img, trans_input, # (self.opt.input_res, self.opt.input_res), # flags=cv2.INTER_LINEAR) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] angle_list = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) #TODO change wwlekeuihx cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'][0:3], np.float32).reshape(num_joints, 3) #tmjx if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 #for e in self.flip_idx: #pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) #bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] h = np.clip(h, 0, output_res - 1) w = np.clip(w, 0, output_res - 1) if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) * 1.2 sqrt_wh = np.sqrt(np.sqrt(h * w)) radius_w = radius * np.sqrt(w) / sqrt_wh radius_h = radius * np.sqrt(h) / sqrt_wh radius_w = self.opt.hm_gauss if self.opt.mse_loss else max( 0, np.ceil(radius_w)) radius_h = self.opt.hm_gauss if self.opt.mse_loss else max( 0, np.ceil(radius_h)) # radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct[0] = np.clip(ct[0], 0, output_res - 1) ct[1] = np.clip(ct[1], 0, output_res - 1) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) #TODO change angle = math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1])) angle_list.append(angle) draw_gaussian(hm[cls_id], ct_int, [radius_w, radius_h, angle]) # draw_gaussian(hm[cls_id], ct_int, radiusw,radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta #这里是调试可视化生成的特征图的程序 # debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3), # theme=self.opt.debugger_theme) # self.debug(debugger, inp1, ret) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) channel_counter = 1 # len(self.coco.getCatIds()) # target # target_img = cv2.imread(img_path) N_FRAMES = 11 middle = int(N_FRAMES/2) index = os.path.basename(img_path).replace('.jpg', '').replace('img', '').replace('.JPEG', '') rest = img_path.replace(index + '.jpg', '').replace(os.path.dirname(img_path), '') length = len(index) modulo = '1' for i in range(length): modulo += '0' img_paths = [] for i in range(N_FRAMES): new_img_path = os.path.dirname(img_path) \ + rest \ + str((int(index) - (i-middle)) % int(modulo)).zfill(length) + '.jpg' if not os.path.exists(new_img_path): new_img_path = img_path img_paths.append(new_img_path) imgs = [] for path in img_paths: imgs.append(cv2.imread(path)) img = np.concatenate(imgs, -1) bboxes = {} for ann in anns: if str(ann['category_id']) in bboxes: bboxes[str(ann['category_id'])].append([int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]) else: bboxes[str(ann['category_id'])] = [[int(ann['bbox'][0]), int(ann['bbox'][1]), int(ann['bbox'][0] + ann['bbox'][2]), int(ann['bbox'][1] + ann['bbox'][3])]] seg_img = np.zeros([channel_counter, img.shape[0], img.shape[1]]) for label in range(1, channel_counter+1): if str(label) in bboxes: for bbox in bboxes[str(label)]: seg_img[label-1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255 height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True # target # target_img = target_img[:, ::-1, :] img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) seg_inp = np.zeros((seg_img.shape[0], input_w, input_h)) for channel in range(seg_img.shape[0]): seg_inp[channel, :, :] = cv2.warpAffine(seg_img[channel, :, :], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # print('pre: ', img.shape) # target # target_inp = cv2.warpAffine(target_img, trans_input,(input_w, input_h),flags=cv2.INTER_LINEAR) inp = np.zeros((input_w, input_h, N_FRAMES*3)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): inp[:, :, i*3:i*3+3] = cv2.warpAffine(img[:, :, i*3:i*3+3], trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # print('post: ', inp.shape) # target # target_inp = (target_inp.astype(np.float32) / 255.) inp = (inp.astype(np.float32) / 255.) seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes # print('np.mean(inp), PRE: ', np.mean(inp)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp[:, :, i*3:i*3+3], self._eig_val, self._eig_vec) else: if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # target # color_aug(self._data_rng, target_inp, self._eig_val, self._eig_vec) # print('np.mean(inp), POST: ', np.mean(inp)) if inp.shape[2] == N_FRAMES*3: for i in range(N_FRAMES): inp[:, :, i*3:i*3+3] = (inp[:, :, i*3:i*3+3] - self.mean) / self.std else: inp = (inp - self.mean) / self.std # target # target_inp = (target_inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # target # target_inp = target_inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # write_hm = cv2.resize(((hm-np.min(hm)/np.max(hm))*255).astype(np.uint8).squeeze(0), (512, 512)) # cv2.imwrite('/store/datasets/UA-Detrac/test_sample/VID_HM/' + 'inp_' + os.path.basename(file_name) + '_' + 'HM.jpg', write_hm) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp} # 'seg': np.expand_dims(seg_inp, 0)} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # if inp.shape[0] == N_FRAMES*3: # for i in range(N_FRAMES): # img_test = (inp[i*3:i*3+3, :, :].transpose(1, 2, 0) * 255).astype(np.uint8) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_sample/VID_HM/", 'inp_' + os.path.basename(file_name) + '_' + str(i)), img_test) #img_test = (target_inp.transpose(1, 2, 0) * 255).astype(np.uint8) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/exp/tensors/VID_HM/", os.path.basename(file_name) + '_target'), img_test) # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.transpose(1, 2, 0) * 255).astype(np.uint8)) # exit() return ret
def __getitem__(self, index): # print('--------------->>>> multi pose index',index) # print('--------------->>>> multi pose index',index) # print('--------------->>>> multi pose index',index) # print('--------------->>>> multi pose index',index) img_id = self.images[index] # print('--------------->>>> multi pose ',img_id) file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opt.hm_gauss if self.opt.mse_loss else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_res + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) hp_offset[k * num_joints + j] = pts[j, :2] - pt_int hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_path = self.images[index] ann_path = img_path.replace('Data', 'Annotations').replace(os.path.splitext(img_path)[-1], '.xml') anns = [] root = ET.parse(ann_path).getroot() #im_w = int(root.find('size/width').text) #im_h = int(root.find('size/height').text) for obj in root.findall('object'): cls = obj.find('name').text if cls not in self.id2idx: continue cls = self.id2idx[cls] x1 = int(obj.find('bndbox/xmin').text) y1 = int(obj.find('bndbox/ymin').text) x2 = int(obj.find('bndbox/xmax').text) y2 = int(obj.find('bndbox/ymax').text) #x = 0.5 * (x1 + x2) / im_w #y = 0.5 * (y1 + y2) / im_h #ww = (x2 - x1) / im_w #hh = (y2 - y1) / im_h anns.append(np.array([cls, x1, y1, x2, y2], dtype=np.float32)) #l = np.array(boxes, dtype=np.float32) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = ann[1:] cls_id = int(ann[0]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_path} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] inp, ann_list, output_w, output_h, meta = self.get_img_ann(index, scale_lv=2) # TBD: Mosaic augmentation requires large input image size # Increase input image size from 512x512 to 800x800 or larger and # adjust the scale level to avoid the mosaic boundary to become # a significant boundary of objects #inp, ann_list, output_w, output_h, meta = self.mosaic_mix( index ) if False: # Augmnetation visualization img = inp.transpose(1, 2, 0) img = (img*self.std + self.mean)*255 for an in ann_list: bbox, cls_id, bbox2 = an bbox = bbox.astype(np.int32) bbox2 = bbox2.astype(np.int32) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, img.shape[1]) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, img.shape[0]) bbox2[[0, 2]] = np.clip(bbox2[[0, 2]], 0, img.shape[1]) bbox2[[1, 3]] = np.clip(bbox2[[1, 3]], 0, img.shape[0]) if bbox[2] - bbox[0] > 0 and bbox[3] - bbox[1] > 0: cv2.rectangle(img, (bbox[0],bbox[1]), (bbox[2],bbox[3]), (255,0,0), 3) if bbox2.shape[0] > 0: cv2.rectangle(img, (bbox2[0],bbox2[1]), (bbox2[2],bbox2[3]), (0,255,0), 2) cv2.imwrite('temp_%d.jpg'%(index),img) num_objs = min(len(ann_list), self.max_objs) num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_reg = np.zeros((4, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) dense_wh_mask = np.zeros((4, output_h, output_w), dtype=np.float32) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] xs = np.random.randint(output_w, size=(self.max_objs, 1)) ys = np.random.randint(output_h, size=(self.max_objs, 1)) bgs = np.concatenate([xs,ys], axis=1) for k in range(num_objs): bbox, cls_id, bbox2 = ann_list[k] bbox /= self.opt.down_ratio bbox2 /= self.opt.down_ratio oh, ow = bbox[3] - bbox[1], bbox[2] - bbox[0] bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h/(oh+0.01) < 0.9 or w/(ow+0.01) < 0.9) and bbox2.shape[0] > 0: bbox = bbox2 bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] #get center of box ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if (h > 2 or h/(oh+0.01) > 0.5) and (w > 2 or w/(ow+0.01) > 0.5): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius draw_dense_reg(dense_reg, dense_wh_mask, ct_int, bbox, radius) draw_gaussian(hm[cls_id], ct_int, radius) cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) dense_wh = dense_reg[:2,:,:] dense_off = dense_reg[2:,:,:] ret = {'input': inp, 'hm': hm, 'dense_wh': dense_wh, 'dense_off': dense_off, 'dense_wh_mask': dense_wh_mask[:2]} if self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': meta[0], 's': meta[1], 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = len(anns) # num_objs = min(len(anns), self.max_objs) if num_objs > self.max_objs: num_objs = self.max_objs anns = np.random.choice(anns, num_objs) img = cv2.imread(img_path) img, anns = Data_anchor_sample(img, anns) # # for test the keypoint order # img1 = cv2.flip(img,1) # for ann in anns: # width = img1.shape[1] # bbox = self._coco_box_to_bbox(ann['bbox']) # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # pts = np.array(ann['keypoints'], np.float32).reshape(5, 3) # # # for flip # pts[:, 0] = width - pts[:, 0] - 1 # for e in self.flip_idx: # pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() # # # for debug show # def add_coco_bbox(image, bbox, conf=1): # txt = '{}{:.1f}'.format('person', conf) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 255), 2) # cv2.putText(image, txt, (bbox[0], bbox[1] - 2), # font, 0.5, (0, 255, 0), thickness=1, lineType=cv2.LINE_AA) # # def add_coco_hp(image, points, keypoints_prob=1): # for j in range(5): # if keypoints_prob > 0.5: # if j == 0: # cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 255, 0), -1) # elif j == 1: # cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 0, 0), -1) # elif j == 2: # cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 255, 0), -1) # elif j == 3: # cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 255), -1) # elif j == 4: # cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 0), -1) # return image # # bbox = [int(x) for x in bbox] # add_coco_bbox(img1, bbox ) # add_coco_hp(img1, pts) # cv2.imshow('mat', img1) # cv2.waitKey(5000) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: # s = s * np.random.choice(np.arange(0.8, 1.1, 0.1)) s = s # _border = np.random.randint(128*0.4, 128*1.4) _border = s * np.random.choice([0.1, 0.2, 0.25]) w_border = self._get_border(_border, img.shape[1]) h_border = self._get_border(_border, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: # 随机进行图片增强 color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # inp = Randaugment(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) wight_mask = np.ones((self.max_objs), dtype=np.float32) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = self.opt.hm_gauss if self.opt.mse_loss else max( 0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # 人脸的中心坐标 ct_int = ct.astype(np.int32) # 整数化 # wh[k] = 1. * w, 1. * h # 2. centernet的方式 wh[k] = np.log(1. * w / 4), np.log( 1. * h / 4) # 2. 人脸bbox的高度和宽度,centerface论文的方式 ind[k] = ct_int[1] * output_res + ct_int[ 0] # 人脸bbox在1/4特征图中的索引 reg[k] = ct - ct_int # 3. 人脸bbox中心点整数化的偏差 reg_mask[k] = 1 # 是否需要用于计算误差 # if w*h <= 20: # wight_mask[k] = 15 num_kpts = pts[:, 2].sum() # 没有关键点标注的时哦 if num_kpts == 0: # 没有关键点标注的都是比较困难的样本 # print('没有关键点标注') hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 # reg_mask[k] = 0 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[k, j * 2:j * 2 + 2] = pts[ j, :2] - ct_int # 4. 关键点相对于人脸bbox的中心的偏差 kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) # 关键点整数化 hp_offset[k * num_joints + j] = pts[j, :2] - pt_int # 关键点整数化的偏差 hp_ind[ k * num_joints + j] = pt_int[1] * output_res + pt_int[0] # 索引 hp_mask[k * num_joints + j] = 1 # 计算损失的mask if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) # 1. 关键点高斯map if ann['bbox'][2] * ann['bbox'][ 3] <= 16.0: # 太小的人脸忽略 kps_mask[k, j * 2:j * 2 + 2] = 0 draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'landmarks': kps, 'hps_mask': kps_mask, 'wight_mask': wight_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'hm_offset': reg}) # 人脸bbox中心点整数化的偏差 if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret