def _get_additional_inputs(self, dets, meta, with_hm=True): trans_input, trans_output = meta['trans_input'], meta['trans_output'] inp_width, inp_height = meta['inp_width'], meta['inp_height'] out_width, out_height = meta['out_width'], meta['out_height'] input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32) output_inds = [] for det in dets: if det['score'] < self.opt.pre_thresh: continue bbox = self._trans_bbox(det['bbox'], trans_input, inp_width, inp_height) bbox_out = self._trans_bbox( det['bbox'], trans_output, out_width, out_height) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if with_hm: draw_umich_gaussian(input_hm[0], ct_int, radius) ct_out = np.array( [(bbox_out[0] + bbox_out[2]) / 2, (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32) output_inds.append(ct_out[1] * out_width + ct_out[0]) if with_hm: input_hm = input_hm[np.newaxis] if self.opt.flip_test: input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]), axis=0) input_hm = torch.from_numpy(input_hm).to(self.opt.device) output_inds = np.array(output_inds, np.int64).reshape(1, -1) output_inds = torch.from_numpy(output_inds).to(self.opt.device) return input_hm, output_inds
def get_additional_inputs(dets, meta, with_hm=True): ''' Render input heatmap from previous trackings. ''' trans_input, trans_output = meta['trans_input'], meta['trans_output'] inp_width, inp_height = meta['inp_width'], meta['inp_height'] out_width, out_height = meta['out_width'], meta['out_height'] input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32) output_inds = [] for det in dets: if det['score'] < 0.2 or det['active'] == 0: continue bbox = trans_bbox(det['bbox'], trans_input, inp_width, inp_height) bbox_out = trans_bbox(det['bbox'], trans_output, out_width, out_height) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if with_hm: draw_umich_gaussian(input_hm[0], ct_int, radius) ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2, (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32) output_inds.append(ct_out[1] * out_width + ct_out[0]) if with_hm: input_hm = input_hm[np.newaxis] input_hm = torch.from_numpy(input_hm).to(torch.device('cuda')) output_inds = np.array(output_inds, np.int64).reshape(1, -1) output_inds = torch.from_numpy(output_inds).to(torch.device('cuda')) return input_hm, output_inds
def _get_pre_dets(self, anns, trans_input, trans_output): hm_h, hm_w = self.opt.input_h, self.opt.input_w down_ratio = self.opt.down_ratio trans = trans_input reutrn_hm = self.opt.pre_hm pre_hm = np.zeros( (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None pre_cts, track_ids = [], [] for ann in anns: cls_id = int(self.cat_ids[ann['category_id']]) if cls_id > self.opt.num_classes or cls_id <= -99 or \ ('iscrowd' in ann and ann['iscrowd'] > 0): continue bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] max_rad = 1 if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) max_rad = max(max_rad, radius) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct0 = ct.copy() conf = 1 ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h conf = 1 if np.random.random() > self.opt.lost_disturb else 0 ct_int = ct.astype(np.int32) # if conf == 0: if conf == 1: pre_cts.append(ct / down_ratio) else: pre_cts.append(ct0 / down_ratio) track_ids.append(ann['track_id'] if 'track_id' in ann else -1) if reutrn_hm: draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf) if np.random.random() < self.opt.fp_disturb and reutrn_hm: ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w ct2[1] = ct2[1] + np.random.randn() * 0.05 * h ct2_int = ct2.astype(np.int32) draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf) return pre_hm, pre_cts, track_ids
def visualize_centers(self, im_blob, keep, node0_neighbor_idx, attn, output, p_img): from utils.image import draw_umich_gaussian, gaussian_radius, draw_msra_gaussian node0_neighbor_idx_keep = node0_neighbor_idx[keep].cpu().numpy() attn_keep = attn[keep].cpu().numpy() # in ltwh node0_neighbor_idx_keep_box = np.array([[idx % 272, idx // 272 + 1, 1, 1] for idx in node0_neighbor_idx_keep]) node0_neighbor_idx_keep_box *= 4 im_blob_np = im_blob * 255. im_blob_np = im_blob_np.squeeze().cpu().numpy() im_blob_np = im_blob_np.transpose(1, 2, 0)[:, :, ::-1] im_blob_np = cv2.UMat(im_blob_np) hm = np.zeros((608, 1088)) for i in range(0, node0_neighbor_idx_keep_box.shape[0]): bbox = node0_neighbor_idx_keep_box[i][0:4] cv2.rectangle(im_blob_np, (bbox[0], bbox[1]), (bbox[2] + bbox[0], bbox[3] + bbox[1]), (0, 255, 0), 2) radius = gaussian_radius((self.opt.viz_heatmap_radius, self.opt.viz_heatmap_radius)) radius = max(0, int(radius)) ct = bbox[:2].astype(np.int32) draw_umich_gaussian(hm, ct, radius) import matplotlib.pyplot as plt from matplotlib import patches import matplotlib matplotlib.use("Agg") fig, ax = plt.subplots() im_blob_plt = im_blob.squeeze().cpu().numpy() im_blob_plt = im_blob_plt.transpose(1, 2, 0) ax.imshow(im_blob_plt) ax.imshow(hm, cmap='plasma', alpha=0.3) plt.axis('off') plt.savefig("heatmap.png") # Draw the prevoius box fig, ax = plt.subplots() node0_box = output['node0_box'].cpu().numpy() node0_box *= np.array([1088, 608, 1088, 608]) p_img_np = p_img.squeeze().cpu().numpy() p_img_np = p_img_np.transpose(1, 2, 0)[:, :, ::-1] ax.imshow(p_img_np) rect = patches.Rectangle((node0_box[0], node0_box[1]), node0_box[2]-node0_box[0], node0_box[3]-node0_box[1], linewidth=1, edgecolor='g', facecolor='none') ax.add_patch(rect) plt.axis('off') plt.savefig("p_img_vis.png") exit()
def process_data(line, use_aug): if 'str' not in str(type(line)): line = line.decode() s = line.split() line_id = s[0] image_path = s[1] # print(image_path) if not os.path.exists(image_path): raise KeyError("%s does not exist ... " % image_path) image = np.array(cv2.imread(image_path)) ori_w = s[2] ori_h = s[3] labels = np.array( [list(map(lambda x: int(float(x)), box.split(','))) for box in s[4:]]) # labels = np.array([list(map(lambda x: int(float(x)), box.split(','))) for box in s[1:]]) if use_aug: image, labels = random_horizontal_flip(image, labels) image, labels = random_crop(image, labels) image, labels = random_translate(image, labels) image, labels = image_preporcess(np.copy(image), [cfg.input_image_h, cfg.input_image_w], np.copy(labels)) output_h = cfg.input_image_h // cfg.down_ratio output_w = cfg.input_image_w // cfg.down_ratio hm = np.zeros((output_h, output_w, cfg.num_classes), dtype=np.float32) wh = np.zeros((cfg.max_objs, 2), dtype=np.float32) reg = np.zeros((cfg.max_objs, 2), dtype=np.float32) ind = np.zeros((cfg.max_objs), dtype=np.float32) reg_mask = np.zeros((cfg.max_objs), dtype=np.float32) for idx, label in enumerate(labels): # print("label", label) bbox = label[:4] / cfg.down_ratio class_id = label[4] h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[:, :, class_id], ct_int, radius) wh[idx] = 1. * w, 1. * h ind[idx] = ct_int[1] * output_w + ct_int[0] reg[idx] = ct - ct_int reg_mask[idx] = 1 return image, hm, wh, reg, reg_mask, ind, ori_w, ori_h, line_id
def __getitem__(self, idx): sample = {} row_bbox = self.df.loc[idx] row_landmarks = np.array(self.df_landmarks.loc[idx].tolist()) img_file = os.path.join(self.root, row_bbox['filename']) img = Image.open(img_file) # кроп лица bbox = [row_bbox['top_x'], row_bbox['top_y'], row_bbox['bottom_x'], row_bbox['bottom_y']] img = img.crop(bbox) # ресайз кропа до размеров CROP_SIZE с сохранением соотношения сторон w, h = img.size if h > w: f = self.CROP_SIZE / w else: f = self.CROP_SIZE / h img = img.resize((int(w * f), int(h * f))) row_landmarks = row_landmarks * f # CropCenter w, h = img.size margin_h = (h - self.CROP_SIZE) // 2 margin_w = (w - self.CROP_SIZE) // 2 img = img.crop([margin_w, margin_h, self.CROP_SIZE + margin_w, self.CROP_SIZE + margin_h]) row_landmarks = row_landmarks.astype(np.int16).reshape(-1, 2) row_landmarks -= np.array((margin_w, margin_h), dtype=np.int16)[None, :] # row_landmarks = row_landmarks.reshape(-1) # hmap = np.zeros((self.NUM_PTS + 1, self.hmap_size, self.hmap_size), dtype=np.float32) # M = np.zeros((self.NUM_PTS + 1, self.hmap_size, self.hmap_size), dtype=np.float32) hmap = np.zeros((self.NUM_PTS, self.hmap_size, self.hmap_size), dtype=np.float32) M = np.zeros((self.NUM_PTS, self.hmap_size, self.hmap_size), dtype=np.float32) for ind, xy in enumerate(row_landmarks): hmap[ind] = draw_umich_gaussian(hmap[ind], xy / self.CROP_SIZE * self.hmap_size, 7) # hmap[-1] = draw_boundary(hmap[-1], np.clip((row_landmarks / self.CROP_SIZE * self.hmap_size).astype(np.int), 0, self.NUM_PTS)) for i in range(len(M)): M[i] = grey_dilation(hmap[i], size=(3, 3)) M = np.where(M >= 0.5, 1, 0) sample = {"file_name": row_bbox['filename'], "image": img, "landmarks": torch.from_numpy(row_landmarks.astype(np.float32)), "crop_margin_x": margin_w, "crop_margin_y": margin_h, "scale_coef": f, "top_x": row_bbox['top_x'], "top_y": row_bbox['top_y'], "hmap": hmap, "M": M} if self.transforms is not None: sample = self.transforms(sample) return sample
def _add_hps(self, ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w): num_joints = self.num_joints pts = (np.array(ann["keypoints"], np.float32).reshape(num_joints, 3) if "keypoints" in ann else np.zeros( (self.num_joints, 3), np.float32)) if self.opt.simple_radius > 0: hp_radius = int( simple_radius(h, w, min_overlap=self.opt.simple_radius)) else: hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = max(0, int(hp_radius)) for j in range(num_joints): pts[j, :2] = affine_transform(pts[j, :2], trans_output) if pts[j, 2] > 0: if (pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w and pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h): ret["hps"][k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int ret["hps_mask"][k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) ret["hp_offset"][k * num_joints + j] = pts[j, :2] - pt_int ret["hp_ind"][k * num_joints + j] = (pt_int[1] * self.opt.output_w + pt_int[0]) ret["hp_offset_mask"][k * num_joints + j] = 1 ret["hm_hp_mask"][k * num_joints + j] = 1 ret["joint"][k * num_joints + j] = j draw_umich_gaussian(ret["hm_hp"][j], pt_int, hp_radius) if pts[j, 2] == 1: ret["hm_hp"][j, pt_int[1], pt_int[0]] = self.ignore_val ret["hp_offset_mask"][k * num_joints + j] = 0 ret["hm_hp_mask"][k * num_joints + j] = 0 else: pts[j, :2] *= 0 else: pts[j, :2] *= 0 self._ignore_region(ret["hm_hp"][j, int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1, ]) gt_det["hps"].append(pts[:, :2].reshape(num_joints * 2))
hm = np.zeros((output_h, output_w, num_classes),dtype=np.float32) wh = np.zeros((max_objs, 2),dtype=np.float32) reg = np.zeros((max_objs, 2),dtype=np.float32) ind = np.zeros((max_objs),dtype=np.float32) reg_mask = np.zeros((max_objs),dtype=np.float32) down_ratio = 4 label = np.array([10, 30, 50, 100, 1]) idx = 0 bbox = label[:4] / down_ratio class_id = label[4] h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] print("w h is", w, h) radius = gaussian_radius((math.ceil(h), math.ceil(w))) print("radius is", radius) radius = max(0, int(radius)) print(radius) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) print("ct is", ct) draw_umich_gaussian(hm[:, :, class_id], ct_int, radius) print(hm) cv2.imwrite("/home/pcl/tf_work/TF_CenterNet/single_heatmap.jpg", hm[0]*255) wh[idx] = 1. * w, 1. * h ind[idx] = ct_int[1] * output_w + ct_int[0] reg[idx] = ct - ct_int reg_mask[idx] = 1 print("ind is", ind) print("wh is", wh) print("reg is", reg)
def _get_additional_inputs(self, tracks, meta, age_images, with_hm=True, with_kmf=False, with_sch=False): ''' Render input heatmap from previous trackings. ''' trans_input, trans_output = meta['trans_input'], meta['trans_output'] inp_width, inp_height = meta['inp_width'], meta['inp_height'] out_width, out_height = meta['out_width'], meta['out_height'] input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32) output_inds = [] track_ids = [] kmf_inds = [] sch_weights = [] for track in tracks: if track['score'] < self.opt.pre_thresh[ track['class'] - 1]: #or det['active'] == 0: continue bbox = self._trans_bbox(track['bbox'], trans_input, inp_width, inp_height) bbox_out = self._trans_bbox(track['bbox'], trans_output, out_width, out_height) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): if 'seg' in self.opt.task and self.opt.seg_center: seg_mask = self.get_masks_as_input(track, trans_input) ct = np.array([ np.mean(np.where(seg_mask >= 0.5)[1]), np.mean(np.where(seg_mask >= 0.5)[0]) ], dtype=np.float32) else: ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2, (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32) output_inds.append(ct_out[1] * out_width + ct_out[0]) track_ids.append(track['tracking_id']) if with_sch: sch_weights.append(track['sch_weight']) if with_hm: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) draw_umich_gaussian(input_hm[0], ct_int, radius) if with_kmf: if track['active'] >= self.opt.kmf_confirm_age: p_bbox_ = track['kmf'].predict()[0] p_bbox = self._trans_bbox(p_bbox_, trans_input, inp_width, inp_height) # kmf_ind: trans to output p_bbox_out = self._trans_bbox(p_bbox_, trans_output, out_width, out_height) p_ct_out = np.array( [(p_bbox_out[0] + p_bbox_out[2]) / 2, (p_bbox_out[1] + p_bbox_out[3]) / 2], dtype=np.int32) kmf_inds.append(p_ct_out[1] * out_width + p_ct_out[0]) else: # unconfirm kmf tracker kmf_inds.append(ct_out[1] * out_width + ct_out[0]) if with_hm: input_hm = input_hm[np.newaxis] if self.opt.flip_test: input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]), axis=0) input_hm = torch.from_numpy(input_hm).to(self.opt.device) if with_kmf: assert (len(output_inds) == len(kmf_inds)) num_pre = len(output_inds) output_inds = np.array(output_inds, np.int64).reshape(1, -1) output_inds = torch.from_numpy(output_inds).to(self.opt.device) kmf_inds = np.array(kmf_inds, np.int64).reshape(1, -1) kmf_inds = torch.from_numpy(kmf_inds).to( self.opt.device) if with_kmf else None track_ids = np.array(track_ids, np.int64).reshape(1, -1) sch_weights = np.array(sch_weights)[None, :] sch_weights = torch.from_numpy(sch_weights).to(self.opt.device) return input_hm, output_inds, None, track_ids, kmf_inds, sch_weights
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), cfg.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.split == 'train': s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = cfg.train_resolution[0], cfg.train_resolution[1] else: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) flipped = False if self.split == 'train': s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = get_border(128, img.shape[1]) h_border = get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_matrix = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_matrix, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = inp.astype(np.float32) / 255. # TODO:inp appears numbers below 0 after color_aug (myself) if self.split == 'train': color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - cfg.mean) / cfg.std inp = inp.transpose(2, 0, 1) output_h = input_h // cfg.down_ratio output_w = input_w // cfg.down_ratio trans_matrix = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((cfg.max_objs, 2), dtype=np.float32) reg = np.zeros((cfg.max_objs, 2), dtype=np.float32) ind = np.zeros(cfg.max_objs, dtype=np.int64) reg_mask = np.zeros(cfg.max_objs, dtype=np.uint8) gt_box = [] for i in range(num_objs): ann = anns[i] bbox = coco2x1y1x2y2(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_matrix) bbox[2:] = affine_transform(bbox[2:], trans_matrix) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: # get an object size-adapative radius radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[i] = 1. * w, 1. * h ind[i] = ct_int[1] * output_w + ct_int[0] reg[i] = ct - ct_int reg_mask[i] = 1 gt_box.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg': reg, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.debug > 0 or not self.split == 'train': gt_box = np.array( gt_box, dtype=np.float32) if len(gt_box) > 0 else np.zeros( (1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_box, 'img_id': img_id} ret['meta'] = meta return ret
def _add_instance_tl(self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat_tl'][k] = cls_id - 1 ret['mask_tl'][k] = 1 if 'wh_tl' in ret: ret['wh_tl'][k] = 1. * w, 1. * h ret['wh_tl_mask'][k] = 1 ret['ind_tl'][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret['reg_tl'][k] = ct - ct_int ret['reg_tl_mask'][k] = 1 draw_umich_gaussian(ret['hm_tl'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) # if 'tracking' in self.opt.heads: # if ann['track_id'] in track_ids: # pre_ct = pre_cts[track_ids.index(ann['track_id'])] # ret['tracking_mask'][k] = 1 # ret['tracking'][k] = pre_ct - ct_int # gt_det['tracking'].append(ret['tracking'][k]) # else: # gt_det['tracking'].append(np.zeros(2, np.float32)) if 'ltrb' in self.opt.heads: ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \ bbox[2] - ct_int[0], bbox[3] - ct_int[1] ret['ltrb_mask'][k] = 1 if 'ltrb_amodal' in self.opt.heads: ret['ltrb_amodal'][k] = \ bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \ bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1] ret['ltrb_amodal_mask'][k] = 1 gt_det['ltrb_amodal'].append(bbox_amodal) if 'nuscenes_att' in self.opt.heads: if ('attributes' in ann) and ann['attributes'] > 0: att = int(ann['attributes'] - 1) ret['nuscenes_att'][k][att] = 1 ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1 gt_det['nuscenes_att'].append(ret['nuscenes_att'][k]) if 'velocity' in self.opt.heads: if ('velocity' in ann) and min(ann['velocity']) > -1000: ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3] ret['velocity_mask'][k] = 1
def __getitem__(self, index): image_fn = self.flist[index] image = cv2.imread(image_fn) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) box_fn = str(Path(self.box_root)/(Path(image_fn).stem + '.txt')) if osp.exists(box_fn): xywh = np.loadtxt(box_fn) xx,yy,ww,hh = xywh x1,y1,x2,y2 = xx-ww/2,yy-hh/2,xx+ww/2,yy+hh/2 boxes = np.array([[x1,y1,x2,y2]]).astype('float32') else: boxes = np.array([[0.0,0.0,1.0,1.0]]).astype('float32') if self.transform: image, boxes = self.transform(image, boxes) #generate box_gt for loss #box x1,y1,x2,y2, [0,1] output_h,output_w,grid_wh = self.configs.hh,self.configs.ww,self.configs.grid_wh hin,win = self.configs.image_size hm = np.zeros((self.configs.num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.configs.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) dense_xy = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.configs.max_objs, 2), dtype=np.float32) ind = np.zeros((self.configs.max_objs), dtype=np.int64) reg_mask = np.zeros((self.configs.max_objs), dtype=np.uint8) num_objs = min(boxes.shape[0], self.configs.max_objs) # gt_det = [] for k in range(num_objs): bbox = boxes[k] h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h*grid_wh), math.ceil(w*grid_wh))) radius = max(0, int(radius)) #radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2.0 * grid_wh, (bbox[1] + bbox[3]) / 2.0* grid_wh], dtype=np.float32) ct_int = ct.astype(np.int32) ct_int = np.clip(ct_int, 0, grid_wh-1) draw_umich_gaussian(hm[k], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) draw_dense_reg(dense_xy, hm.max(axis=0), ct_int, reg[k], radius) # gt_det.append([ct[0] - w / 2, ct[1] - h / 2, # ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # #ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} #if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_mask = np.concatenate([hm_a, hm_a], axis=0) ret = {'hm': hm, 'wh': wh, 'xy': reg, 'ind': ind,'dense_xy': dense_xy,'dense_wh': dense_wh,'dense_mask':dense_mask, 'boxes': boxes} #ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) #del ret['wh'] #elif self.opt.cat_spec_wh: #ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) #del ret['wh'] #if self.opt.reg_offset: #ret.update({'reg': reg}) # if self.opt.debug > 0 or not self.split == 'train': # gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ # np.zeros((1, 6), dtype=np.float32) # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} # ret['meta'] = meta # return ret # return image, ret
def _add_kmf_att(self, ret, trans_input, ann=None, bbox=None, init=False, conf=1, draw=True): trans = trans_input hm_h, hm_w = self.opt.input_h, self.opt.input_w if bbox is None and ann is not None: if 'bbox' not in ann.keys(): ann['bbox'] = mask_utils.toBbox(ann['segmentation']) bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) if self.opt.guss_rad: min_overlap = 0.2 if init else 0.6 conf = self.opt.init_conf if init else 1 radius = gaussian_radius_center((math.ceil(h), math.ceil(w)), min_overlap=0.2) else: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct0 = ct.copy() ct[0] = ct[0] + np.random.randn() * self.opt.att_hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.att_hm_disturb * h conf = conf if np.random.random( ) > self.opt.att_lost_disturb else 0 ct_int = ct.astype(np.int32) if self.opt.guss_oval and draw: radius = radius if (self.opt.guss_rad and init) or ( self.opt.guss_rad and self.opt.guss_rad_always) else 0 draw_umich_gaussian_oval(ret['kmf_att'][0], ct_int, radius_h=h // 2 + radius, radius_w=w // 2 + radius, k=conf) elif draw: draw_umich_gaussian(ret['kmf_att'][0], ct_int, radius, k=conf) if np.random.random( ) < self.opt.att_fp_disturb: # generate false positive ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn( ) * self.opt.att_disturb_dist * w ct2[1] = ct2[1] + np.random.randn( ) * self.opt.att_disturb_dist * h ct2_int = ct2.astype(np.int32) if self.opt.guss_oval and draw: draw_umich_gaussian_oval(ret['kmf_att'][0], ct2_int, radius_h=h // 2, radius_w=w // 2, k=conf) elif draw: draw_umich_gaussian(ret['kmf_att'][0], ct2_int, radius, k=conf) else: return None return ct_int
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue polygons = get_connected_polygon_using_mask( anno['segmentation'], (h_img, w_img), n_vertices=self.n_vertices, closing_max_kernel=50) gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if len(contour) > self.n_vertices: fixed_contour = resample(contour, num=self.n_vertices) else: fixed_contour = turning_angle_resample(contour, self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [ np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1]) ] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(160, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt mass centers to bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) contour_std_ = np.zeros( (self.max_objs, 1), dtype=np.float32) # keep track of codes that is activated regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) contour_std = np.std(indexed_shape, axis=0) + 1e-4 if h < 1e-6 or w < 1e-6: # remove small bboxes continue # centered_shape = indexed_shape - mass_center norm_shape = (indexed_shape - mass_center) / np.sqrt( np.sum(contour_std**2)) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # obj_c = mass_center obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = norm_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) contour_std_[k] = np.sqrt(np.sum(contour_std**2)) w_h_[k] = 1. * w, 1. * h # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \ # mass_center[0] - bbox[0], bbox[2] - mass_center[0] # [top, bottom, left, right] distance regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'std': contour_std_, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def _add_instance( self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None, ): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret["cat"][k] = cls_id - 1 ret["mask"][k] = 1 if "wh" in ret: ret["wh"][k] = 1.0 * w, 1.0 * h ret["wh_mask"][k] = 1 ret["ind"][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret["reg"][k] = ct - ct_int ret["reg_mask"][k] = 1 draw_umich_gaussian(ret["hm"][cls_id - 1], ct_int, radius) gt_det["bboxes"].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32, )) gt_det["scores"].append(1) gt_det["clses"].append(cls_id - 1) gt_det["cts"].append(ct) if "tracking" in self.opt.heads: if ann["track_id"] in track_ids: pre_ct = pre_cts[track_ids.index(ann["track_id"])] ret["tracking_mask"][k] = 1 ret["tracking"][k] = 0 * (pre_ct - ct_int) gt_det["tracking"].append(ret["tracking"][k]) else: gt_det["tracking"].append(np.zeros(2, np.float32)) if "ltrb" in self.opt.heads: ret["ltrb"][k] = ( bbox[0] - ct_int[0], bbox[1] - ct_int[1], bbox[2] - ct_int[0], bbox[3] - ct_int[1], ) ret["ltrb_mask"][k] = 1 if "ltrb_amodal" in self.opt.heads: ret["ltrb_amodal"][k] = ( bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1], ) ret["ltrb_amodal_mask"][k] = 1 gt_det["ltrb_amodal"].append(bbox_amodal) if "nuscenes_att" in self.opt.heads: if ("attributes" in ann) and ann["attributes"] > 0: att = int(ann["attributes"] - 1) ret["nuscenes_att"][k][att] = 1 ret["nuscenes_att_mask"][k][self.nuscenes_att_range[att]] = 1 gt_det["nuscenes_att"].append(ret["nuscenes_att"][k]) if "velocity" in self.opt.heads: if ("velocity" in ann) and min(ann["velocity"]) > -1000: ret["velocity"][k] = np.array(ann["velocity"], np.float32)[:3] ret["velocity_mask"][k] = 1 gt_det["velocity"].append(ret["velocity"][k]) if "hps" in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if "rot" in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if "dep" in self.opt.heads: if "depth" in ann: ret["dep_mask"][k] = 1 ret["dep"][k] = ann["depth"] * aug_s gt_det["dep"].append(ret["dep"][k]) else: gt_det["dep"].append(2) if "dim" in self.opt.heads: if "dim" in ann: ret["dim_mask"][k] = 1 ret["dim"][k] = ann["dim"] gt_det["dim"].append(ret["dim"][k]) else: gt_det["dim"].append([1, 1, 1]) if "amodel_offset" in self.opt.heads: if "amodel_center" in ann: amodel_center = affine_transform(ann["amodel_center"], trans_output) ret["amodel_offset_mask"][k] = 1 ret["amodel_offset"][k] = amodel_center - ct_int gt_det["amodel_offset"].append(ret["amodel_offset"][k]) else: gt_det["amodel_offset"].append([0, 0])
def _get_pre_dets(self, anns, trans_input, trans_output): hm_h, hm_w = self.opt.input_h, self.opt.input_w down_ratio = self.opt.down_ratio trans = trans_input reutrn_hm = self.opt.pre_hm pre_hm = np.zeros((1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None pre_cts, pre_whs, track_ids, pre_bboxes, pre_bbox_amodals= [], [], [],[],[] ignore_regions =[] for ann in anns: cls_id = int(ann['category_id']) if cls_id > self.opt.num_classes or cls_id <= -999 or cls_id <= 0 or ( 'iscrowd' in ann and ann['iscrowd'] > 0): bbox, _ = self._get_bbox_output( ann['bbox'], trans_output, hm_h, hm_w) ignore_regions.append(bbox) for ann in anns: cls_id = int(ann['category_id']) if cls_id > self.opt.num_classes or cls_id <= -99 or \ ('iscrowd' in ann and ann['iscrowd'] > 0): continue ## bbox input bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] max_rad = 1 ignored = False if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) max_rad = max(max_rad, radius) ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct0 = ct.copy() conf = 1 for area in ignore_regions: if (area[0] <= ct[0] and ct[0] <= area[2]) and (area[1] <= ct[1] and ct[1] <= area[3]): ignored = True break if ignored: continue else: ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h conf = 1 if np.random.random() > self.opt.lost_disturb else 0 ct_int = ct.astype(np.int32) if conf == 0: pre_cts.append(ct / down_ratio) ### output ct else: pre_cts.append(ct0 / down_ratio) track_ids.append(ann['track_id'] if 'track_id' in ann else -1) if reutrn_hm: draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf) if np.random.random() < self.opt.fp_disturb and reutrn_hm: ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w ct2[1] = ct2[1] + np.random.randn() * 0.05 * h ct2_int = ct2.astype(np.int32) draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf) ## get the bbox out bbox_out, bbox_amodal = self._get_bbox_output(ann['bbox'], trans_output) pre_bboxes.append(np.array(bbox_out)) pre_bbox_amodals.append(np.array(bbox_amodal)) h_out, w_out = bbox_out[3] - bbox_out[1], bbox_out[2] - bbox_out[0] pre_wh = np.array( [w_out, h_out], dtype=np.float32) pre_whs.append(pre_wh) return pre_hm, pre_cts, track_ids, pre_whs, pre_bboxes,pre_bbox_amodals
def __getitem__(self, index): img_id = self.images[index] video_info = self.coco.loadImgs(ids=[img_id])[0] file_name = video_info['file_name'] image_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) input_h, input_w = self.opt.input_h, self.opt.input_w centers = np.array([ann['keypoints_2d'] for ann in anns])[:, 0::9, :2] centers = centers.reshape(-1, 2) keep = np.where(np.all((0 < centers) & (1 > centers), axis=1) == True) centers = centers[keep] anns = [anns[i] for i in keep[0]] img = cv2.imread(image_path) # resize, pad, and color augs centers[:, 0], centers[:, 1] = centers[:, 0]*img.shape[1], centers[:, 1]*img.shape[0] augmented = self.augs(image=img, keypoints=centers) inp, centers = augmented['image'], np.array(augmented['keypoints']) num_objs = min(len(centers), self.max_objs) wh_ratio = img.shape[1] / img.shape[0] c = np.array([inp.shape[1] / 2., inp.shape[0] / 2.], dtype=np.float32) s = max(inp.shape[0], inp.shape[1]) * 1.0 aug = False if self.split == 'train' and np.random.random() < self.opt.aug_ddd and num_objs > 0: aug = True sf = self.opt.scale # cf = self.opt.shift scale_rand = np.random.random() s = s * np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf) trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(inp, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) centers = np.concatenate([centers, np.ones((centers.shape[0], 1))], axis=1) centers = np.matmul(trans_input, centers.T).T if num_objs > 0: centers[:, 0], centers[:, 1] = centers[:, 0] / inp.shape[1], centers[:, 1] / inp.shape[0] inp = (inp.astype(np.float32) / 255.) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio # empty input heat_map = np.zeros([self.num_classes, output_h, output_w], dtype=np.float32) scales = np.zeros([self.max_objs, 3], dtype=np.float32) translations = np.zeros([self.max_objs, 3], dtype=np.float32) rotvecs = np.zeros([self.max_objs, 3], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) ind = np.zeros((self.max_objs), dtype=np.int64) reg = np.zeros((self.max_objs, 2), dtype=np.float32) for k in range(num_objs): ann = anns[k] bbox = np.array(ann['bbox']) scale = np.array(ann['scale']) rot_angles = np.array(ann['rot']) translation = np.array(ann['translation']) if aug: translation[2] *= np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf) # translation[0] += translation[0] * y_shift * cf # translation[1] -= (x_shift * cf) * 0.3 ct = centers[k][:2] ct[0], ct[1] = ct[0] * output_h, ct[1] * output_w ct[0], ct[1] = np.clip(ct[0], 0, output_w - 1), np.clip(ct[1], 0, output_w - 1) cls_id = int(self.cat_ids[ann['category_id']]) bbox[[0, 2]] *= output_w bbox[[1, 3]] *= output_h bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius/2)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct_int = ct.astype(np.int32) draw_umich_gaussian(heat_map[cls_id], ct_int, radius) scales[k] = scale translations[k] = translation rotvecs[k] = rot_angles ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if DEBUG: lines = ( [1, 5], [2, 6], [3, 7], [4, 8], # lines along x-axis [1, 3], [5, 7], [2, 4], [6, 8], # lines along y-axis [1, 2], [3, 4], [5, 6], [7, 8] # lines along z-axis ) plt.scatter(ct_int[0], ct_int[1]) r = R.from_euler('zyx', rot_angles).as_matrix() box_3d = Box.from_transformation(r, translation, scale).vertices points_2d = project_points(box_3d, np.array(video_info['projection_matrix'])) points_2d[:, 0] = points_2d[:, 0] * (128*wh_ratio) + 128*(1-wh_ratio)/2 points_2d[:, 1] *= 128 points_2d = points_2d.astype(int) for ids in lines: plt.plot( (points_2d[ids[0]][0], points_2d[ids[1]][0]), (points_2d[ids[0]][1], points_2d[ids[1]][1]), color='r', ) # points_2d = np.array(ann['keypoints_2d']) # points_2d[:, 0] *= 128 # points_2d[:, 1] *= 128 # # points_2d = points_2d.astype(int) # for ids in lines: # plt.plot( # (points_2d[ids[0]][0], points_2d[ids[1]][0]), # (points_2d[ids[0]][1], points_2d[ids[1]][1]), # color='b', # ) ret = { 'input': inp, 'hm': heat_map, 'reg_mask': reg_mask, 'ind': ind, 'dim': scales, 'rot': rotvecs, 'loc': translations } if self.opt.reg_offset: ret.update({'reg': reg}) if DEBUG: if inp.shape[0] == 3: plot_img = inp.transpose(1, 2, 0) plot_img = (plot_img * self.std) + self.mean else: plot_img = inp.copy() plot_img = cv2.resize(plot_img, (output_w, output_h)) plot_img = cv2.cvtColor(plot_img, cv2.COLOR_BGR2RGB) plt.imshow(plot_img) plt.show() plt.imshow(heat_map[0]) plt.show() return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array( [self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(img, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', img) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) # detections = [] for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) # img = self.coco.loadImgs(ids=[img_id])[0] # w_img = int(img['width']) # h_img = int(img['height']) # if w_img < 2 or h_img < 2: # continue labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1 or type(anno['segmentation']) != list: # Excludes crowd objects continue if len(anno['segmentation']) > 1: obj_contours = [np.array(s).reshape((-1, 2)).astype(np.int32) for s in anno['segmentation']] obj_contours = sorted(obj_contours, key=cv2.contourArea) polygons = obj_contours[-1] else: polygons = anno['segmentation'][0] gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] if gt_w < 5 or gt_h < 5: continue contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if cv2.contourArea(contour.astype(np.int32)) < 35: continue fixed_contour = uniformsample(contour, self.n_vertices) # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1])] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(150, width) h_border = get_border(150, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32) # votes for hmap and code w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros((self.max_objs, 2), dtype=np.float32) # gt mass centers to bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) # contour_std = np.std(indexed_shape, axis=0) + 1e-4 if h < 1e-6 or w < 1e-6: # remove small bboxes continue # centered_shape = indexed_shape - mass_center norm_shape = (indexed_shape - mass_center) / np.array([w / 2., h / 2.]) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = norm_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=80) w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # getting the gt votes shifted_poly = indexed_shape - np.array([bbox[0], bbox[1]]) + 1 # crop to the bbox, add padding 1 # obj_mask = polys_to_mask([np.ndarray.flatten(shifted_poly, order='C').tolist()], h + 2, w + 2) * 255 obj_mask = np.zeros((int(h) + 3, int(w) + 3), dtype=np.uint8) cv2.drawContours(obj_mask, shifted_poly[None, :, :].astype(np.int32), color=255, contourIdx=-1, thickness=-1) # instance = obj_mask.copy() # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), # interpolation=cv2.INTER_LINEAR) * 1. # votes_[k] = obj_mask.reshape((1, -1)) / 255. # votes_[k] = (obj_mask.reshape((1, -1)) > 255 * 0.4) * 1.0 # show debug masks obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), interpolation=cv2.INTER_LINEAR) # INTER_AREA # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), # interpolation=cv2.INTER_AREA) votes_[k] = (obj_mask.reshape((1, -1)) > 0.2 * 255) * 1.0 # cv2.imshow('obj_mask', instance.astype(np.uint8)) # cv2.waitKey() # cv2.imshow('votes', obj_mask.astype(np.uint8)) # cv2.waitKey() return {'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'votes': votes_, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue # polygons = anno['segmentation'][0] polygons = anno['segmentation'] if len(polygons) > 1: bg = np.zeros((h_img, w_img, 1), dtype=np.uint8) for poly in polygons: len_poly = len(poly) vertices = np.zeros((1, len_poly // 2, 2), dtype=np.int32) for i in range(len_poly // 2): vertices[0, i, 0] = int(poly[2 * i]) vertices[0, i, 1] = int(poly[2 * i + 1]) # cv2.fillPoly(bg, vertices, color=(255)) cv2.drawContours(bg, vertices, color=(255), contourIdx=-1, thickness=-1) pads = 5 while True: kernel = np.ones((pads, pads), np.uint8) bg_closed = cv2.morphologyEx(bg, cv2.MORPH_CLOSE, kernel) obj_contours, _ = cv2.findContours(bg_closed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(obj_contours) > 1: pads += 5 else: polygons = obj_contours[0] break else: # continue polygons = anno['segmentation'][0] gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices fixed_contour = resample(contour, num=self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_mean = np.mean(fixed_contour, axis=0) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy # if img_id in self.all_annotations.keys(): # annotations = self.all_annotations[img_id] # shape_annots = self.all_shapes[img_id] # labels = annotations['cat_id'] # bboxes = annotations['bbox'] # xyxy format # shapes = shape_annots['shape'] # polygonal vertices format xyxyxyxyxy... # codes = annotations['codes'] # labels = np.array(labels) # bboxes = np.array(bboxes, dtype=np.float32) # codes = np.array(codes, dtype=np.float32) # shapes = np.array(shapes, dtype=np.float32) # else: # bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) # labels = np.array([[0]]) # codes = np.zeros(shape=(1, self.n_codes), dtype=np.float32) # shapes = np.zeros(shape=(1, self.n_vertices * 2), dtype=np.float32) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label, shape in zip(bboxes, labels, shapes): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # # Flip the contour # for m in range(self.n_vertices): # shape[2 * m] = width - shape[2 * m] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # # # generate gt shape mean and std from contours # for m in range(self.n_vertices): # apply scale and crop transform to shapes # shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_img) # # contour = np.reshape(shape, (self.n_vertices, 2)) # # Indexing from the left-most vertex, argmin x-axis # idx = np.argmin(contour[:, 0]) # indexed_shape = np.concatenate((contour[idx:, :], contour[:idx, :]), axis=0) # # clockwise_flag = check_clockwise_polygon(indexed_shape) # if not clockwise_flag: # fixed_contour = np.flip(indexed_shape, axis=0) # else: # fixed_contour = indexed_shape # # contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.img_size['w'] - 1) # contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.img_size['h'] - 1) # # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2) # cv2.drawContours(image_show, [contour.astype(np.int32)], # color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), # contourIdx=-1, thickness=-1) # # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap # w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of the shape w_h_std = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of the shape codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt coefficients/codes for shapes regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) # detections = [] for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) contour = np.reshape(shape, (self.n_vertices, 2)) # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(contour[:, 0]) indexed_shape = np.concatenate( (contour[idx:, :], contour[:idx, :]), axis=0) clockwise_flag = check_clockwise_polygon(indexed_shape) if not clockwise_flag: fixed_contour = np.flip(indexed_shape, axis=0) else: fixed_contour = indexed_shape.copy() contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.fmap_size['w'] - 1) contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.fmap_size['h'] - 1) contour_mean = np.mean(contour, axis=0) contour_std = np.std(contour, axis=0) if np.sqrt(np.sum(contour_std**2)) <= 1e-6: continue else: norm_shape = (contour - contour_mean) / np.sqrt( np.sum(contour_std**2)) if h > 0 and w > 0 and np.sqrt(np.sum(contour_std**2)) > 1e-6: obj_c = contour_mean obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_std[k] = contour_std temp_codes, _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=80) codes_[k] = np.exp(temp_codes) regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) # -----------------------------------debug--------------------------------- # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32) # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3)) # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3)) # print(w_h_[0], regs[0]) # cv2.imshow('hmap', canvas) # cv2.waitKey() # -----------------------------------debug--------------------------------- # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label, shape in zip(bboxes, labels, shapes): # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- return { 'image': img, 'codes': codes_, 'hmap': hmap, 'w_h_std': w_h_std, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] a_bboxes = [] shapes = [] a_shapes = [] for anno in annotations: if anno['category_id'] not in KINS_IDS: continue # excludes 3: person-sitting class for evaluation a_polygons = anno['segmentation'][ 0] # only one mask for each instance polygons = anno['i_segm'][0] # gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox'] # this is used to clip resampled polygons a_contour = np.array(a_polygons).reshape((-1, 2)) contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if cv2.contourArea(contour.astype( np.int32)) < 5: # remove tiny objects continue fixed_contour = uniformsample(a_contour, self.n_vertices) i_contour = uniformsample(contour, self.n_vertices) # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes # continue shapes.append(np.ndarray.flatten(i_contour).tolist()) a_shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) a_bboxes.append(anno['a_bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) a_bboxes = np.array(a_bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) a_shapes = np.array(a_shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) a_bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) a_shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy a_bboxes[:, 2:] += a_bboxes[:, :2] img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(360, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) # -----------------------------------debug--------------------------------- # image_show = img.copy() img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # -----------------------------------debug--------------------------------- # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h'])) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap of centers occ_map = np.zeros( (1, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # grayscale map for occlusion levels w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of inmodal bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt amodal mass centers to inmodal bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt amodal coefficients regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for quantization error inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32) # voting for heatmaps for k, (bbox, a_bbox, label, shape, a_shape) in enumerate( zip(bboxes, a_bboxes, labels, shapes, a_shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 a_bbox[[0, 2]] = width - a_bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): a_shape[2 * m] = width - a_shape[2 * m] - 1 shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[ 0] # This box is the inmodal boxes a_bbox[:2] = affine_transform(a_bbox[:2], trans_fmap) a_bbox[2:] = affine_transform(a_bbox[2:], trans_fmap) a_bbox[[0, 2]] = np.clip(a_bbox[[0, 2]], 0, self.fmap_size['w'] - 1) a_bbox[[1, 3]] = np.clip(a_bbox[[1, 3]], 0, self.fmap_size['h'] - 1) # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes a_shape[2 * m:2 * m + 2] = affine_transform( a_shape[2 * m:2 * m + 2], trans_fmap) shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(a_shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) i_shape_clipped = np.reshape(shape, (self.n_vertices, 2)) i_shape_clipped[:, 0] = np.clip(i_shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) i_shape_clipped[:, 1] = np.clip(i_shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) if h < 1e-6 or w < 1e-6: # remove small bboxes continue centered_shape = indexed_shape - mass_center # these are amodal mask shapes if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = centered_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) a_shifted_poly = indexed_shape - np.array([ a_bbox[0], a_bbox[1] ]) # crop amodal shapes to the amodal bboxes amodal_obj_mask = self.polys_to_mask( [np.ndarray.flatten(a_shifted_poly, order='C').tolist()], a_bbox[3], a_bbox[2]) i_shifted_poly = i_shape_clipped - np.array([ a_bbox[0], a_bbox[1] ]) # crop inmodal shapes to the same amodal bboxes inmodal_obj_mask = self.polys_to_mask( [np.ndarray.flatten(i_shifted_poly, order='C').tolist()], a_bbox[3], a_bbox[2]) obj_mask = ( amodal_obj_mask + inmodal_obj_mask ) * 255. / 2 # convert to float type in image scale obj_mask = cv2.resize( obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), interpolation=cv2.INTER_LINEAR) * 1. votes_[k] = obj_mask.reshape((1, -1)) / 255. w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # occlusion level map gt occ_map[0] += self.polys_to_mask( [np.ndarray.flatten(indexed_shape).tolist()], self.fmap_size['h'], self.fmap_size['w']) * 1. occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ # -----------------------------------debug--------------------------------- # for bbox, label, shape in zip(bboxes, labels, shapes_): # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # # print(shape, shape.shape) # cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255), # thickness=1) # # cv2.imshow('img', image_show) # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255) # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]), # code=cv2.COLOR_GRAY2BGR) # cat_img = np.concatenate([m_img, image_show], axis=0) # cv2.imshow('segm', cat_img) # cv2.waitKey() # -----------------------------------debug--------------------------------- return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'occ_map': occ_map, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'votes': votes_, 'c': center, 's': scale, 'img_id': img_id }
def _add_instance( self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat'][k] = cls_id - 1 ret['mask'][k] = 1 if 'wh' in ret: ret['wh'][k] = 1. * w, 1. * h ret['wh_mask'][k] = 1 ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret['reg'][k] = ct - ct_int ret['reg_mask'][k] = 1 draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) if 'tracking' in self.opt.heads: if ann['track_id'] in track_ids: pre_ct = pre_cts[track_ids.index(ann['track_id'])] ret['tracking_mask'][k] = 1 ret['tracking'][k] = pre_ct - ct_int gt_det['tracking'].append(ret['tracking'][k]) else: gt_det['tracking'].append(np.zeros(2, np.float32)) if 'ltrb' in self.opt.heads: ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \ bbox[2] - ct_int[0], bbox[3] - ct_int[1] ret['ltrb_mask'][k] = 1 if 'ltrb_amodal' in self.opt.heads: ret['ltrb_amodal'][k] = \ bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \ bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1] ret['ltrb_amodal_mask'][k] = 1 gt_det['ltrb_amodal'].append(bbox_amodal) if 'nuscenes_att' in self.opt.heads: if ('attributes' in ann) and ann['attributes'] > 0: att = int(ann['attributes'] - 1) ret['nuscenes_att'][k][att] = 1 ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1 gt_det['nuscenes_att'].append(ret['nuscenes_att'][k]) if 'velocity' in self.opt.heads: if ('velocity' in ann) and min(ann['velocity']) > -1000: ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3] ret['velocity_mask'][k] = 1 gt_det['velocity'].append(ret['velocity'][k]) if 'hps' in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if 'rot' in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if 'dep' in self.opt.heads: if 'depth' in ann: ret['dep_mask'][k] = 1 ret['dep'][k] = ann['depth'] * aug_s gt_det['dep'].append(ret['dep'][k]) else: gt_det['dep'].append(2) if 'dim' in self.opt.heads: if 'dim' in ann: ret['dim_mask'][k] = 1 ret['dim'][k] = ann['dim'] gt_det['dim'].append(ret['dim'][k]) else: gt_det['dim'].append([1,1,1]) if 'amodel_offset' in self.opt.heads: if 'amodel_center' in ann: amodel_center = affine_transform(ann['amodel_center'], trans_output) ret['amodel_offset_mask'][k] = 1 ret['amodel_offset'][k] = amodel_center - ct_int gt_det['amodel_offset'].append(ret['amodel_offset'][k]) else: gt_det['amodel_offset'].append([0, 0])
def _add_instance(self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None, flipped=False): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat'][k] = cls_id - 1 ret['mask'][k] = 1 if 'wh' in ret: ret['wh'][k] = 1. * w, 1. * h ret['wh_mask'][k] = 1 ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret['reg'][k] = ct - ct_int ret['reg_mask'][k] = 1 draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) if 'tracking' in self.opt.heads: if ann['track_id'] in track_ids: pre_ct = pre_cts[track_ids.index(ann['track_id'])] ret['tracking_mask'][k] = 1 ret['tracking'][k] = pre_ct - ct_int gt_det['tracking'].append(ret['tracking'][k]) else: gt_det['tracking'].append(np.zeros(2, np.float32)) if 'ltrb' in self.opt.heads: ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \ bbox[2] - ct_int[0], bbox[3] - ct_int[1] ret['ltrb_mask'][k] = 1 if 'ltrb_amodal' in self.opt.heads: ret['ltrb_amodal'][k] = \ bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \ bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1] ret['ltrb_amodal_mask'][k] = 1 gt_det['ltrb_amodal'].append(bbox_amodal) if 'nuscenes_att' in self.opt.heads: if ('attributes' in ann) and ann['attributes'] > 0: att = int(ann['attributes'] - 1) ret['nuscenes_att'][k][att] = 1 ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1 gt_det['nuscenes_att'].append(ret['nuscenes_att'][k]) if 'velocity' in self.opt.heads: if ('velocity' in ann) and min(ann['velocity']) > -1000: ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3] ret['velocity_mask'][k] = 1 gt_det['velocity'].append(ret['velocity'][k]) if 'hps' in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if 'rot' in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if 'dep' in self.opt.heads: if 'depth' in ann: ret['dep_mask'][k] = 1 ret['dep'][k] = ann['depth'] * aug_s gt_det['dep'].append(ret['dep'][k]) else: gt_det['dep'].append(2) if 'dim' in self.opt.heads: if 'dim' in ann: ret['dim_mask'][k] = 1 ret['dim'][k] = ann['dim'] gt_det['dim'].append(ret['dim'][k]) else: gt_det['dim'].append([1, 1, 1]) if 'amodel_offset' in self.opt.heads: if 'amodel_center' in ann: amodel_center = affine_transform(ann['amodel_center'], trans_output) ret['amodel_offset_mask'][k] = 1 ret['amodel_offset'][k] = amodel_center - ct_int gt_det['amodel_offset'].append(ret['amodel_offset'][k]) else: gt_det['amodel_offset'].append([0, 0]) #######track seg if 'seg' in self.opt.heads: if ann['segmentation'] != None: segment = self.coco.annToMask(ann) if flipped: if ann['segmentation'] != None: segment = segment[:, ::-1] if ann['segmentation'] != None: segment = cv2.warpAffine( segment, trans_output, (self.opt.output_w, self.opt.output_h), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) segment_mask = np.ones_like(segment) pad_rate = 0.1 x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,self.opt.output_w - 1)).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,self.opt.output_h - 1)).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment > 0] = 1 segment[segment_mask == 1] = 255 ret['seg'][k] = segment
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] # to fit for any given global data path: if 'images' not in self.img_dir: img_folder = self.img_dir.split('/')[-1] self.img_dir = self.img_dir.replace(img_folder, "images/" + img_folder) img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) try: height, width = img.shape[0], img.shape[1] except AttributeError: print("None type image! path: {}".format(img_path)) c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True try: img = img[:, ::-1, :] except IndexError: img = img[:, ::-1] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec, self.opt.color_aug_var) inp = (inp - np.mean(self.mean)) / np.mean(self.std) inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # Get ground truth gradient magnitude if self.opt.loss_hm_magnitude: img_mag_path = os.path.join(self.img_dir + '_mag', file_name.replace('.png', '_mag.png')) inp_grad_magnitude = cv2.imread(img_mag_path, 0) inp_grad_magnitude = cv2.warpAffine(inp_grad_magnitude, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR) inp_grad_magnitude = (inp_grad_magnitude.astype(np.float32) / 255.) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) direct = np.zeros((self.max_objs, 1), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) gt_line = [] for k in range(num_objs): ann = anns[k] cord = self._semline_cord_to_box(ann['cord']) try: cls_id = int(self.cat_ids[ann['category_id']]) except KeyError: print("Wrong label!!! ", file_name) continue if cord[0] <= cord[2]: x_left = cord[0] x_right = cord[2] y_left = cord[1] y_right = cord[3] else: # cord[0] > cord[2]: x_left = cord[2] x_right = cord[0] y_left = cord[3] y_right = cord[1] cord[0] = x_left cord[1] = y_left cord[2] = x_right cord[3] = y_right if flipped: cord[[0, 2]] = width - cord[[2, 0]] - 1 cord[[1, 3]] = cord[[3, 1]] cord[:2] = affine_transform(cord[:2], trans_output) cord[2:] = affine_transform(cord[2:], trans_output) direct_str = 'lt2rb' if cord[0] < cord[2] and cord[1] < cord[ 3] else 'lb2rt' if 0 < cord[0] < output_w and 0 < cord[2] < output_w \ and 0 < cord[1] < output_h and 0 < cord[3] < output_h: if cord[0] == cord[2]: # vertical line angle = 90 else: a = (cord[1] - cord[3]) / (cord[0] - cord[2]) angle = np.arctan(a) * 180 / 3.14159265359 pass else: if cord[0] == cord[2]: # vertical line if cord[0] < 0 or cord[0] >= output_w: continue cord[[1, 3]] = np.clip(cord[[1, 3]], 0, output_h - 1) if cord[1] == cord[3]: continue elif cord[1] == cord[3]: # horizontal line if cord[1] < 0 or cord[1] >= output_h: continue cord[[0, 2]] = np.clip(cord[[0, 2]], 0, output_w - 1) if cord[0] == cord[2]: continue else: a = (cord[1] - cord[3]) / (cord[0] - cord[2]) b = (cord[0] * cord[3] - cord[2] * cord[1]) / (cord[0] - cord[2]) # Clip y first, then update x. x0, y0, x1, y1 = cord[[0, 1, 2, 3]] (y0, y1) = np.clip((y0, y1), 0, output_h - 1) if y0 == y1: continue if y0 != cord[1]: x0 = (y0 - b) / a if y1 != cord[3]: x1 = (y1 - b) / a # Then clip x, then update y: (x0, x1) = np.clip((x0, x1), 0, output_w - 1) if x0 == x1: continue if x0 != cord[0]: y0 = a * x0 + b if x1 != cord[2]: y1 = a * x1 + b # Copy back to cord: if direct_str == 'lt2rb': cord[[0, 1, 2, 3]] = min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1) else: cord[[0, 1, 2, 3]] = min(x0, x1), max(y0, y1), max(x0, x1), min(y0, y1) h, w = abs(cord[3] - cord[1]), abs(cord[2] - cord[0]) w = 0.25 if w == 0 else w h = 0.25 if h == 0 else h if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) ct = np.array([(cord[0] + cord[2]) / 2, (cord[1] + cord[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) radius = max(0, int(radius)) hm[cls_id] = draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h direct[k] = 1 if direct_str == 'lt2rb' else 0 direct2append = direct[k] ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_line.append([ cord[0], cord[1], cord[2], cord[3], 1, cls_id, direct2append ]) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind} ret.update({'wh': wh}) ret.update({'direct': direct}) if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.loss_hm_magnitude: ret.update({'grad_magnitude': inp_grad_magnitude}) if self.opt.debug > 0 or self.split == 'test': gt_line = np.array(gt_line, dtype=np.float32) if len(gt_line) > 0 else \ np.zeros((1, 7), dtype=np.float32) meta = {'c': c, 's': s, 'gt_line': gt_line, 'img_id': img_id} ret['meta'] = meta return ret
def _get_pre_dets(self, anns, trans_input, trans_output, ret): k = 0 hm_h, hm_w = self.opt.input_h, self.opt.input_w down_ratio = self.opt.down_ratio trans = trans_input reutrn_hm = self.opt.pre_hm pre_hm = np.zeros( (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None pre_cts, track_ids = [], [] for i, ann in enumerate(anns): cls_id = int(self.cat_ids[ann['category_id']]) if cls_id > self.opt.num_classes or cls_id <= -99 or \ ('iscrowd' in ann and ann['iscrowd'] > 0) or cls_id == 0: # cls_id add by vtsai01 continue if 'bbox' not in anns[i].keys(): ann['bbox'] = mask_utils.toBbox(ann['segmentation']) bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] max_rad = 1 track_id = ann['track_id'] if 'track_id' in ann else -1 if (h > 0 and w > 0): if 'seg' in self.opt.task and self.opt.seg_center: seg_mask = self.get_masks_as_input(ann, trans) if np.sum(seg_mask) <= 0: continue ct = np.array([ np.mean(np.where(seg_mask >= 0.5)[1]), np.mean(np.where(seg_mask >= 0.5)[0]) ], dtype=np.float32) else: ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) max_rad = max(max_rad, radius) ct0 = ct.copy() conf = 1 ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h conf = 1 if np.random.random() > self.opt.lost_disturb else 0 ct_int = ct.astype(np.int32) if conf == 0: pre_cts.append(ct / down_ratio) else: pre_cts.append(ct0 / down_ratio) track_ids.append(ann['track_id'] if 'track_id' in ann else -1) if reutrn_hm: draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf) if np.random.random() < self.opt.fp_disturb and reutrn_hm: ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w ct2[1] = ct2[1] + np.random.randn() * 0.05 * h ct2_int = ct2.astype(np.int32) draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf) return pre_hm, pre_cts, track_ids
def _add_instance(self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat'][k] = cls_id - 1 ret['mask'][k] = 1 if 'wh' in ret: ret['wh'][k] = 1. * w, 1. * h ret['wh_mask'][k] = 1 ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret['reg'][k] = ct - ct_int ret['reg_mask'][k] = 1 draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) if 'tracking' in self.opt.heads: if ann['track_id'] in track_ids: pre_ct = pre_cts[track_ids.index(ann['track_id'])] ret['tracking_mask'][k] = 1 ret['tracking'][k] = pre_ct - ct_int gt_det['tracking'].append(ret['tracking'][k]) else: gt_det['tracking'].append(np.zeros(2, np.float32)) if 'ltrb' in self.opt.heads: ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \ bbox[2] - ct_int[0], bbox[3] - ct_int[1] ret['ltrb_mask'][k] = 1 ## ltrb_amodal is to use the left, top, right, bottom bounding box representation # to enable detecting out-of-image bounding box (important for MOT datasets) if 'ltrb_amodal' in self.opt.heads: ret['ltrb_amodal'][k] = \ bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \ bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1] ret['ltrb_amodal_mask'][k] = 1 gt_det['ltrb_amodal'].append(bbox_amodal) if 'nuscenes_att' in self.opt.heads: if ('attributes' in ann) and ann['attributes'] > 0: att = int(ann['attributes'] - 1) ret['nuscenes_att'][k][att] = 1 ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1 gt_det['nuscenes_att'].append(ret['nuscenes_att'][k]) if 'velocity' in self.opt.heads: if ('velocity_cam' in ann) and min(ann['velocity_cam']) > -1000: ret['velocity'][k] = np.array(ann['velocity_cam'], np.float32)[:3] ret['velocity_mask'][k] = 1 gt_det['velocity'].append(ret['velocity'][k]) if 'hps' in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if 'rot' in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if 'dep' in self.opt.heads: if 'depth' in ann: ret['dep_mask'][k] = 1 ret['dep'][k] = ann['depth'] * aug_s gt_det['dep'].append(ret['dep'][k]) else: gt_det['dep'].append(2) if 'dim' in self.opt.heads: if 'dim' in ann: ret['dim_mask'][k] = 1 ret['dim'][k] = ann['dim'] gt_det['dim'].append(ret['dim'][k]) else: gt_det['dim'].append([1, 1, 1]) if 'amodel_offset' in self.opt.heads: if 'amodel_center' in ann: amodel_center = affine_transform(ann['amodel_center'], trans_output) ret['amodel_offset_mask'][k] = 1 ret['amodel_offset'][k] = amodel_center - ct_int gt_det['amodel_offset'].append(ret['amodel_offset'][k]) else: gt_det['amodel_offset'].append([0, 0]) if self.opt.pointcloud: ## get pointcloud heatmap if self.opt.disable_frustum: ret['pc_hm'] = ret['pc_dep'] if opt.normalize_depth: ret['pc_hm'][ self.opt.pc_feat_channels['pc_dep']] /= opt.max_pc_dist else: dist_thresh = get_dist_thresh(calib, ct, ann['dim'], ann['alpha']) pc_dep_to_hm(ret['pc_hm'], ret['pc_dep'], ann['depth'], bbox, dist_thresh, self.opt)
def __getitem__(self, idx): imgPath = self.root + "/" + self.imgPath[idx] ptsPath = self.root + "/" + self.ptsPath[idx] img = plt.imread(imgPath) #print(imgPath) if(len(img.shape)==2): # gray to rgb img = img.reshape(img.shape[0],img.shape[1],1) img = np.repeat(img,3,axis=2) w,h,c = img.shape with open(ptsPath) as ptsf: rows = [rows.strip() for rows in ptsf][3:-1] if len(rows) != 68: print("points are not 68") return None tofloat = lambda lst: [float(i) for i in lst] rows = [tofloat(pair.split(' ')) for pair in rows] rows = np.array(rows) minx,maxx = rows[:,0].min(),rows[:,0].max() miny,maxy = rows[:,1].min(),rows[:,1].max() face_h = maxx-minx img = img[int(max(0,miny-face_h)):int(min(maxy+self.crop_pad,w)), int(max(0,minx-self.crop_pad)):int(min(maxx+self.crop_pad,h)), : ] rows[:,1] -= max(0,miny-face_h) rows[:,0] -= max(0,minx-self.crop_pad) if(self.frame): csh = img.shape frame = np.zeros((max(csh[0],csh[1]),max(csh[0],csh[1]),3)) frame_ctr = np.array([max(csh[0],csh[1])//2,max(csh[0],csh[1])//2]) frame[math.ceil(frame_ctr[0]-csh[0]/2.):math.ceil(frame_ctr[0]+csh[0]/2.), math.ceil(frame_ctr[1]-csh[1]/2.):math.ceil(frame_ctr[1]+csh[1]/2.),:] = img if(csh[1] != frame.shape[1]): #가로패딩 rows[:,0] += (frame.shape[0]-csh[1])/2. else: #새로패딩 rows[:,1] += (frame.shape[0]-csh[0])/2. if(self.resize != None): rows /= frame.shape[0] frame = cv2.resize(frame, dsize=(self.resize, self.resize), interpolation=cv2.INTER_LINEAR) rows *= float(self.resize) hmap = np.zeros((68+1, 64, 64), dtype=np.float32) M = np.zeros((68+1, 64, 64), dtype=np.float32) for ind, xy in enumerate(rows): hmap[ind] = draw_umich_gaussian(hmap[ind], xy/256.*64, 7) hmap[-1] = draw_boundary(hmap[-1],np.clip((rows/256.*64).astype(np.int),0,63)) for i in range(len(M)): M[i] = grey_dilation(hmap[i], size=(3,3)) M = np.where(M>=0.5, 1, 0) return frame, hmap , M, rows
def __getitem__(self, indices): if isinstance(indices, int): indices = [indices] + [ np.random.randint(0, len(self.images) - 1) for _ in range(3) ] img_list, anns_list = [], [] for i, index in enumerate(indices): img, anns, img_info, img_path = self._load_data(index) # print(i, ': ', img_info['file_name']) ori_h, ori_w, c = img.shape if np.random.random() < self.opt.flip: img = img[:, ::-1, :] anns = self._flip_anns(anns, ori_w) img_list.append(img) anns_list.append(anns) img4_norm, img4, anns_img4, _, _ = self._mosaic(img_list, anns_list) pre_img4_norm, pre_img4, pre_anns_img4, track_ids, pre_cts = self._mosaic( img_list, anns_list) ret = {'image': img4_norm, 'pre_img': pre_img4_norm} gt_det = {'bboxes': [], 'scores': [], 'clses': [], 'cts': []} self._init_ret(ret, gt_det) num_objs = min(len(anns_img4), self.max_objs) for k in range(num_objs): ann = anns_img4[k] bbox = ann['bbox'] / self.opt.down_ratio # 除以down_ratio后表示输出层的坐标 h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] cls_id = int(self.cat_ids[ann['category_id']]) if cls_id > self.opt.num_classes or cls_id <= -999: continue radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat'][k] = cls_id - 1 ret['mask'][k] = 1 if 'wh' in ret: ret['wh'][k] = 1. * w, 1. * h ret['wh_mask'][k] = 1 ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret['reg'][k] = ct - ct_int ret['reg_mask'][k] = 1 draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2 ], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) if ann['track_id'] in track_ids: pre_ct = pre_cts[track_ids.index( ann['track_id'])] / self.opt.down_ratio ret['tracking_mask'][k] = 1 ret['tracking'][k] = pre_ct - ct_int gt_det['tracking'].append(ret['tracking'][k]) else: gt_det['tracking'].append(np.zeros(2, np.float32)) if self.opt.debug > 0: gt_det = self._format_gt_det(gt_det) meta = {'gt_det': gt_det} ret['meta'] = meta # for ann in anns_img4: # cv2.rectangle(img4, (int(ann['bbox'][0]), int(ann['bbox'][1])), # (int(ann['bbox'][2]), int(ann['bbox'][3])), (0, 0, 255), 1) # cv2.imshow('img4', img4) # # for ann in pre_anns_img4: # cv2.rectangle(pre_img4, (int(ann['bbox'][0]), int(ann['bbox'][1])), # (int(ann['bbox'][2]), int(ann['bbox'][3])), (0, 0, 255), 1) # cv2.imshow('pre_img4', pre_img4) # cv2.waitKey(0) return ret
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy # print("===============", img_path) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] # 获取中心坐标p center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 # 仿射变换 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 # 实行仿射变换 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = (img.astype(np.float32) / 255.) if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # 3个最重要的变量 hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) # 椭圆形状 radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) # 得到高斯分布 draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h # 记录偏移量 regs[k] = obj_c - obj_c_int # discretization error # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数 inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] # 进行mask标记? ind_masks[k] = 1 return {'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.ids[index] img_path = self.data_dir + "/images/" + img_id + ".jpeg" annot_path = self.data_dir + "/annotations/" + img_id + ".xml" tree = elemTree.parse(annot_path) annotations = [ [ float(obj.find('robndbox').find('cx').text), #ctrX float(obj.find('robndbox').find('cy').text), #ctrY float(obj.find('robndbox').find('w').text), #W float(obj.find('robndbox').find('h').text), #H float(obj.find('robndbox').find('angle').text) ] #angle for obj in tree.findall('./object') ] labels = np.array([1. for anno in annotations]) bboxes = np.array([anno for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(self.img_size['w'], width) h_border = get_border(self.img_size['h'], height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) img = img.astype(np.float32) / 255. #if self.split == 'train': #color_aug(self.data_rng, img, self.eig_val, self.eig_vec) #img -= self.mean #img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height thetas = np.zeros((self.max_objs, 1), dtype=np.float32) regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) objCnt = np.zeros((self.max_objs, 2), dtype=np.float32) # detections = [] for k, (rbox, label) in enumerate(zip(bboxes, labels)): w, h, angle = rbox[2], rbox[3], rbox[-1] if h > 0 and w > 0: obj_c = np.array([rbox[0], rbox[1]], dtype=np.float32) / float( self.down_ratio) objCnt[k] = obj_c obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[int(label) - 1], obj_c_int, radius) w_h_[k] = w / self.img_size['w'], h / self.img_size['h'] thetas[k] = angle regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id, 'theta': thetas, 'center': objCnt }