def _get_pre_dets(self, anns, trans_input, trans_output): hm_h, hm_w = self.opt.input_h, self.opt.input_w down_ratio = self.opt.down_ratio trans = trans_input reutrn_hm = self.opt.pre_hm pre_hm = np.zeros( (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None pre_cts, track_ids = [], [] for ann in anns: cls_id = int(self.cat_ids[ann['category_id']]) if cls_id > self.opt.num_classes or cls_id <= -99 or \ ('iscrowd' in ann and ann['iscrowd'] > 0): continue bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] max_rad = 1 if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) max_rad = max(max_rad, radius) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct0 = ct.copy() conf = 1 ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h conf = 1 if np.random.random() > self.opt.lost_disturb else 0 ct_int = ct.astype(np.int32) # if conf == 0: if conf == 1: pre_cts.append(ct / down_ratio) else: pre_cts.append(ct0 / down_ratio) track_ids.append(ann['track_id'] if 'track_id' in ann else -1) if reutrn_hm: draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf) if np.random.random() < self.opt.fp_disturb and reutrn_hm: ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w ct2[1] = ct2[1] + np.random.randn() * 0.05 * h ct2_int = ct2.astype(np.int32) draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf) return pre_hm, pre_cts, track_ids
def _gen_kmf_att_hm(self, ret, pre_anns, trans_input): trackers = {} trans = trans_input hm_h, hm_w = self.opt.input_h, self.opt.input_w iid = None for idx, anns in enumerate(pre_anns): #[..., n-2, n-1] for i, ann in enumerate(anns): cls_id = int(self.cat_ids[ann['category_id']]) if cls_id > self.opt.num_classes or cls_id <= -999 or cls_id == 0: continue if 'bbox' not in anns[i].keys(): ann['bbox'] = mask_utils.toBbox(ann['segmentation']) bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: continue if ann['track_id'] not in trackers: trackers[ann['track_id']] = {} trackers[ann['track_id']]['kmf'] = KalmanBoxTracker(bbox) trackers[ann['track_id']]['age'] = 0 trackers[ann['track_id']]['cts_history'] = [ np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ] else: if np.random.random( ) > self.opt.att_track_lost_disturb or idx == len( pre_anns) - 1: trackers[ann['track_id']]['kmf'].predict() trackers[ann['track_id']]['kmf'].update(bbox) trackers[ann['track_id']]['age'] += 1 trackers[ann['track_id']]['cts_history'].append( np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)) for k in trackers: bbox = trackers[k]['kmf'].predict()[0] pred_ct = self._add_kmf_att(ret=ret, bbox=bbox, trans_input=trans_input, init=(trackers[k]['age'] <= 0), draw=(self.opt.kmf_att)) if pred_ct is None: trackers[k]['ct'] = trackers[k]['cts_history'][-1] else: trackers[k]['ct'] = pred_ct return trackers
def re_anns(anns, trans_output, output_w, output_h): # gt_boxes = [ann['bbox'] for ann in anns] # gt_boxes = cvtools.x1y1wh_to_x1y1x2y2(np.array(gt_boxes, dtype=np.float)) # for bbox in gt_boxes: # bbox[:2] = affine_transform(bbox[:2], trans_output) # bbox[2:] = affine_transform(bbox[2:], trans_output) # iofs = cvtools.bbox_overlaps( # gt_boxes, np.array([[0, 0, output_w - 1, output_h - 1]]), mode='iof' # ) # ids = np.where(iofs > 0.7)[0] # anns = [anns[ind] for ind in ids] # num_objs = len(ids) # iofs = iofs[ids] for k in range(len(anns)): # segm = np.array(anns[k]['segmentation'][0]) segm = anns[k]['segmentation'][0] for i in range(0, len(segm), 2): segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_output) # segm[i] = np.clip(segm[i], 0, output_w - 1) # segm[i + 1] = np.clip(segm[i + 1], 0, output_h - 1) # if iofs[k] < 1.: # img_box_polygon = np.array( # x1y1wh_to_x1y1x2y2x3y3x4y4( # [0, 0, output_w, output_h]) # ).reshape(-1, 2) # segm_cutted, _ = cut_polygon( # segm.reshape(-1, 2), img_box_polygon # ) # assert segm_cutted is not None and len(segm_cutted) > 0 # segm = segm_cutted # anns[k]['segmentation'] = [segm.reshape(-1).tolist()] return anns
def __getitem__(self, index): if index < 10 and self.split == 'train': self.idxs = np.random.choice(self.num_samples, self.num_samples, replace=False) img = self._load_image(index) gt_3d, pts, c, s = self._get_part_info(index) r = 0 s = np.array([s, s]) s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side) trans_input = get_affine_transform( c, s, r, [self.opt.input_h, self.opt.input_w]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_h, self.opt.input_w), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_h, self.opt.output_w]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) reg_target = np.zeros((self.num_joints, 1), dtype=np.float32) reg_ind = np.zeros((self.num_joints), dtype=np.int64) reg_mask = np.zeros((self.num_joints), dtype=np.uint8) pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32) for i in range(self.num_joints): pt = affine_transform(pts[i, :2], trans_output).astype(np.int32) if pt[0] >= 0 and pt[1] >=0 and pt[0] < self.opt.output_w \ and pt[1] < self.opt.output_h: pts_crop[i] = pt out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss) reg_target[i] = pts[i, 2] / s[0] # assert not fit_short reg_ind[i] = pt[1] * self.opt.output_w * self.num_joints + \ pt[0] * self.num_joints + i # note transposed reg_mask[i] = 1 meta = { 'index': self.idxs[index], 'center': c, 'scale': s, 'gt_3d': gt_3d, 'pts_crop': pts_crop } ret = { 'input': inp, 'target': out, 'meta': meta, 'reg_target': reg_target, 'reg_ind': reg_ind, 'reg_mask': reg_mask } return ret
def _get_data_AFE(self, anns, trans_input, trans_output): hm_h, hm_w = self.opt.input_h, self.opt.input_w down_ratio = self.opt.down_ratio trans = trans_input bboxes, track_ids = [], [] for ann in anns: cls_id = int(self.cat_ids[ann["category_id"]]) if (cls_id > self.opt.num_classes or cls_id <= -99 or ("iscrowd" in ann and ann["iscrowd"] > 0)): continue bbox = self._coco_box_to_bbox(ann["bbox"]) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: bboxes.append([bbox[0], bbox[1], bbox[2], bbox[3]].copy()) track_ids.append(ann["track_id"] if "track_id" in ann else -1) return bboxes, track_ids
def __getitem__(self, index): img = self._load_image(index) _, pts, c, s = self._get_part_info(index) r = 0 if self.split == 'train': sf = self.opt.scale rf = self.opt.rotate s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if np.random.random() <= 0.6 else 0 s = min(s, max(img.shape[0], img.shape[1])) * 1.0 s = np.array([s, s]) s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side) flipped = ( self.split == 'train' and np.random.random() < self.opt.flip) if flipped: img = img[:, ::-1, :] c[0] = img.shape[1] - 1 - c[0] pts[:, 0] = img.shape[1] - 1 - pts[:, 0] for e in self.shuffle_ref: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() trans_input = get_affine_transform( c, s, r, [self.opt.input_h, self.opt.input_w]) inp = cv2.warpAffine( img, trans_input, (self.opt.input_h, self.opt.input_w), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_h, self.opt.output_w]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32) for i in range(self.num_joints): if pts[i, 0] > 0 or pts[i, 1] > 0: pts_crop[i] = affine_transform(pts[i], trans_output) out[i] = draw_gaussian(out[i], pts_crop[i], self.opt.hm_gauss) meta = {'index': index, 'center': c, 'scale': s, 'pts_crop': pts_crop} return {'input': inp, 'target': out, 'meta': meta}
def _add_hps(self, ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w): num_joints = self.num_joints pts = (np.array(ann["keypoints"], np.float32).reshape(num_joints, 3) if "keypoints" in ann else np.zeros( (self.num_joints, 3), np.float32)) if self.opt.simple_radius > 0: hp_radius = int( simple_radius(h, w, min_overlap=self.opt.simple_radius)) else: hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = max(0, int(hp_radius)) for j in range(num_joints): pts[j, :2] = affine_transform(pts[j, :2], trans_output) if pts[j, 2] > 0: if (pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w and pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h): ret["hps"][k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int ret["hps_mask"][k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) ret["hp_offset"][k * num_joints + j] = pts[j, :2] - pt_int ret["hp_ind"][k * num_joints + j] = (pt_int[1] * self.opt.output_w + pt_int[0]) ret["hp_offset_mask"][k * num_joints + j] = 1 ret["hm_hp_mask"][k * num_joints + j] = 1 ret["joint"][k * num_joints + j] = j draw_umich_gaussian(ret["hm_hp"][j], pt_int, hp_radius) if pts[j, 2] == 1: ret["hm_hp"][j, pt_int[1], pt_int[0]] = self.ignore_val ret["hp_offset_mask"][k * num_joints + j] = 0 ret["hm_hp_mask"][k * num_joints + j] = 0 else: pts[j, :2] *= 0 else: pts[j, :2] *= 0 self._ignore_region(ret["hm_hp"][j, int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1, ]) gt_det["hps"].append(pts[:, :2].reshape(num_joints * 2))
def pre_process(self, image, scale, meta=None, anns=None): height, width = image.shape[0:2] new_height = int(height * scale) new_width = int(width * scale) fix = False if height > 1024 or width > 1024: fix = True if self.opt.fix_res or fix: inp_height, inp_width = self.opt.input_h, self.opt.input_w c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) s = max(height, width) * 1.0 else: inp_height = (new_height | self.opt.pad) + 1 # 保证能被32整除 inp_width = (new_width | self.opt.pad) + 1 # inp_height = new_height # inp_width = new_width c = np.array([new_width // 2, new_height // 2], dtype=np.float32) s = np.array([inp_width, inp_height], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) resized_image = cv2.resize(image, (new_width, new_height)) inp_image = cv2.warpAffine( resized_image, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR) if anns is not None: for ann in anns: segm = ann['segmentation'][0] for i in range(0, len(segm), 2): segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_input) inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32) images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) if self.opt.flip_test: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} return images, meta
def __getitem__(self, index, debug=False): index = self.pick[index] dataset, index = self.find_dataset(index) gray = self.gray and self.gray > random.random() neg = self.neg and self.neg > random.random() if neg: template = dataset.get_random_target(index) if self.inner_neg and self.inner_neg > random.random(): search = dataset.get_random_target() else: search = random.choice(self.all_data).get_random_target() else: template, search = dataset.get_positive_pair(index) def center_crop(img, size): shape = img.shape[1] if shape == size: return img c = shape // 2 l = c - size // 2 r = c + size // 2 + 1 return img[l:r, l:r] template_image, scale_z = self.imread(template[0]) if self.template_small: template_image = center_crop(template_image, self.template_size) search_image, scale_x = self.imread(search[0]) if dataset.has_mask: if not neg: search_mask = (cv2.imread(search[2], 0) > 0).astype(np.float32) else: search_mask = np.zeros(search_image.shape[:2], dtype=np.float32) else: if not neg: search_kp = np.array(search[2], dtype=np.float32) else: search_kp = np.zeros(51, dtype=np.float32) if self.crop_size > 0: search_image = center_crop(search_image, self.crop_size) def toBBox(image, shape): imh, imw = image.shape[:2] if len(shape) == 4: w, h = shape[2] - shape[0], shape[3] - shape[1] else: w, h = shape context_amount = 0.5 exemplar_size = self.template_size # 127 wc_z = w + context_amount * (w + h) hc_z = h + context_amount * (w + h) s_z = np.sqrt(wc_z * hc_z) scale_z = exemplar_size / s_z w = w * scale_z h = h * scale_z cx, cy = imw // 2, imh // 2 bbox = center2corner(Center(cx, cy, w, h)) return bbox template_box = toBBox(template_image, template[1]) search_box = toBBox(search_image, search[1]) # bbox = search_box template, _, _ = self.template_aug(template_image, template_box, self.template_size, gray=gray) search, bbox, mask = self.search_aug(search_image, search_box, self.search_size, gray=gray) def draw(image, box, name): image = image.copy() x1, y1, x2, y2 = map(lambda x: int(round(x)), box) cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0)) cv2.imwrite(name, image) def crop_hwc(bbox, out_sz=255): a = (out_sz - 1) / (bbox[2] - bbox[0]) b = (out_sz - 1) / (bbox[3] - bbox[1]) c = -a * bbox[0] d = -b * bbox[1] mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float) # crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), # borderMode=cv2.BORDER_CONSTANT, borderValue=padding) return mapping def crop_hwc1(image, bbox, out_sz, padding=(0, 0, 0)): a = (out_sz - 1) / (bbox[2] - bbox[0]) b = (out_sz - 1) / (bbox[3] - bbox[1]) c = -a * bbox[0] d = -b * bbox[1] mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float) crop = cv2.warpAffine(image, mapping, (out_sz, out_sz)) return crop def pos_s_2_bbox(pos, s): bbox = [ pos[0] - s / 2, pos[1] - s / 2, pos[0] + s / 2, pos[1] + s / 2 ] return bbox def crop_like_SiamFCx(bbox, exemplar_size=127, context_amount=0.5, search_size=255): target_pos = [(bbox[2] + bbox[0]) / 2., (bbox[3] + bbox[1]) / 2.] target_size = [bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1] wc_z = target_size[1] + context_amount * sum(target_size) hc_z = target_size[0] + context_amount * sum(target_size) s_z = np.sqrt(wc_z * hc_z) scale_z = exemplar_size / s_z d_search = (search_size - exemplar_size) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad # x = crop_hwc1(image, pos_s_2_bbox(target_pos, s_x), search_size, padding) return target_pos, s_x def kp_conversion(KeyPoints, matrix): key_points = [] kps_conversion = [] skeleton = [0, 0] Skeleton = [] for i in range(0, int(len(KeyPoints) / 3)): skeleton[0] = KeyPoints[i * 3 + 0] skeleton[1] = KeyPoints[i * 3 + 1] Skeleton.append(skeleton[:]) lis = Skeleton[i] lis.append(1) key_points.append(lis) key_points = np.array(key_points) for i in range(0, int(len(KeyPoints) / 3)): if KeyPoints[i * 3 + 2] != 0: ky_conversion = np.matmul(matrix, key_points[i, :]).tolist() kps_conversion.append(ky_conversion[0]) kps_conversion.append(ky_conversion[1]) kps_conversion.append(KeyPoints[i * 3 + 2]) else: kps_conversion.append(0) kps_conversion.append(0) kps_conversion.append(0) return kps_conversion if debug: draw(template_image, template_box, "debug/{:06d}_ot.jpg".format(index)) draw(search_image, search_box, "debug/{:06d}_os.jpg".format(index)) draw(template, _, "debug/{:06d}_t.jpg".format(index)) draw(search, bbox, "debug/{:06d}_s.jpg".format(index)) cls, delta, delta_weight = self.anchor_target(self.anchors, bbox, self.size, neg) if not dataset.has_mask: pos, s = crop_like_SiamFCx(search_box, exemplar_size=127, context_amount=0.5, search_size=255) mapping_bbox = pos_s_2_bbox(pos, s) mapping = crop_hwc(mapping_bbox, out_sz=255) keypoints = kp_conversion(search_kp.tolist(), mapping) joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) for ipt in range(self.num_joints): joints_3d[ipt, 0] = keypoints[ipt * 3 + 0] joints_3d[ipt, 1] = keypoints[ipt * 3 + 1] joints_3d[ipt, 2] = keypoints[ipt * 3 + 2] t_vis = search_kp[ipt * 3 + 2] if t_vis > 1: t_vis = 1 joints_3d_vis[ipt, 0] = t_vis joints_3d_vis[ipt, 1] = t_vis joints_3d_vis[ipt, 2] = 0 img = search.copy() # joints_3d = joints_3d / 255 if not neg: kp_weight = cls.max(axis=0, keepdims=True) else: kp_weight = np.zeros([1, cls.shape[1], cls.shape[2]], dtype=np.float32) # now process the ct part c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 output_res = self.output_res num_joints = self.num_joints trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) ind = np.zeros(1, dtype=np.int64) # hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) kps = np.zeros(num_joints * 2, dtype=np.float32) kps_mask = np.zeros((self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((num_joints, 2), dtype=np.float32) hp_ind = np.zeros(num_joints, dtype=np.int64) hp_mask = np.zeros(num_joints, dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.mse_loss else \ draw_umich_gaussian pts = joints_3d.copy() bbox = np.array(bbox, np.float32) bbox_reg = np.array(bbox, np.float32) bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) hp_radius = gaussian_radius( (math.ceil(h) * 2.3, math.ceil(w) * 2.3)) hp_radius = self.hm_gauss \ if self.mse_loss else max(0, int(hp_radius)) ind[0] = ct_int[1] * output_res + ct_int[0] for j in range(num_joints): if pts[j, 2] > 0: pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: kps[j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) # print('ct_int: ', ct_int) # print('pt_int: ', pt_int) hp_offset[j] = pts[j, :2] - pt_int hp_ind[j] = pt_int[1] * output_res + pt_int[0] hp_mask[j] = 1 draw_gaussian(hm_hp[j], pt_int, hp_radius) # pt_ori = joints_3d[j, :2].astype(np.int32) # draw_gaussian(hm_hp[j], pt_ori, hp_radius) ret = {'hps': kps, 'hm_hp': hm_hp, 'hp_mask': hp_mask} # print('kps: ', ret['hps']) ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hps_mask': kps_mask, 'ind': ind }) # print('hp_offset: ', hp_offset) joints_3d_out = joints_3d.transpose(1, 0) template, search = map( lambda x: np.transpose(x, (2, 0, 1)).astype(np.float32), [template, search]) return template, search, cls, delta, \ delta_weight, bbox_reg, \ np.array(kp_weight, np.float32), ret, joints_3d_out
def _add_instance( self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None, ): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret["cat"][k] = cls_id - 1 ret["mask"][k] = 1 if "wh" in ret: ret["wh"][k] = 1.0 * w, 1.0 * h ret["wh_mask"][k] = 1 ret["ind"][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret["reg"][k] = ct - ct_int ret["reg_mask"][k] = 1 draw_umich_gaussian(ret["hm"][cls_id - 1], ct_int, radius) gt_det["bboxes"].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32, )) gt_det["scores"].append(1) gt_det["clses"].append(cls_id - 1) gt_det["cts"].append(ct) if "tracking" in self.opt.heads: if ann["track_id"] in track_ids: pre_ct = pre_cts[track_ids.index(ann["track_id"])] ret["tracking_mask"][k] = 1 ret["tracking"][k] = 0 * (pre_ct - ct_int) gt_det["tracking"].append(ret["tracking"][k]) else: gt_det["tracking"].append(np.zeros(2, np.float32)) if "ltrb" in self.opt.heads: ret["ltrb"][k] = ( bbox[0] - ct_int[0], bbox[1] - ct_int[1], bbox[2] - ct_int[0], bbox[3] - ct_int[1], ) ret["ltrb_mask"][k] = 1 if "ltrb_amodal" in self.opt.heads: ret["ltrb_amodal"][k] = ( bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1], ) ret["ltrb_amodal_mask"][k] = 1 gt_det["ltrb_amodal"].append(bbox_amodal) if "nuscenes_att" in self.opt.heads: if ("attributes" in ann) and ann["attributes"] > 0: att = int(ann["attributes"] - 1) ret["nuscenes_att"][k][att] = 1 ret["nuscenes_att_mask"][k][self.nuscenes_att_range[att]] = 1 gt_det["nuscenes_att"].append(ret["nuscenes_att"][k]) if "velocity" in self.opt.heads: if ("velocity" in ann) and min(ann["velocity"]) > -1000: ret["velocity"][k] = np.array(ann["velocity"], np.float32)[:3] ret["velocity_mask"][k] = 1 gt_det["velocity"].append(ret["velocity"][k]) if "hps" in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if "rot" in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if "dep" in self.opt.heads: if "depth" in ann: ret["dep_mask"][k] = 1 ret["dep"][k] = ann["depth"] * aug_s gt_det["dep"].append(ret["dep"][k]) else: gt_det["dep"].append(2) if "dim" in self.opt.heads: if "dim" in ann: ret["dim_mask"][k] = 1 ret["dim"][k] = ann["dim"] gt_det["dim"].append(ret["dim"][k]) else: gt_det["dim"].append([1, 1, 1]) if "amodel_offset" in self.opt.heads: if "amodel_center" in ann: amodel_center = affine_transform(ann["amodel_center"], trans_output) ret["amodel_offset_mask"][k] = 1 ret["amodel_offset"][k] = amodel_center - ct_int gt_det["amodel_offset"].append(ret["amodel_offset"][k]) else: gt_det["amodel_offset"].append([0, 0])
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # all anns of one img ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) # height, width height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # ori img center if self.opt.keep_res: # False input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: # not keep_res, use opt.input_h, w # note: h != w, ori not keep_res, then set w=h=512 # s = max(img.shape[0], img.shape[1]) * 1.0 s = np.array([width, height], dtype=np.float32) # ori img size? input_h, input_w = self.opt.input_h, self.opt.input_w # flip flipped = False # get scale and center to do affine transform if self.split == 'train': # random scale if not self.opt.not_rand_crop: # train set opt.not_rand_crop=False, so will use default random scale # s = s * np.random.choice(np.arange(0.4, 0.6, 0.1)) # (1920,1080) -> (640) # note: restrict the img center translate range, lrtb 1/2 # w_border = self._get_border(img.shape[1] // 4, img.shape[1]) # h_border = self._get_border(img.shape[0] // 4, img.shape[0]) # random center, this may translate img so far w_range, h_range = img.shape[1] // 8, img.shape[0] // 8 c[0] = np.random.randint(low=img.shape[1] // 2 - w_range, high=img.shape[1] // 2 + w_range) c[1] = np.random.randint(low=img.shape[0] // 2 - h_range, high=img.shape[0] // 2 + h_range) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # random flip if np.random.random() < self.opt.flip: # 0.5 flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 # trans ori img to input size trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) # use generated trans_input matrix to trans img inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # note: see trans img # print('scale:', s, 'center:', c) # cv2.imwrite('{}_img_trans.png'.format(img_id), inp) inp = (inp.astype(np.float32) / 255.) # color augment if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) # normalize inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) # down sample output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes # trans ori img box to output size trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # draw gaussian core on heatmap hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # 20 # dense or sparse wh regress wh = np.zeros((self.max_objs, 2), dtype=np.float32) # (10,2) sparse! dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) # dense! reg = np.zeros((self.max_objs, 2), dtype=np.float32) # (10,2) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) # msra, umich # opt.mse_loss = False draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian # GT gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) # xywh -> x1y1x2y2; shape (4,) segmentation = np.array(ann['segmentation'][0]).reshape((-1, 2)) # x,y # map ori cat_id (whatever) to [0, num_class-1] cls_id = int(self.cat_ids[ann['category_id']]) # self.cat_ids in cigar.py if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # [0,2], segmentation[:, 0] = width - segmentation[:, 0] - 1 # flip x # transform box 2 pts to output bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # x1y1x2y2 # transform segmentation, just trans polygon_center is enough polygon_center = self._get_polygon_center(segmentation) polygon_center = affine_transform(polygon_center, trans_output) print(polygon_center) if h > 0 and w > 0: # note: radius generated with spatial extent info from h,w radius = gaussian_radius(det_size=(math.ceil(h), math.ceil(w))) radius = max(0, int(math.ceil(radius / 3))) # radius = max(0, int(radius)) # opt.mse_loss = False radius = self.opt.hm_gauss if self.opt.mse_loss else radius # box center box_center = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) print(box_center) # note: change ct to polygon center ct = polygon_center ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) # label of w,h wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] # 1D ind of ct position # note: update offset reg[k] = box_center - ct_int # float_box_center - int_polygon_center print('offset:', reg[k]) reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) # use box_center to compute box ct = box_center.astype(np.int32) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } # from utils.plt_utils import plt_heatmaps # note: see heatmaps # plt_heatmaps(hm, basename='{}_hm'.format(img_id)) # print(wh) if self.opt.dense_wh: # False hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.ids[index] file_name = self.hoi_annotations[img_id]['file_name'] img_path = os.path.join(self.root, self.image_dir, file_name) anns = self.hoi_annotations[img_id]['annotations'] hoi_anns = self.hoi_annotations[img_id]['hoi_annotation'] num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.7, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) hm_rel = np.zeros((self.num_classes_verb, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) sub_offset = np.zeros((self.max_rels, 2), dtype=np.float32) obj_offset = np.zeros((self.max_rels, 2), dtype=np.float32) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] bbox_ct = [] num_rels = min(len(hoi_anns), self.max_rels) for k in range(num_objs): ann = anns[k] bbox = np.asarray(ann['bbox']) if isinstance(ann['category_id'], str): ann['category_id'] = int(ann['category_id'].replace('\n', '')) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) bbox_ct.append(ct_int.tolist()) if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) offset_mask = np.zeros((self.max_rels), dtype=np.uint8) rel_ind = np.zeros((self.max_rels), dtype=np.int64) for k in range(num_rels): hoi = hoi_anns[k] if isinstance(hoi['category_id'], str): hoi['category_id'] = int(hoi['category_id'].replace('\n', '')) hoi_cate = int(self.cat_ids_verb[hoi['category_id']]) sub_ct = bbox_ct[hoi['subject_id']] obj_ct = bbox_ct[hoi['object_id']] offset_mask[k] = 1 rel_ct = np.array([(sub_ct[0] + obj_ct[0]) / 2, (sub_ct[1] + obj_ct[1]) / 2], dtype=np.float32) radius = gaussian_radius((math.ceil(abs(sub_ct[0] - obj_ct[0])), math.ceil(abs(sub_ct[1] - obj_ct[1])))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius rel_ct_int = rel_ct.astype(np.int32) draw_gaussian(hm_rel[hoi_cate], rel_ct_int, radius) rel_sub_offset = np.array( [rel_ct_int[0] - sub_ct[0], rel_ct_int[1] - sub_ct[1]], dtype=np.float32) rel_obj_offset = np.array( [rel_ct_int[0] - obj_ct[0], rel_ct_int[1] - obj_ct[1]], dtype=np.float32) sub_offset[k] = 1. * rel_sub_offset[0], 1. * rel_sub_offset[1] obj_offset[k] = 1. * rel_obj_offset[0], 1. * rel_obj_offset[1] rel_ind[k] = rel_ct_int[1] * output_w + rel_ct_int[0] ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hm_rel': hm_rel, 'sub_offset': sub_offset, 'obj_offset': obj_offset, 'offset_mask': offset_mask, 'rel_ind': rel_ind } if self.opt.reg_offset: ret.update({'reg': reg}) return ret
def __getitem__( self, index): #adecuar calibracion, es posible que haya que adaptarla img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) if 'calib' in img_info: calib = np.array(img_info['calib'], dtype=np.float32) else: calib = self.calib height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) if self.opt.keep_res: s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32) else: s = np.array([width, height], dtype=np.int32) aug = False if self.split == 'train' and np.random.random() < self.opt.aug_ddd: aug = True sf = self.opt.scale cf = self.opt.shift s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) trans_input = get_affine_transform( c, s, 0, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) num_classes = self.opt.num_classes trans_output = get_affine_transform( c, s, 0, [self.opt.output_w, self.opt.output_h]) hm = np.zeros((num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, self.opt.output_h, self.opt.output_w), dtype=np.float32) dep = np.zeros((self.max_objs, 1), dtype=np.float32) #dim = np.zeros((self.max_objs, 3), dtype=np.float32) #dim no es output de ctdet ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) reg_mask[k] = 1 if not aug else 0 if cls_id <= -99: continue #if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((h, w)) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if cls_id < 0: ignore_id = [_ for _ in range(num_classes)] \ if cls_id == - 1 else [- cls_id - 2] if self.opt.rect_mask: hm[ignore_id, int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1] = 0.9999 else: for cc in ignore_id: draw_gaussian(hm[cc], ct, radius) hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999 continue draw_gaussian(hm[cls_id], ct, radius) wh[k] = 1. * w, 1. * h #gt_det.append([ct[0], ct[1], 1] + \ # self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \ # [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id]) #if self.opt.reg_bbox: PARECE QUE ESTO ES POR SI SE USA COCOBOX # gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]] dep[k] = ann['depth'] #dim[k] = ann['dim'] # print(' cat dim', cls_id, dim[k]) ind[k] = ct_int[1] * self.opt.output_w + ct_int[0] ret = { 'input': inp, 'hm': hm, 'dep': dep, 'wh': wh, 'ind': ind, 'reg_mask': reg_mask } #cambiado, se ha quitado dim if self.opt.reg_bbox: ret.update({'wh': wh}) if self.opt.debug > 0 or not ('train' in self.split): gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 18), dtype=np.float32) meta = { 'c': c, 's': s, 'gt_det': gt_det, #'calib': calib, hasta que no se añada la calibracion da error, en COCO no se usa por default 'image_path': img_path, 'img_id': img_id } ret['meta'] = meta return ret
def _add_instance(self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output, aug_s, calib, pre_cts=None, track_ids=None, flipped=False): h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h <= 0 or w <= 0: return radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) ret['cat'][k] = cls_id - 1 ret['mask'][k] = 1 if 'wh' in ret: ret['wh'][k] = 1. * w, 1. * h ret['wh_mask'][k] = 1 ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0] ret['reg'][k] = ct - ct_int ret['reg_mask'][k] = 1 draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius) gt_det['bboxes'].append( np.array( [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32)) gt_det['scores'].append(1) gt_det['clses'].append(cls_id - 1) gt_det['cts'].append(ct) if 'tracking' in self.opt.heads: if ann['track_id'] in track_ids: pre_ct = pre_cts[track_ids.index(ann['track_id'])] ret['tracking_mask'][k] = 1 ret['tracking'][k] = pre_ct - ct_int gt_det['tracking'].append(ret['tracking'][k]) else: gt_det['tracking'].append(np.zeros(2, np.float32)) if 'ltrb' in self.opt.heads: ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \ bbox[2] - ct_int[0], bbox[3] - ct_int[1] ret['ltrb_mask'][k] = 1 if 'ltrb_amodal' in self.opt.heads: ret['ltrb_amodal'][k] = \ bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \ bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1] ret['ltrb_amodal_mask'][k] = 1 gt_det['ltrb_amodal'].append(bbox_amodal) if 'nuscenes_att' in self.opt.heads: if ('attributes' in ann) and ann['attributes'] > 0: att = int(ann['attributes'] - 1) ret['nuscenes_att'][k][att] = 1 ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1 gt_det['nuscenes_att'].append(ret['nuscenes_att'][k]) if 'velocity' in self.opt.heads: if ('velocity' in ann) and min(ann['velocity']) > -1000: ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3] ret['velocity_mask'][k] = 1 gt_det['velocity'].append(ret['velocity'][k]) if 'hps' in self.opt.heads: self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w) if 'rot' in self.opt.heads: self._add_rot(ret, ann, k, gt_det) if 'dep' in self.opt.heads: if 'depth' in ann: ret['dep_mask'][k] = 1 ret['dep'][k] = ann['depth'] * aug_s gt_det['dep'].append(ret['dep'][k]) else: gt_det['dep'].append(2) if 'dim' in self.opt.heads: if 'dim' in ann: ret['dim_mask'][k] = 1 ret['dim'][k] = ann['dim'] gt_det['dim'].append(ret['dim'][k]) else: gt_det['dim'].append([1, 1, 1]) if 'amodel_offset' in self.opt.heads: if 'amodel_center' in ann: amodel_center = affine_transform(ann['amodel_center'], trans_output) ret['amodel_offset_mask'][k] = 1 ret['amodel_offset'][k] = amodel_center - ct_int gt_det['amodel_offset'].append(ret['amodel_offset'][k]) else: gt_det['amodel_offset'].append([0, 0]) #######track seg if 'seg' in self.opt.heads: if ann['segmentation'] != None: segment = self.coco.annToMask(ann) if flipped: if ann['segmentation'] != None: segment = segment[:, ::-1] if ann['segmentation'] != None: segment = cv2.warpAffine( segment, trans_output, (self.opt.output_w, self.opt.output_h), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) segment_mask = np.ones_like(segment) pad_rate = 0.1 x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,self.opt.output_w - 1)).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,self.opt.output_h - 1)).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment > 0] = 1 segment[segment_mask == 1] = 255 ret['seg'][k] = segment