def __getitem__(self, index): img = cv2.imread(self.image_path_list[index]) idx = self.index_list[index] meta_data = self.get_anno(self.data[idx], self.instance_info_list[index]) augmentations = [ partial(aug_meth, params_transform=params_transform) for aug_meth in [aug_scale_bbox, aug_rotate_bbox, aug_croppad_bbox, aug_flip_bbox] ] meta_data, img = reduce(lambda md_i_mm_ma, f: f(*md_i_mm_ma), augmentations, (meta_data, img)) extracted_bbox = self.get_ground_truth(meta_data, self.instance_info_list[index]) # image preprocessing, which comply the model # trianed on Imagenet dataset if self.preprocess == 'resnet': img = resnet_preprocess(img) img = torch.from_numpy(img) bbox = torch.from_numpy(np.array(extracted_bbox).astype(np.float32)) return img, bbox
def test(self): img_list = os.listdir(self.params.testdata_dir) multipose_results = [] for img_name in tqdm(img_list): img = cv2.imread(os.path.join(self.params.testdata_dir, img_name)).astype(np.float32) shape_dst = np.max(img.shape) scale = float(shape_dst) / self.params.inp_size pad_size = np.abs(img.shape[1] - img.shape[0]) img_resized = np.pad(img, ([0, pad_size], [0, pad_size], [0, 0]), 'constant')[:shape_dst, :shape_dst] img_resized = cv2.resize(img_resized, (self.params.inp_size, self.params.inp_size)) img_input = resnet_preprocess(img_resized) img_input = torch.from_numpy(np.expand_dims(img_input, 0)) with torch.no_grad(): img_input = img_input.cuda(device=self.params.gpus[0]) heatmaps, [scores, classification, transformed_anchors] = self.model([img_input, self.params.subnet_name]) heatmaps = heatmaps.cpu().detach().numpy() heatmaps = np.squeeze(heatmaps, 0) heatmaps = np.transpose(heatmaps, (1, 2, 0)) heatmap_max = np.max(heatmaps[:, :, :17], 2) # segment_map = heatmaps[:, :, 17] param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5} joint_list = get_joint_list(img_resized, param, heatmaps[:, :, :17], scale) del img_resized # bounding box from retinanet scores = scores.cpu().detach().numpy() classification = classification.cpu().detach().numpy() transformed_anchors = transformed_anchors.cpu().detach().numpy() idxs = np.where(scores > 0.5) bboxs=[] for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :]*scale if int(classification[idxs[0][j]]) == 0: # class0=people bboxs.append(bbox.tolist()) prn_result = self.prn_process(joint_list.tolist(), bboxs, img_name) for result in prn_result: multipose_results.append(result) if self.params.testresult_write_image: canvas = plot_result(img, prn_result) cv2.imwrite(os.path.join(self.params.testresult_dir, img_name.split('.', 1)[0] + '_1heatmap.png'), heatmap_max * 256) cv2.imwrite(os.path.join(self.params.testresult_dir, img_name.split('.', 1)[0] + '_2canvas.png'), canvas) if self.params.testresult_write_json: with open(self.params.testresult_dir+'multipose_results.json', "w") as f: json.dump(multipose_results, f)
def infer(self, img): shape_dst = np.max(img.shape) scale = float(shape_dst) / self.params.inp_size pad_size = np.abs(img.shape[1] - img.shape[0]) img_resized = np.pad(img, ([0, pad_size], [0, pad_size], [0, 0]), 'constant')[:shape_dst, :shape_dst] img_resized = cv2.resize(img_resized, (self.params.inp_size, self.params.inp_size)) img_input = resnet_preprocess(img_resized) img_input = torch.from_numpy(np.expand_dims(img_input, 0)) with torch.no_grad(): img_input = img_input.cuda(device=self.params.gpus[0]) heatmaps, [scores, classification, transformed_anchors ] = self.model([img_input, self.params.subnet_name]) heatmaps = heatmaps.cpu().detach().numpy() heatmaps = np.squeeze(heatmaps, 0) heatmaps = np.transpose(heatmaps, (1, 2, 0)) heatmap_max = np.max(heatmaps[:, :, :18], 2) # segment_map = heatmaps[:, :, 17] param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5} joint_list = get_joint_list(img_resized, param, heatmaps[:, :, :18], scale) joint_list = joint_list.tolist() del img_resized joints = [] for joint in joint_list: if int(joint[-1]) != 1: joint[-1] = max(0, int(joint[-1]) - 1) joints.append(joint) joint_list = joints # bounding box from retinanet scores = scores.cpu().detach().numpy() classification = classification.cpu().detach().numpy() transformed_anchors = transformed_anchors.cpu().detach().numpy() idxs = np.where(scores > 0.5) bboxs = [] for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] * scale if int(classification[idxs[0][j]]) == 0: # class0=people bboxs.append(bbox.tolist()) prn_result = self.prn_process(joint_list, bboxs, None) results = [] for prn in prn_result: results.append(np.array(prn['keypoints']).reshape(-1, 3)) return results
def __getitem__(self, index): idx = self.index_list[index] img = cv2.imread(os.path.join(self.root, self.data[idx]['img_paths'])) img_idx = self.data[idx]['img_paths'][-16:-3] # print img.shape if "COCO_val" in self.data[idx]['dataset']: mask_miss = cv2.imread( self.mask_dir + 'mask2014/val2014_mask_miss_' + img_idx + 'png', 0) #mask_all = cv2.imread( # self.mask_dir + 'mask2014/val2014_mask_all_' + img_idx + 'png', 0) elif "COCO" in self.data[idx]['dataset']: mask_miss = cv2.imread( self.mask_dir + 'mask2014/train2014_mask_miss_' + img_idx + 'png', 0) #mask_all = cv2.imread( # self.mask_dir + 'mask2014/train2014_mask_all_' + img_idx + 'png', 0) meta_data = self.get_anno(self.data[idx]) meta_data = self.add_neck(meta_data) augmentations = [ partial(aug_meth, params_transform=params_transform) for aug_meth in [aug_scale, aug_rotate, aug_croppad, aug_flip] ] meta_data, img, mask_miss = reduce( lambda md_i_mm_ma, f: f(*md_i_mm_ma), augmentations, (meta_data, img, mask_miss)) meta_data = self.remove_illegal_joint(meta_data) heat_mask, heatmaps = self.get_ground_truth(meta_data, mask_miss) # image preprocessing, which comply the model # trianed on Imagenet dataset if self.preprocess == 'resnet': img = resnet_preprocess(img) img = torch.from_numpy(img) heatmaps = torch.from_numpy( heatmaps.transpose((2, 0, 1)).astype(np.float32)) heat_mask = torch.from_numpy( heat_mask.transpose((2, 0, 1)).astype(np.float32)) #mask_all = torch.from_numpy( # mask_all.transpose((2, 0, 1)).astype(np.float32)) return img, heatmaps, heat_mask #, mask_all
def coco_eval(self): coco_val = os.path.join(self.params.coco_root, 'annotations/person_keypoints_val2017.json') coco = COCO(coco_val) img_ids = coco.getImgIds(catIds=[1]) multipose_results = [] coco_order = [0, 14, 13, 16, 15, 4, 1, 5, 2, 6, 3, 10, 7, 11, 8, 12, 9] for img_id in tqdm(img_ids): img_name = coco.loadImgs(img_id)[0]['file_name'] img = cv2.imread(os.path.join(self.params.coco_root, 'images/val2017/', img_name)).astype(np.float32) shape_dst = np.max(img.shape) scale = float(shape_dst) / self.params.inp_size pad_size = np.abs(img.shape[1] - img.shape[0]) img_resized = np.pad(img, ([0, pad_size], [0, pad_size], [0, 0]), 'constant')[:shape_dst, :shape_dst] img_resized = cv2.resize(img_resized, (self.params.inp_size, self.params.inp_size)) img_input = resnet_preprocess(img_resized) img_input = torch.from_numpy(np.expand_dims(img_input, 0)) with torch.no_grad(): img_input = img_input.cuda(device=self.params.gpus[0]) heatmaps, [scores, classification, transformed_anchors] = self.model([img_input, self.params.subnet_name]) heatmaps = heatmaps.cpu().detach().numpy() heatmaps = np.squeeze(heatmaps, 0) heatmaps = np.transpose(heatmaps, (1, 2, 0)) heatmap_max = np.max(heatmaps[:, :, :17], 2) # segment_map = heatmaps[:, :, 17] param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5} joint_list = get_joint_list(img_resized, param, heatmaps[:, :, :17], scale) del img_resized # bounding box from retinanet scores = scores.cpu().detach().numpy() classification = classification.cpu().detach().numpy() transformed_anchors = transformed_anchors.cpu().detach().numpy() idxs = np.where(scores > 0.5) bboxs=[] for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :]*scale if int(classification[idxs[0][j]]) == 0: # class0=people bboxs.append(bbox.tolist()) prn_result = self.prn_process(joint_list.tolist(), bboxs, img_name, img_id) for result in prn_result: keypoints = result['keypoints'] coco_keypoint = [] for i in range(17): coco_keypoint.append(keypoints[coco_order[i] * 3]) coco_keypoint.append(keypoints[coco_order[i] * 3 + 1]) coco_keypoint.append(keypoints[coco_order[i] * 3 + 2]) result['keypoints'] = coco_keypoint multipose_results.append(result) ann_filename = self.params.coco_result_filename with open(ann_filename, "w") as f: json.dump(multipose_results, f, indent=4) # load results in COCO evaluation tool coco_pred = coco.loadRes(ann_filename) # run COCO evaluation coco_eval = COCOeval(coco, coco_pred, 'keypoints') coco_eval.params.imgIds = img_ids coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() if not self.params.testresult_write_json: os.remove(ann_filename)
def _get_outputs(self, multiplier, img): """Computes the averaged heatmap and paf for the given image :param multiplier: :param origImg: numpy array, the image being processed :param model: pytorch model :returns: numpy arrays, the averaged paf and heatmap """ heatmap_avg = np.zeros((img.shape[0], img.shape[1], 17)) bbox_all = [] max_scale = multiplier[-1] max_size = max_scale * img.shape[0] # padding max_cropped, _, _ = crop_with_factor(img, max_size, factor=32) for m in range(len(multiplier)): scale = multiplier[m] inp_size = scale * img.shape[0] # padding im_cropped, im_scale, real_shape = crop_with_factor(img, inp_size, factor=32, pad_val=128) im_data = resnet_preprocess(im_cropped) im_data = np.expand_dims(im_data, 0) with torch.no_grad(): im_data = torch.from_numpy(im_data).type( torch.FloatTensor).cuda(device=self.params.gpus[0]) heatmaps, [scores, classification, transformed_anchors ] = self.model([im_data, self.params.subnet_name]) heatmaps = heatmaps.cpu().detach().numpy().transpose(0, 2, 3, 1) scores = scores.cpu().detach().numpy() classification = classification.cpu().detach().numpy() transformed_anchors = transformed_anchors.cpu().detach().numpy() heatmap = heatmaps[0, :int(im_cropped.shape[0] / 4), :int(im_cropped.shape[1] / 4), :] heatmap = cv2.resize(heatmap, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC) heatmap = heatmap[0:real_shape[0], 0:real_shape[1], :] heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) heatmap_avg = heatmap_avg + heatmap / len(multiplier) # bboxs idxs = np.where(scores > 0.5) bboxs = [] for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] / im_scale if int(classification[idxs[0][j]]) == 0: # class0=people bboxs.append(bbox.tolist()) bbox_all.append(bboxs) return heatmap_avg, bbox_all