def __init__(self, checkpoint_path, device, img_mean=np.array([128, 128, 128], dtype=np.float32), img_scale=np.float32(1/255), use_tensorrt=False): from models.with_mobilenet import PoseEstimationWithMobileNet from modules.load_state import load_state self.img_mean = img_mean self.img_scale = img_scale self.device = 'cpu' if device != 'CPU': if torch.cuda.is_available(): self.device = torch.device('cuda:0') else: print('No CUDA device found, inferring on CPU') net = PoseEstimationWithMobileNet() checkpoint = torch.load(checkpoint_path, map_location='cpu') if use_tensorrt: from torch2trt import TRTModule net = TRTModule() net.load_state_dict(checkpoint) else: load_state(net, checkpoint) net = net.to(self.device) net.eval() self.net = net
class PoseDetector(object): def __init__(self, model_path: str): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.net = PoseEstimationWithMobileNet() self.net.to(self.device) checkpoint = torch.load(model_path) load_state(self.net, checkpoint) self.net.eval() self.image = None self.avg_heatmap = None self.avg_paf = None self.track = True self.stride = 8 self.upsample_ratio = 4 self.height_size = 256 self.smooth = 1 self.num_keypoints = Pose.num_kpts def __inference(self, image, multiscale=False): img = image.copy() base_height = 368 scales = [1] if multiscale: scales = [0.5, 1.0, 1.5, 2.0] stride = 8 normed_img = self.__normalize(img) height, width, _ = normed_img.shape scales_ratios = [ scale * base_height / float(height) for scale in scales ] avg_heatmap = np.zeros((height, width, 19), dtype=np.float32) avg_paf = np.zeros((height, width, 38), dtype=np.float32) for ratio in scales_ratios: scaled_img = cv2.resize(normed_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) min_dims = [base_height, max(scaled_img.shape[1], base_height)] padded_img, pad = self.__pad_width(scaled_img, stride, min_dims) tensor_img = torch.from_numpy(padded_img).permute( 2, 0, 1).unsqueeze(0).float().to(self.device) stages_output = self.net(tensor_img) stage2_heatmap = stages_output[-2] heatmap = np.transpose(stage2_heatmap.squeeze().cpu().data.numpy(), (1, 2, 0)) heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) heatmap = heatmap[pad[0]:heatmap.shape[0] - pad[2], pad[1]:heatmap.shape[1] - pad[3]:, :] heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) self.avg_heatmap = avg_heatmap + heatmap / len(scales_ratios) stage2_paf = stages_output[-1] paf = np.transpose(stage2_paf.squeeze().cpu().data.numpy(), (1, 2, 0)) paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) paf = paf[pad[0]:paf.shape[0] - pad[2], pad[1]:paf.shape[1] - pad[3], :] paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC) self.avg_paf = avg_paf + paf / len(scales_ratios) def __inference_fast(self, img, net_input_height_size, stride, upsample_ratio): height, width, _ = img.shape self.scale = net_input_height_size / height scaled_img = cv2.resize(img, (0, 0), fx=self.scale, fy=self.scale, interpolation=cv2.INTER_LINEAR) scaled_img = self.__normalize(scaled_img) min_dims = [ net_input_height_size, max(scaled_img.shape[1], net_input_height_size) ] padded_img, self.pad = self.__pad_width(scaled_img, stride, min_dims) tensor_img = torch.from_numpy(padded_img).permute( 2, 0, 1).unsqueeze(0).float() tensor_img = tensor_img.to(self.device) stages_output = self.net(tensor_img) stage2_heatmaps = stages_output[-2] heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) self.avg_heatmap = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) stage2_pafs = stages_output[-1] pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) self.avg_paf = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(self.num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints( self.avg_heatmap[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) self.pose_entries, self.all_keypoints = group_keypoints( all_keypoints_by_type, self.avg_paf) for kpt_id in range(self.all_keypoints.shape[0]): self.all_keypoints[kpt_id, 0] = \ int((self.all_keypoints[kpt_id, 0] * self.stride / self.upsample_ratio - self.pad[1]) / self.scale) self.all_keypoints[kpt_id, 1] = \ int((self.all_keypoints[kpt_id, 1] * self.stride / self.upsample_ratio - self.pad[0]) / self.scale) def visualize_prediction(self, image): orig_img = image.copy() if not np.array_equal(self.image, image): self.image = image self.__inference_fast(self.image, self.height_size, self.stride, self.upsample_ratio) current_poses = [] for n in range(len(self.pose_entries)): if len(self.pose_entries[n]) == 0: continue pose_keypoints = np.ones( (self.num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(self.num_keypoints): if self.pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( self.all_keypoints[int(self.pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( self.all_keypoints[int(self.pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, self.pose_entries[n][18]) current_poses.append(pose) # if self.track: # previous_poses = [] # track_poses(previous_poses, current_poses, smooth=smooth) # previous_poses = current_poses for pose in current_poses: pose.draw(image) image = cv2.addWeighted(orig_img, 0.6, image, 0.4, 0) # plt.imshow(image) # plt.show() return image def __get_auto_grading_outputs(self, image): if not np.array_equal(self.image, image): self.image = image self.__inference_fast(self.image, self.height_size, self.stride, self.upsample_ratio) return self.all_keypoints, self.pose_entries def __visualize_prediction_slow(self, image): if not np.array_equal(self.image, image): self.image = image self.__inference(self.image) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(18): # 19th for bg total_keypoints_num += extract_keypoints( self.avg_heatmap[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, self.avg_paf) coco_keypoints, scores = convert_to_coco_format( pose_entries, all_keypoints) for keypoints in coco_keypoints: for idx in range(len(keypoints) // 3): cv2.circle( image, (int(keypoints[idx * 3]), int(keypoints[idx * 3 + 1])), 3, (255, 0, 255), -1) plt.imshow(image) plt.show() def __get_auto_grading_outputs_slow(self, image): if not np.array_equal(self.image, image): self.image = image self.__inference(self.image) all_peaks = [] peak_counter = 0 thre1 = 0.1 thre2 = 0.05 for part in range(18): map_ori = self.avg_heatmap[:, :, part] one_heatmap = gaussian_filter(map_ori, sigma=3) map_left = np.zeros(one_heatmap.shape) map_left[1:, :] = one_heatmap[:-1, :] map_right = np.zeros(one_heatmap.shape) map_right[:-1, :] = one_heatmap[1:, :] map_up = np.zeros(one_heatmap.shape) map_up[:, 1:] = one_heatmap[:, :-1] map_down = np.zeros(one_heatmap.shape) map_down[:, :-1] = one_heatmap[:, 1:] peaks_binary = np.logical_and.reduce( (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1)) peaks = list( zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks] peak_id = range(peak_counter, peak_counter + len(peaks)) peaks_with_score_and_id = [ peaks_with_score[i] + (peak_id[i], ) for i in range(len(peak_id)) ] all_peaks.append(peaks_with_score_and_id) peak_counter += len(peaks) # find connection in the specified sequence, center 29 is in the position 15 limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]] # the middle joints heatmap correspondence mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], [55, 56], [37, 38], [45, 46]] connection_all = [] special_k = [] mid_num = 10 for k in range(len(mapIdx)): score_mid = self.avg_paf[:, :, [x - 19 for x in mapIdx[k]]] candA = all_peaks[limbSeq[k][0] - 1] candB = all_peaks[limbSeq[k][1] - 1] nA = len(candA) nB = len(candB) if nA != 0 and nB != 0: connection_candidate = [] for i in range(nA): for j in range(nB): vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) vec = np.divide(vec, norm) startend = list( zip( np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num))) vec_x = np.array([ score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] for I in range(len(startend)) ]) vec_y = np.array([ score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] for I in range(len(startend)) ]) score_midpts = np.multiply( vec_x, vec[0]) + np.multiply(vec_y, vec[1]) score_with_dist_prior = sum(score_midpts) / len( score_midpts) + min( 0.5 * image.shape[0] / norm - 1, 0) criterion1 = len(np.nonzero( score_midpts > thre2)[0]) > 0.8 * len(score_midpts) criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([ i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2] ]) connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) connection = np.zeros((0, 5)) for c in range(len(connection_candidate)): i, j, s = connection_candidate[c][0:3] if i not in connection[:, 3] and j not in connection[:, 4]: connection = np.vstack( [connection, [candA[i][3], candB[j][3], s, i, j]]) if len(connection) >= min(nA, nB): break connection_all.append(connection) else: special_k.append(k) connection_all.append([]) # last number in each row is the total parts number of that person # the second last number in each row is the score of the overall configuration subset = -1 * np.ones((0, 20)) candidate = np.array( [item for sublist in all_peaks for item in sublist]) for k in range(len(mapIdx)): if k not in special_k: partAs = connection_all[k][:, 0] partBs = connection_all[k][:, 1] indexA, indexB = np.array(limbSeq[k]) - 1 for i in range(len(connection_all[k])): # = 1:size(temp,1) found = 0 subset_idx = [-1, -1] for j in range(len(subset)): # 1:size(subset,1): if subset[j][indexA] == partAs[i] or subset[j][ indexB] == partBs[i]: subset_idx[found] = j found += 1 if found == 1: j = subset_idx[0] if subset[j][indexB] != partBs[i]: subset[j][indexB] = partBs[i] subset[j][-1] += 1 subset[j][-2] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = subset_idx membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: # merge subset[j1][:-2] += (subset[j2][:-2] + 1) subset[j1][-2:] += subset[j2][-2:] subset[j1][-2] += connection_all[k][i][2] subset = np.delete(subset, j2, 0) else: # as like found == 1 subset[j1][indexB] = partBs[i] subset[j1][-1] += 1 subset[j1][-2] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the subset, create a new subset elif not found and k < 17: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = sum( candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] subset = np.vstack([subset, row]) # delete some rows of subset which has few parts occur deleteIdx = [] for i in range(len(subset)): if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts # candidate: x, y, score, id return candidate, subset @staticmethod def __normalize(img, img_mean=(128, 128, 128), img_scale=1 / 256): img = np.array(img, dtype=np.float32) img = (img - img_mean) * img_scale return img @staticmethod def __pad_width(img, stride, min_dims, pad_value=(0, 0, 0)): h, w, _ = img.shape h = min(min_dims[0], h) min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride min_dims[1] = max(min_dims[1], w) min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride pad = [] pad.append(int(math.floor((min_dims[0] - h) / 2.0))) pad.append(int(math.floor((min_dims[1] - w) / 2.0))) pad.append(int(min_dims[0] - h - pad[0])) pad.append(int(min_dims[1] - w - pad[1])) padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3], cv2.BORDER_CONSTANT, value=pad_value) return padded_img, pad def __call__(self, image): return self.__get_auto_grading_outputs(image)