def play(self, is_train, video_index): self.scd = PgdSkeleton(Path.home() / 'intentlong', is_train, self.img_size) res = self.scd[video_index] coord_norm = res[PG.COORD_NORM] coord_norm = np.transpose(coord_norm, (0, 2, 1)) coord = coord_norm * np.array(self.img_size) img_shape = self.img_size[::-1] + (3, ) kps = [ KeypointsOnImage.from_xy_array(coord_JX, shape=img_shape) for coord_JX in coord ] cap = cv2.VideoCapture(str(res[PG.VIDEO_PATH])) v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) v_fps = int(cap.get(cv2.CAP_PROP_FPS)) duration = int(1000 / v_fps) for n in range(v_size): ret, img = cap.read() re_img = cv2.resize(img, self.img_size) pOnImg = kps[n] img_kps = pOnImg.draw_on_image(re_img) if self.is_unittest: break cv2.imshow("Play saved keypoint results", img_kps) cv2.waitKey(duration) cap.release()
def play_custom_video(self, video_path): rkr = ResizeKeepRatio((512, 512)) if video_path is None: cap = cv2.VideoCapture(0) if not cap.isOpened(): raise IOError('Failed to open camera.') else: cap = cv2.VideoCapture(str(video_path)) v_fps = int(cap.get(cv2.CAP_PROP_FPS)) if v_fps != 15: warn('Suggested video frame rate is 15, currently %d, which may impact accuracy' % v_fps) duration = 10 while True: ret, img = cap.read() if not ret: break re_img, _, _ = rkr.resize(img, np.zeros((2,)), np.zeros((4,))) gdict = self.gpred.from_img(re_img) gesture = gdict[PG.OUT_ARGMAX] coord_norm_FXJ = gdict[PG.COORD_NORM] coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1)) coord_FJX = coord_norm_FJX * np.array(self.img_size) koi = KeypointsOnImage.from_xy_array(coord_FJX[0], shape=re_img.shape) re_img = koi.draw_on_image(re_img) ges_name = self.gesture_dict[gesture] re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40) if self.is_unittest: break cv2.imshow("Play saved keypoint results", re_img) cv2.waitKey(duration) cap.release()
def play_dataset_video(self, is_train, video_index, show=True): self.scd = PgdSkeleton(Path.home() / 'PoliceGestureLong', is_train, self.img_size) res = self.scd[video_index] print('Playing %s' % res[PG.VIDEO_NAME]) coord_norm_FXJ = res[PG.COORD_NORM] # Shape: F,X,J coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1)) # FJX coord = coord_norm_FJX * np.array(self.img_size) img_shape = self.img_size[::-1] + (3,) kps = [KeypointsOnImage.from_xy_array(coord_JX, shape=img_shape) for coord_JX in coord] # (frames, KeyOnImage) cap = cv2.VideoCapture(str(res[PG.VIDEO_PATH])) v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) v_fps = int(cap.get(cv2.CAP_PROP_FPS)) duration = int(1000/(v_fps*4)) gestures = [] # Full video gesture recognition results for n in range(v_size): gdict = self.gpred.from_skeleton(coord_norm_FXJ[n][np.newaxis]) gesture = gdict[PG.OUT_ARGMAX] gestures.append(gesture) if not show: continue ret, img = cap.read() re_img = cv2.resize(img, self.img_size) ges_name = self.gesture_dict[gesture] re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40) pOnImg = kps[n] img_kps = pOnImg.draw_on_image(re_img) if self.is_unittest: break cv2.imshow("Play saved keypoint results", img_kps) cv2.waitKey(duration) cap.release() gestures = np.array(gestures, np.int) res[PG.PRED_GESTURES] = gestures print('The prediction of video ', res[PG.VIDEO_NAME], ' is completed') return res
def _augment_batch_(self, batch, random_state, parents, hooks): batch = super()._augment_batch_(batch, random_state, parents, hooks) keypoints = [] for kpts in batch.keypoints: kpts_ = list(kpts) n_kpts = len(kpts_) for i1, i2 in self.symmetric_pairs: for j in range(0, n_kpts, self.n_keypoints): kpts_[i1 + j], kpts_[i2 + j] = kpts_[i2 + j], kpts_[i1 + j] keypoints.append(KeypointsOnImage(kpts_, kpts.shape)) batch.keypoints = keypoints return batch
def _distort(self, image, ground_truths, aug_pipe): if not ground_truths: return image, ground_truths det_aug = aug_pipe.to_deterministic() image = det_aug.augment_image(image) image_shape = image.shape keypoints_on_image = [] keypoints = [] bbox_class_labels = [] result_gts = [] for label in ground_truths: keypoints.append(Keypoint(x=label[0], y=label[1])) # top left xmin, ymin keypoints.append(Keypoint(x=label[2], y=label[3])) # bottom right xmax, ymax keypoints.append(Keypoint(x=label[0], y=label[3])) # bottom left xmin, ymax keypoints.append(Keypoint(x=label[2], y=label[1])) # top right xmax, ymin bbox_class_labels.append(label[4]) keypoints_on_image.append(KeypointsOnImage(keypoints, shape=image_shape)) keypoints_on_image = det_aug.augment_keypoints(keypoints_on_image) index = 0 for keypoint in keypoints_on_image[0].keypoints: if index % 4 == 0: x1, y1 = keypoint.x, keypoint.y if index % 4 == 1: x2, y2 = keypoint.x, keypoint.y if index % 4 == 2: x3, y3 = keypoint.x, keypoint.y if index % 4 == 3: x4, y4 = keypoint.x, keypoint.y result_gts.append( [x1, y1, x2, y2, x3, y3, x4, y4, bbox_class_labels[index // 4] ] ) # top left, bottom right, bottom left, top right and class_name index += 1 return image, result_gts
def play_custom_image(self, img_path): rkr = ResizeKeepRatio((512, 512)) img = cv2.imread(img_path) re_img, _, _ = rkr.resize(img, np.zeros((2,)), np.zeros((4,))) gdict = self.gpred.from_img(re_img) gesture = gdict[PG.OUT_ARGMAX] coord_norm_FXJ = gdict[PG.COORD_NORM] coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1)) coord_FJX = coord_norm_FJX * np.array(self.img_size) koi = KeypointsOnImage.from_xy_array(coord_FJX[0], shape=re_img.shape) re_img = koi.draw_on_image(re_img) ges_name = self.gesture_dict[gesture] re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40) cv2.imshow("Play saved keypoint results", re_img)
def play_custom_video(self, video_path): cap = cv2.VideoCapture(str(video_path)) v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) v_fps = int(cap.get(cv2.CAP_PROP_FPS)) duration = 10 for n in range(v_size): ret, img = cap.read() re_img = cv2.resize(img, self.img_size) gdict = self.gpred.from_img(re_img) gesture = gdict[PG.OUT_ARGMAX] # Keypoints on image coord_norm_FXJ = gdict[PG.COORD_NORM] coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1)) # FJX coord_FJX = coord_norm_FJX * np.array(self.img_size) koi = KeypointsOnImage.from_xy_array(coord_FJX[0], shape=re_img.shape) re_img = koi.draw_on_image(re_img) # Gesture name on image ges_name = self.gesture_dict[gesture] re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40) cv2.imshow("Play saved keypoint results", re_img) cv2.waitKey(duration)
def play_dataset_video(self, is_train, video_index): self.scd = SkeletonCoordsDataset(Path.home() / 'PoliceGestureLong', is_train, self.img_size) res = self.scd[video_index] coord_norm_FXJ = res[PG.COORD_NORM] # Shape: F,X,J coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1)) # FJX coord = coord_norm_FJX * np.array(self.img_size) img_shape = self.img_size[::-1] + (3,) kps = [KeypointsOnImage.from_xy_array(coord_JX, shape=img_shape) for coord_JX in coord] # (frames, KeyOnImage) cap = cv2.VideoCapture(str(res[PG.VIDEO_PATH])) v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) v_fps = int(cap.get(cv2.CAP_PROP_FPS)) duration = int(1000/(v_fps*4)) for n in range(v_size): ret, img = cap.read() re_img = cv2.resize(img, self.img_size) gdict = self.gpred.from_skeleton(coord_norm_FXJ[n][np.newaxis]) gesture = gdict[PG.OUT_ARGMAX] ges_name = self.gesture_dict[gesture] re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40) pOnImg = kps[n] img_kps = pOnImg.draw_on_image(re_img) cv2.imshow("Play saved keypoint results", img_kps) cv2.waitKey(duration)
def distort(image, ground_truths, aug_pipe): truncated_box = False det_aug = aug_pipe.to_deterministic() image = det_aug.augment_image(image) if not ground_truths: return image, ground_truths, truncated_box image_shape = image.shape keypoints_on_image = [] keypoints = [] bbox_class_labels = [] result_gts = [] for label in ground_truths: keypoints.append(Keypoint(x=label[0], y=label[1])) # top left xmin, ymin keypoints.append(Keypoint(x=label[2], y=label[3])) # bottom right xmax, ymax keypoints.append(Keypoint(x=label[0], y=label[3])) # bottom left xmin, ymax keypoints.append(Keypoint(x=label[2], y=label[1])) # top right xmax, ymin bbox_class_labels.append(label[4]) keypoints_on_image.append( KeypointsOnImage(keypoints, shape=image_shape)) keypoints_on_image = det_aug.augment_keypoints(keypoints_on_image) index = 0 image_h, image_w = image_shape[0:2] for keypoint in keypoints_on_image[0].keypoints: if index % 4 == 0: x1, y1 = keypoint.x, keypoint.y if index % 4 == 1: x2, y2 = keypoint.x, keypoint.y if index % 4 == 2: x3, y3 = keypoint.x, keypoint.y if index % 4 == 3: x4, y4 = keypoint.x, keypoint.y _x1 = max(x1, 0) _x1 = min(image_w, _x1) if _x1 != x1: truncated_box = True _x2 = max(x2, 0) _x2 = min(image_w, _x2) if _x2 != x2: truncated_box = True _x3 = max(x3, 0) _x3 = min(image_w, _x3) if _x3 != x3: truncated_box = True _x4 = max(x4, 0) _x4 = min(image_w, _x4) if _x4 != x4: truncated_box = True _y1 = max(y1, 0) _y1 = min(_y1, image_h) if _y1 != y1: truncated_box = True _y2 = max(y2, 0) _y2 = min(_y2, image_h) if _y2 != y2: truncated_box = True _y3 = max(y3, 0) _y3 = min(_y3, image_h) if _y3 != y3: truncated_box = True _y4 = max(y4, 0) _y4 = min(_y4, image_h) if _y4 != y4: truncated_box = True xmin, ymin, xmax, ymax = min(_x1, _x2, _x3, _x4), min( _y1, _y2, _y3, _y4), max(_x1, _x2, _x3, _x4), max(_y1, _y2, _y3, _y4) box_width = xmax - xmin box_height = ymax - ymin box_area = box_width * box_height #if box_area < ((image_w * image_h) * 0.01): #print('Found a box with less than 0.01 of the image area... skipping') #continue result_gts.append([ _x1, _y1, _x2, _y2, _x3, _y3, _x4, _y4, bbox_class_labels[index // 4] ]) # top left, bottom right, bottom left, top right and class_name index += 1 return image, result_gts, truncated_box
def _augment_batch(images, ground_truths): images = self._aug_pipe.augment_image(images) image_shape = images.shape keypoints_on_images = [] keypoints = [] # for each image, ground_truths = [ ann1 -> [xmin, ymin, xmax, ymax, cls_index], ...] im_w = image_shape[1] im_h = image_shape[0] for label in ground_truths: if np.sum(label) == 0: # possible negative example [ [0, 0, 0, 0, 0, 0] ] continue _label = [ label[0] * im_w, label[1] * im_h, label[2] * im_w, label[3] * im_h ] keypoints.append(Keypoint(x=_label[0], y=_label[1])) # top left xmin, ymin keypoints.append(Keypoint(x=_label[2], y=_label[3])) # bottom right xmax, ymax keypoints.append(Keypoint(x=_label[0], y=_label[3])) # bottom left xmin, ymax keypoints.append(Keypoint(x=_label[2], y=_label[1])) # top right xmax, ymin keypoints_on_images.append(KeypointsOnImage(keypoints, shape=image_shape)) keypoints_on_images = det_aug.augment_keypoints(keypoints_on_images) index = 0 for keypoint in keypoints_on_images[0].keypoints: if index % 4 == 0: x1, y1 = keypoint.x, keypoint.y if index % 4 == 1: x2, y2 = keypoint.x, keypoint.y if index % 4 == 2: x3, y3 = keypoint.x, keypoint.y if index % 4 == 3: x4, y4 = keypoint.x, keypoint.y xmin = min(x1, x2, x3, x4) xmax = max(x1, x2, x3, x4) ymin = min(y1, y2, y3, y4) ymax = max(y1, y2, y3, y4) ground_truths[ int(index / 4) ][0] = xmin ground_truths[ int(index / 4) ][1] = ymin ground_truths[ int(index / 4) ][2] = xmax ground_truths[ int(index / 4) ][3] = ymax index += 1 return images, ground_truths