def play(self, is_train, video_index):
     self.scd = PgdSkeleton(Path.home() / 'intentlong', is_train,
                            self.img_size)
     res = self.scd[video_index]
     coord_norm = res[PG.COORD_NORM]
     coord_norm = np.transpose(coord_norm, (0, 2, 1))
     coord = coord_norm * np.array(self.img_size)
     img_shape = self.img_size[::-1] + (3, )
     kps = [
         KeypointsOnImage.from_xy_array(coord_JX, shape=img_shape)
         for coord_JX in coord
     ]
     cap = cv2.VideoCapture(str(res[PG.VIDEO_PATH]))
     v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     v_fps = int(cap.get(cv2.CAP_PROP_FPS))
     duration = int(1000 / v_fps)
     for n in range(v_size):
         ret, img = cap.read()
         re_img = cv2.resize(img, self.img_size)
         pOnImg = kps[n]
         img_kps = pOnImg.draw_on_image(re_img)
         if self.is_unittest:
             break
         cv2.imshow("Play saved keypoint results", img_kps)
         cv2.waitKey(duration)
     cap.release()
    def play_custom_video(self, video_path):
        rkr = ResizeKeepRatio((512, 512))
        if video_path is None:
            cap = cv2.VideoCapture(0)
            if not cap.isOpened():
                raise IOError('Failed to open camera.')
        else:
            cap = cv2.VideoCapture(str(video_path))
            v_fps = int(cap.get(cv2.CAP_PROP_FPS))
            if v_fps != 15:
                warn('Suggested video frame rate is 15, currently %d, which may impact accuracy' % v_fps)
        duration = 10
        while True:
            ret, img = cap.read()
            if not ret:
                break
            re_img, _, _ = rkr.resize(img, np.zeros((2,)), np.zeros((4,)))
            gdict = self.gpred.from_img(re_img)
            gesture = gdict[PG.OUT_ARGMAX]

            coord_norm_FXJ = gdict[PG.COORD_NORM]
            coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1))  
            coord_FJX = coord_norm_FJX * np.array(self.img_size)
            koi = KeypointsOnImage.from_xy_array(coord_FJX[0], shape=re_img.shape)
            re_img = koi.draw_on_image(re_img)
            ges_name = self.gesture_dict[gesture]
            re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40)
            if self.is_unittest:
                break
            cv2.imshow("Play saved keypoint results", re_img)
            cv2.waitKey(duration)
        cap.release()
 def play_dataset_video(self, is_train, video_index, show=True):
     self.scd = PgdSkeleton(Path.home() / 'PoliceGestureLong', is_train, self.img_size)
     res = self.scd[video_index]
     print('Playing %s' % res[PG.VIDEO_NAME])
     coord_norm_FXJ = res[PG.COORD_NORM]  # Shape: F,X,J
     coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1))  # FJX
     coord = coord_norm_FJX * np.array(self.img_size)
     img_shape = self.img_size[::-1] + (3,)
     kps = [KeypointsOnImage.from_xy_array(coord_JX, shape=img_shape) for coord_JX in coord]  # (frames, KeyOnImage)
     cap = cv2.VideoCapture(str(res[PG.VIDEO_PATH]))
     v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     v_fps = int(cap.get(cv2.CAP_PROP_FPS))
     duration = int(1000/(v_fps*4))
     gestures = []  # Full video gesture recognition results
     for n in range(v_size):
         gdict = self.gpred.from_skeleton(coord_norm_FXJ[n][np.newaxis])
         gesture = gdict[PG.OUT_ARGMAX]
         gestures.append(gesture)
         if not show:
             continue
         ret, img = cap.read()
         re_img = cv2.resize(img, self.img_size)
         ges_name = self.gesture_dict[gesture]
         re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40)
         pOnImg = kps[n]
         img_kps = pOnImg.draw_on_image(re_img)
         if self.is_unittest:
             break
         cv2.imshow("Play saved keypoint results", img_kps)
         cv2.waitKey(duration)
     cap.release()
     gestures = np.array(gestures, np.int)
     res[PG.PRED_GESTURES] = gestures
     print('The prediction of video ', res[PG.VIDEO_NAME], ' is completed')
     return res
 def _augment_batch_(self, batch, random_state, parents, hooks):
     batch = super()._augment_batch_(batch, random_state, parents, hooks)
     keypoints = []
     for kpts in batch.keypoints:
         kpts_ = list(kpts)
         n_kpts = len(kpts_)
         for i1, i2 in self.symmetric_pairs:
             for j in range(0, n_kpts, self.n_keypoints):
                 kpts_[i1 + j], kpts_[i2 + j] = kpts_[i2 + j], kpts_[i1 + j]
         keypoints.append(KeypointsOnImage(kpts_, kpts.shape))
     batch.keypoints = keypoints
     return batch
    def _distort(self, image, ground_truths, aug_pipe):

        if not ground_truths: return image, ground_truths

        det_aug = aug_pipe.to_deterministic()
                
        image = det_aug.augment_image(image)
                
        image_shape = image.shape
                
        keypoints_on_image = []
        keypoints = []
        bbox_class_labels = []
        result_gts = []

        for label in ground_truths:

            keypoints.append(Keypoint(x=label[0], y=label[1]))  # top left xmin, ymin
            keypoints.append(Keypoint(x=label[2], y=label[3]))  # bottom right xmax, ymax
            keypoints.append(Keypoint(x=label[0], y=label[3]))  # bottom left xmin, ymax
            keypoints.append(Keypoint(x=label[2], y=label[1]))  # top right xmax, ymin

            bbox_class_labels.append(label[4])

            keypoints_on_image.append(KeypointsOnImage(keypoints, shape=image_shape))
        
        keypoints_on_image = det_aug.augment_keypoints(keypoints_on_image)
        
        index = 0

        for keypoint in keypoints_on_image[0].keypoints: 

            if index % 4 == 0:
                        
                x1, y1 = keypoint.x, keypoint.y
                         
            if index % 4 == 1:
                        
                x2, y2 = keypoint.x, keypoint.y
                
            if index % 4 == 2:
                        
                x3, y3 = keypoint.x, keypoint.y
                        
            if index % 4 == 3:
                        
                x4, y4 = keypoint.x, keypoint.y
                result_gts.append( [x1, y1, x2, y2, x3, y3, x4, y4, bbox_class_labels[index // 4] ] ) # top left, bottom right, bottom left, top right and class_name

            index += 1
                    
        return image, result_gts
 def play_custom_image(self, img_path):
     rkr = ResizeKeepRatio((512, 512))
     img = cv2.imread(img_path)
     re_img, _, _ = rkr.resize(img, np.zeros((2,)), np.zeros((4,)))
     gdict = self.gpred.from_img(re_img)
     gesture = gdict[PG.OUT_ARGMAX]
     coord_norm_FXJ = gdict[PG.COORD_NORM]
     coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1))  
     coord_FJX = coord_norm_FJX * np.array(self.img_size)
     koi = KeypointsOnImage.from_xy_array(coord_FJX[0], shape=re_img.shape)
     re_img = koi.draw_on_image(re_img)
     ges_name = self.gesture_dict[gesture]
     re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40)
     cv2.imshow("Play saved keypoint results", re_img)
Exemple #7
0
 def play_custom_video(self, video_path):
     cap = cv2.VideoCapture(str(video_path))
     v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     v_fps = int(cap.get(cv2.CAP_PROP_FPS))
     duration = 10
     for n in range(v_size):
         ret, img = cap.read()
         re_img = cv2.resize(img, self.img_size)
         gdict = self.gpred.from_img(re_img)
         gesture = gdict[PG.OUT_ARGMAX]
         # Keypoints on image
         coord_norm_FXJ = gdict[PG.COORD_NORM]
         coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1))  # FJX
         coord_FJX = coord_norm_FJX * np.array(self.img_size)
         koi = KeypointsOnImage.from_xy_array(coord_FJX[0], shape=re_img.shape)
         re_img = koi.draw_on_image(re_img)
         # Gesture name on image
         ges_name = self.gesture_dict[gesture]
         re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40)
         cv2.imshow("Play saved keypoint results", re_img)
         cv2.waitKey(duration)
Exemple #8
0
 def play_dataset_video(self, is_train, video_index):
     self.scd = SkeletonCoordsDataset(Path.home() / 'PoliceGestureLong', is_train, self.img_size)
     res = self.scd[video_index]
     coord_norm_FXJ = res[PG.COORD_NORM]  # Shape: F,X,J
     coord_norm_FJX = np.transpose(coord_norm_FXJ, (0, 2, 1))  # FJX
     coord = coord_norm_FJX * np.array(self.img_size)
     img_shape = self.img_size[::-1] + (3,)
     kps = [KeypointsOnImage.from_xy_array(coord_JX, shape=img_shape) for coord_JX in coord]  # (frames, KeyOnImage)
     cap = cv2.VideoCapture(str(res[PG.VIDEO_PATH]))
     v_size = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     v_fps = int(cap.get(cv2.CAP_PROP_FPS))
     duration = int(1000/(v_fps*4))
     for n in range(v_size):
         ret, img = cap.read()
         re_img = cv2.resize(img, self.img_size)
         gdict = self.gpred.from_skeleton(coord_norm_FXJ[n][np.newaxis])
         gesture = gdict[PG.OUT_ARGMAX]
         ges_name = self.gesture_dict[gesture]
         re_img = draw_text(re_img, 50, 100, ges_name, (255, 50, 50), size=40)
         pOnImg = kps[n]
         img_kps = pOnImg.draw_on_image(re_img)
         cv2.imshow("Play saved keypoint results", img_kps)
         cv2.waitKey(duration)
Exemple #9
0
def distort(image, ground_truths, aug_pipe):

    truncated_box = False

    det_aug = aug_pipe.to_deterministic()

    image = det_aug.augment_image(image)

    if not ground_truths: return image, ground_truths, truncated_box

    image_shape = image.shape

    keypoints_on_image = []
    keypoints = []
    bbox_class_labels = []
    result_gts = []

    for label in ground_truths:

        keypoints.append(Keypoint(x=label[0],
                                  y=label[1]))  # top left xmin, ymin
        keypoints.append(Keypoint(x=label[2],
                                  y=label[3]))  # bottom right xmax, ymax
        keypoints.append(Keypoint(x=label[0],
                                  y=label[3]))  # bottom left xmin, ymax
        keypoints.append(Keypoint(x=label[2],
                                  y=label[1]))  # top right xmax, ymin

        bbox_class_labels.append(label[4])

        keypoints_on_image.append(
            KeypointsOnImage(keypoints, shape=image_shape))

    keypoints_on_image = det_aug.augment_keypoints(keypoints_on_image)

    index = 0

    image_h, image_w = image_shape[0:2]

    for keypoint in keypoints_on_image[0].keypoints:

        if index % 4 == 0:

            x1, y1 = keypoint.x, keypoint.y

        if index % 4 == 1:

            x2, y2 = keypoint.x, keypoint.y

        if index % 4 == 2:

            x3, y3 = keypoint.x, keypoint.y

        if index % 4 == 3:

            x4, y4 = keypoint.x, keypoint.y

            _x1 = max(x1, 0)
            _x1 = min(image_w, _x1)

            if _x1 != x1: truncated_box = True

            _x2 = max(x2, 0)
            _x2 = min(image_w, _x2)

            if _x2 != x2: truncated_box = True

            _x3 = max(x3, 0)
            _x3 = min(image_w, _x3)

            if _x3 != x3: truncated_box = True

            _x4 = max(x4, 0)
            _x4 = min(image_w, _x4)

            if _x4 != x4: truncated_box = True

            _y1 = max(y1, 0)
            _y1 = min(_y1, image_h)

            if _y1 != y1: truncated_box = True

            _y2 = max(y2, 0)
            _y2 = min(_y2, image_h)

            if _y2 != y2: truncated_box = True

            _y3 = max(y3, 0)
            _y3 = min(_y3, image_h)

            if _y3 != y3: truncated_box = True

            _y4 = max(y4, 0)
            _y4 = min(_y4, image_h)

            if _y4 != y4: truncated_box = True

            xmin, ymin, xmax, ymax = min(_x1, _x2, _x3, _x4), min(
                _y1, _y2, _y3, _y4), max(_x1, _x2, _x3,
                                         _x4), max(_y1, _y2, _y3, _y4)

            box_width = xmax - xmin
            box_height = ymax - ymin

            box_area = box_width * box_height

            #if box_area < ((image_w * image_h) * 0.01):

            #print('Found a box with less than 0.01 of the image area... skipping')
            #continue

            result_gts.append([
                _x1, _y1, _x2, _y2, _x3, _y3, _x4, _y4,
                bbox_class_labels[index // 4]
            ])  # top left, bottom right, bottom left, top right and class_name

        index += 1

    return image, result_gts, truncated_box
Exemple #10
0
 def _augment_batch(images, ground_truths):
               
     images = self._aug_pipe.augment_image(images)
     
     image_shape = images.shape
     
     keypoints_on_images = []
     keypoints = []
      
     # for each image, ground_truths = [ ann1 -> [xmin, ymin, xmax, ymax, cls_index], ...]
     
     im_w = image_shape[1]
     im_h = image_shape[0]
     
     for label in ground_truths:
         
         if np.sum(label) == 0: # possible negative example [ [0, 0, 0, 0, 0, 0] ]
             
             continue
         
         _label = [ label[0] * im_w, label[1] * im_h, label[2] * im_w, label[3] * im_h ]
         
         keypoints.append(Keypoint(x=_label[0], y=_label[1]))  # top left xmin, ymin
         keypoints.append(Keypoint(x=_label[2], y=_label[3]))  # bottom right xmax, ymax
         keypoints.append(Keypoint(x=_label[0], y=_label[3]))  # bottom left xmin, ymax
         keypoints.append(Keypoint(x=_label[2], y=_label[1]))  # top right xmax, ymin
 
     keypoints_on_images.append(KeypointsOnImage(keypoints, shape=image_shape))
     keypoints_on_images = det_aug.augment_keypoints(keypoints_on_images)
 
     index = 0
     for keypoint in keypoints_on_images[0].keypoints:
          
         if index % 4 == 0:
             
             x1, y1 = keypoint.x, keypoint.y
              
         if index % 4 == 1:
             
             x2, y2 = keypoint.x, keypoint.y
  
         if index % 4 == 2:
             
             x3, y3 = keypoint.x, keypoint.y
             
         if index % 4 == 3:
             
             x4, y4 = keypoint.x, keypoint.y
             
             xmin = min(x1, x2, x3, x4)
             xmax = max(x1, x2, x3, x4)
             ymin = min(y1, y2, y3, y4)
             ymax = max(y1, y2, y3, y4)
 
             ground_truths[ int(index / 4) ][0] = xmin
             ground_truths[ int(index / 4) ][1] = ymin
             ground_truths[ int(index / 4) ][2] = xmax
             ground_truths[ int(index / 4) ][3] = ymax
              
         index += 1
         
     return images, ground_truths