def get_input_clip(self, keyframe_idx):
        """
        Get input clip from the video/folder of images for a given
        keyframe index.
        Args:
            keyframe_idx (int): index of the current keyframe.
        Returns:
            clip (list of tensors): formatted input clip(s) corresponding to
                the current keyframe.
        """
        seq = get_sequence(
            keyframe_idx,
            self.seq_length // 2,
            self.cfg.DATA.SAMPLING_RATE,
            self.total_frames,
        )
        clip = []
        for frame_idx in seq:
            self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            was_read, frame = self.cap.read()
            if was_read:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = scale(self.cfg.DATA.TEST_CROP_SIZE, frame)
                clip.append(frame)
            else:
                logger.error(
                    "Unable to read frame. Duplicating previous frame.")
                clip.append(clip[-1])

        clip = process_cv2_inputs(clip, self.cfg)
        return clip
예제 #2
0
파일: predictor.py 프로젝트: AK391/X3D
    def __call__(self, task):
        """
        Returns the prediction results for the current task.
        Args:
            task (TaskInfo object): task object that contain
                the necessary information for action prediction. (e.g. frames, boxes)
        Returns:
            task (TaskInfo object): the same task info object but filled with
                prediction values (a tensor) and the corresponding boxes for
                action detection task.
        """
        if self.cfg.DETECTION.ENABLE:
            task = self.object_detector(task)

        frames, bboxes = task.frames, task.bboxes
        if bboxes is not None:
            bboxes = cv2_transform.scale_boxes(
                self.cfg.DATA.TEST_CROP_SIZE,
                bboxes,
                task.img_height,
                task.img_width,
            )
        if self.cfg.DEMO.INPUT_FORMAT == "BGR":
            frames = [
                cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames
            ]

        frames = [
            cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame)
            for frame in frames
        ]
        inputs = process_cv2_inputs(frames, self.cfg)
        if bboxes is not None:
            index_pad = torch.full(
                size=(bboxes.shape[0], 1),
                fill_value=float(0),
                device=bboxes.device,
            )

            # Pad frame index for each box.
            bboxes = torch.cat([index_pad, bboxes], axis=1)
        if self.cfg.NUM_GPUS > 0:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(device=torch.device(
                        self.gpu_id),
                                               non_blocking=True)
            else:
                inputs = inputs.cuda(device=torch.device(self.gpu_id),
                                     non_blocking=True)
        if self.cfg.DETECTION.ENABLE and not bboxes.shape[0]:
            preds = torch.tensor([])
        else:
            preds = self.model(inputs, bboxes)

        if self.cfg.NUM_GPUS:
            preds = preds.cpu()
            if bboxes is not None:
                bboxes = bboxes.detach().cpu()

        preds = preds.detach()
        task.add_action_preds(preds)
        if bboxes is not None:
            task.add_bboxes(bboxes[:, 1:])

        return task