class VideoCutRange(object): def __init__( self, # TODO why can it be a dict? video: typing.Union[VideoObject, typing.Dict], start: int, end: int, # TODO need refactored ? ssim: typing.List[float], mse: typing.List[float], psnr: typing.List[float], start_time: float, end_time: float, ): if isinstance(video, dict): self.video = VideoObject(**video) else: self.video = video self.start = start self.end = end self.ssim = ssim self.mse = mse self.psnr = psnr self.start_time = start_time self.end_time = end_time # if length is 1 # https://github.com/williamfzc/stagesepx/issues/9 if start > end: self.start, self.end = self.end, self.start self.start_time, self.end_time = self.end_time, self.start_time logger.debug( f"new a range: {self.start}({self.start_time}) - {self.end}({self.end_time})" ) def can_merge(self, another: "VideoCutRange", offset: int = None, **_): if not offset: is_continuous = self.end == another.start else: is_continuous = self.end + offset >= another.start return is_continuous and self.video.path == another.video.path def merge(self, another: "VideoCutRange", **kwargs) -> "VideoCutRange": assert self.can_merge(another, **kwargs) return __class__( self.video, self.start, another.end, self.ssim + another.ssim, self.mse + another.mse, self.psnr + another.psnr, self.start_time, another.end_time, ) def contain(self, frame_id: int) -> bool: # in python: # range(0, 10) => [0, 10) # range(0, 10 + 1) => [0, 10] return frame_id in range(self.start, self.end + 1) # alias contain_frame_id = contain def contain_image(self, image_path: str = None, image_object: np.ndarray = None, *args, **kwargs) -> typing.Dict[str, typing.Any]: # todo pick only one picture? target_id = self.pick(*args, **kwargs)[0] operator = self.video.get_operator() frame = operator.get_frame_by_id(target_id) return frame.contain_image(image_path=image_path, image_object=image_object, **kwargs) def pick(self, frame_count: int = None, is_random: bool = None, *_, **__) -> typing.List[int]: if not frame_count: frame_count = 3 logger.debug(f"pick {frame_count} frames " f"from {self.start}({self.start_time}) " f"to {self.end}({self.end_time}) " f"on video {self.video.path}") result = list() if is_random: return random.sample(range(self.start, self.end), frame_count) length = self.get_length() # https://github.com/williamfzc/stagesepx/issues/37 frame_count += 1 for _ in range(1, frame_count): cur = int(self.start + length / frame_count * _) result.append(cur) return result def get_frames(self, frame_id_list: typing.List[int], *_, **__) -> typing.List[VideoFrame]: """ return a list of VideoFrame, usually works with pick """ out = list() operator = self.video.get_operator() for each_id in frame_id_list: frame = operator.get_frame_by_id(each_id) out.append(frame) return out def pick_and_get(self, *args, **kwargs) -> typing.List[VideoFrame]: picked = self.pick(*args, **kwargs) return self.get_frames(picked, *args, **kwargs) def get_length(self): return self.end - self.start + 1 def is_stable(self, threshold: float = None, psnr_threshold: float = None, **_) -> bool: # IMPORTANT function! # it decided whether a range is stable => everything is based on it! if not threshold: threshold = 0.95 # ssim res = np.mean(self.ssim) > threshold # psnr (double check if stable) if res and psnr_threshold: res = np.mean(self.psnr) > psnr_threshold return res def is_loop(self, threshold: float = None, **_) -> bool: if not threshold: threshold = 0.95 operator = self.video.get_operator() start_frame = operator.get_frame_by_id(self.start) end_frame = operator.get_frame_by_id(self.end) return toolbox.compare_ssim(start_frame.data, end_frame.data) > threshold def diff(self, another: "VideoCutRange", *args, **kwargs) -> typing.List[float]: self_picked = self.pick_and_get(*args, **kwargs) another_picked = another.pick_and_get(*args, **kwargs) return toolbox.multi_compare_ssim(self_picked, another_picked) def __str__(self): return f"<VideoCutRange [{self.start}({self.start_time})-{self.end}({self.end_time})] ssim={self.ssim}>" __repr__ = __str__
def _convert_video_into_range_list(self, video: VideoObject, block: int = None, *args, **kwargs) -> typing.List[VideoCutRange]: range_list: typing.List[VideoCutRange] = list() logger.info( f"total frame count: {video.frame_count}, size: {video.frame_size}" ) # load the first two frames video_operator = video.get_operator() cur_frame = video_operator.get_frame_by_id(1) next_frame = video_operator.get_frame_by_id(1 + self.step) # hook cur_frame.data = self._apply_hook(cur_frame.frame_id, cur_frame.data) # check block if not block: block = 2 if not self.is_block_valid(cur_frame.data, block): logger.warning( "array split does not result in an equal division, set block to 1" ) block = 1 while True: # hook next_frame.data = self._apply_hook(next_frame.frame_id, next_frame.data, *args, **kwargs) logger.debug( f"computing {cur_frame.frame_id}({cur_frame.timestamp}) & {next_frame.frame_id}({next_frame.timestamp}) ..." ) start_part_list = self.pic_split(cur_frame.data, block) end_part_list = self.pic_split(next_frame.data, block) # find the min ssim and the max mse / psnr ssim = 1.0 mse = 0.0 psnr = 0.0 for part_index, (each_start, each_end) in enumerate( zip(start_part_list, end_part_list)): part_ssim = toolbox.compare_ssim(each_start, each_end) if part_ssim < ssim: ssim = part_ssim # mse is very sensitive part_mse = toolbox.calc_mse(each_start, each_end) if part_mse > mse: mse = part_mse part_psnr = toolbox.calc_psnr(each_start, each_end) if part_psnr > psnr: psnr = part_psnr logger.debug( f"part {part_index}: ssim={part_ssim}; mse={part_mse}; psnr={part_psnr}" ) logger.debug( f"between {cur_frame.frame_id} & {next_frame.frame_id}: ssim={ssim}; mse={mse}; psnr={psnr}" ) range_list.append( VideoCutRange( video, start=cur_frame.frame_id, end=next_frame.frame_id, ssim=[ssim], mse=[mse], psnr=[psnr], start_time=cur_frame.timestamp, end_time=next_frame.timestamp, )) # load the next one cur_frame = next_frame next_frame = video_operator.get_frame_by_id(next_frame.frame_id + self.step) if next_frame is None: break return range_list