def __call__(self, results): """Perform the OpenCV initiation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ if self.io_backend == 'disk': new_path = results['filename'] else: if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) thread_id = get_thread_id() # save the file of same thread at the same place new_path = osp.join(self.tmp_folder, f'tmp_{thread_id}.mp4') with open(new_path, 'wb') as f: f.write(self.file_client.get(results['filename'])) container = mmcv.VideoReader(new_path) results['new_path'] = new_path results['video_reader'] = container results['total_frames'] = len(container) return results
def __call__(self, results): """Perform the PyAV initiation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ try: import av except ImportError: raise ImportError('Please run "conda install av -c conda-forge" ' 'or "pip install av" to install PyAV first.') if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) file_obj = io.BytesIO(self.file_client.get(results['filename'])) container = av.open(file_obj) results['video_reader'] = container # results['total_frames'] = container.streams.video[0].frames try: try: results['total_frames'] = container.streams.video[0].frames except KeyError: print(results['filename']) except IndexError: print(results['filename']) return results
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepath = str(results[f'{self.key}_path']) img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC results[self.key] = img results[f'{self.key}_path'] = filepath results[f'{self.key}_ori_shape'] = img.shape if self.save_original_img: results[f'ori_{self.key}'] = img.copy() return results
def __call__(self, results): """Perform the librosa initialization. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ try: import librosa except ImportError: raise ImportError('Please install librosa first.') if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) if osp.exists(results['audio_path']): file_obj = io.BytesIO(self.file_client.get(results['audio_path'])) y, sr = librosa.load(file_obj, sr=self.sample_rate) else: # Generate a random dummy 10s input pad_func = getattr(self, f'_{self.pad_method}_pad') y = pad_func(int(round(10.0 * self.sample_rate))) sr = self.sample_rate results['length'] = y.shape[0] results['sample_rate'] = sr results['audios'] = y return results
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepaths = results[f'{self.key}_path'] if not isinstance(filepaths, list): raise TypeError( f'filepath should be list, but got {type(filepaths)}') filepaths = [str(v) for v in filepaths] imgs = [] shapes = [] if self.save_original_img: ori_imgs = [] for filepath in filepaths: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC # convert to y-channel, if specified if self.convert_to is not None: if self.channel_order == 'bgr' and self.convert_to.lower( ) == 'y': img = mmcv.bgr2ycbcr(img, y_only=True) elif self.channel_order == 'rgb': img = mmcv.rgb2ycbcr(img, y_only=True) else: raise ValueError('Currently support only "bgr2ycbcr" or ' '"bgr2ycbcr".') if img.ndim == 2: img = np.expand_dims(img, axis=2) imgs.append(img) shapes.append(img.shape) if self.save_original_img: ori_imgs.append(img.copy()) results[self.key] = imgs results[f'{self.key}_path'] = filepaths results[f'{self.key}_ori_shape'] = shapes if self.save_original_img: results[f'ori_{self.key}'] = ori_imgs return results
def _get_mask_from_file(self, path): if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.file_client_kwargs) mask_bytes = self.file_client.get(path) mask = mmcv.imfrombytes(mask_bytes, flag=self.flag) # HWC, BGR if mask.ndim == 2: mask = np.expand_dims(mask, axis=2) else: mask = mask[:, :, 0:1] mask[mask > 0] = 1. return mask
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ filepath = str(results[f'{self.key}_path']) if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) if self.use_cache: if self.cache is None: self.cache = dict() if filepath in self.cache: img = self.cache[filepath] else: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order, backend=self.backend) # HWC self.cache[filepath] = img else: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order, backend=self.backend) # HWC if self.convert_to is not None: if self.channel_order == 'bgr' and self.convert_to.lower() == 'y': img = mmcv.bgr2ycbcr(img, y_only=True) elif self.channel_order == 'rgb': img = mmcv.rgb2ycbcr(img, y_only=True) else: raise ValueError('Currently support only "bgr2ycbcr" or ' '"bgr2ycbcr".') if img.ndim == 2: img = np.expand_dims(img, axis=2) results[self.key] = img results[f'{self.key}_path'] = filepath results[f'{self.key}_ori_shape'] = img.shape if self.save_original_img: results[f'ori_{self.key}'] = img.copy() return results
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepaths = results[f'{self.key}_path'] if not isinstance(filepaths, list): raise TypeError( f'filepath should be list, but got {type(filepaths)}') filepaths = [str(v) for v in filepaths] imgs = [] shapes = [] if self.save_original_img: ori_imgs = [] for filepath in filepaths: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag) # HWC, BGR # print(img.shape) # pad = 544 - img.shape[0] # if pad != 8: # print("******") # print(filepath) # pad = 8 pad = 16 - img.shape[0] % 16 img = np.pad( img, ((0, pad), (0, 0), (0, 0)), 'constant', constant_values=0) if img.ndim == 2: img = np.expand_dims(img, axis=2) imgs.append(img) shapes.append(img.shape) if self.save_original_img: ori_imgs.append(img.copy()) results[self.key] = imgs results[f'{self.key}_path'] = filepaths results[f'{self.key}_ori_shape'] = shapes if self.save_original_img: results[f'ori_{self.key}'] = ori_imgs return results
def __call__(self, results): """Perform the FrameSelector selecting given indices. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ mmcv.use_backend(self.decoding_backend) directory = results['frame_dir'] filename_tmpl = results['filename_tmpl'] modality = results['modality'] if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) imgs = list() if results['frame_inds'].ndim != 1: results['frame_inds'] = np.squeeze(results['frame_inds']) offset = results.get('offset', 0) for frame_idx in results['frame_inds']: frame_idx += offset if modality == 'RGB': filepath = osp.join(directory, filename_tmpl.format(frame_idx)) img_bytes = self.file_client.get(filepath) # Get frame with channel order RGB directly. cur_frame = mmcv.imfrombytes(img_bytes, channel_order='rgb') imgs.append(cur_frame) elif modality == 'Flow': x_filepath = osp.join(directory, filename_tmpl.format('x', frame_idx)) y_filepath = osp.join(directory, filename_tmpl.format('y', frame_idx)) x_img_bytes = self.file_client.get(x_filepath) x_frame = mmcv.imfrombytes(x_img_bytes, flag='grayscale') y_img_bytes = self.file_client.get(y_filepath) y_frame = mmcv.imfrombytes(y_img_bytes, flag='grayscale') imgs.extend([x_frame, y_frame]) else: raise NotImplementedError results['imgs'] = imgs results['original_shape'] = imgs[0].shape[:2] results['img_shape'] = imgs[0].shape[:2] return results
def _get_random_mask_from_set(self): if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.file_client_kwargs) # minus 1 to avoid out of range error mask_idx = np.random.randint(0, self.mask_set_size) mask_bytes = self.file_client.get(self.mask_list[mask_idx]) mask = mmcv.imfrombytes(mask_bytes, flag=self.flag) # HWC, BGR if mask.ndim == 2: mask = np.expand_dims(mask, axis=2) else: mask = mask[:, :, 0:1] mask[mask > 0] = 1. return mask
class LoadImageFromFile(object): """Load image from file. Args: io_backend (str): io backend where images are store. Default: 'disk'. key (str): Keys in results to find corresponding path. Default: 'gt'. flag (str): Loading flag for images. Default: 'color'. channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. Default: 'bgr'. save_original_img (bool): If True, maintain a copy of the image in `results` dict with name of `f'ori_{key}'`. Default: False. kwargs (dict): Args for file client. """ def __init__(self, io_backend='disk', key='gt', flag='color', channel_order='bgr', save_original_img=False, **kwargs): self.io_backend = io_backend self.key = key self.flag = flag self.save_original_img = save_original_img self.channel_order = channel_order self.kwargs = kwargs self.file_client = None def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepath = str(results[f'{self.key}_path']) img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC results[self.key] = img results[f'{self.key}_path'] = filepath results[f'{self.key}_ori_shape'] = img.shape if self.save_original_img: results[f'ori_{self.key}'] = img.copy() return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += ( f'(io_backend={self.io_backend}, key={self.key}, ' f'flag={self.flag}, save_original_img={self.save_original_img})') return repr_str
class DecordInit(object): """Using decord to initialize the video_reader. Decord: https://github.com/dmlc/decord Required keys are "filename", added or modified keys are "video_reader" and "total_frames". """ def __init__(self, io_backend='disk', num_threads=1, **kwargs): self.io_backend = io_backend self.num_threads = num_threads self.kwargs = kwargs self.file_client = None def __call__(self, results): """Perform the PyAV loading. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ try: import decord except ImportError: raise ImportError( 'Please run "pip install decord" to install Decord first.') if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) file_obj = io.BytesIO(self.file_client.get(results['filename'])) container = decord.VideoReader(file_obj, num_threads=self.num_threads) results['video_reader'] = container results['total_frames'] = len(container) return results
def before_run(self, runner): if not self.out_dir: self.out_dir = runner.work_dir self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # if `self.out_dir` is not equal to `runner.work_dir`, it means that # `self.out_dir` is set so the final `self.out_dir` is the # concatenation of `self.out_dir` and the last level directory of # `runner.work_dir` if self.out_dir != runner.work_dir: basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' f'{self.file_client.name}.')) # disable the create_symlink option because some file backends do not # allow to create a symlink if 'create_symlink' in self.args: if self.args[ 'create_symlink'] and not self.file_client.allow_symlink: self.args['create_symlink'] = False warnings.warn( ('create_symlink is set as True by the user but is changed' 'to be False because creating symbolic link is not ' f'allowed in {self.file_client.name}')) else: self.args['create_symlink'] = self.file_client.allow_symlink
def before_run(self, runner): if not self.out_dir: self.out_dir = runner.work_dir self.file_client = FileClient.infer_client(self.file_client_args, self.out_dir) # if `self.out_dir` is not equal to `runner.work_dir`, it means that # `self.out_dir` is set so the final `self.out_dir` is the # concatenation of `self.out_dir` and the last level directory of # `runner.work_dir` if self.out_dir != runner.work_dir: basename = osp.basename(runner.work_dir.rstrip(osp.sep)) self.out_dir = self.file_client.join_path(self.out_dir, basename) runner.logger.info( (f'The best checkpoint will be saved to {self.out_dir} by ' f'{self.file_client.name}')) if self.save_best is not None: if runner.meta is None: warnings.warn('runner.meta is None. Creating an empty one.') runner.meta = dict() runner.meta.setdefault('hook_msgs', dict()) self.best_ckpt_path = runner.meta['hook_msgs'].get( 'best_ckpt', None)
class LoadImageFromFileList(LoadImageFromFile): """Load image from file list. It accepts a list of path and read each frame from each path. A list of frames will be returned. Args: io_backend (str): io backend where images are store. Default: 'disk'. key (str): Keys in results to find corresponding path. Default: 'gt'. flag (str): Loading flag for images. Default: 'color'. channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. Default: 'bgr'. save_original_img (bool): If True, maintain a copy of the image in `results` dict with name of `f'ori_{key}'`. Default: False. kwargs (dict): Args for file client. """ def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepaths = results[f'{self.key}_path'] if not isinstance(filepaths, list): raise TypeError( f'filepath should be list, but got {type(filepaths)}') filepaths = [str(v) for v in filepaths] imgs = [] shapes = [] if self.save_original_img: ori_imgs = [] for filepath in filepaths: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes( img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC if img.ndim == 2: img = np.expand_dims(img, axis=2) imgs.append(img) shapes.append(img.shape) if self.save_original_img: ori_imgs.append(img.copy()) results[self.key] = imgs results[f'{self.key}_path'] = filepaths results[f'{self.key}_ori_shape'] = shapes if self.save_original_img: results[f'ori_{self.key}'] = ori_imgs return results
class AudioDecodeInit(object): """Using librosa to initialize the audio reader. Args: io_backend (str): io backend where frames are store. Default: 'disk'. sample_rate (int): Audio sampling times per second. Default: 16000. Required keys are "audio_path", added or modified keys are "length", "sample_rate", "audios". """ def __init__(self, io_backend='disk', sample_rate=16000, pad_method='zero', **kwargs): self.io_backend = io_backend self.sample_rate = sample_rate if pad_method in ['random', 'zero']: self.pad_method = pad_method else: raise NotImplementedError self.kwargs = kwargs self.file_client = None def _zero_pad(self, shape): return np.zeros(shape, dtype=np.float32) def _random_pad(self, shape): # librosa load raw audio file into a distribution of -1~+1 return np.random.rand(shape).astype(np.float32) * 2 - 1 def __call__(self, results): """Perform the librosa initialization. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ try: import librosa except ImportError: raise ImportError('Please install librosa first.') if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) if osp.exists(results['audio_path']): file_obj = io.BytesIO(self.file_client.get(results['audio_path'])) y, sr = librosa.load(file_obj, sr=self.sample_rate) else: # Generate a random dummy 10s input pad_func = getattr(self, f'_{self.pad_method}_pad') y = pad_func(int(round(10.0 * self.sample_rate))) sr = self.sample_rate results['length'] = y.shape[0] results['sample_rate'] = sr results['audios'] = y return results
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepath = str(results[f'{self.key}_path']) img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC if img.ndim == 2: img = np.expand_dims(img, axis=2) results[self.key] = img results[f'{self.key}_path'] = filepath results[f'{self.key}_ori_shape'] = img.shape if self.save_original_img: results[f'ori_{self.key}'] = img.copy() # crop pair into a and b w = img.shape[1] if w % 2 != 0: raise ValueError( f'The width of image pair must be even number, but got {w}.') new_w = w // 2 img_a = img[:, :new_w, :] img_b = img[:, new_w:, :] results['img_a'] = img_a results['img_b'] = img_b results['img_a_path'] = filepath results['img_b_path'] = filepath results['img_a_ori_shape'] = img_a.shape results['img_b_ori_shape'] = img_b.shape if self.save_original_img: results['ori_img_a'] = img_a.copy() results['ori_img_b'] = img_b.copy() return results
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) fg = results['fg'] alpha = results['alpha_f'] # alpha = results['alpha'].astype(np.float32) / 255. h, w = results['fg'].shape[:2] # randomly select fg if np.random.rand() < 0.5: idx = np.random.randint(len(self.fg_list)) fg2_bytes = self.file_client.get(self.fg_list[idx]) fg2 = mmcv.imfrombytes(fg2_bytes) alpha2_bytes = self.file_client.get(self.alpha_list[idx]) alpha2 = mmcv.imfrombytes(alpha2_bytes, flag='grayscale') alpha2 = alpha2.astype(np.float32) / 255. fg2 = mmcv.imresize(fg2, (w, h), interpolation=self.interpolation) alpha2 = mmcv.imresize(alpha2, (w, h), interpolation=self.interpolation) # the overlap of two 50% transparency will be 75% alpha_tmp = 1 - (1 - alpha) * (1 - alpha2) # if the result alpha is all-one, then we avoid composition if np.any(alpha_tmp < 1): # composite fg with fg2 fg = fg.astype(np.float32) * alpha[..., None] \ + fg2.astype(np.float32) * (1 - alpha[..., None]) alpha = alpha_tmp fg.astype(np.uint8) results['fg'] = fg results['alpha'] = (alpha * 255).astype(np.uint8) return results
def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) h, w = results['fg'].shape[:2] idx = np.random.randint(len(self.bg_list)) filepath = Path(self.bg_dir).joinpath(self.bg_list[idx]) img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag) # HWC, BGR bg = mmcv.imresize(img, (w, h), interpolation='bicubic') results['bg'] = bg return results
def __call__(self, results): """Perform the PyAV loading. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ try: import decord except ImportError: raise ImportError( 'Please run "pip install decord" to install Decord first.') if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) file_obj = io.BytesIO(self.file_client.get(results['filename'])) container = decord.VideoReader(file_obj, num_threads=self.num_threads) results['video_reader'] = container results['total_frames'] = len(container) return results
class RandomLoadResizeBg: """Randomly load a background image and resize it. Required key is "fg", added key is "bg". Args: bg_dir (str): Path of directory to load background images from. io_backend (str): io backend where images are store. Default: 'disk'. flag (str): Loading flag for images. Default: 'color'. channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. Default: 'bgr'. kwargs (dict): Args for file client. """ def __init__(self, bg_dir, io_backend='disk', flag='color', channel_order='bgr', **kwargs): self.bg_dir = bg_dir self.bg_list = list(mmcv.scandir(bg_dir)) self.io_backend = io_backend self.flag = flag self.channel_order = channel_order self.kwargs = kwargs self.file_client = None def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) h, w = results['fg'].shape[:2] idx = np.random.randint(len(self.bg_list)) filepath = Path(self.bg_dir).joinpath(self.bg_list[idx]) img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC bg = mmcv.imresize(img, (w, h), interpolation='bicubic') results['bg'] = bg return results def __repr__(self): return self.__class__.__name__ + f"(bg_dir='{self.bg_dir}')"
def imwrite(img, file_path, params=None, auto_mkdir=None, file_client_args=None): """Write image to file. Note: In v1.4.1 and later, add `file_client_args` parameters. Warning: The parameter `auto_mkdir` will be deprecated in the future and every file clients will make directory automatically. Args: img (ndarray): Image array to be written. file_path (str): Image file path. params (None or list): Same as opencv :func:`imwrite` interface. auto_mkdir (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. It will be deprecated. file_client_args (dict | None): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Returns: bool: Successful or not. Examples: >>> # write to hard disk client >>> ret = mmcv.imwrite(img, '/path/to/img.jpg') >>> # infer the file backend by the prefix s3 >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg') >>> # manually set the file backend petrel >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg', file_client_args={ ... 'backend': 'petrel'}) """ assert is_filepath(file_path) file_path = str(file_path) if auto_mkdir is not None: warnings.warn( 'The parameter `auto_mkdir` will be deprecated in the future and ' 'every file clients will make directory automatically.') file_client = FileClient.infer_client(file_client_args, file_path) img_ext = osp.splitext(file_path)[-1] # Encode image according to image suffix. # For example, if image path is '/path/your/img.jpg', the encode # format is '.jpg'. flag, img_buff = cv2.imencode(img_ext, img, params) file_client.put(img_buff.tobytes(), file_path) return flag
class PyAVInit(object): """Using pyav to initialize the video. PyAV: https://github.com/mikeboers/PyAV Required keys are "filename", added or modified keys are "video_reader", and "total_frames". Args: io_backend (str): io backend where frames are store. Default: 'disk'. kwargs (dict): Args for file client. """ def __init__(self, io_backend='disk', **kwargs): self.io_backend = io_backend self.kwargs = kwargs self.file_client = None def __call__(self, results): """Perform the PyAV initiation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ try: import av except ImportError: raise ImportError('Please run "conda install av -c conda-forge" ' 'or "pip install av" to install PyAV first.') if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) file_obj = io.BytesIO(self.file_client.get(results['filename'])) container = av.open(file_obj) results['video_reader'] = container # results['total_frames'] = container.streams.video[0].frames try: try: results['total_frames'] = container.streams.video[0].frames except KeyError: print(results['filename']) except IndexError: print(results['filename']) return results
class OpenCVInit(object): """Using OpenCV to initalize the video_reader. Required keys are "filename", added or modified keys are "new_path", "video_reader" and "total_frames". """ def __init__(self, io_backend='disk', **kwargs): self.io_backend = io_backend self.kwargs = kwargs self.file_client = None random_string = get_random_string() thread_id = get_thread_id() self.tmp_folder = osp.join(get_shm_dir(), f'{random_string}_{thread_id}') os.mkdir(self.tmp_folder) def __call__(self, results): """Perform the OpenCV initiation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ if self.io_backend == 'disk': new_path = results['filename'] else: if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) thread_id = get_thread_id() # save the file of same thread at the same place new_path = osp.join(self.tmp_folder, f'tmp_{thread_id}.mp4') with open(new_path, 'wb') as f: f.write(self.file_client.get(results['filename'])) container = mmcv.VideoReader(new_path) results['new_path'] = new_path results['video_reader'] = container results['total_frames'] = len(container) return results def __del__(self): shutil.rmtree(self.tmp_folder)
def load_fileclient_dist(filename, backend, map_location): """In distributed setting, this function only download checkpoint at local rank 0.""" rank, world_size = get_dist_info() rank = int(os.environ.get('LOCAL_RANK', rank)) allowed_backends = ['ceph'] if backend not in allowed_backends: raise ValueError(f'Load from Backend {backend} is not supported.') if rank == 0: fileclient = FileClient(backend=backend) buffer = io.BytesIO(fileclient.get(filename)) checkpoint = torch.load(buffer, map_location=map_location) if world_size > 1: torch.distributed.barrier() if rank > 0: fileclient = FileClient(backend=backend) buffer = io.BytesIO(fileclient.get(filename)) checkpoint = torch.load(buffer, map_location=map_location) return checkpoint
def imread(img_or_path, flag='color', channel_order='bgr', backend=None, file_client_args=None): """Read an image. Note: In v1.4.1 and later, add `file_client_args` parameters. Args: img_or_path (ndarray or str or Path): Either a numpy array or str or pathlib.Path. If it is a numpy array (loaded image), then it will be returned as is. flag (str): Flags specifying the color type of a loaded image, candidates are `color`, `grayscale`, `unchanged`, `color_ignore_orientation` and `grayscale_ignore_orientation`. By default, `cv2` and `pillow` backend would rotate the image according to its EXIF info unless called with `unchanged` or `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend always ignore image's EXIF info regardless of the flag. The `turbojpeg` backend only supports `color` and `grayscale`. channel_order (str): Order of channel, candidates are `bgr` and `rgb`. backend (str | None): The image decoding backend type. Options are `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. file_client_args (dict | None): Arguments to instantiate a FileClient. See :class:`mmcv.fileio.FileClient` for details. Default: None. Returns: ndarray: Loaded image array. Examples: >>> import mmcv >>> img_path = '/path/to/img.jpg' >>> img = mmcv.imread(img_path) >>> img = mmcv.imread(img_path, flag='color', channel_order='rgb', ... backend='cv2') >>> img = mmcv.imread(img_path, flag='color', channel_order='bgr', ... backend='pillow') >>> s3_img_path = 's3://bucket/img.jpg' >>> # infer the file backend by the prefix s3 >>> img = mmcv.imread(s3_img_path) >>> # manually set the file backend petrel >>> img = mmcv.imread(s3_img_path, file_client_args={ ... 'backend': 'petrel'}) >>> http_img_path = 'http://path/to/img.jpg' >>> img = mmcv.imread(http_img_path) >>> img = mmcv.imread(http_img_path, file_client_args={ ... 'backend': 'http'}) """ if isinstance(img_or_path, Path): img_or_path = str(img_or_path) if isinstance(img_or_path, np.ndarray): return img_or_path elif is_str(img_or_path): file_client = FileClient.infer_client(file_client_args, img_or_path) img_bytes = file_client.get(img_or_path) return imfrombytes(img_bytes, flag, channel_order, backend) else: raise TypeError('"img" must be a numpy array or a str or ' 'a pathlib.Path object')
class CompositeFg: """Composite foreground with a random foreground. This class composites the current training sample with additional data randomly (could be from the same dataset). With probability 0.5, the sample will be composited with a random sample from the specified directory. The composition is performed as: .. math:: fg_{new} = \\alpha_1 * fg_1 + (1 - \\alpha_1) * fg_2 \\alpha_{new} = 1 - (1 - \\alpha_1) * (1 - \\alpha_2) where :math:`(fg_1, \\alpha_1)` is from the current sample and :math:`(fg_2, \\alpha_2)` is the randomly loaded sample. With the above composition, :math:`\\alpha_{new}` is still in `[0, 1]`. Required keys are "alpha" and "fg". Modified keys are "alpha" and "fg". Args: fg_dirs (str | list[str]): Path of directories to load foreground images from. alpha_dirs (str | list[str]): Path of directories to load alpha mattes from. interpolation (str): Interpolation method of `mmcv.imresize` to resize the randomly loaded images. """ def __init__(self, fg_dirs, alpha_dirs, interpolation='nearest', io_backend='disk', **kwargs): self.fg_dirs = fg_dirs if isinstance(fg_dirs, list) else [fg_dirs] self.alpha_dirs = alpha_dirs if isinstance(alpha_dirs, list) else [alpha_dirs] self.interpolation = interpolation self.fg_list, self.alpha_list = self._get_file_list( self.fg_dirs, self.alpha_dirs) self.io_backend = io_backend self.file_client = None self.kwargs = kwargs def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) fg = results['fg'] alpha = results['alpha'].astype(np.float32) / 255. h, w = results['fg'].shape[:2] # randomly select fg if np.random.rand() < 0.5: idx = np.random.randint(len(self.fg_list)) fg2_bytes = self.file_client.get(self.fg_list[idx]) fg2 = mmcv.imfrombytes(fg2_bytes) alpha2_bytes = self.file_client.get(self.alpha_list[idx]) alpha2 = mmcv.imfrombytes(alpha2_bytes, flag='grayscale') alpha2 = alpha2.astype(np.float32) / 255. fg2 = mmcv.imresize(fg2, (w, h), interpolation=self.interpolation) alpha2 = mmcv.imresize(alpha2, (w, h), interpolation=self.interpolation) # the overlap of two 50% transparency will be 75% alpha_tmp = 1 - (1 - alpha) * (1 - alpha2) # if the result alpha is all-one, then we avoid composition if np.any(alpha_tmp < 1): # composite fg with fg2 fg = fg.astype(np.float32) * alpha[..., None] \ + fg2.astype(np.float32) * (1 - alpha[..., None]) alpha = alpha_tmp fg.astype(np.uint8) results['fg'] = fg results['alpha'] = (alpha * 255).astype(np.uint8) return results @staticmethod def _get_file_list(fg_dirs, alpha_dirs): all_fg_list = list() all_alpha_list = list() for fg_dir, alpha_dir in zip(fg_dirs, alpha_dirs): fg_list = sorted(mmcv.scandir(fg_dir)) alpha_list = sorted(mmcv.scandir(alpha_dir)) # we assume the file names for fg and alpha are the same assert len(fg_list) == len(alpha_list), ( f'{fg_dir} and {alpha_dir} should have the same number of ' f'images ({len(fg_list)} differs from ({len(alpha_list)})') fg_list = [osp.join(fg_dir, fg) for fg in fg_list] alpha_list = [osp.join(alpha_dir, alpha) for alpha in alpha_list] all_fg_list.extend(fg_list) all_alpha_list.extend(alpha_list) return all_fg_list, all_alpha_list def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(fg_dirs={self.fg_dirs}, alpha_dirs={self.alpha_dirs}, ' f"interpolation='{self.interpolation}')") return repr_str
class LoadPairedImageFromFile(LoadImageFromFile): """Load a pair of images from file. Each sample contains a pair of images, which are concatenated in the w dimension (a|b). This is a special loading class for generation paired dataset. It loads a pair of images as the common loader does and crops it into two images with the same shape in different domains. Required key is "pair_path". Added or modified keys are "pair", "pair_ori_shape", "ori_pair", "img_a", "img_b", "img_a_path", "img_b_path", "img_a_ori_shape", "img_b_ori_shape", "ori_img_a" and "ori_img_b". Args: io_backend (str): io backend where images are store. Default: 'disk'. key (str): Keys in results to find corresponding path. Default: 'gt'. flag (str): Loading flag for images. Default: 'color'. channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. Default: 'bgr'. save_original_img (bool): If True, maintain a copy of the image in `results` dict with name of `f'ori_{key}'`. Default: False. kwargs (dict): Args for file client. """ def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) filepath = str(results[f'{self.key}_path']) img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag) # HWC, BGR if img.ndim == 2: img = np.expand_dims(img, axis=2) results[self.key] = img results[f'{self.key}_path'] = filepath results[f'{self.key}_ori_shape'] = img.shape if self.save_original_img: results[f'ori_{self.key}'] = img.copy() # crop pair into a and b w = img.shape[1] if w % 2 != 0: raise ValueError( f'The width of image pair must be even number, but got {w}.') new_w = w // 2 img_a = img[:, :new_w, :] img_b = img[:, new_w:, :] results['img_a'] = img_a results['img_b'] = img_b results['img_a_path'] = filepath results['img_b_path'] = filepath results['img_a_ori_shape'] = img_a.shape results['img_b_ori_shape'] = img_b.shape if self.save_original_img: results['ori_img_a'] = img_a.copy() results['ori_img_b'] = img_b.copy() return results
class LoadMask(object): """Load Mask for multiple types. For different types of mask, users need to provide the corresponding config dict. Example config for bbox: .. code-block:: python config = dict(img_shape=(256, 256), max_bbox_shape=128) Example config for irregular: .. code-block:: python config = dict( img_shape=(256, 256), num_vertexes=(4, 12), max_angle=4., length_range=(10, 100), brush_width=(10, 40), area_ratio_range=(0.15, 0.5)) Example config for ff: .. code-block:: python config = dict( img_shape=(256, 256), num_vertexes=(4, 12), mean_angle=1.2, angle_range=0.4, brush_width=(12, 40)) Example config for set: .. code-block:: python config = dict( mask_list_file='xxx/xxx/ooxx.txt', prefix='/xxx/xxx/ooxx/', io_backend='disk', flag='unchanged', file_client_kwargs=dict() ) The mask_list_file contains the list of mask file name like this: test1.jpeg test2.jpeg ... ... The prefix gives the data path. Args: mask_mode (str): Mask mode in ['bbox', 'irregular', 'ff', 'set', 'file']. * bbox: square bounding box masks. * irregular: irregular holes. * ff: free-form holes from DeepFillv2. * set: randomly get a mask from a mask set. * file: get mask from 'mask_path' in results. mask_config (dict): Params for creating masks. Each type of mask needs different configs. """ def __init__(self, mask_mode='bbox', mask_config=None): self.mask_mode = mask_mode self.mask_config = dict() if mask_config is None else mask_config assert isinstance(self.mask_config, dict) # set init info if needed in some modes self._init_info() def _init_info(self): if self.mask_mode == 'set': # get mask list information self.mask_list = [] mask_list_file = self.mask_config['mask_list_file'] with open(mask_list_file, 'r') as f: for line in f: line_split = line.strip().split(' ') mask_name = line_split[0] self.mask_list.append( Path(self.mask_config['prefix']).joinpath(mask_name)) self.mask_set_size = len(self.mask_list) self.io_backend = self.mask_config['io_backend'] self.flag = self.mask_config['flag'] self.file_client_kwargs = self.mask_config['file_client_kwargs'] self.file_client = None elif self.mask_mode == 'file': self.io_backend = 'disk' self.flag = 'unchanged' self.file_client_kwargs = dict() self.file_client = None def _get_random_mask_from_set(self): if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.file_client_kwargs) # minus 1 to avoid out of range error mask_idx = np.random.randint(0, self.mask_set_size) mask_bytes = self.file_client.get(self.mask_list[mask_idx]) mask = mmcv.imfrombytes(mask_bytes, flag=self.flag) # HWC, BGR if mask.ndim == 2: mask = np.expand_dims(mask, axis=2) else: mask = mask[:, :, 0:1] mask[mask > 0] = 1. return mask def _get_mask_from_file(self, path): if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.file_client_kwargs) mask_bytes = self.file_client.get(path) mask = mmcv.imfrombytes(mask_bytes, flag=self.flag) # HWC, BGR if mask.ndim == 2: mask = np.expand_dims(mask, axis=2) else: mask = mask[:, :, 0:1] mask[mask > 0] = 1. return mask def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.mask_mode == 'bbox': mask_bbox = random_bbox(**self.mask_config) mask = bbox2mask(self.mask_config['img_shape'], mask_bbox) results['mask_bbox'] = mask_bbox elif self.mask_mode == 'irregular': mask = get_irregular_mask(**self.mask_config) elif self.mask_mode == 'set': mask = self._get_random_mask_from_set() elif self.mask_mode == 'ff': mask = brush_stroke_mask(**self.mask_config) elif self.mask_mode == 'file': mask = self._get_mask_from_file(results['mask_path']) else: raise NotImplementedError( f'Mask mode {self.mask_mode} has not been implemented.') results['mask'] = mask return results def __repr__(self): return self.__class__.__name__ + f"(mask_mode='{self.mask_mode}')"
class LoadImageFromFile: """Load image from file. Args: io_backend (str): io backend where images are store. Default: 'disk'. key (str): Keys in results to find corresponding path. Default: 'gt'. flag (str): Loading flag for images. Default: 'color'. channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. Default: 'bgr'. convert_to (str | None): The color space of the output image. If None, no conversion is conducted. Default: None. save_original_img (bool): If True, maintain a copy of the image in `results` dict with name of `f'ori_{key}'`. Default: False. use_cache (bool): If True, load all images at once. Default: False. backend (str): The image loading backend type. Options are `cv2`, `pillow`, and 'turbojpeg'. Default: None. kwargs (dict): Args for file client. """ def __init__(self, io_backend='disk', key='gt', flag='color', channel_order='bgr', convert_to=None, save_original_img=False, use_cache=False, backend=None, **kwargs): self.io_backend = io_backend self.key = key self.flag = flag self.save_original_img = save_original_img self.channel_order = channel_order self.convert_to = convert_to self.kwargs = kwargs self.file_client = None self.use_cache = use_cache self.cache = None self.backend = backend def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ filepath = str(results[f'{self.key}_path']) if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) if self.use_cache: if self.cache is None: self.cache = dict() if filepath in self.cache: img = self.cache[filepath] else: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order, backend=self.backend) # HWC self.cache[filepath] = img else: img_bytes = self.file_client.get(filepath) img = mmcv.imfrombytes(img_bytes, flag=self.flag, channel_order=self.channel_order, backend=self.backend) # HWC if self.convert_to is not None: if self.channel_order == 'bgr' and self.convert_to.lower() == 'y': img = mmcv.bgr2ycbcr(img, y_only=True) elif self.channel_order == 'rgb': img = mmcv.rgb2ycbcr(img, y_only=True) else: raise ValueError('Currently support only "bgr2ycbcr" or ' '"bgr2ycbcr".') if img.ndim == 2: img = np.expand_dims(img, axis=2) results[self.key] = img results[f'{self.key}_path'] = filepath results[f'{self.key}_ori_shape'] = img.shape if self.save_original_img: results[f'ori_{self.key}'] = img.copy() return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += ( f'(io_backend={self.io_backend}, key={self.key}, ' f'flag={self.flag}, save_original_img={self.save_original_img}, ' f'channel_order={self.channel_order}, use_cache={self.use_cache})') return repr_str