def get_local_path(input_file, dest_dir): """ If user specified copying data to a local directory, get the local path where the data files were copied. - If input_file is just a file, we return the dest_dir/filename - If the intput_file is a directory, then we check if the environemt is SLURM and use slurm_dir or otherwise dest_dir to look up copy_complete file is available. If available, we return the directory. - If both above fail, we return the input_file as is. """ out = "" if g_pathmgr.isfile(input_file): out = os.path.join(dest_dir, os.path.basename(input_file)) elif g_pathmgr.isdir(input_file): data_name = input_file.strip("/").split("/")[-1] if "SLURM_JOBID" in os.environ: dest_dir = get_slurm_dir(dest_dir) dest_dir = os.path.join(dest_dir, data_name) complete_flag = os.path.join(dest_dir, "copy_complete") if g_pathmgr.isfile(complete_flag): out = dest_dir if g_pathmgr.exists(out): return out else: return input_file
def __init__(self, cfg, data_source, path, split, dataset_name): super(DiskImageDataset, self).__init__( queue_size=cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"]) assert data_source in [ "disk_filelist", "disk_folder", "disk_roi_annotations", ], "data_source must be either disk_filelist or disk_folder" if data_source == "disk_filelist": assert g_pathmgr.isfile(path), f"File {path} does not exist" elif data_source == "disk_folder": assert g_pathmgr.isdir(path), f"Directory {path} does not exist" elif data_source == "disk_roi_annotations": assert g_pathmgr.isfile(path), f"File {path} does not exist" assert path.endswith("json"), "Annotations must be in json format" self.cfg = cfg self.split = split self.dataset_name = dataset_name self.data_source = data_source self._path = path self.image_dataset = [] self.image_roi_bbox = [] self.is_initialized = False self._load_data(path) self._num_samples = len(self.image_dataset) self._remove_prefix = cfg["DATA"][self.split]["REMOVE_IMG_PATH_PREFIX"] self._new_prefix = cfg["DATA"][self.split]["NEW_IMG_PATH_PREFIX"] if self.data_source in ["disk_filelist", "disk_roi_annotations"]: # Set dataset to null so that workers dont need to pickle this file. # This saves memory when disk_filelist is large, especially when memory mapping. self.image_dataset = [] self.image_roi_bbox = [] # whether to use QueueDataset class to handle invalid images or not self.enable_queue_dataset = cfg["DATA"][ self.split]["ENABLE_QUEUE_DATASET"]
def copy_data(input_file, destination_dir, num_threads, tmp_destination_dir): """ Copy data from one source to the other using num_threads. The data to copy can be a single file or a directory. We check what type of data and call the relevant functions. Returns: output_file (str): the new path of the data (could be file or dir) destination_dir (str): the destination dir that was actually used """ # return whatever the input is: whether "", None or anything else. logging.info(f"Creating directory: {destination_dir}") if not (destination_dir is None or destination_dir == ""): makedir(destination_dir) else: destination_dir = None if g_pathmgr.isfile(input_file): output_file, output_dir = copy_file( input_file, destination_dir, tmp_destination_dir ) elif g_pathmgr.isdir(input_file): output_file, output_dir = copy_dir(input_file, destination_dir, num_threads) else: raise RuntimeError("The input_file is neither a file nor a directory") return output_file, output_dir
def video_from_path(self, filepath, decode_audio=False, decoder="pyav", fps=30): try: is_file = g_pathmgr.isfile(filepath) is_dir = g_pathmgr.isdir(filepath) except NotImplementedError: # Not all PathManager handlers support is{file,dir} functions, when this is the # case, we default to assuming the path is a file. is_file = True is_dir = False if is_file: from pytorchvideo.data.encoded_video import EncodedVideo return EncodedVideo.from_path(filepath, decode_audio, decoder) elif is_dir: from pytorchvideo.data.frame_video import FrameVideo assert not decode_audio, "decode_audio must be False when using FrameVideo" return FrameVideo.from_directory( filepath, fps, path_order_cache=self.path_order_cache) else: raise FileNotFoundError(f"{filepath} not found.")
def load_single_label_file(self, path: str): """ Load the single data file. We only support user specifying the numpy label files if user is specifying a data_filelist source of labels. To save memory, if the mmap_mode is set to True for loading, we try to load the images in mmap_mode. If it fails, we simply load the labels without mmap """ assert g_pathmgr.isfile(path), f"Path to labels {path} is not a file" assert path.endswith("npy"), "Please specify a numpy file for labels" if self.cfg["DATA"][self.split].MMAP_MODE: try: with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True, mmap_mode="r") except ValueError as e: logging.info( f"Could not mmap {path}: {e}. Trying without g_pathmgr") labels = np.load(path, allow_pickle=True, mmap_mode="r") logging.info("Successfully loaded without g_pathmgr") except Exception: logging.info( "Could not mmap without g_pathmgr. Trying without mmap") with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True) else: with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True) return labels
def from_path(cls, data_path: str) -> LabeledVideoPaths: """ Factory function that creates a LabeledVideoPaths object depending on the path type. - If it is a directory path it uses the LabeledVideoPaths.from_directory function. - If it's a file it uses the LabeledVideoPaths.from_csv file. Args: file_path (str): The path to the file to be read. """ if g_pathmgr.isfile(data_path): return LabeledVideoPaths.from_csv(data_path) elif g_pathmgr.isdir(data_path): return LabeledVideoPaths.from_directory(data_path) else: raise FileNotFoundError(f"{data_path} not found.")
def copy_dir(input_dir, destination_dir, num_threads): """ Copy contents of one directory to the specified destination directory using the number of threads to speed up the copy. When the data is copied successfully, we create a copy_complete file in the destination_dir folder to mark the completion. If the destination_dir folder already exists and has the copy_complete file, we don't copy the file. useful for copying datasets like ImageNet to speed up dataloader. Using 20 threads for imagenet takes about 20 minutes to copy. Returns: destination_dir (str): directory where the contents were copied """ # remove the backslash if user added it data_name = input_dir.strip("/").split("/")[-1] if "SLURM_JOBID" in os.environ: destination_dir = get_slurm_dir(destination_dir) destination_dir = f"{destination_dir}/{data_name}" makedir(destination_dir) complete_flag = f"{destination_dir}/copy_complete" if g_pathmgr.isfile(complete_flag): logging.info(f"Found Data already copied: {destination_dir}...") return destination_dir logging.info( f"Copying {input_dir} to dir {destination_dir} using {num_threads} threads" ) # We have to do multi-threaded rsync to speed up copy. cmd = ( f"ls -d {input_dir}/* | parallel -j {num_threads} --will-cite " f"rsync -ruW --inplace {{}} {destination_dir}" ) os.system(cmd) g_pathmgr.open(complete_flag, "a").close() logging.info("Copied to local directory") return destination_dir, destination_dir
def isfile(path: str) -> bool: if IOPathManager: return IOPathManager.isfile(path) return os.path.isfile(path)
def clip_recognition_dataset( data_path: str, clip_sampler: ClipSampler, video_sampler: Type[ torch.utils.data.Sampler] = torch.utils.data.RandomSampler, transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, video_path_prefix: str = "", decode_audio: bool = True, decoder: str = "pyav", ): """ Builds a LabeledVideoDataset with noun, verb annotations from a json file with the following format: .. code-block:: text { "video_name1": { { "benchmarks": { "forecasting_hands_objects": [ { "critical_frame_selection_parent_start_sec": <start_sec> "critical_frame_selection_parent_end_sec": <end_sec> { "taxonomy: { "noun": <label>, "verb": <label>, } } }, { ... } ] } } } "video_name2": {...} .... "video_nameN": {...} } Args: labeled_video_paths (List[Tuple[str, Optional[dict]]]): List containing video file paths and associated labels. If video paths are a folder it's interpreted as a frame video, otherwise it must be an encoded video. clip_sampler (ClipSampler): Defines how clips should be sampled from each video. See the clip sampling documentation for more information. video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal video container. This defines the order videos are decoded and, if necessary, the distributed split. transform (Callable): This callable is evaluated on the clip output before the clip is returned. It can be used for user defined preprocessing and augmentations on the clips. The clip output format is described in __next__(). decode_audio (bool): If True, also decode audio from video. decoder (str): Defines what type of decoder used to decode a video. Not used for frame videos. """ if g_pathmgr.isfile(data_path): try: with g_pathmgr.open(data_path, "r") as f: annotations = json.load(f) except Exception: raise FileNotFoundError( f"{data_path} must be json for Ego4D dataset") # LabeledVideoDataset requires the data to be list of tuples with format: # (video_paths, annotation_dict), for no annotations we just pass in an empty dict. untrimmed_clip_annotations = [] for video_name, child in annotations.items(): video_path = os.path.join(video_path_prefix, video_name) for clip_annotation in child["benchmarks"][ "forecasting_hands_objects"]: clip_start = clip_annotation[ "critical_frame_selection_parent_start_sec"] clip_end = clip_annotation[ "critical_frame_selection_parent_end_sec"] taxonomy = clip_annotation["taxonomy"] noun_label = taxonomy["noun"] verb_label = taxonomy["verb"] verb_unsure = taxonomy["verb_unsure"] noun_unsure = taxonomy["noun_unsure"] if (noun_label is None or verb_label is None or verb_unsure or noun_unsure): continue untrimmed_clip_annotations.append(( video_path, { "clip_start_sec": clip_start, "clip_end_sec": clip_end, "noun_label": noun_label, "verb_label": verb_label, }, )) else: raise FileNotFoundError(f"{data_path} not found.") # Map noun and verb key words to unique index. def map_labels_to_index(label_name): labels = list( {info[label_name] for _, info in untrimmed_clip_annotations}) label_to_idx = {label: i for i, label in enumerate(labels)} for i in range(len(untrimmed_clip_annotations)): label = untrimmed_clip_annotations[i][1][label_name] untrimmed_clip_annotations[i][1][label_name] = label_to_idx[label] map_labels_to_index("noun_label") map_labels_to_index("verb_label") dataset = LabeledVideoDataset( untrimmed_clip_annotations, UntrimmedClipSampler(clip_sampler), video_sampler, transform, decode_audio=decode_audio, decoder=decoder, ) return dataset
def video_only_dataset( data_path: str, clip_sampler: ClipSampler, video_sampler: Type[ torch.utils.data.Sampler] = torch.utils.data.RandomSampler, transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, video_path_prefix: str = "", decode_audio: bool = True, decoder: str = "pyav", ): """ Builds a LabeledVideoDataset with no annotations from a json file with the following format: .. code-block:: text { "video_name1": {...} "video_name2": {...} .... "video_nameN": {...} } Args: labeled_video_paths (List[Tuple[str, Optional[dict]]]): List containing video file paths and associated labels. If video paths are a folder it's interpreted as a frame video, otherwise it must be an encoded video. clip_sampler (ClipSampler): Defines how clips should be sampled from each video. See the clip sampling documentation for more information. video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal video container. This defines the order videos are decoded and, if necessary, the distributed split. transform (Callable): This callable is evaluated on the clip output before the clip is returned. It can be used for user defined preprocessing and augmentations on the clips. The clip output format is described in __next__(). decode_audio (bool): If True, also decode audio from video. decoder (str): Defines what type of decoder used to decode a video. Not used for frame videos. """ torch._C._log_api_usage_once( "PYTORCHVIDEO.dataset.json_dataset.video_only_dataset") if g_pathmgr.isfile(data_path): try: with g_pathmgr.open(data_path, "r") as f: annotations = json.load(f) except Exception: raise FileNotFoundError( f"{data_path} must be json for Ego4D dataset") # LabeledVideoDataset requires the data to be list of tuples with format: # (video_paths, annotation_dict), for no annotations we just pass in an empty dict. video_paths = [(os.path.join(video_path_prefix, x), {}) for x in annotations.keys()] else: raise FileNotFoundError(f"{data_path} not found.") dataset = LabeledVideoDataset( video_paths, clip_sampler, video_sampler, transform, decode_audio=decode_audio, decoder=decoder, ) return dataset