Exemple #1
0
def get_local_path(input_file, dest_dir):
    """
    If user specified copying data to a local directory,
    get the local path where the data files were copied.

    - If input_file is just a file, we return the dest_dir/filename
    - If the intput_file is a directory, then we check if the
      environemt is SLURM and use slurm_dir or otherwise dest_dir
      to look up copy_complete file is available.
      If available, we return the directory.
    - If both above fail, we return the input_file as is.
    """
    out = ""
    if g_pathmgr.isfile(input_file):
        out = os.path.join(dest_dir, os.path.basename(input_file))
    elif g_pathmgr.isdir(input_file):
        data_name = input_file.strip("/").split("/")[-1]
        if "SLURM_JOBID" in os.environ:
            dest_dir = get_slurm_dir(dest_dir)
        dest_dir = os.path.join(dest_dir, data_name)
        complete_flag = os.path.join(dest_dir, "copy_complete")
        if g_pathmgr.isfile(complete_flag):
            out = dest_dir
    if g_pathmgr.exists(out):
        return out
    else:
        return input_file
Exemple #2
0
 def __init__(self, cfg, data_source, path, split, dataset_name):
     super(DiskImageDataset, self).__init__(
         queue_size=cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"])
     assert data_source in [
         "disk_filelist",
         "disk_folder",
         "disk_roi_annotations",
     ], "data_source must be either disk_filelist or disk_folder"
     if data_source == "disk_filelist":
         assert g_pathmgr.isfile(path), f"File {path} does not exist"
     elif data_source == "disk_folder":
         assert g_pathmgr.isdir(path), f"Directory {path} does not exist"
     elif data_source == "disk_roi_annotations":
         assert g_pathmgr.isfile(path), f"File {path} does not exist"
         assert path.endswith("json"), "Annotations must be in json format"
     self.cfg = cfg
     self.split = split
     self.dataset_name = dataset_name
     self.data_source = data_source
     self._path = path
     self.image_dataset = []
     self.image_roi_bbox = []
     self.is_initialized = False
     self._load_data(path)
     self._num_samples = len(self.image_dataset)
     self._remove_prefix = cfg["DATA"][self.split]["REMOVE_IMG_PATH_PREFIX"]
     self._new_prefix = cfg["DATA"][self.split]["NEW_IMG_PATH_PREFIX"]
     if self.data_source in ["disk_filelist", "disk_roi_annotations"]:
         # Set dataset to null so that workers dont need to pickle this file.
         # This saves memory when disk_filelist is large, especially when memory mapping.
         self.image_dataset = []
         self.image_roi_bbox = []
     # whether to use QueueDataset class to handle invalid images or not
     self.enable_queue_dataset = cfg["DATA"][
         self.split]["ENABLE_QUEUE_DATASET"]
Exemple #3
0
def copy_data(input_file, destination_dir, num_threads, tmp_destination_dir):
    """
    Copy data from one source to the other using num_threads. The data to copy
    can be a single file or a directory. We check what type of data and
    call the relevant functions.

    Returns:
        output_file (str): the new path of the data (could be file or dir)
        destination_dir (str): the destination dir that was actually used
    """
    # return whatever the input is: whether "", None or anything else.
    logging.info(f"Creating directory: {destination_dir}")
    if not (destination_dir is None or destination_dir == ""):
        makedir(destination_dir)
    else:
        destination_dir = None
    if g_pathmgr.isfile(input_file):
        output_file, output_dir = copy_file(
            input_file, destination_dir, tmp_destination_dir
        )
    elif g_pathmgr.isdir(input_file):
        output_file, output_dir = copy_dir(input_file, destination_dir, num_threads)
    else:
        raise RuntimeError("The input_file is neither a file nor a directory")
    return output_file, output_dir
Exemple #4
0
    def video_from_path(self,
                        filepath,
                        decode_audio=False,
                        decoder="pyav",
                        fps=30):
        try:
            is_file = g_pathmgr.isfile(filepath)
            is_dir = g_pathmgr.isdir(filepath)
        except NotImplementedError:

            # Not all PathManager handlers support is{file,dir} functions, when this is the
            # case, we default to assuming the path is a file.
            is_file = True
            is_dir = False

        if is_file:
            from pytorchvideo.data.encoded_video import EncodedVideo

            return EncodedVideo.from_path(filepath, decode_audio, decoder)
        elif is_dir:
            from pytorchvideo.data.frame_video import FrameVideo

            assert not decode_audio, "decode_audio must be False when using FrameVideo"
            return FrameVideo.from_directory(
                filepath, fps, path_order_cache=self.path_order_cache)
        else:
            raise FileNotFoundError(f"{filepath} not found.")
Exemple #5
0
    def load_single_label_file(self, path: str):
        """
        Load the single data file. We only support user specifying the numpy label
        files if user is specifying a data_filelist source of labels.

        To save memory, if the mmap_mode is set to True for loading, we try to load
        the images in mmap_mode. If it fails, we simply load the labels without mmap
        """
        assert g_pathmgr.isfile(path), f"Path to labels {path} is not a file"
        assert path.endswith("npy"), "Please specify a numpy file for labels"
        if self.cfg["DATA"][self.split].MMAP_MODE:
            try:
                with g_pathmgr.open(path, "rb") as fopen:
                    labels = np.load(fopen, allow_pickle=True, mmap_mode="r")
            except ValueError as e:
                logging.info(
                    f"Could not mmap {path}: {e}. Trying without g_pathmgr")
                labels = np.load(path, allow_pickle=True, mmap_mode="r")
                logging.info("Successfully loaded without g_pathmgr")
            except Exception:
                logging.info(
                    "Could not mmap without g_pathmgr. Trying without mmap")
                with g_pathmgr.open(path, "rb") as fopen:
                    labels = np.load(fopen, allow_pickle=True)
        else:
            with g_pathmgr.open(path, "rb") as fopen:
                labels = np.load(fopen, allow_pickle=True)
        return labels
    def from_path(cls, data_path: str) -> LabeledVideoPaths:
        """
        Factory function that creates a LabeledVideoPaths object depending on the path
        type.
        - If it is a directory path it uses the LabeledVideoPaths.from_directory function.
        - If it's a file it uses the LabeledVideoPaths.from_csv file.
        Args:
            file_path (str): The path to the file to be read.
        """

        if g_pathmgr.isfile(data_path):
            return LabeledVideoPaths.from_csv(data_path)
        elif g_pathmgr.isdir(data_path):
            return LabeledVideoPaths.from_directory(data_path)
        else:
            raise FileNotFoundError(f"{data_path} not found.")
Exemple #7
0
def copy_dir(input_dir, destination_dir, num_threads):
    """
    Copy contents of one directory to the specified destination directory
    using the number of threads to speed up the copy. When the data is
    copied successfully, we create a copy_complete file in the
    destination_dir folder to mark the completion. If the destination_dir
    folder already exists and has the copy_complete file, we don't
    copy the file.

    useful for copying datasets like ImageNet to speed up dataloader.
    Using 20 threads for imagenet takes about 20 minutes to copy.

    Returns:
        destination_dir (str): directory where the contents were copied
    """
    # remove the backslash if user added it
    data_name = input_dir.strip("/").split("/")[-1]
    if "SLURM_JOBID" in os.environ:
        destination_dir = get_slurm_dir(destination_dir)
    destination_dir = f"{destination_dir}/{data_name}"
    makedir(destination_dir)
    complete_flag = f"{destination_dir}/copy_complete"
    if g_pathmgr.isfile(complete_flag):
        logging.info(f"Found Data already copied: {destination_dir}...")
        return destination_dir
    logging.info(
        f"Copying {input_dir} to dir {destination_dir} using {num_threads} threads"
    )
    # We have to do multi-threaded rsync to speed up copy.
    cmd = (
        f"ls -d {input_dir}/* | parallel -j {num_threads} --will-cite "
        f"rsync -ruW --inplace {{}} {destination_dir}"
    )
    os.system(cmd)
    g_pathmgr.open(complete_flag, "a").close()
    logging.info("Copied to local directory")
    return destination_dir, destination_dir
Exemple #8
0
 def isfile(path: str) -> bool:
     if IOPathManager:
         return IOPathManager.isfile(path)
     return os.path.isfile(path)
Exemple #9
0
def clip_recognition_dataset(
    data_path: str,
    clip_sampler: ClipSampler,
    video_sampler: Type[
        torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
    transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
    video_path_prefix: str = "",
    decode_audio: bool = True,
    decoder: str = "pyav",
):
    """
    Builds a LabeledVideoDataset with noun, verb annotations from a json file with the following
    format:

        .. code-block:: text

            {
              "video_name1": {
                  {
                    "benchmarks": {
                        "forecasting_hands_objects": [
                            {
                                "critical_frame_selection_parent_start_sec": <start_sec>
                                "critical_frame_selection_parent_end_sec": <end_sec>
                                {
                                    "taxonomy: {
                                        "noun": <label>,
                                        "verb": <label>,
                                    }
                                }
                            },
                            {
                                ...
                            }
                        ]
                    }
                  }
              }
              "video_name2": {...}
              ....
              "video_nameN": {...}
            }

    Args:
        labeled_video_paths (List[Tuple[str, Optional[dict]]]): List containing
                video file paths and associated labels. If video paths are a folder
                it's interpreted as a frame video, otherwise it must be an encoded
                video.

        clip_sampler (ClipSampler): Defines how clips should be sampled from each
            video. See the clip sampling documentation for more information.

        video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
            video container. This defines the order videos are decoded and,
            if necessary, the distributed split.

        transform (Callable): This callable is evaluated on the clip output before
            the clip is returned. It can be used for user defined preprocessing and
            augmentations on the clips. The clip output format is described in __next__().

        decode_audio (bool): If True, also decode audio from video.

        decoder (str): Defines what type of decoder used to decode a video. Not used for
            frame videos.
    """
    if g_pathmgr.isfile(data_path):
        try:
            with g_pathmgr.open(data_path, "r") as f:
                annotations = json.load(f)
        except Exception:
            raise FileNotFoundError(
                f"{data_path} must be json for Ego4D dataset")

        # LabeledVideoDataset requires the data to be list of tuples with format:
        # (video_paths, annotation_dict), for no annotations we just pass in an empty dict.
        untrimmed_clip_annotations = []
        for video_name, child in annotations.items():
            video_path = os.path.join(video_path_prefix, video_name)
            for clip_annotation in child["benchmarks"][
                    "forecasting_hands_objects"]:
                clip_start = clip_annotation[
                    "critical_frame_selection_parent_start_sec"]
                clip_end = clip_annotation[
                    "critical_frame_selection_parent_end_sec"]
                taxonomy = clip_annotation["taxonomy"]
                noun_label = taxonomy["noun"]
                verb_label = taxonomy["verb"]
                verb_unsure = taxonomy["verb_unsure"]
                noun_unsure = taxonomy["noun_unsure"]
                if (noun_label is None or verb_label is None or verb_unsure
                        or noun_unsure):
                    continue

                untrimmed_clip_annotations.append((
                    video_path,
                    {
                        "clip_start_sec": clip_start,
                        "clip_end_sec": clip_end,
                        "noun_label": noun_label,
                        "verb_label": verb_label,
                    },
                ))
    else:
        raise FileNotFoundError(f"{data_path} not found.")

    # Map noun and verb key words to unique index.
    def map_labels_to_index(label_name):
        labels = list(
            {info[label_name]
             for _, info in untrimmed_clip_annotations})
        label_to_idx = {label: i for i, label in enumerate(labels)}
        for i in range(len(untrimmed_clip_annotations)):
            label = untrimmed_clip_annotations[i][1][label_name]
            untrimmed_clip_annotations[i][1][label_name] = label_to_idx[label]

    map_labels_to_index("noun_label")
    map_labels_to_index("verb_label")

    dataset = LabeledVideoDataset(
        untrimmed_clip_annotations,
        UntrimmedClipSampler(clip_sampler),
        video_sampler,
        transform,
        decode_audio=decode_audio,
        decoder=decoder,
    )
    return dataset
Exemple #10
0
def video_only_dataset(
    data_path: str,
    clip_sampler: ClipSampler,
    video_sampler: Type[
        torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
    transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
    video_path_prefix: str = "",
    decode_audio: bool = True,
    decoder: str = "pyav",
):
    """
    Builds a LabeledVideoDataset with no annotations from a json file with the following
    format:

        .. code-block:: text

            {
              "video_name1": {...}
              "video_name2": {...}
              ....
              "video_nameN": {...}
            }

    Args:
        labeled_video_paths (List[Tuple[str, Optional[dict]]]): List containing
                video file paths and associated labels. If video paths are a folder
                it's interpreted as a frame video, otherwise it must be an encoded
                video.

        clip_sampler (ClipSampler): Defines how clips should be sampled from each
            video. See the clip sampling documentation for more information.

        video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
            video container. This defines the order videos are decoded and,
            if necessary, the distributed split.

        transform (Callable): This callable is evaluated on the clip output before
            the clip is returned. It can be used for user defined preprocessing and
            augmentations on the clips. The clip output format is described in __next__().

        decode_audio (bool): If True, also decode audio from video.

        decoder (str): Defines what type of decoder used to decode a video. Not used for
            frame videos.
    """

    torch._C._log_api_usage_once(
        "PYTORCHVIDEO.dataset.json_dataset.video_only_dataset")

    if g_pathmgr.isfile(data_path):
        try:
            with g_pathmgr.open(data_path, "r") as f:
                annotations = json.load(f)
        except Exception:
            raise FileNotFoundError(
                f"{data_path} must be json for Ego4D dataset")

        # LabeledVideoDataset requires the data to be list of tuples with format:
        # (video_paths, annotation_dict), for no annotations we just pass in an empty dict.
        video_paths = [(os.path.join(video_path_prefix, x), {})
                       for x in annotations.keys()]
    else:
        raise FileNotFoundError(f"{data_path} not found.")

    dataset = LabeledVideoDataset(
        video_paths,
        clip_sampler,
        video_sampler,
        transform,
        decode_audio=decode_audio,
        decoder=decoder,
    )
    return dataset