Ejemplo n.º 1
0
def get_local_path(input_file, dest_dir):
    """
    If user specified copying data to a local directory,
    get the local path where the data files were copied.

    - If input_file is just a file, we return the dest_dir/filename
    - If the intput_file is a directory, then we check if the
      environemt is SLURM and use slurm_dir or otherwise dest_dir
      to look up copy_complete file is available.
      If available, we return the directory.
    - If both above fail, we return the input_file as is.
    """
    out = ""
    if g_pathmgr.isfile(input_file):
        out = os.path.join(dest_dir, os.path.basename(input_file))
    elif g_pathmgr.isdir(input_file):
        data_name = input_file.strip("/").split("/")[-1]
        if "SLURM_JOBID" in os.environ:
            dest_dir = get_slurm_dir(dest_dir)
        dest_dir = os.path.join(dest_dir, data_name)
        complete_flag = os.path.join(dest_dir, "copy_complete")
        if g_pathmgr.isfile(complete_flag):
            out = dest_dir
    if g_pathmgr.exists(out):
        return out
    else:
        return input_file
Ejemplo n.º 2
0
    def from_directory(
        cls,
        path: str,
        fps: float = 30.0,
        multithreaded_io=False,
        path_order_cache: Optional[Dict[str, List[str]]] = None,
    ):
        """
        Args:
            path (str): path to frame video directory.
            fps (float): the target fps for the video. This is needed to link the frames
                to a second timestamp in the video.
            multithreaded_io (bool):  controls whether parllelizable io operations are
                performed across multiple threads.
            path_order_cache (dict): An optional mapping from directory-path to list
                of frames in the directory in numerical order. Used for speedup by
                caching the frame paths.
        """
        if path_order_cache is not None and path in path_order_cache:
            return cls.from_frame_paths(path_order_cache[path], fps,
                                        multithreaded_io)

        assert g_pathmgr.isdir(path), f"{path} is not a directory"
        rel_frame_paths = g_pathmgr.ls(path)

        def natural_keys(text):
            return [
                int(c) if c.isdigit() else c for c in re.split("(\d+)", text)
            ]

        rel_frame_paths.sort(key=natural_keys)
        frame_paths = [os.path.join(path, f) for f in rel_frame_paths]
        if path_order_cache is not None:
            path_order_cache[path] = frame_paths
        return cls.from_frame_paths(frame_paths, fps, multithreaded_io)
Ejemplo n.º 3
0
Archivo: io.py Proyecto: zlapp/vissl
def copy_data(input_file, destination_dir, num_threads, tmp_destination_dir):
    """
    Copy data from one source to the other using num_threads. The data to copy
    can be a single file or a directory. We check what type of data and
    call the relevant functions.

    Returns:
        output_file (str): the new path of the data (could be file or dir)
        destination_dir (str): the destination dir that was actually used
    """
    # return whatever the input is: whether "", None or anything else.
    logging.info(f"Creating directory: {destination_dir}")
    if not (destination_dir is None or destination_dir == ""):
        makedir(destination_dir)
    else:
        destination_dir = None
    if g_pathmgr.isfile(input_file):
        output_file, output_dir = copy_file(
            input_file, destination_dir, tmp_destination_dir
        )
    elif g_pathmgr.isdir(input_file):
        output_file, output_dir = copy_dir(input_file, destination_dir, num_threads)
    else:
        raise RuntimeError("The input_file is neither a file nor a directory")
    return output_file, output_dir
Ejemplo n.º 4
0
    def video_from_path(self,
                        filepath,
                        decode_audio=False,
                        decoder="pyav",
                        fps=30):
        try:
            is_file = g_pathmgr.isfile(filepath)
            is_dir = g_pathmgr.isdir(filepath)
        except NotImplementedError:

            # Not all PathManager handlers support is{file,dir} functions, when this is the
            # case, we default to assuming the path is a file.
            is_file = True
            is_dir = False

        if is_file:
            from pytorchvideo.data.encoded_video import EncodedVideo

            return EncodedVideo.from_path(filepath, decode_audio, decoder)
        elif is_dir:
            from pytorchvideo.data.frame_video import FrameVideo

            assert not decode_audio, "decode_audio must be False when using FrameVideo"
            return FrameVideo.from_directory(
                filepath, fps, path_order_cache=self.path_order_cache)
        else:
            raise FileNotFoundError(f"{filepath} not found.")
Ejemplo n.º 5
0
 def __init__(self, cfg, data_source, path, split, dataset_name):
     super(DiskImageDataset, self).__init__(
         queue_size=cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"])
     assert data_source in [
         "disk_filelist",
         "disk_folder",
         "disk_roi_annotations",
     ], "data_source must be either disk_filelist or disk_folder"
     if data_source == "disk_filelist":
         assert g_pathmgr.isfile(path), f"File {path} does not exist"
     elif data_source == "disk_folder":
         assert g_pathmgr.isdir(path), f"Directory {path} does not exist"
     elif data_source == "disk_roi_annotations":
         assert g_pathmgr.isfile(path), f"File {path} does not exist"
         assert path.endswith("json"), "Annotations must be in json format"
     self.cfg = cfg
     self.split = split
     self.dataset_name = dataset_name
     self.data_source = data_source
     self._path = path
     self.image_dataset = []
     self.image_roi_bbox = []
     self.is_initialized = False
     self._load_data(path)
     self._num_samples = len(self.image_dataset)
     self._remove_prefix = cfg["DATA"][self.split]["REMOVE_IMG_PATH_PREFIX"]
     self._new_prefix = cfg["DATA"][self.split]["NEW_IMG_PATH_PREFIX"]
     if self.data_source in ["disk_filelist", "disk_roi_annotations"]:
         # Set dataset to null so that workers dont need to pickle this file.
         # This saves memory when disk_filelist is large, especially when memory mapping.
         self.image_dataset = []
         self.image_roi_bbox = []
     # whether to use QueueDataset class to handle invalid images or not
     self.enable_queue_dataset = cfg["DATA"][
         self.split]["ENABLE_QUEUE_DATASET"]
Ejemplo n.º 6
0
 def from_directory(
     cls,
     path: str,
     fps: float = 30.0,
     multithreaded_io=False,
 ):
     assert g_pathmgr.isdir(path), f"{path} is not a directory"
     frame_paths = list(glob.glob(os.path.join(path, "*")))
     return cls.from_frame_paths(frame_paths, fps, multithreaded_io)
Ejemplo n.º 7
0
 def __init__(
     self, cfg: AttrDict, data_source: str, path: str, split: str, dataset_name: str
 ):
     super().__init__()
     assert g_pathmgr.isdir(path), f"Directory {path} does not exist"
     self.dataset_name = dataset_name
     self.path = path
     self.split = split.lower()
     self.dataset = self._load_dataset()
Ejemplo n.º 8
0
    def from_path(cls, data_path: str) -> LabeledVideoPaths:
        """
        Factory function that creates a LabeledVideoPaths object depending on the path
        type.
        - If it is a directory path it uses the LabeledVideoPaths.from_directory function.
        - If it's a file it uses the LabeledVideoPaths.from_csv file.
        Args:
            file_path (str): The path to the file to be read.
        """

        if g_pathmgr.isfile(data_path):
            return LabeledVideoPaths.from_csv(data_path)
        elif g_pathmgr.isdir(data_path):
            return LabeledVideoPaths.from_directory(data_path)
        else:
            raise FileNotFoundError(f"{data_path} not found.")
Ejemplo n.º 9
0
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode): configs. Details can be found in
                slowfast/config/defaults.py
        """
        self.source = g_pathmgr.get_local_path(path=cfg.DEMO.INPUT_VIDEO)
        self.fps = None
        if g_pathmgr.isdir(self.source):
            self.fps = cfg.DEMO.FPS
            self.video_name = self.source.split("/")[-1]
            self.source = os.path.join(self.source,
                                       "{}_%06d.jpg".format(self.video_name))
        else:
            self.video_name = self.source.split("/")[-1]
            self.video_name = self.video_name.split(".")[0]

        self.cfg = cfg
        self.cap = cv2.VideoCapture(self.source)
        if self.fps is None:
            self.fps = self.cap.get(cv2.CAP_PROP_FPS)

        self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

        self.display_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.display_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if not self.cap.isOpened():
            raise IOError("Video {} cannot be opened".format(self.source))

        self.output_file = None

        if cfg.DEMO.OUTPUT_FILE != "":
            self.output_file = self.get_output_file(cfg.DEMO.OUTPUT_FILE)

        self.pred_boxes, self.gt_boxes = load_boxes_labels(
            cfg,
            self.video_name,
            self.fps,
            self.display_width,
            self.display_height,
        )

        self.seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
        self.no_frames_repeat = cfg.DEMO.SLOWMO