Ejemplo n.º 1
0
def local(team: Optional[str] = None):
    """Lists synced datasets, stored in the specified path. """
    table = Table(["name", "images", "sync_date", "size"], [Table.L, Table.R, Table.R, Table.R])
    client = _load_client(offline=True)
    for dataset_path in client.list_local_datasets(team=team):
        table.add_row(
            {
                "name": f"{dataset_path.parent.name}/{dataset_path.name}",
                "images": sum(1 for _ in find_files([dataset_path])),
                "sync_date": humanize.naturaldate(datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)),
                "size": humanize.naturalsize(sum(p.stat().st_size for p in find_files([dataset_path]))),
            }
        )
    # List deprecated datasets
    deprecated_local_datasets = client.list_deprecated_local_datasets()
    if deprecated_local_datasets:
        for dataset_path in client.list_deprecated_local_datasets():
            table.add_row(
                {
                    "name": dataset_path.name + " (deprecated format)",
                    "images": sum(1 for _ in find_files([dataset_path])),
                    "sync_date": humanize.naturaldate(datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)),
                    "size": humanize.naturalsize(sum(p.stat().st_size for p in find_files([dataset_path]))),
                }
            )

    print(table)
    if len(list(deprecated_local_datasets)):
        print(
            f"\nWARNING: found some local datasets that use a deprecated format "
            f"not supported by the recent version of darwin-py. "
            f"Run `darwin dataset migrate team_slug/dataset_slug` "
            "if you want to be able to use them in darwin-py."
        )
Ejemplo n.º 2
0
def local(team: Optional[str] = None) -> None:
    """
    Lists synced datasets, stored in the specified path.

    Parameters
    ----------
    team: Optional[str]
        The name of the team to list, or the defautl one if no team is given. Defaults to None.
    """
    table: Table = Table(show_header=True, header_style="bold cyan")
    table.add_column("Name")
    table.add_column("Image Count", justify="right")
    table.add_column("Sync Date", justify="right")
    table.add_column("Size", justify="right")

    client: Client = _load_client(offline=True)
    for dataset_path in client.list_local_datasets(team_slug=team):
        files_in_dataset_path = find_files([dataset_path])
        table.add_row(
            f"{dataset_path.parent.name}/{dataset_path.name}",
            str(len(files_in_dataset_path)),
            humanize.naturaldate(
                datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)),
            humanize.naturalsize(
                sum(p.stat().st_size for p in files_in_dataset_path)),
        )

    Console().print(table)
Ejemplo n.º 3
0
def local():
    """Lists synced datasets, stored in the specified path. """
    table = Table(["name", "images", "sync_date", "size"],
                  [Table.L, Table.R, Table.R, Table.R])
    client = _load_client(offline=True)
    for dataset_path in client.list_local_datasets():
        table.add_row({
            "name":
            dataset_path.name,
            "images":
            sum(1 for _ in find_files([dataset_path])),
            "sync_date":
            humanize.naturaldate(
                datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)),
            "size":
            humanize.naturalsize(
                sum(p.stat().st_size for p in find_files([dataset_path]))),
        })
    print(table)
Ejemplo n.º 4
0
    def push(
        self,
        files_to_upload: List[str],
        blocking: bool = True,
        multi_threaded: bool = True,
        fps: int = 1,
        as_frames: bool = False,
        files_to_exclude: Optional[List[str]] = None,
        resume: bool = False,
        path: Optional[str] = None,
    ):
        """Uploads a local dataset (images ONLY) in the datasets directory.

        Parameters
        ----------
        files_to_upload : list[Path]
            List of files to upload. It can be a folder.
        blocking : bool
            If False, the dataset is not uploaded and a generator function is returned instead
        multi_threaded : bool
            Uses multiprocessing to upload the dataset in parallel.
            If blocking is False this has no effect.
        files_to_exclude : list[str]
            List of files to exclude from the file scan (which is done only if files is None)
        fps : int
            Number of file per seconds to upload
        as_frames: bool
            Annotate as video.
        resume : bool
            Flag for signalling the resuming of a push
        path: str
            Optional path to put the files into

        Returns
        -------
        generator : function
            Generator for doing the actual uploads. This is None if blocking is True
        count : int
            The files count
        """

        # paths needs to start with /
        if path and path[0] != "/":
            path = f"/{path}"

        # This is where the responses from the upload function will be saved/load for resume
        self.local_path.parent.mkdir(exist_ok=True)
        responses_path = self.local_path.parent / ".upload_responses.json"
        # Init optional parameters
        if files_to_exclude is None:
            files_to_exclude = []
        if files_to_upload is None:
            raise NotFound("Dataset location not found. Check your path.")

        if resume:
            if not responses_path.exists():
                raise NotFound("Dataset location not found. Check your path.")
            with responses_path.open() as f:
                logged_responses = json.load(f)
            files_to_exclude.extend([
                response["file_path"] for response in logged_responses
                if response["s3_response_status_code"].startswith("2")
            ])

        files_to_upload = find_files(files=files_to_upload,
                                     recursive=True,
                                     files_to_exclude=files_to_exclude)

        if not files_to_upload:
            raise ValueError(
                "No files to upload, check your path, exclusion filters and resume flag"
            )

        progress, count = add_files_to_dataset(
            client=self.client,
            dataset_id=str(self.dataset_id),
            filenames=files_to_upload,
            fps=fps,
            as_frames=as_frames,
            team=self.team,
            path=path,
        )

        # If blocking is selected, upload the dataset remotely
        if blocking:
            responses = exhaust_generator(progress=progress,
                                          count=count,
                                          multi_threaded=multi_threaded)
            # Log responses to file
            if responses:
                responses = [{k: str(v)
                              for k, v in response.items()}
                             for response in responses]
                if resume:
                    responses.extend(logged_responses)
                with responses_path.open("w") as f:
                    json.dump(responses, f)
            return None, count
        else:
            return progress, count
Ejemplo n.º 5
0
    def push(
        self,
        files_to_upload: Optional[List[Union[PathLike, LocalFile]]],
        *,
        blocking: bool = True,
        multi_threaded: bool = True,
        fps: int = 0,
        as_frames: bool = False,
        files_to_exclude: Optional[List[PathLike]] = None,
        path: Optional[str] = None,
        preserve_folders: bool = False,
        progress_callback: Optional[ProgressCallback] = None,
        file_upload_callback: Optional[FileUploadCallback] = None,
    ) -> UploadHandler:
        """Uploads a local dataset (images ONLY) in the datasets directory.

        Parameters
        ----------
        files_to_upload : Optional[List[Union[PathLike, LocalFile]]]
            List of files to upload. Those can be folders.
        blocking : bool
            If False, the dataset is not uploaded and a generator function is returned instead.
        multi_threaded : bool
            Uses multiprocessing to upload the dataset in parallel.
            If blocking is False this has no effect.
        files_to_exclude : Optional[PathLike]]
            Optional list of files to exclude from the file scan. Those can be folders.
        fps : int
            When the uploading file is a video, specify its framerate.
        as_frames: bool
            When the uploading file is a video, specify whether it's going to be uploaded as a list of frames.
        path: Optional[str]
            Optional path to store the files in.
        preserve_folders : bool
            Specify whether or not to preserve folder paths when uploading
        progress_callback: Optional[ProgressCallback]
            Optional callback, called every time the progress of an uploading files is reported.
        file_upload_callback: Optional[FileUploadCallback]
            Optional callback, called every time a file chunk is uploaded.

        Returns
        -------
        handler : UploadHandler
           Class for handling uploads, progress and error messages
        """

        if files_to_exclude is None:
            files_to_exclude = []

        if files_to_upload is None:
            raise ValueError("No files or directory specified.")

        uploading_files = [
            item for item in files_to_upload if isinstance(item, LocalFile)
        ]
        search_files = [
            item for item in files_to_upload
            if not isinstance(item, LocalFile)
        ]

        generic_parameters_specified = path is not None or fps != 0 or as_frames is not False
        if uploading_files and generic_parameters_specified:
            raise ValueError(
                "Cannot specify a path when uploading a LocalFile object.")

        for found_file in find_files(search_files,
                                     files_to_exclude=files_to_exclude):
            local_path = path
            if preserve_folders:
                source_files = [
                    source_file for source_file in search_files
                    if is_relative_to(found_file, source_file)
                ]
                if source_files:
                    local_path = str(
                        found_file.relative_to(source_files[0]).parent)
            uploading_files.append(
                LocalFile(found_file,
                          fps=fps,
                          as_frames=as_frames,
                          path=local_path))

        if not uploading_files:
            raise ValueError(
                "No files to upload, check your path, exclusion filters and resume flag"
            )

        handler = UploadHandler(self, uploading_files)
        if blocking:
            handler.upload(
                multi_threaded=multi_threaded,
                progress_callback=progress_callback,
                file_upload_callback=file_upload_callback,
            )
        else:
            handler.prepare_upload()

        return handler