def _upload_segment(
        self,
        segment: Segment,
        *,
        jobs: int = 1,
        skip_uploaded_files: bool = False,
        pbar: Tqdm,
    ) -> SegmentClient:
        segment_client = self.get_or_create_segment(segment.name)
        all_data: Iterator[Union[AuthData, Data]] = filter(
            lambda data: pbar.update_for_skip(not isinstance(data, RemoteData)
                                              ),
            segment,  # type: ignore[arg-type]
        )
        if not skip_uploaded_files:
            segment_filter = all_data
        else:
            done_set = set(segment_client.list_data_paths())
            segment_filter = filter(
                lambda data: pbar.update_for_skip(data.target_remote_path
                                                  not in done_set),
                all_data,
            )

        multithread_upload(
            # pylint: disable=protected-access
            segment_client._upload_or_import_data,
            segment_filter,
            callback=segment_client._synchronize_upload_info,
            jobs=jobs,
            pbar=pbar,
        )
        return segment_client
    def _extract_all_data(
        source_frames: Iterator[Tuple[Frame, ULID]],
        pbar: Tqdm,
    ) -> FrameDataGenerator:
        for frame, frame_id in source_frames:
            for sensor_name, data in frame.items():
                if isinstance(data, RemoteData):
                    pbar.update()
                    continue

                yield data, sensor_name, frame_id.str
Example #3
0
    def test__extract_all_data(self):
        source_frames = []
        ulids = []
        for i in range(5):
            temp_frame = Frame()
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
            temp_ulid = from_timestamp(10 * i + 10)
            temp_frame.frame_id = temp_ulid
            source_frames.append((temp_frame, temp_ulid))
            ulids.append(temp_ulid)

        with Tqdm(5, disable=False) as pbar:
            for index, values in enumerate(
                    self.fusion_dataset_client._extract_all_data(
                        source_frames, pbar)):
                data, sensor_name, frame_id = values
                assert data.path == f"{index}.png"
                if index % 2 == 0:
                    assert sensor_name == "camera"
                else:
                    assert sensor_name == "lidar"
                assert frame_id == ulids[index].str
    def test__extract_unuploaded_data(self):
        source_frames = []
        ulids = []
        done_frames = {}
        for i in range(5):
            temp_frame = Frame()
            temp_ulid = from_timestamp(10 * i + 10)
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
                done_frames[temp_ulid.timestamp().timestamp] = temp_frame
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
                ulids.append(temp_ulid)
            temp_frame.frame_id = temp_ulid
            source_frames.append((temp_frame, temp_ulid))

        with Tqdm(5, disable=False) as pbar:
            for index, values in enumerate(
                self.fusion_dataset_client._extract_unuploaded_data(
                    source_frames, pbar, done_frames=done_frames
                )
            ):
                data, sensor_name, frame_id = values
                assert data.path == f"{index * 2 + 1}.png"
                assert sensor_name == "lidar"
                assert frame_id == ulids[index].str
Example #5
0
    def test__upload_segment(self, mocker):
        segment_test = Segment(name="test1")
        for i in range(5):
            segment_test.append(Data(f"data{i}.png"))
        segment_client = SegmentClient(name="test1",
                                       data_client=self.dataset_client)
        get_or_create_segment = mocker.patch(
            f"{dataset.__name__}.DatasetClient.get_or_create_segment",
            return_value=segment_client)
        list_data_paths = mocker.patch(
            f"{segment.__name__}.SegmentClient.list_data_paths",
            return_value=["data1.png", "data2.png"],
        )
        multithread_upload = mocker.patch(
            f"{dataset.__name__}.multithread_upload")

        with Tqdm(5, disable=False) as pbar:
            self.dataset_client._upload_segment(segment_test,
                                                skip_uploaded_files=True,
                                                pbar=pbar)
            get_or_create_segment.assert_called_once_with(segment_test.name)
            list_data_paths.assert_called_once_with()
            args, keywords = multithread_upload.call_args
            assert args[0] == segment_client._upload_or_import_data
            assert [item.path for item in args[1]
                    ] == ["data0.png", "data3.png", "data4.png"]
            assert keywords[
                "callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 1
            assert keywords["pbar"] == pbar
            multithread_upload.assert_called_once()
        with Tqdm(5, disable=False) as pbar:
            self.dataset_client._upload_segment(segment_test,
                                                skip_uploaded_files=False,
                                                pbar=pbar)
            get_or_create_segment.assert_called_with(segment_test.name)
            list_data_paths.assert_called_with()
            args, keywords = multithread_upload.call_args
            assert args[0] == segment_client._upload_or_import_data
            assert [item.path
                    for item in args[1]] == [f"data{i}.png" for i in range(5)]
            assert keywords[
                "callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 1
            assert keywords["pbar"] == pbar
            multithread_upload.assert_called()
    def _extract_unuploaded_data(
            source_frames: Iterator[Tuple[Frame, ULID]], pbar: Tqdm, *,
            done_frames: Dict[float, Frame]) -> FrameDataGenerator:
        for frame, frame_id in source_frames:
            done_frame = done_frames.get(frame_id.timestamp().timestamp)
            if done_frame:
                frame_id = done_frame.frame_id
            for sensor_name, data in frame.items():
                if isinstance(data, RemoteData):
                    pbar.update()
                    continue

                if (done_frame and sensor_name in done_frame
                        and done_frame[sensor_name].path
                        == data.target_remote_path):
                    pbar.update()
                    continue

                yield data, sensor_name, frame_id.str
    def upload_segment(
        self,
        segment: Segment,
        *,
        jobs: int = 1,
        skip_uploaded_files: bool = False,
        quiet: bool = False,
        _is_cli: bool = False,
    ) -> SegmentClient:
        """Upload a :class:`~tensorbay.dataset.segment.Segment` to the dataset.

        This function will upload all info contains in
        the input :class:`~tensorbay.dataset.segment.Segment`, which includes:

            - Create a segment using the name of input Segment.
            - Upload all Data in the Segment to the dataset.

        Arguments:
            segment: The :class:`~tensorbay.dataset.segment.Segment`
                contains the information needs to be upload.
            jobs: The number of the max workers in multi-thread uploading method.
            skip_uploaded_files: True for skipping the uploaded files.
            quiet: Set to True to stop showing the upload process bar.
            _is_cli: Whether the method is called by CLI.

        Raises:
            Exception: When the upload got interrupted by Exception.

        Returns:
            The :class:`~tensorbay.client.segment.SegmentClient`
            used for uploading the data in the segment.

        """
        self._status.check_authority_for_draft()
        try:
            with Tqdm(len(segment), disable=quiet) as pbar:
                return self._upload_segment(
                    segment,
                    jobs=jobs,
                    skip_uploaded_files=skip_uploaded_files,
                    pbar=pbar,
                )
        except Exception:
            if _is_cli:
                logger.error(
                    UPLOAD_SEGMENT_RESUME_TEMPLATE_CLI,
                    self._status.draft_number,
                )
            else:
                logger.error(
                    UPLOAD_SEGMENT_RESUME_TEMPLATE_SDK,
                    self._status.draft_number,
                    self._status.draft_number,
                )
            raise
    def upload_segment(
        self,
        segment: FusionSegment,
        *,
        jobs: int = 1,
        skip_uploaded_files: bool = False,
        quiet: bool = False,
    ) -> FusionSegmentClient:
        """Upload a fusion segment object to the draft.

        This function will upload all info contains in the input
        :class:`~tensorbay.dataset.segment.FusionSegment`, which includes:

            - Create a segment using the name of input fusion segment object.
            - Upload all sensors in the segment to the dataset.
            - Upload all frames in the segment to the dataset.

        Arguments:
            segment: The :class:`~tensorbay.dataset.segment.FusionSegment`.
            jobs: The number of the max workers in multi-thread upload.
            skip_uploaded_files: Set it to True to skip the uploaded files.
            quiet: Set to True to stop showing the upload process bar.

        Raises:
            Exception: When the upload got interrupted by Exception.

        Returns:
            The :class:`~tensorbay.client.segment.FusionSegmentClient`
                used for uploading the data in the segment.

        """
        self._status.check_authority_for_draft()
        try:
            with Tqdm(sum(len(frame) for frame in segment),
                      disable=quiet) as pbar:
                return self._upload_segment(
                    segment,
                    jobs=jobs,
                    skip_uploaded_files=skip_uploaded_files,
                    pbar=pbar)
        except Exception:
            logger.error(
                UPLOAD_SEGMENT_RESUME_TEMPLATE_SDK,
                self._status.draft_number,
                self._status.draft_number,
            )
            raise
    def upload_dataset(
        self,
        dataset: Union[Dataset, FusionDataset],
        draft_number: Optional[int] = None,
        *,
        branch_name: Optional[str] = None,
        jobs: int = 1,
        skip_uploaded_files: bool = False,
        quiet: bool = False,
    ) -> DatasetClientType:
        """Upload a local dataset to TensorBay.

        This function will upload all information contains
        in the :class:`~tensorbay.dataset.dataset.Dataset`
        or :class:`~tensorbay.dataset.dataset.FusionDataset`, which includes:

            - Create a TensorBay dataset with the name and type of input local dataset.
            - Upload all :class:`~tensorbay.dataset.segment.Segment`
              or :class:`~tensorbay.dataset.segment.FusionSegment` in the dataset to TensorBay.

        Arguments:
            dataset: The :class:`~tensorbay.dataset.dataset.Dataset` or
                :class:`~tensorbay.dataset.dataset. FusionDataset` needs to be uploaded.
            draft_number: The draft number.
            branch_name: The branch name.
            jobs: The number of the max workers in multi-thread upload.
            skip_uploaded_files: Set it to True to skip the uploaded files.
            quiet: Set to True to stop showing the upload process bar.

        Returns:
            The :class:`~tensorbay.client.dataset.DatasetClient` or
            :class:`~tensorbay.client.dataset.FusionDatasetClient`
            bound with the uploaded dataset.

        Raises:
            ValueError: When uploading the dataset based on both draft number
                and branch name is not allowed.
            Exception: When Exception was raised during uploading dataset.

        """
        dataset_client = self.get_dataset(dataset.name,
                                          isinstance(dataset, FusionDataset))

        if draft_number and branch_name:
            raise ValueError(
                "Uploading the dataset based on both draft number and branch name is not allowed"
            )

        if draft_number:
            dataset_client.checkout(draft_number=draft_number)

        else:
            target_branch_name = branch_name if branch_name else dataset_client.status.branch_name

            drafts = dataset_client.list_drafts(branch_name=target_branch_name)
            if drafts:
                dataset_client.checkout(draft_number=drafts[0].number)
            else:
                dataset_client.create_draft(
                    'Draft autogenerated by "GAS.upload_dataset"',
                    branch_name=target_branch_name)

        try:
            if dataset.catalog:
                dataset_client.upload_catalog(dataset.catalog)

            dataset_client.update_notes(
                **dataset.notes)  # type: ignore[arg-type]

            if isinstance(dataset, Dataset):
                data_count = sum(len(segment) for segment in dataset)
            else:
                data_count = sum(
                    sum(len(frame) for frame in fusion_segment)
                    for fusion_segment in dataset)

            with Tqdm(data_count, disable=quiet) as pbar:
                for segment in dataset:
                    dataset_client._upload_segment(  # pylint: disable=protected-access
                        segment,  # type: ignore[arg-type]
                        jobs=jobs,
                        skip_uploaded_files=skip_uploaded_files,
                        pbar=pbar,
                    )
        except Exception:
            if draft_number:
                logger.error(
                    UPLOAD_DATASET_RESUME_TEMPLATE,
                    dataset_client.status.draft_number,
                    dataset_client.status.draft_number,
                )
            raise

        return dataset_client
Example #10
0
    def test__upload_segment(self, mocker):
        segment_test = FusionSegment(name="test1")
        ulids = []
        done_frames = []
        for i in range(5):
            temp_frame = Frame()
            temp_ulid = from_timestamp(10 * i + 10)
            temp_frame.frame_id = temp_ulid
            if i % 2 == 0:
                temp_frame["camera"] = Data(f"{i}.png")
                done_frames.append(temp_frame)
            else:
                temp_frame["lidar"] = Data(f"{i}.png")
            ulids.append(temp_ulid)
            segment_test.append(temp_frame)

        segment_client = FusionSegmentClient(
            name="test1", data_client=self.fusion_dataset_client)
        get_or_create_segment = mocker.patch(
            f"{dataset.__name__}.FusionDatasetClient.get_or_create_segment",
            return_value=segment_client,
        )

        list_frames = mocker.patch(
            f"{segment.__name__}.FusionSegmentClient.list_frames",
            return_value=done_frames,
        )

        multithread_upload = mocker.patch(
            f"{dataset.__name__}.multithread_upload")

        with Tqdm(5, disable=False) as pbar:
            self.fusion_dataset_client._upload_segment(
                segment_test, jobs=8, skip_uploaded_files=True, pbar=pbar)
            get_or_create_segment.assert_called_once_with(segment_test.name)
            list_frames.assert_called_once_with()
            args, keywords = multithread_upload.call_args
            for index, values in enumerate(args[1]):
                data, sensor_name, frame_id = values
                assert data.path == f"{index * 2 + 1}.png"
                assert sensor_name == "lidar"
                assert frame_id == ulids[index * 2 + 1].str
            assert keywords[
                "callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 8
            assert keywords["pbar"] == pbar
            multithread_upload.assert_called_once()
        with Tqdm(5, disable=False) as pbar:
            self.fusion_dataset_client._upload_segment(
                segment_test, jobs=8, skip_uploaded_files=False, pbar=pbar)
            get_or_create_segment.assert_called_with(segment_test.name)
            list_frames.assert_called_with()
            args, keywords = multithread_upload.call_args
            for index, values in enumerate(args[1]):
                data, sensor_name, frame_id = values
                assert data.path == f"{index}.png"
                if index % 2 == 0:
                    assert sensor_name == "camera"
                else:
                    assert sensor_name == "lidar"
                assert frame_id == ulids[index].str
            assert keywords[
                "callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 8
            assert keywords["pbar"] == pbar